ICU-9101 merge branches/markus/collv2@35302 into the trunk

author Markus Scherer <markus.icu@gmail.com>

Tue, 4 Mar 2014 00:11:11 +0000 (00:11 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Tue, 4 Mar 2014 00:11:11 +0000 (00:11 +0000)
author Markus Scherer <markus.icu@gmail.com>
Tue, 4 Mar 2014 00:11:11 +0000 (00:11 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Tue, 4 Mar 2014 00:11:11 +0000 (00:11 +0000)
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/ImplicitCEGenerator.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/ImplicitCEGenerator.java

deleted file mode 100644 (file)

index 39485d4..0000000
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/ImplicitCEGenerator.java
+++ /dev/null
@@ -1,433 +0,0 @@
-/**
- *******************************************************************************
- * Copyright (C) 2004-2012, International Business Machines Corporation and         *
- * others. All Rights Reserved.                                                *
- *******************************************************************************
- */
-package com.ibm.icu.impl;
-
-/**
- * For generation of Implicit CEs
- * @author Mark Davis
- *
- * Cleaned up so that changes can be made more easily.
- * Old values:
-# First Implicit: E26A792D
-# Last Implicit: E3DC70C0
-# First CJK: E0030300
-# Last CJK: E0A9DD00
-# First CJK_A: E0A9DF00
-# Last CJK_A: E0DE3100
-@internal
- */
-public class ImplicitCEGenerator {
-    
-    /**
-     * constants
-     */
-    static final boolean DEBUG = false;
-    
-    static final long topByte = 0xFF000000L;
-    static final long bottomByte = 0xFFL;
-    static final long fourBytes = 0xFFFFFFFFL;
-    
-    static final int MAX_INPUT = 0x220001; // 2 * Unicode range + 2
-
-//    public static final int CJK_BASE = 0x4E00;
-//    public static final int CJK_LIMIT = 0x9FFF+1;
-//    public static final int CJK_COMPAT_USED_BASE = 0xFA0E;
-//    public static final int CJK_COMPAT_USED_LIMIT = 0xFA2F+1;
-//    public static final int CJK_A_BASE = 0x3400;
-//    public static final int CJK_A_LIMIT = 0x4DBF+1;
-//    public static final int CJK_B_BASE = 0x20000;
-//    public static final int CJK_B_LIMIT = 0x2A6DF+1;
-    
-    public static final int 
-    // 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
-    // 9FCC;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;  (Unicode 6.1)
-    CJK_BASE = 0x4E00,
-    CJK_LIMIT = 0x9FCC+1,
-
-    CJK_COMPAT_USED_BASE = 0xFA0E,
-    CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
-
-    //3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
-    //4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
-
-    CJK_A_BASE = 0x3400,
-    CJK_A_LIMIT = 0x4DB5+1,
-
-    //20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
-    //2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
-
-    CJK_B_BASE = 0x20000,
-    CJK_B_LIMIT = 0x2A6D6+1,
-
-    //2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;;
-    //2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;;
-
-    CJK_C_BASE = 0x2A700,
-    CJK_C_LIMIT = 0x2B734+1,
-
-    //2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;;
-    //2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;;
-
-    CJK_D_BASE = 0x2B740,
-    CJK_D_LIMIT = 0x2B81D+1
-
-    // when adding to this list, look for all occurrences (in project) of CJK_C_BASE and CJK_C_LIMIT, etc. to check for code that needs changing!!!!
-    ;
-    
-//    private void throwError(String title, int cp) {
-//        throw new IllegalArgumentException(title + "\t" + Utility.hex(cp, 6) + "\t" + 
-//                                           Utility.hex(getImplicitFromRaw(cp) & fourBytes));
-//    }
-//
-//    private void throwError(String title, long ce) {
-//        throw new IllegalArgumentException(title + "\t" + Utility.hex(ce & fourBytes));
-//    }
-//
-//    private void show(int i) {
-//        if (i >= 0 && i <= MAX_INPUT) {
-//            System.out.println(Utility.hex(i) + "\t" + Utility.hex(getImplicitFromRaw(i) & fourBytes));
-//        } 
-//    }
-    
-    /**
-     * Precomputed by constructor
-     */
-    int final3Multiplier;
-    int final4Multiplier;
-    int final3Count;
-    int final4Count;
-    int medialCount;
-    int min3Primary;
-    int min4Primary;
-    int max4Primary;
-    int minTrail;
-    int maxTrail;
-    int max3Trail;
-    int max4Trail;
-    int min4Boundary;
-    
-    public int getGap4() {
-        return final4Multiplier - 1;
-    }
-    
-    public int getGap3() {
-        return final3Multiplier - 1;
-    }
-    
-    // old comment
-    // we must skip all 00, 01, 02, FF bytes, so most bytes have 252 values
-    // we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
-    // we shift so that HAN all has the same first primary, for compression.
-    // for the 4 byte case, we make the gap as large as we can fit.
-
-    /**
-     * Supply parameters for generating implicit CEs
-     */
-    public ImplicitCEGenerator(int minPrimary, int maxPrimary) {
-        // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
-        this(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1);
-    }
-    
-    /**
-     * Set up to generate implicits.
-     * @param minPrimary The minimum primary value.
-     * @param maxPrimary The maximum primary value.
-     * @param minTrail final byte
-     * @param maxTrail final byte
-     * @param gap3 the gap we leave for tailoring for 3-byte forms
-     * @param primaries3count number of 3-byte primarys we can use (normally 1)
-     */
-    public ImplicitCEGenerator(int minPrimary, int maxPrimary, int minTrail, int maxTrail, int gap3, int primaries3count) {
-        // some simple parameter checks
-        if (minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) {
-            throw new IllegalArgumentException("bad lead bytes");
-        }
-        if (minTrail < 0 || minTrail >= maxTrail || maxTrail > 0xFF) {
-            throw new IllegalArgumentException("bad trail bytes");
-        }
-        if (primaries3count < 1) {
-            throw new IllegalArgumentException("bad three-byte primaries");
-        }
-        
-        this.minTrail = minTrail;
-        this.maxTrail = maxTrail;
-        
-        min3Primary = minPrimary;
-        max4Primary = maxPrimary;
-        // compute constants for use later.
-        // number of values we can use in trailing bytes
-        // leave room for empty values between AND above, e.g. if gap = 2
-        // range 3..7 => +3 -4 -5 -6 -7: so 1 value
-        // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
-        // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
-        final3Multiplier = gap3 + 1;
-        final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
-        max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
-        
-        // medials can use full range
-        medialCount = (maxTrail - minTrail + 1);
-        // find out how many values fit in each form
-        int threeByteCount = medialCount * final3Count;
-        // now determine where the 3/4 boundary is.
-        // we use 3 bytes below the boundary, and 4 above
-        int primariesAvailable = maxPrimary - minPrimary + 1;
-        int primaries4count = primariesAvailable - primaries3count;        
-        
-        int min3ByteCoverage = primaries3count * threeByteCount;
-        min4Primary = minPrimary + primaries3count;
-        min4Boundary = min3ByteCoverage;
-        // Now expand out the multiplier for the 4 bytes, and redo.
- 
-        int totalNeeded = MAX_INPUT - min4Boundary;
-        int neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
-        if (DEBUG) System.out.println("neededPerPrimaryByte: " + neededPerPrimaryByte);
-        
-        int neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
-        if (DEBUG) System.out.println("neededPerFinalByte: " + neededPerFinalByte);
-        
-        int gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
-        if (DEBUG) System.out.println("expandedGap: " + gap4);
-        if (gap4 < 1) throw new IllegalArgumentException("must have larger gap4s");
-        
-        final4Multiplier = gap4 + 1;
-        final4Count = neededPerFinalByte;
-        max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
-        
-        if (primaries4count * medialCount * medialCount * final4Count < MAX_INPUT) {
-            throw new IllegalArgumentException("internal error");
-        } 
-        if (DEBUG) {
-            System.out.println("final4Count: " + final4Count);
-            for (int counter = 0; counter < final4Count; ++counter) {
-                int value = minTrail + (1 + counter)*final4Multiplier;
-                System.out.println(counter + "\t" + value + "\t" + Utility.hex(value));
-            }
-        }
-    }
-    
-    static public int divideAndRoundUp(int a, int b) {
-        return 1 + (a-1)/b;
-    }
-
-    /**
-     * Converts implicit CE into raw integer
-     * @param implicit The implicit value passed.
-     * @return -1 if illegal format
-     */
-    public int getRawFromImplicit(int implicit) {
-        int result;
-        int b3 = implicit & 0xFF;
-        implicit >>= 8;
-        int b2 = implicit & 0xFF;
-        implicit >>= 8;
-        int b1 = implicit & 0xFF;
-        implicit >>= 8;
-        int b0 = implicit & 0xFF;
-
-        // simple parameter checks
-        if (b0 < min3Primary || b0 > max4Primary
-            || b1 < minTrail || b1 > maxTrail) return -1;
-        // normal offsets
-        b1 -= minTrail;
-
-        // take care of the final values, and compose
-        if (b0 < min4Primary) {
-            if (b2 < minTrail || b2 > max3Trail || b3 != 0) return -1;
-            b2 -= minTrail;
-            int remainder = b2 % final3Multiplier;
-            if (remainder != 0) return -1;
-            b0 -= min3Primary;
-            b2 /= final3Multiplier;
-            result = ((b0 * medialCount) + b1) * final3Count + b2;
-        } else {
-            if (b2 < minTrail || b2 > maxTrail
-                || b3 < minTrail || b3 > max4Trail) return -1;
-            b2 -= minTrail;
-            b3 -= minTrail;
-            int remainder = b3 % final4Multiplier;
-            if (remainder != 0) return -1;
-            b3 /= final4Multiplier;
-            b0 -= min4Primary;
-            result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
-        }
-        // final check
-        if (result < 0 || result > MAX_INPUT) return -1;
-        return result;
-    }
-    
-    /**
-     * Generate the implicit CE, from raw integer.
-     * Left shifted to put the first byte at the top of an int.
-     * @param cp code point
-     * @return Primary implicit weight
-     */
-    public int getImplicitFromRaw(int cp) {
-        if (cp < 0 || cp > MAX_INPUT) {
-            throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
-        }
-        int last0 = cp - min4Boundary;
-        if (last0 < 0) {
-            int last1 = cp / final3Count;
-            last0 = cp % final3Count;
-                        
-            int last2 = last1 / medialCount;
-            last1 %= medialCount;
-            
-            last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
-            last1 = minTrail + last1; // offset
-            last2 = min3Primary + last2; // offset
-            
-            if (last2 >= min4Primary) {
-                throw new IllegalArgumentException("4-byte out of range: " + 
-                                                   Utility.hex(cp) + ", " + Utility.hex(last2));
-            } 
-            
-            return (last2 << 24) + (last1 << 16) + (last0 << 8);
-        } else {
-            int last1 = last0 / final4Count;
-            last0 %= final4Count;
-            
-            int last2 = last1 / medialCount;
-            last1 %= medialCount;
-            
-            int last3 = last2 / medialCount;
-            last2 %= medialCount;
-            
-            last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start           
-            last1 = minTrail + last1; // offset
-            last2 = minTrail + last2; // offset
-            last3 = min4Primary + last3; // offset
-            
-            if (last3 > max4Primary) {
-                throw new IllegalArgumentException("4-byte out of range: " + 
-                                                   Utility.hex(cp) + ", " + Utility.hex(last3));
-            } 
-            
-            return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
-        }
-    }
-
-    /**
-     * Gets an Implicit from a code point. Internally, 
-     * swaps (which produces a raw value 0..220000, 
-     * then converts raw to implicit.
-     * @param cp The code point to convert to implicit.
-     * @return Primary implicit weight
-     */
-    public int getImplicitFromCodePoint(int cp) {
-        if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
-        
-        // Produce Raw value
-        // note, we add 1 so that the first value is always empty!!
-        cp = ImplicitCEGenerator.swapCJK(cp) + 1;
-        // we now have a range of numbers from 0 to 220000.
-            
-        if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
-            
-        return getImplicitFromRaw(cp);
-    }
-
-    /**
-     * Function used to: 
-     * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
-     * b) bump any non-CJK characters by 10FFFF.
-     * The relevant blocks are:
-     * A:    4E00..9FFF; CJK Unified Ideographs
-     *       F900..FAFF; CJK Compatibility Ideographs
-     * B:    3400..4DBF; CJK Unified Ideographs Extension A
-     *       20000..XX;  CJK Unified Ideographs Extension B (and others later on)
-     * As long as
-     *   no new B characters are allocated between 4E00 and FAFF, and
-     *   no new A characters are outside of this range,
-     * (very high probability) this simple code will work.
-     * The reordered blocks are:
-     * Block1 is CJK
-     * Block2 is CJK_COMPAT_USED
-     * Block3 is CJK_A
-     * (all contiguous)
-     * Any other CJK gets its normal code point
-     * Any non-CJK gets +10FFFF
-     * When we reorder Block1, we make sure that it is at the very start,
-     * so that it will use a 3-byte form.
-     * Warning: the we only pick up the compatibility characters that are
-     * NOT decomposed, so that block is smaller!
-     */
-    
-    static int NON_CJK_OFFSET = 0x110000;
-        
-    public static int swapCJK(int i) {
-        
-        if (i >= CJK_BASE) {
-            if (i < CJK_LIMIT)              return i - CJK_BASE;
-            
-            if (i < CJK_COMPAT_USED_BASE)   return i + NON_CJK_OFFSET;
-            
-            if (i < CJK_COMPAT_USED_LIMIT)  return i - CJK_COMPAT_USED_BASE
-                                                + (CJK_LIMIT - CJK_BASE);
-            if (i < CJK_B_BASE)             return i + NON_CJK_OFFSET;
-            
-            if (i < CJK_B_LIMIT)            return i; // non-BMP-CJK
-            
-            if (i < CJK_C_BASE)             return i + NON_CJK_OFFSET;
-            
-            if (i < CJK_C_LIMIT)            return i; // non-BMP-CJK
-            
-            if (i < CJK_D_BASE)             return i + NON_CJK_OFFSET;
-            
-            if (i < CJK_D_LIMIT)            return i; // non-BMP-CJK
-            
-            return i + NON_CJK_OFFSET;  // non-CJK
-        }
-        if (i < CJK_A_BASE)                 return i + NON_CJK_OFFSET;
-        
-        if (i < CJK_A_LIMIT)                return i - CJK_A_BASE
-                                                + (CJK_LIMIT - CJK_BASE) 
-                                                + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
-        return i + NON_CJK_OFFSET; // non-CJK
-    }
-    
-
-    /**
-     * @return Minimal trail value
-     */
-    public int getMinTrail() {
-        return minTrail;
-    }
-
-    /**
-     * @return Maximal trail value
-     */
-    public int getMaxTrail() {
-        return maxTrail;
-    }
-    
-    public int getCodePointFromRaw(int i) {
-        i--;
-        int result = 0;
-        if(i >= NON_CJK_OFFSET) {
-            result = i - NON_CJK_OFFSET;
-        } else if(i >= CJK_B_BASE) {
-            result = i;
-        } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { 
-            // rest of CJKs, compacted
-            if(i < CJK_LIMIT - CJK_BASE) {
-                result = i + CJK_BASE;
-            } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
-                result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);
-            } else {
-                result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
-            }
-        } else {
-            result = -1;
-        }
-        return result;
-    }
-
-    public int getRawFromCodePoint(int i) {
-        return swapCJK(i)+1;
-    }
-}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/BOCU.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/BOCSU.java

similarity index 72%

rename from icu4j/main/classes/core/src/com/ibm/icu/impl/BOCU.java

rename to icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/BOCSU.java

index 898eb47b846e3d298a34ac8ba130d899d1086ee1..cf477ebf566e390710232e7c3baf01f31813795e 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/BOCU.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/BOCSU.java
@@ -1,15 +1,15 @@
  /**
  *******************************************************************************
-* Copyright (C) 1996-2009, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
+* Copyright (C) 1996-2014, International Business Machines Corporation and
+* others. All Rights Reserved.
  *******************************************************************************
  */
-package com.ibm.icu.impl;
+package com.ibm.icu.impl.coll;
  
-import com.ibm.icu.text.UCharacterIterator;
+import com.ibm.icu.util.ByteArrayWrapper;
  
  /**
- * <p>Binary Ordered Compression for Unicode</p>
+ * <p>Binary Ordered Compression Scheme for Unicode</p>
   * 
   * <p>Users are strongly encouraged to read the ICU paper on 
   * <a href="http://www.icu-project.org/docs/papers/binary_ordered_compression_for_unicode.html">
@@ -76,90 +76,70 @@ import com.ibm.icu.text.UCharacterIterator;
   * @author Syn Wee Quek
   * @since release 2.2, May 3rd 2002
   */
-public class BOCU 
+public class BOCSU 
  {      
-    // public constructors --------------------------------------------------
-    
      // public methods -------------------------------------------------------
-        
+
      /**
-     * <p>Encode the code points of a string as a sequence of bytes,
-     * preserving lexical order.</p>
-     * <p>The minimum size of buffer required for the compression can be 
-     * preflighted by getCompressionLength(String).</p>
-     * @param source text source
-     * @param buffer output buffer
-     * @param offset to start writing to
-     * @return end offset where the writing stopped
-     * @see #getCompressionLength(String)
-     * @exception ArrayIndexOutOfBoundsException thrown if size of buffer is 
-     *            too small for the output.
+     * Encode the code points of a string as
+     * a sequence of byte-encoded differences (slope detection),
+     * preserving lexical order.
+     *
+     * <p>Optimize the difference-taking for runs of Unicode text within
+     * small scripts:
+     *
+     * <p>Most small scripts are allocated within aligned 128-blocks of Unicode
+     * code points. Lexical order is preserved if "prev" is always moved
+     * into the middle of such a block.
+     *
+     * <p>Additionally, "prev" is moved from anywhere in the Unihan
+     * area into the middle of that area.
+     * Note that the identical-level run in a sort key is generated from
+     * NFD text - there are never Hangul characters included.
       */
-    public static int compress(String source, byte buffer[], int offset) 
-    {
-        int prev = 0;
-        UCharacterIterator iterator = UCharacterIterator.getInstance(source);
-        int codepoint = iterator.nextCodePoint();
-        while (codepoint != UCharacterIterator.DONE) {
-            if (prev < 0x4e00 || prev >= 0xa000) {
-                prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
-            } 
-            else {
-                // Unihan U+4e00..U+9fa5:
-                // double-bytes down from the upper end
-                prev = 0x9fff - SLOPE_REACH_POS_2_;
+    public static int writeIdenticalLevelRun(int prev, CharSequence s, int i, int length, ByteArrayWrapper sink) {
+        while (i < length) {
+            // We must have capacity>=SLOPE_MAX_BYTES in case writeDiff() writes that much,
+            // but we do not want to force the sink to allocate
+            // for a large min_capacity because we might actually only write one byte.
+            ensureAppendCapacity(sink, 16, s.length() * 2);
+            byte[] buffer = sink.bytes;
+            int capacity = buffer.length;
+            int p = sink.size;
+            int lastSafe = capacity - SLOPE_MAX_BYTES_;
+            while (i < length && p <= lastSafe) {
+                if (prev < 0x4e00 || prev >= 0xa000) {
+                    prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
+                } else {
+                    // Unihan U+4e00..U+9fa5:
+                    // double-bytes down from the upper end
+                    prev = 0x9fff - SLOPE_REACH_POS_2_;
+                }
+
+                int c = Character.codePointAt(s, i);
+                i += Character.charCount(c);
+                if (c == 0xfffe) {
+                    buffer[p++] = 2;  // merge separator
+                    prev = 0;
+                } else {
+                    p = writeDiff(c - prev, buffer, p);
+                    prev = c;
+                }
              }
-        
-            offset = writeDiff(codepoint - prev, buffer, offset);
-            prev = codepoint;
-            codepoint = iterator.nextCodePoint();
+            sink.size = p;
          }
-        return offset;
+        return prev;
      }
-        
-    /** 
-     * Return the number of  bytes that compress() would write.
-     * @param source text source string
-     * @return the length of the BOCU result 
-     * @see #compress(String, byte[], int)
-     */
-    public static int getCompressionLength(String source) 
-    {
-        int prev = 0;
-        int result = 0;
-        UCharacterIterator iterator =  UCharacterIterator.getInstance(source);
-        int codepoint = iterator.nextCodePoint();
-        while (codepoint != UCharacterIterator.DONE) {
-            if (prev < 0x4e00 || prev >= 0xa000) {
-                prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
-            } 
-            else {
-                // Unihan U+4e00..U+9fa5:
-                // double-bytes down from the upper end
-                prev = 0x9fff - SLOPE_REACH_POS_2_;
-            }
-        
-            codepoint = iterator.nextCodePoint();
-            result += lengthOfDiff(codepoint - prev);
-            prev = codepoint;
-        }
-        return result;
+
+    private static void ensureAppendCapacity(ByteArrayWrapper sink, int minCapacity, int desiredCapacity) {
+        int remainingCapacity = sink.bytes.length - sink.size;
+        if (remainingCapacity >= minCapacity) { return; }
+        if (desiredCapacity < minCapacity) { desiredCapacity = minCapacity; }
+        sink.ensureCapacity(sink.size + desiredCapacity);
      }
  
-    // public setter methods -------------------------------------------------
-        
-    // public getter methods ------------------------------------------------
-            
-    // public other methods -------------------------------------------------
-    
-    // protected constructor ------------------------------------------------
-      
-    // protected data members ------------------------------------------------
-    
-    // protected methods -----------------------------------------------------
- 
      // private data members --------------------------------------------------
-    
+
      /** 
       * Do not use byte values 0, 1, 2 because they are separators in sort keys.
       */
@@ -167,7 +147,7 @@ public class BOCU
      private static final int SLOPE_MAX_ = 0xff;
      private static final int SLOPE_MIDDLE_ = 0x81;
      private static final int SLOPE_TAIL_COUNT_ = SLOPE_MAX_ - SLOPE_MIN_ + 1;
-    //private static final int SLOPE_MAX_BYTES_ = 4;
+    private static final int SLOPE_MAX_BYTES_ = 4;
  
      /**
       * Number of lead bytes:
@@ -239,7 +219,7 @@ public class BOCU
       * Constructor private to prevent initialization
       */
      ///CLOVER:OFF
-    private BOCU()
+    private BOCSU()
      {
      }            
      ///CLOVER:ON                                                                                       
@@ -266,7 +246,7 @@ public class BOCU
      }
          
      /**
-     * Encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes,
+     * Encode one difference value -0x10ffff..+0x10ffff in 1..4 bytes,
       * preserving lexical order
       * @param diff
       * @param buffer byte buffer to append to
@@ -299,7 +279,7 @@ public class BOCU
                  buffer[offset + 3] = (byte)(SLOPE_MIN_ 
                                              + diff % SLOPE_TAIL_COUNT_);
                  diff /= SLOPE_TAIL_COUNT_;
-                buffer[offset] = (byte)(SLOPE_MIN_ 
+                buffer[offset + 2] = (byte)(SLOPE_MIN_ 
                                          + diff % SLOPE_TAIL_COUNT_);
                  diff /= SLOPE_TAIL_COUNT_;
                  buffer[offset + 1] = (byte)(SLOPE_MIN_ 
@@ -342,37 +322,4 @@ public class BOCU
          }
          return offset;
      }
-        
-    /**
-     * How many bytes would writeDiff() write? 
-     * @param diff
-     */
-    private static final int lengthOfDiff(int diff) 
-    {
-        if (diff >= SLOPE_REACH_NEG_1_) {
-            if (diff <= SLOPE_REACH_POS_1_) {
-                return 1;
-            } 
-            else if (diff <= SLOPE_REACH_POS_2_) {
-                return 2;
-            } 
-            else if(diff <= SLOPE_REACH_POS_3_) {
-                return 3;
-            } 
-            else {
-                return 4;
-            }
-        } 
-        else {
-            if (diff >= SLOPE_REACH_NEG_2_) {
-                return 2;
-            } 
-            else if (diff >= SLOPE_REACH_NEG_3_) {
-                return 3;
-            } 
-            else {
-                return 4;
-            }
-        }
-    }
  }
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/Collation.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/Collation.java

new file mode 100644 (file)

index 0000000..7296a3f
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/Collation.java
@@ -0,0 +1,593 @@
+/*
+*******************************************************************************
+* Copyright (C) 2010-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* Collation.java, ported from collation.h/.cpp
+*
+* C++ version created on: 2010oct27
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+/**
+ * Collation v2 basic definitions and static helper functions.
+ *
+ * Data structures except for expansion tables store 32-bit CEs which are
+ * either specials (see tags below) or are compact forms of 64-bit CEs.
+ */
+public final class Collation {
+    /** UChar32 U_SENTINEL.
+     * TODO: Create a common, public constant?
+     */
+    public static final int SENTINEL_CP = -1;
+
+    // ICU4C compare() API returns enum UCollationResult values (with UCOL_ prefix).
+    // ICU4J just returns int. We use these constants for ease of porting.
+    public static final int LESS = -1;
+    public static final int EQUAL = 0;
+    public static final int GREATER = 1;
+
+    // Special sort key bytes for all levels.
+    public static final int TERMINATOR_BYTE = 0;
+    public static final int LEVEL_SEPARATOR_BYTE = 1;
+    /**
+     * Merge-sort-key separator.
+     * Must not be used as the lead byte of any CE weight,
+     * nor as primary compression low terminator.
+     * Otherwise usable.
+     */
+    public static final int MERGE_SEPARATOR_BYTE = 2;
+    public static final long MERGE_SEPARATOR_PRIMARY = 0x02000000;  // U+FFFE
+    static final int MERGE_SEPARATOR_WEIGHT16 = 0x0200;  // U+FFFE
+    public static final int MERGE_SEPARATOR_LOWER32 = 0x02000200;  // U+FFFE
+    static final int MERGE_SEPARATOR_CE32 = 0x02000202;  // U+FFFE
+
+    /**
+     * Primary compression low terminator, must be greater than MERGE_SEPARATOR_BYTE.
+     * Reserved value in primary second byte if the lead byte is compressible.
+     * Otherwise usable in all CE weight bytes.
+     */
+    public static final int PRIMARY_COMPRESSION_LOW_BYTE = 3;
+    /**
+     * Primary compression high terminator.
+     * Reserved value in primary second byte if the lead byte is compressible.
+     * Otherwise usable in all CE weight bytes.
+     */
+    public static final int PRIMARY_COMPRESSION_HIGH_BYTE = 0xff;
+
+    /** Default secondary/tertiary weight lead byte. */
+    static final int COMMON_BYTE = 5;
+    public static final int COMMON_WEIGHT16 = 0x0500;
+    /** Middle 16 bits of a CE with a common secondary weight. */
+    static final int COMMON_SECONDARY_CE = 0x05000000;
+    /** Lower 16 bits of a CE with a common tertiary weight. */
+    static final int COMMON_TERTIARY_CE = 0x0500;
+    /** Lower 32 bits of a CE with common secondary and tertiary weights. */
+    public static final int COMMON_SEC_AND_TER_CE = 0x05000500;
+
+    static final int SECONDARY_MASK = 0xffff0000;
+    public static final int CASE_MASK = 0xc000;
+    static final int SECONDARY_AND_CASE_MASK = SECONDARY_MASK | CASE_MASK;
+    /** Only the 2*6 bits for the pure tertiary weight. */
+    public static final int ONLY_TERTIARY_MASK = 0x3f3f;
+    /** Only the secondary & tertiary bits; no case, no quaternary. */
+    static final int ONLY_SEC_TER_MASK = SECONDARY_MASK | ONLY_TERTIARY_MASK;
+    /** Case bits and tertiary bits. */
+    static final int CASE_AND_TERTIARY_MASK = CASE_MASK | ONLY_TERTIARY_MASK;
+    public static final int QUATERNARY_MASK = 0xc0;
+    /** Case bits and quaternary bits. */
+    public static final int CASE_AND_QUATERNARY_MASK = CASE_MASK | QUATERNARY_MASK;
+
+    static final int UNASSIGNED_IMPLICIT_BYTE = 0xfe;  // compressible
+    /**
+     * First unassigned: AlphabeticIndex overflow boundary.
+     * We want a 3-byte primary so that it fits into the root elements table.
+     *
+     * This 3-byte primary will not collide with
+     * any unassigned-implicit 4-byte primaries because
+     * the first few hundred Unicode code points all have real mappings.
+     */
+    static final long FIRST_UNASSIGNED_PRIMARY = 0xfe040200L;
+
+    static final int TRAIL_WEIGHT_BYTE = 0xff;  // not compressible
+    static final long FIRST_TRAILING_PRIMARY = 0xff020200L;  // [first trailing]
+    public static final long MAX_PRIMARY = 0xffff0000L;  // U+FFFF
+    static final int MAX_REGULAR_CE32 = 0xffff0505;  // U+FFFF
+
+    // CE32 value for U+FFFD as well as illegal UTF-8 byte sequences (which behave like U+FFFD).
+    // We use the third-highest primary weight for U+FFFD (as in UCA 6.3+).
+    public static final long FFFD_PRIMARY = MAX_PRIMARY - 0x20000;
+    static final int FFFD_CE32 = MAX_REGULAR_CE32 - 0x20000;
+
+    /**
+     * A CE32 is special if its low byte is this or greater.
+     * Impossible case bits 11 mark special CE32s.
+     * This value itself is used to indicate a fallback to the base collator.
+     */
+    static final int SPECIAL_CE32_LOW_BYTE = 0xc0;
+    static final int FALLBACK_CE32 = SPECIAL_CE32_LOW_BYTE;
+    /**
+     * Low byte of a long-primary special CE32.
+     */
+    static final int LONG_PRIMARY_CE32_LOW_BYTE = 0xc1;  // SPECIAL_CE32_LOW_BYTE | LONG_PRIMARY_TAG
+
+    static final int UNASSIGNED_CE32 = 0xffffffff;  // Compute an unassigned-implicit CE.
+
+    static final int NO_CE32 = 1;
+
+    /** No CE: End of input. Only used in runtime code, not stored in data. */
+    static final long NO_CE_PRIMARY = 1;  // not a left-adjusted weight
+    static final int NO_CE_WEIGHT16 = 0x0100;  // weight of LEVEL_SEPARATOR_BYTE
+    public static final long NO_CE = 0x101000100L;  // NO_CE_PRIMARY, NO_CE_WEIGHT16, NO_CE_WEIGHT16
+
+    /** Sort key levels. */
+
+    /** Unspecified level. */
+    public static final int NO_LEVEL = 0;
+    public static final int PRIMARY_LEVEL = 1;
+    public static final int SECONDARY_LEVEL = 2;
+    public static final int CASE_LEVEL = 3;
+    public static final int TERTIARY_LEVEL = 4;
+    public static final int QUATERNARY_LEVEL = 5;
+    public static final int IDENTICAL_LEVEL = 6;
+    /** Beyond sort key bytes. */
+    public static final int ZERO_LEVEL = 7;
+
+    /**
+     * Sort key level flags: xx_FLAG = 1 << xx_LEVEL.
+     * In Java, use enum Level with flag() getters, or use EnumSet rather than hand-made bit sets.
+     */
+    static final int NO_LEVEL_FLAG = 1;
+    static final int PRIMARY_LEVEL_FLAG = 2;
+    static final int SECONDARY_LEVEL_FLAG = 4;
+    static final int CASE_LEVEL_FLAG = 8;
+    static final int TERTIARY_LEVEL_FLAG = 0x10;
+    static final int QUATERNARY_LEVEL_FLAG = 0x20;
+    static final int IDENTICAL_LEVEL_FLAG = 0x40;
+    static final int ZERO_LEVEL_FLAG = 0x80;
+
+    /**
+     * Special-CE32 tags, from bits 3..0 of a special 32-bit CE.
+     * Bits 31..8 are available for tag-specific data.
+     * Bits  5..4: Reserved. May be used in the future to indicate lccc!=0 and tccc!=0.
+     */
+
+    /**
+     * Fall back to the base collator.
+     * This is the tag value in SPECIAL_CE32_LOW_BYTE and FALLBACK_CE32.
+     * Bits 31..8: Unused, 0.
+     */
+    static final int FALLBACK_TAG = 0;
+    /**
+     * Long-primary CE with COMMON_SEC_AND_TER_CE.
+     * Bits 31..8: Three-byte primary.
+     */
+    static final int LONG_PRIMARY_TAG = 1;
+    /**
+     * Long-secondary CE with zero primary.
+     * Bits 31..16: Secondary weight.
+     * Bits 15.. 8: Tertiary weight.
+     */
+    static final int LONG_SECONDARY_TAG = 2;
+    /**
+     * Unused.
+     * May be used in the future for single-byte secondary CEs (SHORT_SECONDARY_TAG),
+     * storing the secondary in bits 31..24, the ccc in bits 23..16,
+     * and the tertiary in bits 15..8.
+     */
+    static final int RESERVED_TAG_3 = 3;
+    /**
+     * Latin mini expansions of two simple CEs [pp, 05, tt] [00, ss, 05].
+     * Bits 31..24: Single-byte primary weight pp of the first CE.
+     * Bits 23..16: Tertiary weight tt of the first CE.
+     * Bits 15.. 8: Secondary weight ss of the second CE.
+     */
+    static final int LATIN_EXPANSION_TAG = 4;
+    /**
+     * Points to one or more simple/long-primary/long-secondary 32-bit CE32s.
+     * Bits 31..13: Index into int table.
+     * Bits 12.. 8: Length=1..31.
+     */
+    static final int EXPANSION32_TAG = 5;
+    /**
+     * Points to one or more 64-bit CEs.
+     * Bits 31..13: Index into CE table.
+     * Bits 12.. 8: Length=1..31.
+     */
+    static final int EXPANSION_TAG = 6;
+    /**
+     * Builder data, used only in the CollationDataBuilder, not in runtime data.
+     *
+     * If bit 8 is 0: Builder context, points to a list of context-sensitive mappings.
+     * Bits 31..13: Index to the builder's list of ConditionalCE32 for this character.
+     * Bits 12.. 9: Unused, 0.
+     *
+     * If bit 8 is 1 (IS_BUILDER_JAMO_CE32): Builder-only jamoCE32 value.
+     * The builder fetches the Jamo CE32 from the trie.
+     * Bits 31..13: Jamo code point.
+     * Bits 12.. 9: Unused, 0.
+     */
+    static final int BUILDER_DATA_TAG = 7;
+    /**
+     * Points to prefix trie.
+     * Bits 31..13: Index into prefix/contraction data.
+     * Bits 12.. 8: Unused, 0.
+     */
+    static final int PREFIX_TAG = 8;
+    /**
+     * Points to contraction data.
+     * Bits 31..13: Index into prefix/contraction data.
+     * Bits 12..11: Unused, 0.
+     * Bit      10: CONTRACT_TRAILING_CCC flag.
+     * Bit       9: CONTRACT_NEXT_CCC flag.
+     * Bit       8: CONTRACT_SINGLE_CP_NO_MATCH flag.
+     */
+    static final int CONTRACTION_TAG = 9;
+    /**
+     * Decimal digit.
+     * Bits 31..13: Index into int table for non-numeric-collation CE32.
+     * Bit      12: Unused, 0.
+     * Bits 11.. 8: Digit value 0..9.
+     */
+    static final int DIGIT_TAG = 10;
+    /**
+     * Tag for U+0000, for moving the NUL-termination handling
+     * from the regular fastpath into specials-handling code.
+     * Bits 31..8: Unused, 0.
+     */
+    static final int U0000_TAG = 11;
+    /**
+     * Tag for a Hangul syllable.
+     * Bits 31..9: Unused, 0.
+     * Bit      8: HANGUL_NO_SPECIAL_JAMO flag.
+     */
+    static final int HANGUL_TAG = 12;
+    /**
+     * Tag for a lead surrogate code unit.
+     * Optional optimization for UTF-16 string processing.
+     * Bits 31..10: Unused, 0.
+     *       9.. 8: =0: All associated supplementary code points are unassigned-implict.
+     *              =1: All associated supplementary code points fall back to the base data.
+     *              else: (Normally 2) Look up the data for the supplementary code point.
+     */
+    static final int LEAD_SURROGATE_TAG = 13;
+    /**
+     * Tag for CEs with primary weights in code point order.
+     * Bits 31..13: Index into CE table, for one data "CE".
+     * Bits 12.. 8: Unused, 0.
+     *
+     * This data "CE" has the following bit fields:
+     * Bits 63..32: Three-byte primary pppppp00.
+     *      31.. 8: Start/base code point of the in-order range.
+     *           7: Flag isCompressible primary.
+     *       6.. 0: Per-code point primary-weight increment.
+     */
+    static final int OFFSET_TAG = 14;
+    /**
+     * Implicit CE tag. Compute an unassigned-implicit CE.
+     * All bits are set (UNASSIGNED_CE32=0xffffffff).
+     */
+    static final int IMPLICIT_TAG = 15;
+
+    static boolean isAssignedCE32(int ce32) {
+        return ce32 != FALLBACK_CE32 && ce32 != UNASSIGNED_CE32;
+    }
+
+    /**
+     * We limit the number of CEs in an expansion
+     * so that we can use a small number of length bits in the data structure,
+     * and so that an implementation can copy CEs at runtime without growing a destination buffer.
+     */
+    static final int MAX_EXPANSION_LENGTH = 31;
+    static final int MAX_INDEX = 0x7ffff;
+
+    /**
+     * Set if there is no match for the single (no-suffix) character itself.
+     * This is only possible if there is a prefix.
+     * In this case, discontiguous contraction matching cannot add combining marks
+     * starting from an empty suffix.
+     * The default CE32 is used anyway if there is no suffix match.
+     */
+    static final int CONTRACT_SINGLE_CP_NO_MATCH = 0x100;
+    /** Set if the first character of every contraction suffix has lccc!=0. */
+    static final int CONTRACT_NEXT_CCC = 0x200;
+    /** Set if any contraction suffix ends with lccc!=0. */
+    static final int CONTRACT_TRAILING_CCC = 0x400;
+
+    /** For HANGUL_TAG: None of its Jamo CE32s isSpecialCE32(). */
+    static final int HANGUL_NO_SPECIAL_JAMO = 0x100;
+
+    static final int LEAD_ALL_UNASSIGNED = 0;
+    static final int LEAD_ALL_FALLBACK = 0x100;
+    static final int LEAD_MIXED = 0x200;
+    static final int LEAD_TYPE_MASK = 0x300;
+
+    static int makeLongPrimaryCE32(long p) { return (int)(p | LONG_PRIMARY_CE32_LOW_BYTE); }
+
+    /** Turns the long-primary CE32 into a primary weight pppppp00. */
+    static long primaryFromLongPrimaryCE32(int ce32) {
+        return (long)ce32 & 0xffffff00L;
+    }
+    static long ceFromLongPrimaryCE32(int ce32) {
+        return ((long)(ce32 & 0xffffff00) << 32) | COMMON_SEC_AND_TER_CE;
+    }
+
+    static int makeLongSecondaryCE32(int lower32) {
+        return lower32 | SPECIAL_CE32_LOW_BYTE | LONG_SECONDARY_TAG;
+    }
+    static long ceFromLongSecondaryCE32(int ce32) {
+        return (long)ce32 & 0xffffff00L;
+    }
+
+    /** Makes a special CE32 with tag, index and length. */
+    static int makeCE32FromTagIndexAndLength(int tag, int index, int length) {
+        return (index << 13) | (length << 8) | SPECIAL_CE32_LOW_BYTE | tag;
+    }
+    /** Makes a special CE32 with only tag and index. */
+    static int makeCE32FromTagAndIndex(int tag, int index) {
+        return (index << 13) | SPECIAL_CE32_LOW_BYTE | tag;
+    }
+
+    static boolean isSpecialCE32(int ce32) {
+        return (ce32 & 0xff) >= SPECIAL_CE32_LOW_BYTE;
+    }
+
+    static int tagFromCE32(int ce32) {
+        return ce32 & 0xf;
+    }
+
+    static boolean hasCE32Tag(int ce32, int tag) {
+        return isSpecialCE32(ce32) && tagFromCE32(ce32) == tag;
+    }
+
+    static boolean isLongPrimaryCE32(int ce32) {
+        return hasCE32Tag(ce32, LONG_PRIMARY_TAG);
+    }
+
+    static boolean isSimpleOrLongCE32(int ce32) {
+        return !isSpecialCE32(ce32) ||
+                tagFromCE32(ce32) == LONG_PRIMARY_TAG ||
+                tagFromCE32(ce32) == LONG_SECONDARY_TAG;
+    }
+
+    /**
+     * @return true if the ce32 yields one or more CEs without further data lookups
+     */
+    static boolean isSelfContainedCE32(int ce32) {
+        return !isSpecialCE32(ce32) ||
+                tagFromCE32(ce32) == LONG_PRIMARY_TAG ||
+                tagFromCE32(ce32) == LONG_SECONDARY_TAG ||
+                tagFromCE32(ce32) == LATIN_EXPANSION_TAG;
+    }
+
+    static boolean isPrefixCE32(int ce32) {
+        return hasCE32Tag(ce32, PREFIX_TAG);
+    }
+
+    static boolean isContractionCE32(int ce32) {
+        return hasCE32Tag(ce32, CONTRACTION_TAG);
+    }
+
+    static boolean ce32HasContext(int ce32) {
+        return isSpecialCE32(ce32) &&
+                (tagFromCE32(ce32) == PREFIX_TAG ||
+                tagFromCE32(ce32) == CONTRACTION_TAG);
+    }
+
+    /**
+     * Get the first of the two Latin-expansion CEs encoded in ce32.
+     * @see LATIN_EXPANSION_TAG
+     */
+    static long latinCE0FromCE32(int ce32) {
+        return ((long)(ce32 & 0xff000000) << 32) | COMMON_SECONDARY_CE | ((ce32 & 0xff0000) >> 8);
+    }
+
+    /**
+     * Get the second of the two Latin-expansion CEs encoded in ce32.
+     * @see LATIN_EXPANSION_TAG
+     */
+    static long latinCE1FromCE32(int ce32) {
+        return (((long)ce32 & 0xff00) << 16) | COMMON_TERTIARY_CE;
+    }
+
+    /**
+     * Returns the data index from a special CE32.
+     */
+    static int indexFromCE32(int ce32) {
+        return ce32 >>> 13;
+    }
+
+    /**
+     * Returns the data length from a ce32.
+     */
+    static int lengthFromCE32(int ce32) {
+        return (ce32 >> 8) & 31;
+    }
+
+    /**
+     * Returns the digit value from a DIGIT_TAG ce32.
+     */
+    static char digitFromCE32(int ce32) {
+        return (char)((ce32 >> 8) & 0xf);
+    }
+
+    /** Returns a 64-bit CE from a simple CE32 (not special). */
+    static long ceFromSimpleCE32(int ce32) {
+        // normal form ppppsstt -> pppp0000ss00tt00
+        assert (ce32 & 0xff) < SPECIAL_CE32_LOW_BYTE;
+        return ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | ((ce32 & 0xff) << 8);
+    }
+
+    /** Returns a 64-bit CE from a simple/long-primary/long-secondary CE32. */
+    static long ceFromCE32(int ce32) {
+        int tertiary = ce32 & 0xff;
+        if(tertiary < SPECIAL_CE32_LOW_BYTE) {
+            // normal form ppppsstt -> pppp0000ss00tt00
+            return ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | (tertiary << 8);
+        } else {
+            ce32 -= tertiary;
+            if((tertiary & 0xf) == LONG_PRIMARY_TAG) {
+                // long-primary form ppppppC1 -> pppppp00050000500
+                return ((long)ce32 << 32) | COMMON_SEC_AND_TER_CE;
+            } else {
+                // long-secondary form ssssttC2 -> 00000000sssstt00
+                assert (tertiary & 0xf) == LONG_SECONDARY_TAG;
+                return ce32 & 0xffffffffL;
+            }
+        }
+    }
+
+    /** Creates a CE from a primary weight. */
+    public static long makeCE(long p) {
+        return (p << 32) | COMMON_SEC_AND_TER_CE;
+    }
+    /**
+     * Creates a CE from a primary weight,
+     * 16-bit secondary/tertiary weights, and a 2-bit quaternary.
+     */
+    static long makeCE(long p, int s, int t, int q) {
+        return (p << 32) | ((long)s << 16) | t | (q << 6);
+    }
+
+    /**
+     * Increments a 2-byte primary by a code point offset.
+     */
+    public static long incTwoBytePrimaryByOffset(long basePrimary, boolean isCompressible,
+                                              int offset) {
+        // Extract the second byte, minus the minimum byte value,
+        // plus the offset, modulo the number of usable byte values, plus the minimum.
+        // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
+        long primary;
+        if(isCompressible) {
+            offset += ((int)(basePrimary >> 16) & 0xff) - 4;
+            primary = ((offset % 251) + 4) << 16;
+            offset /= 251;
+        } else {
+            offset += ((int)(basePrimary >> 16) & 0xff) - 2;
+            primary = ((offset % 254) + 2) << 16;
+            offset /= 254;
+        }
+        // First byte, assume no further overflow.
+        return primary | ((basePrimary & 0xff000000L) + ((long)offset << 24));
+    }
+
+    /**
+     * Increments a 3-byte primary by a code point offset.
+     */
+    public static long incThreeBytePrimaryByOffset(long basePrimary, boolean isCompressible,
+                                                int offset) {
+        // Extract the third byte, minus the minimum byte value,
+        // plus the offset, modulo the number of usable byte values, plus the minimum.
+        offset += ((int)(basePrimary >> 8) & 0xff) - 2;
+        long primary = ((offset % 254) + 2) << 8;
+        offset /= 254;
+        // Same with the second byte,
+        // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
+        if(isCompressible) {
+            offset += ((int)(basePrimary >> 16) & 0xff) - 4;
+            primary |= ((offset % 251) + 4) << 16;
+            offset /= 251;
+        } else {
+            offset += ((int)(basePrimary >> 16) & 0xff) - 2;
+            primary |= ((offset % 254) + 2) << 16;
+            offset /= 254;
+        }
+        // First byte, assume no further overflow.
+        return primary | ((basePrimary & 0xff000000L) + ((long)offset << 24));
+    }
+
+    /**
+     * Decrements a 2-byte primary by one range step (1..0x7f).
+     */
+    static long decTwoBytePrimaryByOneStep(long basePrimary, boolean isCompressible, int step) {
+        // Extract the second byte, minus the minimum byte value,
+        // minus the step, modulo the number of usable byte values, plus the minimum.
+        // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
+        // Assume no further underflow for the first byte.
+        assert(0 < step && step <= 0x7f);
+        int byte2 = ((int)(basePrimary >> 16) & 0xff) - step;
+        if(isCompressible) {
+            if(byte2 < 4) {
+                byte2 += 251;
+                basePrimary -= 0x1000000;
+            }
+        } else {
+            if(byte2 < 2) {
+                byte2 += 254;
+                basePrimary -= 0x1000000;
+            }
+        }
+        return (basePrimary & 0xff000000L) | (byte2 << 16);
+    }
+
+    /**
+     * Decrements a 3-byte primary by one range step (1..0x7f).
+     */
+    static long decThreeBytePrimaryByOneStep(long basePrimary, boolean isCompressible, int step) {
+        // Extract the third byte, minus the minimum byte value,
+        // minus the step, modulo the number of usable byte values, plus the minimum.
+        assert(0 < step && step <= 0x7f);
+        int byte3 = ((int)(basePrimary >> 8) & 0xff) - step;
+        if(byte3 >= 2) {
+            return (basePrimary & 0xffff0000L) | (byte3 << 8);
+        }
+        byte3 += 254;
+        // Same with the second byte,
+        // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
+        int byte2 = ((int)(basePrimary >> 16) & 0xff) - 1;
+        if(isCompressible) {
+            if(byte2 < 4) {
+                byte2 = 0xfe;
+                basePrimary -= 0x1000000;
+            }
+        } else {
+            if(byte2 < 2) {
+                byte2 = 0xff;
+                basePrimary -= 0x1000000;
+            }
+        }
+        // First byte, assume no further underflow.
+        return (basePrimary & 0xff000000L) | (byte2 << 16) | (byte3 << 8);
+    }
+
+    /**
+     * Computes a 3-byte primary for c's OFFSET_TAG data "CE".
+     */
+    static long getThreeBytePrimaryForOffsetData(int c, long dataCE) {
+        long p = dataCE >>> 32;  // three-byte primary pppppp00
+        int lower32 = (int)dataCE;  // base code point b & step s: bbbbbbss (bit 7: isCompressible)
+        int offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment
+        boolean isCompressible = (lower32 & 0x80) != 0;
+        return Collation.incThreeBytePrimaryByOffset(p, isCompressible, offset);
+    }
+
+    /**
+     * Returns the unassigned-character implicit primary weight for any valid code point c.
+     */
+    static long unassignedPrimaryFromCodePoint(int c) {
+        // Create a gap before U+0000. Use c=-1 for [first unassigned].
+        ++c;
+        // Fourth byte: 18 values, every 14th byte value (gap of 13).
+        long primary = 2 + (c % 18) * 14;
+        c /= 18;
+        // Third byte: 254 values.
+        primary |= (2 + (c % 254)) << 8;
+        c /= 254;
+        // Second byte: 251 values 04..FE excluding the primary compression bytes.
+        primary |= (4 + (c % 251)) << 16;
+        // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
+        return primary | ((long)UNASSIGNED_IMPLICIT_BYTE << 24);
+    }
+
+    static long unassignedCEFromCodePoint(int c) {
+        return makeCE(unassignedPrimaryFromCodePoint(c));
+    }
+
+    static long reorder(byte[] reorderTable, long primary) {
+        return ((reorderTable[(int)primary >>> 24] & 0xffL) << 24) | (primary & 0xffffff);
+    }
+
+    // private Collation()  // No instantiation.
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java

new file mode 100644 (file)

index 0000000..0703935
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java
@@ -0,0 +1,1560 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationBuilder.java, ported from collationbuilder.h/.cpp
+*
+* C++ version created on: 2013may06
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.text.ParseException;
+
+import com.ibm.icu.impl.Norm2AllModes;
+import com.ibm.icu.impl.Normalizer2Impl;
+import com.ibm.icu.impl.Normalizer2Impl.Hangul;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.CanonicalIterator;
+import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.Normalizer2;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSetIterator;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.VersionInfo;
+
+public final class CollationBuilder extends CollationRuleParser.Sink {
+    private static final boolean DEBUG = false;
+    private static final class BundleImporter implements CollationRuleParser.Importer {
+        BundleImporter() {}
+        public String getRules(String localeID, String collationType) {
+            return CollationLoader.loadRules(new ULocale(localeID), collationType);
+        }
+    }
+
+    public CollationBuilder(CollationTailoring b) {
+        nfd = Normalizer2.getNFDInstance();
+        fcd = Norm2AllModes.getFCDNormalizer2();
+        nfcImpl = Norm2AllModes.getNFCInstance().impl;
+        base = b;
+        baseData = b.data;
+        rootElements = new CollationRootElements(b.data.rootElements);
+        variableTop = 0;
+        dataBuilder = new CollationDataBuilder();
+        fastLatinEnabled = true;
+        cesLength = 0;
+        rootPrimaryIndexes = new UVector32();
+        nodes = new UVector64();
+        nfcImpl.ensureCanonIterData();
+        dataBuilder.initForTailoring(baseData);
+    }
+
+    public CollationTailoring parseAndBuild(String ruleString) throws ParseException {
+        if(baseData.rootElements == null) {
+            // C++ U_MISSING_RESOURCE_ERROR
+            throw new UnsupportedOperationException(
+                    "missing root elements data, tailoring not supported");
+        }
+        CollationTailoring tailoring = new CollationTailoring(base.settings);
+        CollationRuleParser parser = new CollationRuleParser(baseData);
+        // Note: This always bases &[last variable] and &[first regular]
+        // on the root collator's maxVariable/variableTop.
+        // If we wanted this to change after [maxVariable x], then we would keep
+        // the tailoring.settings pointer here and read its variableTop when we need it.
+        // See http://unicode.org/cldr/trac/ticket/6070
+        variableTop = base.settings.readOnly().variableTop;
+        parser.setSink(this);
+        // In Java, there is only one Importer implementation.
+        // In C++, the importer is a parameter for this method.
+        parser.setImporter(new BundleImporter());
+        parser.parse(ruleString, tailoring.settings.copyOnWrite());
+        if(dataBuilder.hasMappings()) {
+            makeTailoredCEs();
+            closeOverComposites();
+            finalizeCEs();
+            // Copy all of ASCII, and Latin-1 letters, into each tailoring.
+            optimizeSet.add(0, 0x7f);
+            optimizeSet.add(0xc0, 0xff);
+            // Hangul is decomposed on the fly during collation,
+            // and the tailoring data is always built with HANGUL_TAG specials.
+            optimizeSet.remove(Hangul.HANGUL_BASE, Hangul.HANGUL_END);
+            dataBuilder.optimize(optimizeSet);
+            tailoring.ensureOwnedData();
+            if(fastLatinEnabled) { dataBuilder.enableFastLatin(); }
+            dataBuilder.build(tailoring.ownedData);
+            // C++ tailoring.builder = dataBuilder;
+            dataBuilder = null;
+        } else {
+            tailoring.data = baseData;
+        }
+        tailoring.rules = ruleString;
+        // In Java, we do not have a rules version.
+        // In C++, the genrb build tool reads and supplies one,
+        // and the rulesVersion is a parameter for this method.
+        VersionInfo rulesVersion = VersionInfo.getInstance(0, 0, 0, 0);
+        tailoring.setVersion(base.version, rulesVersion);
+        return tailoring;
+    }
+
+    /** Implements CollationRuleParser.Sink. */
+    @Override
+    void addReset(int strength, CharSequence str) {
+        assert(str.length() != 0);
+        if(str.charAt(0) == CollationRuleParser.POS_LEAD) {
+            ces[0] = getSpecialResetPosition(str);
+            cesLength = 1;
+            assert((ces[0] & Collation.CASE_AND_QUATERNARY_MASK) == 0);
+        } else {
+            // normal reset to a character or string
+            String nfdString = nfd.normalize(str);
+            cesLength = dataBuilder.getCEs(nfdString, ces, 0);
+            if(cesLength > Collation.MAX_EXPANSION_LENGTH) {
+                throw new IllegalArgumentException(
+                        "reset position maps to too many collation elements (more than 31)");
+            }
+        }
+        if(strength == Collator.IDENTICAL) { return; }  // simple reset-at-position
+
+        // &[before strength]position
+        assert(Collator.PRIMARY <= strength && strength <= Collator.TERTIARY);
+        int index = findOrInsertNodeForCEs(strength);
+
+        long node = nodes.elementAti(index);
+        // If the index is for a "weaker" tailored node,
+        // then skip backwards over this and further "weaker" nodes.
+        while(strengthFromNode(node) > strength) {
+            index = previousIndexFromNode(node);
+            node = nodes.elementAti(index);
+        }
+
+        // Find or insert a node whose index we will put into a temporary CE.
+        if(strengthFromNode(node) == strength && isTailoredNode(node)) {
+            // Reset to just before this same-strength tailored node.
+            index = previousIndexFromNode(node);
+        } else if(strength == Collator.PRIMARY) {
+            // root primary node (has no previous index)
+            long p = weight32FromNode(node);
+            if(p == 0) {
+                throw new UnsupportedOperationException(
+                        "reset primary-before ignorable not possible");
+            }
+            if(p <= rootElements.getFirstPrimary()) {
+                // There is no primary gap between ignorables and the space-first-primary.
+                throw new UnsupportedOperationException(
+                        "reset primary-before first non-ignorable not supported");
+            }
+            if(p == Collation.FIRST_TRAILING_PRIMARY) {
+                // We do not support tailoring to an unassigned-implicit CE.
+                throw new UnsupportedOperationException(
+                        "reset primary-before [first trailing] not supported");
+            }
+            p = rootElements.getPrimaryBefore(p, baseData.isCompressiblePrimary(p));
+            index = findOrInsertNodeForPrimary(p);
+            // Go to the last node in this list:
+            // Tailor after the last node between adjacent root nodes.
+            for(;;) {
+                node = nodes.elementAti(index);
+                int nextIndex = nextIndexFromNode(node);
+                if(nextIndex == 0) { break; }
+                index = nextIndex;
+            }
+        } else {
+            // &[before 2] or &[before 3]
+            index = findCommonNode(index, Collator.SECONDARY);
+            if(strength >= Collator.TERTIARY) {
+                index = findCommonNode(index, Collator.TERTIARY);
+            }
+            node = nodes.elementAti(index);
+            if(strengthFromNode(node) == strength) {
+                // Found a same-strength node with an explicit weight.
+                int weight16 = weight16FromNode(node);
+                if(weight16 == 0) {
+                    throw new UnsupportedOperationException(
+                            (strength == Collator.SECONDARY) ?
+                                    "reset secondary-before secondary ignorable not possible" :
+                                    "reset tertiary-before completely ignorable not possible");
+                }
+                assert(weight16 >= Collation.COMMON_WEIGHT16);
+                int previousIndex = previousIndexFromNode(node);
+                if(weight16 == Collation.COMMON_WEIGHT16) {
+                    // Reset to just before this same-strength common-weight node.
+                    index = previousIndex;
+                } else {
+                    // A non-common weight is only possible from a root CE.
+                    // Find the higher-level weights, which must all be explicit,
+                    // and then find the preceding weight for this level.
+                    long previousWeight16 = 0;
+                    int previousWeightIndex = -1;
+                    int i = index;
+                    if(strength == Collator.SECONDARY) {
+                        long p;
+                        do {
+                            i = previousIndexFromNode(node);
+                            node = nodes.elementAti(i);
+                            if(strengthFromNode(node) == Collator.SECONDARY && !isTailoredNode(node) &&
+                                    previousWeightIndex < 0) {
+                                previousWeightIndex = i;
+                                previousWeight16 = weight16FromNode(node);
+                            }
+                        } while(strengthFromNode(node) > Collator.PRIMARY);
+                        assert(!isTailoredNode(node));
+                        p = weight32FromNode(node);
+                        weight16 = rootElements.getSecondaryBefore(p, weight16);
+                    } else {
+                        long p;
+                        int s;
+                        do {
+                            i = previousIndexFromNode(node);
+                            node = nodes.elementAti(i);
+                            if(strengthFromNode(node) == Collator.TERTIARY && !isTailoredNode(node) &&
+                                    previousWeightIndex < 0) {
+                                previousWeightIndex = i;
+                                previousWeight16 = weight16FromNode(node);
+                            }
+                        } while(strengthFromNode(node) > Collator.SECONDARY);
+                        assert(!isTailoredNode(node));
+                        if(strengthFromNode(node) == Collator.SECONDARY) {
+                            s = weight16FromNode(node);
+                            do {
+                                i = previousIndexFromNode(node);
+                                node = nodes.elementAti(i);
+                            } while(strengthFromNode(node) > Collator.PRIMARY);
+                            assert(!isTailoredNode(node));
+                        } else {
+                            assert(!nodeHasBefore2(node));
+                            s = Collation.COMMON_WEIGHT16;
+                        }
+                        p = weight32FromNode(node);
+                        weight16 = rootElements.getTertiaryBefore(p, s, weight16);
+                        assert((weight16 & ~Collation.ONLY_TERTIARY_MASK) == 0);
+                    }
+                    // Find or insert the new explicit weight before the current one.
+                    if(previousWeightIndex >= 0 && weight16 == previousWeight16) {
+                        // Tailor after the last node between adjacent root nodes.
+                        index = previousIndex;
+                    } else {
+                        node = nodeFromWeight16(weight16) | nodeFromStrength(strength);
+                        index = insertNodeBetween(previousIndex, index, node);
+                    }
+                }
+            } else {
+                // Found a stronger node with implied strength-common weight.
+                long hasBefore3 = 0;
+                if(strength == Collator.SECONDARY) {
+                    assert(!nodeHasBefore2(node));
+                    // Move the HAS_BEFORE3 flag from the parent node
+                    // to the new secondary common node.
+                    hasBefore3 = node & HAS_BEFORE3;
+                    node = (node & ~(long)HAS_BEFORE3) | HAS_BEFORE2;
+                } else {
+                    assert(!nodeHasBefore3(node));
+                    node |= HAS_BEFORE3;
+                }
+                nodes.setElementAt(node, index);
+                int nextIndex = nextIndexFromNode(node);
+                // Insert default nodes with weights 02 and 05, reset to the 02 node.
+                node = nodeFromWeight16(BEFORE_WEIGHT16) | nodeFromStrength(strength);
+                index = insertNodeBetween(index, nextIndex, node);
+                node = nodeFromWeight16(Collation.COMMON_WEIGHT16) | hasBefore3 |
+                        nodeFromStrength(strength);
+                insertNodeBetween(index, nextIndex, node);
+            }
+            // Strength of the temporary CE = strength of its reset position.
+            // Code above raises an error if the before-strength is stronger.
+            strength = ceStrength(ces[cesLength - 1]);
+        }
+        ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength);
+    }
+
+    private long getSpecialResetPosition(CharSequence str) {
+        assert(str.length() == 2);
+        long ce;
+        int strength = Collator.PRIMARY;
+        boolean isBoundary = false;
+        CollationRuleParser.Position pos =
+                CollationRuleParser.POSITION_VALUES[str.charAt(1) - CollationRuleParser.POS_BASE];
+        switch(pos) {
+        case FIRST_TERTIARY_IGNORABLE:
+            // Quaternary CEs are not supported.
+            // Non-zero quaternary weights are possible only on tertiary or stronger CEs.
+            return 0;
+        case LAST_TERTIARY_IGNORABLE:
+            return 0;
+        case FIRST_SECONDARY_IGNORABLE: {
+            // Look for a tailored tertiary node after [0, 0, 0].
+            int index = findOrInsertNodeForRootCE(0, Collator.TERTIARY);
+            long node = nodes.elementAti(index);
+            if((index = nextIndexFromNode(node)) != 0) {
+                node = nodes.elementAti(index);
+                assert(strengthFromNode(node) <= Collator.TERTIARY);
+                if(isTailoredNode(node) && strengthFromNode(node) == Collator.TERTIARY) {
+                    return tempCEFromIndexAndStrength(index, Collator.TERTIARY);
+                }
+            }
+            return rootElements.getFirstTertiaryCE();
+            // No need to look for nodeHasAnyBefore() on a tertiary node.
+        }
+        case LAST_SECONDARY_IGNORABLE:
+            ce = rootElements.getLastTertiaryCE();
+            strength = Collator.TERTIARY;
+            break;
+        case FIRST_PRIMARY_IGNORABLE: {
+            // Look for a tailored secondary node after [0, 0, *].
+            int index = findOrInsertNodeForRootCE(0, Collator.SECONDARY);
+            long node = nodes.elementAti(index);
+            while((index = nextIndexFromNode(node)) != 0) {
+                node = nodes.elementAti(index);
+                strength = strengthFromNode(node);
+                if(strength < Collator.SECONDARY) { break; }
+                if(strength == Collator.SECONDARY) {
+                    if(isTailoredNode(node)) {
+                        if(nodeHasBefore3(node)) {
+                            index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node)));
+                            assert(isTailoredNode(nodes.elementAti(index)));
+                        }
+                        return tempCEFromIndexAndStrength(index, Collator.SECONDARY);
+                    } else {
+                        break;
+                    }
+                }
+            }
+            ce = rootElements.getFirstSecondaryCE();
+            strength = Collator.SECONDARY;
+            break;
+        }
+        case LAST_PRIMARY_IGNORABLE:
+            ce = rootElements.getLastSecondaryCE();
+            strength = Collator.SECONDARY;
+            break;
+        case FIRST_VARIABLE:
+            ce = rootElements.getFirstPrimaryCE();
+            isBoundary = true;  // FractionalUCA.txt: FDD1 00A0, SPACE first primary
+            break;
+        case LAST_VARIABLE:
+            ce = rootElements.lastCEWithPrimaryBefore(variableTop + 1);
+            break;
+        case FIRST_REGULAR:
+            ce = rootElements.firstCEWithPrimaryAtLeast(variableTop + 1);
+            isBoundary = true;  // FractionalUCA.txt: FDD1 263A, SYMBOL first primary
+            break;
+        case LAST_REGULAR:
+            // Use the Hani-first-primary rather than the actual last "regular" CE before it,
+            // for backward compatibility with behavior before the introduction of
+            // script-first-primary CEs in the root collator.
+            ce = rootElements.firstCEWithPrimaryAtLeast(
+                baseData.getFirstPrimaryForGroup(UScript.HAN));
+            break;
+        case FIRST_IMPLICIT: {
+            int ce32 = baseData.getCE32(0x4e00);
+            assert(Collation.hasCE32Tag(ce32, Collation.OFFSET_TAG));
+            ce = baseData.getCEFromOffsetCE32(0x4e00, ce32);
+            break;
+        }
+        case LAST_IMPLICIT:
+            // We do not support tailoring to an unassigned-implicit CE.
+            throw new UnsupportedOperationException(
+                    "reset to [last implicit] not supported");
+        case FIRST_TRAILING:
+            ce = Collation.makeCE(Collation.FIRST_TRAILING_PRIMARY);
+            isBoundary = true;  // trailing first primary (there is no mapping for it)
+            break;
+        case LAST_TRAILING:
+            throw new IllegalArgumentException("LDML forbids tailoring to U+FFFF");
+        default:
+            assert(false);
+            return 0;
+        }
+
+        int index = findOrInsertNodeForRootCE(ce, strength);
+        long node = nodes.elementAti(index);
+        if((pos.ordinal() & 1) == 0) {
+            // even pos = [first xyz]
+            if(!nodeHasAnyBefore(node) && isBoundary) {
+                // A <group> first primary boundary is artificially added to FractionalUCA.txt.
+                // It is reachable via its special contraction, but is not normally used.
+                // Find the first character tailored after the boundary CE,
+                // or the first real root CE after it.
+                if((index = nextIndexFromNode(node)) != 0) {
+                    // If there is a following node, then it must be tailored
+                    // because there are no root CEs with a boundary primary
+                    // and non-common secondary/tertiary weights.
+                    node = nodes.elementAti(index);
+                    assert(isTailoredNode(node));
+                    ce = tempCEFromIndexAndStrength(index, strength);
+                } else {
+                    assert(strength == Collator.PRIMARY);
+                    long p = ce >>> 32;
+                    int pIndex = rootElements.findPrimary(p);
+                    boolean isCompressible = baseData.isCompressiblePrimary(p);
+                    p = rootElements.getPrimaryAfter(p, pIndex, isCompressible);
+                    ce = Collation.makeCE(p);
+                    index = findOrInsertNodeForRootCE(ce, Collator.PRIMARY);
+                    node = nodes.elementAti(index);
+                }
+            }
+            if(nodeHasAnyBefore(node)) {
+                // Get the first node that was tailored before this one at a weaker strength.
+                if(nodeHasBefore2(node)) {
+                    index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node)));
+                    node = nodes.elementAti(index);
+                }
+                if(nodeHasBefore3(node)) {
+                    index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node)));
+                }
+                assert(isTailoredNode(nodes.elementAti(index)));
+                ce = tempCEFromIndexAndStrength(index, strength);
+            }
+        } else {
+            // odd pos = [last xyz]
+            // Find the last node that was tailored after the [last xyz]
+            // at a strength no greater than the position's strength.
+            for(;;) {
+                int nextIndex = nextIndexFromNode(node);
+                if(nextIndex == 0) { break; }
+                long nextNode = nodes.elementAti(nextIndex);
+                if(strengthFromNode(nextNode) < strength) { break; }
+                index = nextIndex;
+                node = nextNode;
+            }
+            // Do not make a temporary CE for a root node.
+            // This last node might be the node for the root CE itself,
+            // or a node with a common secondary or tertiary weight.
+            if(isTailoredNode(node)) {
+                ce = tempCEFromIndexAndStrength(index, strength);
+            }
+        }
+        return ce;
+    }
+
+    /** Implements CollationRuleParser.Sink. */
+    // Java 6: @Override
+    void addRelation(int strength, CharSequence prefix, CharSequence str, CharSequence extension) {
+        String nfdPrefix;
+        if(prefix.length() == 0) {
+            nfdPrefix = "";
+        } else {
+            nfdPrefix = nfd.normalize(prefix);
+        }
+        String nfdString = nfd.normalize(str);
+
+        // The runtime code decomposes Hangul syllables on the fly,
+        // with recursive processing but without making the Jamo pieces visible for matching.
+        // It does not work with certain types of contextual mappings.
+        int nfdLength = nfdString.length();
+        if(nfdLength >= 2) {
+            char c = nfdString.charAt(0);
+            if(Hangul.isJamoL(c) || Hangul.isJamoV(c)) {
+                // While handling a Hangul syllable, contractions starting with Jamo L or V
+                // would not see the following Jamo of that syllable.
+                throw new UnsupportedOperationException(
+                        "contractions starting with conjoining Jamo L or V not supported");
+            }
+            c = nfdString.charAt(nfdLength - 1);
+            if(Hangul.isJamoL(c) ||
+                    (Hangul.isJamoV(c) && Hangul.isJamoL(nfdString.charAt(nfdLength - 2)))) {
+                // A contraction ending with Jamo L or L+V would require
+                // generating Hangul syllables in addTailComposites() (588 for a Jamo L),
+                // or decomposing a following Hangul syllable on the fly, during contraction matching.
+                throw new UnsupportedOperationException(
+                        "contractions ending with conjoining Jamo L or L+V not supported");
+            }
+            // A Hangul syllable completely inside a contraction is ok.
+        }
+        // Note: If there is a prefix, then the parser checked that
+        // both the prefix and the string beging with NFC boundaries (not Jamo V or T).
+        // Therefore: prefix.isEmpty() || !isJamoVOrT(nfdString.charAt(0))
+        // (While handling a Hangul syllable, prefixes on Jamo V or T
+        // would not see the previous Jamo of that syllable.)
+
+        if(strength != Collator.IDENTICAL) {
+            // Find the node index after which we insert the new tailored node.
+            int index = findOrInsertNodeForCEs(strength);
+            assert(cesLength > 0);
+            long ce = ces[cesLength - 1];
+            if(strength == Collator.PRIMARY && !isTempCE(ce) && (ce >>> 32) == 0) {
+                // There is no primary gap between ignorables and the space-first-primary.
+                throw new UnsupportedOperationException(
+                        "tailoring primary after ignorables not supported");
+            }
+            if(strength == Collator.QUATERNARY && ce == 0) {
+                // The CE data structure does not support non-zero quaternary weights
+                // on tertiary ignorables.
+                throw new UnsupportedOperationException(
+                        "tailoring quaternary after tertiary ignorables not supported");
+            }
+            // Insert the new tailored node.
+            index = insertTailoredNodeAfter(index, strength);
+            // Strength of the temporary CE:
+            // The new relation may yield a stronger CE but not a weaker one.
+            int tempStrength = ceStrength(ce);
+            if(strength < tempStrength) { tempStrength = strength; }
+            ces[cesLength - 1] = tempCEFromIndexAndStrength(index, tempStrength);
+        }
+
+        setCaseBits(nfdString);
+
+        int cesLengthBeforeExtension = cesLength;
+        if(extension.length() != 0) {
+            String nfdExtension = nfd.normalize(extension);
+            cesLength = dataBuilder.getCEs(nfdExtension, ces, cesLength);
+            if(cesLength > Collation.MAX_EXPANSION_LENGTH) {
+                throw new IllegalArgumentException(
+                        "extension string adds too many collation elements (more than 31 total)");
+            }
+        }
+        int ce32 = Collation.UNASSIGNED_CE32;
+        if((!nfdPrefix.contentEquals(prefix) || !nfdString.contentEquals(str)) &&
+                !ignorePrefix(prefix) && !ignoreString(str)) {
+            // Map from the original input to the CEs.
+            // We do this in case the canonical closure is incomplete,
+            // so that it is possible to explicitly provide the missing mappings.
+            ce32 = addIfDifferent(prefix, str, ces, cesLength, ce32);
+        }
+        addWithClosure(nfdPrefix, nfdString, ces, cesLength, ce32);
+        cesLength = cesLengthBeforeExtension;
+    }
+
+    /**
+     * Picks one of the current CEs and finds or inserts a node in the graph
+     * for the CE + strength.
+     */
+    private int findOrInsertNodeForCEs(int strength) {
+        assert(Collator.PRIMARY <= strength && strength <= Collator.QUATERNARY);
+
+        // Find the last CE that is at least as "strong" as the requested difference.
+        // Note: Stronger is smaller (Collator.PRIMARY=0).
+        long ce;
+        for(;; --cesLength) {
+            if(cesLength == 0) {
+                ce = ces[0] = 0;
+                cesLength = 1;
+                break;
+            } else {
+                ce = ces[cesLength - 1];
+            }
+            if(ceStrength(ce) <= strength) { break; }
+        }
+
+        if(isTempCE(ce)) {
+            // No need to findCommonNode() here for lower levels
+            // because insertTailoredNodeAfter() will do that anyway.
+            return indexFromTempCE(ce);
+        }
+
+        // root CE
+        if((int)(ce >>> 56) == Collation.UNASSIGNED_IMPLICIT_BYTE) {
+            throw new UnsupportedOperationException(
+                    "tailoring relative to an unassigned code point not supported");
+        }
+        return findOrInsertNodeForRootCE(ce, strength);
+    }
+
+    private int findOrInsertNodeForRootCE(long ce, int strength) {
+        assert((int)(ce >>> 56) != Collation.UNASSIGNED_IMPLICIT_BYTE);
+
+        // Find or insert the node for each of the root CE's weights,
+        // down to the requested level/strength.
+        // Root CEs must have common=zero quaternary weights (for which we never insert any nodes).
+        assert((ce & 0xc0) == 0);
+        int index = findOrInsertNodeForPrimary(ce >>> 32 );
+        if(strength >= Collator.SECONDARY) {
+            int lower32 = (int)ce;
+            index = findOrInsertWeakNode(index, lower32 >>> 16, Collator.SECONDARY);
+            if(strength >= Collator.TERTIARY) {
+                index = findOrInsertWeakNode(index, lower32 & Collation.ONLY_TERTIARY_MASK,
+                                            Collator.TERTIARY);
+            }
+        }
+        return index;
+    }
+
+    /**
+     * Like Java Collections.binarySearch(List, key, Comparator).
+     *
+     * @return the index>=0 where the item was found,
+     *         or the index<0 for inserting the string at ~index in sorted order
+     *         (index into rootPrimaryIndexes)
+     */
+    private static final int binarySearchForRootPrimaryNode(
+            int[] rootPrimaryIndexes, int length, long[] nodes, long p) {
+        if(length == 0) { return ~0; }
+        int start = 0;
+        int limit = length;
+        for (;;) {
+            int i = (start + limit) / 2;
+            long node = nodes[rootPrimaryIndexes[i]];
+            long nodePrimary = node >>> 32;  // weight32FromNode(node)
+            if (p == nodePrimary) {
+                return i;
+            } else if (p < nodePrimary) {
+                if (i == start) {
+                    return ~start;  // insert s before i
+                }
+                limit = i;
+            } else {
+                if (i == start) {
+                    return ~(start + 1);  // insert s after i
+                }
+                start = i;
+            }
+        }
+    }
+
+    /** Finds or inserts the node for a root CE's primary weight. */
+    private int findOrInsertNodeForPrimary(long p) {
+        int rootIndex = binarySearchForRootPrimaryNode(
+            rootPrimaryIndexes.getBuffer(), rootPrimaryIndexes.size(), nodes.getBuffer(), p);
+        if(rootIndex >= 0) {
+            return rootPrimaryIndexes.elementAti(rootIndex);
+        } else {
+            // Start a new list of nodes with this primary.
+            int index = nodes.size();
+            nodes.addElement(nodeFromWeight32(p));
+            rootPrimaryIndexes.insertElementAt(index, ~rootIndex);
+            return index;
+        }
+    }
+
+    /** Finds or inserts the node for a secondary or tertiary weight. */
+    private int findOrInsertWeakNode(int index, int weight16, int level) {
+        assert(0 <= index && index < nodes.size());
+
+        assert(weight16 == 0 || weight16 >= Collation.COMMON_WEIGHT16);
+        // Only reset-before inserts common weights.
+        if(weight16 == Collation.COMMON_WEIGHT16) {
+            return findCommonNode(index, level);
+        }
+        // Find the root CE's weight for this level.
+        // Postpone insertion if not found:
+        // Insert the new root node before the next stronger node,
+        // or before the next root node with the same strength and a larger weight.
+        long node = nodes.elementAti(index);
+        int nextIndex;
+        while((nextIndex = nextIndexFromNode(node)) != 0) {
+            node = nodes.elementAti(nextIndex);
+            int nextStrength = strengthFromNode(node);
+            if(nextStrength <= level) {
+                // Insert before a stronger node.
+                if(nextStrength < level) { break; }
+                // nextStrength == level
+                if(!isTailoredNode(node)) {
+                    int nextWeight16 = weight16FromNode(node);
+                    if(nextWeight16 == weight16) {
+                        // Found the node for the root CE up to this level.
+                        return nextIndex;
+                    }
+                    // Insert before a node with a larger same-strength weight.
+                    if(nextWeight16 > weight16) { break; }
+                }
+            }
+            // Skip the next node.
+            index = nextIndex;
+        }
+        node = nodeFromWeight16(weight16) | nodeFromStrength(level);
+        return insertNodeBetween(index, nextIndex, node);
+    }
+
+    /**
+     * Makes and inserts a new tailored node into the list, after the one at index.
+     * Skips over nodes of weaker strength to maintain collation order
+     * ("postpone insertion").
+     * @return the new node's index
+     */
+    private int insertTailoredNodeAfter(int index, int strength) {
+        assert(0 <= index && index < nodes.size());
+        if(strength >= Collator.SECONDARY) {
+            index = findCommonNode(index, Collator.SECONDARY);
+            if(strength >= Collator.TERTIARY) {
+                index = findCommonNode(index, Collator.TERTIARY);
+            }
+        }
+        // Postpone insertion:
+        // Insert the new node before the next one with a strength at least as strong.
+        long node = nodes.elementAti(index);
+        int nextIndex;
+        while((nextIndex = nextIndexFromNode(node)) != 0) {
+            node = nodes.elementAti(nextIndex);
+            if(strengthFromNode(node) <= strength) { break; }
+            // Skip the next node which has a weaker (larger) strength than the new one.
+            index = nextIndex;
+        }
+        node = IS_TAILORED | nodeFromStrength(strength);
+        return insertNodeBetween(index, nextIndex, node);
+    }
+
+    /**
+     * Inserts a new node into the list, between list-adjacent items.
+     * The node's previous and next indexes must not be set yet.
+     * @return the new node's index
+     */
+    private int insertNodeBetween(int index, int nextIndex, long node) {
+        assert(previousIndexFromNode(node) == 0);
+        assert(nextIndexFromNode(node) == 0);
+        assert(nextIndexFromNode(nodes.elementAti(index)) == nextIndex);
+        // Append the new node and link it to the existing nodes.
+        int newIndex = nodes.size();
+        node |= nodeFromPreviousIndex(index) | nodeFromNextIndex(nextIndex);
+        nodes.addElement(node);
+        // nodes[index].nextIndex = newIndex
+        node = nodes.elementAti(index);
+        nodes.setElementAt(changeNodeNextIndex(node, newIndex), index);
+        // nodes[nextIndex].previousIndex = newIndex
+        if(nextIndex != 0) {
+            node = nodes.elementAti(nextIndex);
+            nodes.setElementAt(changeNodePreviousIndex(node, newIndex), nextIndex);
+        }
+        return newIndex;
+    }
+
+    /**
+     * Finds the node which implies or contains a common=05 weight of the given strength
+     * (secondary or tertiary).
+     * Skips weaker nodes and tailored nodes if the current node is stronger
+     * and is followed by an explicit-common-weight node.
+     * Always returns the input index if that node is no stronger than the given strength.
+     */
+    private int findCommonNode(int index, int strength) {
+        assert(Collator.SECONDARY <= strength && strength <= Collator.TERTIARY);
+        long node = nodes.elementAti(index);
+        if(strengthFromNode(node) >= strength) {
+            // The current node is no stronger.
+            return index;
+        }
+        if(strength == Collator.SECONDARY ? !nodeHasBefore2(node) : !nodeHasBefore3(node)) {
+            // The current node implies the strength-common weight.
+            return index;
+        }
+        index = nextIndexFromNode(node);
+        node = nodes.elementAti(index);
+        assert(!isTailoredNode(node) && strengthFromNode(node) == strength &&
+                weight16FromNode(node) == BEFORE_WEIGHT16);
+        // Skip to the explicit common node.
+        do {
+            index = nextIndexFromNode(node);
+            node = nodes.elementAti(index);
+            assert(strengthFromNode(node) >= strength);
+        } while(isTailoredNode(node) || strengthFromNode(node) > strength);
+        assert(weight16FromNode(node) == Collation.COMMON_WEIGHT16);
+        return index;
+    }
+
+    private void setCaseBits(CharSequence nfdString) {
+        int numTailoredPrimaries = 0;
+        for(int i = 0; i < cesLength; ++i) {
+            if(ceStrength(ces[i]) == Collator.PRIMARY) { ++numTailoredPrimaries; }
+        }
+        // We should not be able to get too many case bits because
+        // cesLength<=31==MAX_EXPANSION_LENGTH.
+        // 31 pairs of case bits fit into an long without setting its sign bit.
+        assert(numTailoredPrimaries <= 31);
+
+        long cases = 0;
+        if(numTailoredPrimaries > 0) {
+            CharSequence s = nfdString;
+            UTF16CollationIterator baseCEs = new UTF16CollationIterator(baseData, false, s, 0);
+            int baseCEsLength = baseCEs.fetchCEs() - 1;
+            assert(baseCEsLength >= 0 && baseCEs.getCE(baseCEsLength) == Collation.NO_CE);
+
+            int lastCase = 0;
+            int numBasePrimaries = 0;
+            for(int i = 0; i < baseCEsLength; ++i) {
+                long ce = baseCEs.getCE(i);
+                if((ce >>> 32) != 0) {
+                    ++numBasePrimaries;
+                    int c = ((int)ce >> 14) & 3;
+                    assert(c == 0 || c == 2);  // lowercase or uppercase, no mixed case in any base CE
+                    if(numBasePrimaries < numTailoredPrimaries) {
+                        cases |= (long)c << ((numBasePrimaries - 1) * 2);
+                    } else if(numBasePrimaries == numTailoredPrimaries) {
+                        lastCase = c;
+                    } else if(c != lastCase) {
+                        // There are more base primary CEs than tailored primaries.
+                        // Set mixed case if the case bits of the remainder differ.
+                        lastCase = 1;
+                        // Nothing more can change.
+                        break;
+                    }
+                }
+            }
+            if(numBasePrimaries >= numTailoredPrimaries) {
+                cases |= (long)lastCase << ((numTailoredPrimaries - 1) * 2);
+            }
+        }
+
+        for(int i = 0; i < cesLength; ++i) {
+            long ce = ces[i] & 0xffffffffffff3fffL;  // clear old case bits
+            int strength = ceStrength(ce);
+            if(strength == Collator.PRIMARY) {
+                ce |= (cases & 3) << 14;
+                cases >>>= 2;
+            } else if(strength == Collator.TERTIARY) {
+                // Tertiary CEs must have uppercase bits.
+                // See the LDML spec, and comments in class CollationCompare.
+                ce |= 0x8000;
+            }
+            // Tertiary ignorable CEs must have 0 case bits.
+            // We set 0 case bits for secondary CEs too
+            // since currently only U+0345 is cased and maps to a secondary CE,
+            // and it is lowercase. Other secondaries are uncased.
+            // See [[:Cased:]&[:uca1=:]] where uca1 queries the root primary weight.
+            ces[i] = ce;
+        }
+    }
+
+    /** Implements CollationRuleParser.Sink. */
+    @Override
+    void suppressContractions(UnicodeSet set) {
+        dataBuilder.suppressContractions(set);
+    }
+
+    /** Implements CollationRuleParser.Sink. */
+    @Override
+    void optimize(UnicodeSet set) {
+        optimizeSet.addAll(set);
+    }
+
+    /**
+     * Adds the mapping and its canonical closure.
+     * Takes ce32=dataBuilder.encodeCEs(...) so that the data builder
+     * need not re-encode the CEs multiple times.
+     */
+    private int addWithClosure(CharSequence nfdPrefix, CharSequence nfdString,
+                long[] newCEs, int newCEsLength, int ce32) {
+        // Map from the NFD input to the CEs.
+        ce32 = addIfDifferent(nfdPrefix, nfdString, newCEs, newCEsLength, ce32);
+        ce32 = addOnlyClosure(nfdPrefix, nfdString, newCEs, newCEsLength, ce32);
+        addTailComposites(nfdPrefix, nfdString);
+        return ce32;
+    }
+
+    private int addOnlyClosure(CharSequence nfdPrefix, CharSequence nfdString,
+                long[] newCEs, int newCEsLength, int ce32) {
+        // Map from canonically equivalent input to the CEs. (But not from the all-NFD input.)
+        // TODO: make CanonicalIterator work with CharSequence, or maybe change arguments here to String
+        if(nfdPrefix.length() == 0) {
+            CanonicalIterator stringIter = new CanonicalIterator(nfdString.toString());
+            String prefix = "";
+            for(;;) {
+                String str = stringIter.next();
+                if(str == null) { break; }
+                if(ignoreString(str) || str == nfdString) { continue; }
+                ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32);
+            }
+        } else {
+            CanonicalIterator prefixIter = new CanonicalIterator(nfdPrefix.toString());
+            CanonicalIterator stringIter = new CanonicalIterator(nfdString.toString());
+            for(;;) {
+                String prefix = prefixIter.next();
+                if(prefix == null) { break; }
+                if(ignorePrefix(prefix)) { continue; }
+                boolean samePrefix = prefix == nfdPrefix;
+                for(;;) {
+                    String str = stringIter.next();
+                    if(str == null) { break; }
+                    if(ignoreString(str) || (samePrefix && str == nfdString)) { continue; }
+                    ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32);
+                }
+                stringIter.reset();
+            }
+        }
+        return ce32;
+    }
+
+    private void addTailComposites(CharSequence nfdPrefix, CharSequence nfdString) {
+        // Look for the last starter in the NFD string.
+        int lastStarter;
+        int indexAfterLastStarter = nfdString.length();
+        for(;;) {
+            if(indexAfterLastStarter == 0) { return; }  // no starter at all
+            lastStarter = Character.codePointBefore(nfdString, indexAfterLastStarter);
+            if(nfd.getCombiningClass(lastStarter) == 0) { break; }
+            indexAfterLastStarter -= Character.charCount(lastStarter);
+        }
+        // No closure to Hangul syllables since we decompose them on the fly.
+        if(Hangul.isJamoL(lastStarter)) { return; }
+
+        // Are there any composites whose decomposition starts with the lastStarter?
+        // Note: Normalizer2Impl does not currently return start sets for NFC_QC=Maybe characters.
+        // We might find some more equivalent mappings here if it did.
+        UnicodeSet composites = new UnicodeSet();
+        if(!nfcImpl.getCanonStartSet(lastStarter, composites)) { return; }
+
+        StringBuilder newNFDString = new StringBuilder(), newString = new StringBuilder();
+        long[] newCEs = new long[Collation.MAX_EXPANSION_LENGTH];
+        UnicodeSetIterator iter = new UnicodeSetIterator(composites);
+        while(iter.next()) {
+            assert(iter.codepoint != UnicodeSetIterator.IS_STRING);
+            int composite = iter.codepoint;
+            String decomp = nfd.getDecomposition(composite);
+            if(!mergeCompositeIntoString(nfdString, indexAfterLastStarter, composite, decomp,
+                    newNFDString, newString)) {
+                continue;
+            }
+            int newCEsLength = dataBuilder.getCEs(nfdPrefix, newNFDString, newCEs, 0);
+            if(newCEsLength > Collation.MAX_EXPANSION_LENGTH) {
+                // Ignore mappings that we cannot store.
+                continue;
+            }
+            // Note: It is possible that the newCEs do not make use of the mapping
+            // for which we are adding the tail composites, in which case we might be adding
+            // unnecessary mappings.
+            // For example, when we add tail composites for ae^ (^=combining circumflex),
+            // UCA discontiguous-contraction matching does not find any matches
+            // for ae_^ (_=any combining diacritic below) *unless* there is also
+            // a contraction mapping for ae.
+            // Thus, if there is no ae contraction, then the ae^ mapping is ignored
+            // while fetching the newCEs for ae_^.
+            // TODO: Try to detect this effectively.
+            // (Alternatively, print a warning when prefix contractions are missing.)
+
+            // We do not need an explicit mapping for the NFD strings.
+            // It is fine if the NFD input collates like this via a sequence of mappings.
+            // It also saves a little bit of space, and may reduce the set of characters with contractions.
+            int ce32 = addIfDifferent(nfdPrefix, newString,
+                                          newCEs, newCEsLength, Collation.UNASSIGNED_CE32);
+            if(ce32 != Collation.UNASSIGNED_CE32) {
+                // was different, was added
+                addOnlyClosure(nfdPrefix, newNFDString, newCEs, newCEsLength, ce32);
+            }
+        }
+    }
+
+    private boolean mergeCompositeIntoString(CharSequence nfdString, int indexAfterLastStarter,
+                int composite, CharSequence decomp,
+                StringBuilder newNFDString, StringBuilder newString) {
+        assert(Character.codePointBefore(nfdString, indexAfterLastStarter) ==
+                Character.codePointAt(decomp, 0));
+        int lastStarterLength = Character.offsetByCodePoints(decomp, 0, 1);
+        if(lastStarterLength == decomp.length()) {
+            // Singleton decompositions should be found by addWithClosure()
+            // and the CanonicalIterator, so we can ignore them here.
+            return false;
+        }
+        if(equalSubSequences(nfdString, indexAfterLastStarter, decomp, lastStarterLength)) {
+            // same strings, nothing new to be found here
+            return false;
+        }
+
+        // Make new FCD strings that combine a composite, or its decomposition,
+        // into the nfdString's last starter and the combining marks following it.
+        // Make an NFD version, and a version with the composite.
+        newNFDString.setLength(0);
+        newNFDString.append(nfdString, 0, indexAfterLastStarter);
+        newString.setLength(0);
+        newString.append(nfdString, 0, indexAfterLastStarter - lastStarterLength)
+            .appendCodePoint(composite);
+
+        // The following is related to discontiguous contraction matching,
+        // but builds only FCD strings (or else returns false).
+        int sourceIndex = indexAfterLastStarter;
+        int decompIndex = lastStarterLength;
+        // Small optimization: We keep the source character across loop iterations
+        // because we do not always consume it,
+        // and then need not fetch it again nor look up its combining class again.
+        int sourceChar = Collation.SENTINEL_CP;
+        // The cc variables need to be declared before the loop so that at the end
+        // they are set to the last combining classes seen.
+        int sourceCC = 0;
+        int decompCC = 0;
+        for(;;) {
+            if(sourceChar < 0) {
+                if(sourceIndex >= nfdString.length()) { break; }
+                sourceChar = Character.codePointAt(nfdString, sourceIndex);
+                sourceCC = nfd.getCombiningClass(sourceChar);
+                assert(sourceCC != 0);
+            }
+            // We consume a decomposition character in each iteration.
+            if(decompIndex >= decomp.length()) { break; }
+            int decompChar = Character.codePointAt(decomp, decompIndex);
+            decompCC = nfd.getCombiningClass(decompChar);
+            // Compare the two characters and their combining classes.
+            if(decompCC == 0) {
+                // Unable to merge because the source contains a non-zero combining mark
+                // but the composite's decomposition contains another starter.
+                // The strings would not be equivalent.
+                return false;
+            } else if(sourceCC < decompCC) {
+                // Composite + sourceChar would not be FCD.
+                return false;
+            } else if(decompCC < sourceCC) {
+                newNFDString.appendCodePoint(decompChar);
+                decompIndex += Character.charCount(decompChar);
+            } else if(decompChar != sourceChar) {
+                // Blocked because same combining class.
+                return false;
+            } else {  // match: decompChar == sourceChar
+                newNFDString.appendCodePoint(decompChar);
+                decompIndex += Character.charCount(decompChar);
+                sourceIndex += Character.charCount(decompChar);
+                sourceChar = Collation.SENTINEL_CP;
+            }
+        }
+        // We are at the end of at least one of the two inputs.
+        if(sourceChar >= 0) {  // more characters from nfdString but not from decomp
+            if(sourceCC < decompCC) {
+                // Appending the next source character to the composite would not be FCD.
+                return false;
+            }
+            newNFDString.append(nfdString, sourceIndex, nfdString.length());
+            newString.append(nfdString, sourceIndex, nfdString.length());
+        } else if(decompIndex < decomp.length()) {  // more characters from decomp, not from nfdString
+            newNFDString.append(decomp, decompIndex, decomp.length());
+        }
+        assert(nfd.isNormalized(newNFDString));
+        assert(fcd.isNormalized(newString));
+        assert(nfd.normalize(newString).equals(newNFDString.toString()));  // canonically equivalent
+        return true;
+    }
+
+    private boolean equalSubSequences(CharSequence left, int leftStart, CharSequence right, int rightStart) {
+        // C++ UnicodeString::compare(leftStart, 0x7fffffff, right, rightStart, 0x7fffffff) == 0
+        int leftLength = left.length();
+        if((leftLength - leftStart) != (right.length() - rightStart)) { return false; }
+        while(leftStart < leftLength) {
+            if(left.charAt(leftStart++) != right.charAt(rightStart++)) {
+                return false;
+            }
+        }
+        return true;
+    }
+    private boolean ignorePrefix(CharSequence s) {
+        // Do not map non-FCD prefixes.
+        return !isFCD(s);
+    }
+    private boolean ignoreString(CharSequence s) {
+        // Do not map non-FCD strings.
+        // Do not map strings that start with Hangul syllables: We decompose those on the fly.
+        return !isFCD(s) || Hangul.isHangul(s.charAt(0));
+    }
+    private boolean isFCD(CharSequence s) {
+        return fcd.isNormalized(s);
+    }
+
+    private static final UnicodeSet COMPOSITES = new UnicodeSet("[:NFD_QC=N:]");
+    static {
+        // Hangul is decomposed on the fly during collation.
+        COMPOSITES.remove(Hangul.HANGUL_BASE, Hangul.HANGUL_END);
+    }
+
+    private void closeOverComposites() {
+        String prefix = "";  // empty
+        UnicodeSetIterator iter = new UnicodeSetIterator(COMPOSITES);
+        while(iter.next()) {
+            assert(iter.codepoint != UnicodeSetIterator.IS_STRING);
+            String nfdString = nfd.getDecomposition(iter.codepoint);
+            cesLength = dataBuilder.getCEs(nfdString, ces, 0);
+            if(cesLength > Collation.MAX_EXPANSION_LENGTH) {
+                // Too many CEs from the decomposition (unusual), ignore this composite.
+                // We could add a capacity parameter to getCEs() and reallocate if necessary.
+                // However, this can only really happen in contrived cases.
+                continue;
+            }
+            String composite = iter.getString();
+            addIfDifferent(prefix, composite, ces, cesLength, Collation.UNASSIGNED_CE32);
+        }
+    }
+
+    private int addIfDifferent(CharSequence prefix, CharSequence str,
+                long[] newCEs, int newCEsLength, int ce32) {
+        long[] oldCEs = new long[Collation.MAX_EXPANSION_LENGTH];
+        int oldCEsLength = dataBuilder.getCEs(prefix, str, oldCEs, 0);
+        if(!sameCEs(newCEs, newCEsLength, oldCEs, oldCEsLength)) {
+            if(ce32 == Collation.UNASSIGNED_CE32) {
+                ce32 = dataBuilder.encodeCEs(newCEs, newCEsLength);
+            }
+            dataBuilder.addCE32(prefix, str, ce32);
+        }
+        return ce32;
+    }
+
+    private static boolean sameCEs(long[] ces1, int ces1Length,
+                long[] ces2, int ces2Length) {
+        if(ces1Length != ces2Length) {
+            return false;
+        }
+        assert(ces1Length <= Collation.MAX_EXPANSION_LENGTH);
+        for(int i = 0; i < ces1Length; ++i) {
+            if(ces1[i] != ces2[i]) { return false; }
+        }
+        return true;
+    }
+
+    private static final int alignWeightRight(int w) {
+        if(w != 0) {
+            while((w & 0xff) == 0) { w >>>= 8; }
+        }
+        return w;
+    }
+
+    /**
+     * Walks the tailoring graph and overwrites tailored nodes with new CEs.
+     * After this, the graph is destroyed.
+     * The nodes array can then be used only as a source of tailored CEs.
+     */
+    private void makeTailoredCEs() {
+        CollationWeights primaries = new CollationWeights();
+        CollationWeights secondaries = new CollationWeights();
+        CollationWeights tertiaries = new CollationWeights();
+        long[] nodesArray = nodes.getBuffer();
+
+        for(int rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) {
+            int i = rootPrimaryIndexes.elementAti(rpi);
+            long node = nodesArray[i];
+            long p = weight32FromNode(node);
+            int s = p == 0 ? 0 : Collation.COMMON_WEIGHT16;
+            int t = s;
+            int q = 0;
+            boolean pIsTailored = false;
+            boolean sIsTailored = false;
+            boolean tIsTailored = false;
+            if(DEBUG) {
+                System.out.printf("\nprimary     %x\n", alignWeightRight((int)p));
+            }
+            int pIndex = p == 0 ? 0 : rootElements.findPrimary(p);
+            int nextIndex = nextIndexFromNode(node);
+            while(nextIndex != 0) {
+                i = nextIndex;
+                node = nodesArray[i];
+                nextIndex = nextIndexFromNode(node);
+                int strength = strengthFromNode(node);
+                if(strength == Collator.QUATERNARY) {
+                    assert(isTailoredNode(node));
+                    if(DEBUG) {
+                        System.out.print("      quat+     ");
+                    }
+                    if(q == 3) {
+                        // C++ U_BUFFER_OVERFLOW_ERROR
+                        throw new UnsupportedOperationException("quaternary tailoring gap too small");
+                    }
+                    ++q;
+                } else {
+                    if(strength == Collator.TERTIARY) {
+                        if(isTailoredNode(node)) {
+                            if(DEBUG) {
+                                System.out.print("    ter+        ");
+                            }
+                            if(!tIsTailored) {
+                                // First tailored tertiary node for [p, s].
+                                int tCount = countTailoredNodes(nodesArray, nextIndex,
+                                                                    Collator.TERTIARY) + 1;
+                                int tLimit;
+                                if(t == 0) {
+                                    // Gap at the beginning of the tertiary CE range.
+                                    t = rootElements.getTertiaryBoundary() - 0x100;
+                                    tLimit = (int)rootElements.getFirstTertiaryCE() & Collation.ONLY_TERTIARY_MASK;
+                                } else if(t == BEFORE_WEIGHT16) {
+                                    tLimit = Collation.COMMON_WEIGHT16;
+                                } else if(!pIsTailored && !sIsTailored) {
+                                    // p and s are root weights.
+                                    tLimit = rootElements.getTertiaryAfter(pIndex, s, t);
+                                } else {
+                                    // [p, s] is tailored.
+                                    assert(t == Collation.COMMON_WEIGHT16);
+                                    tLimit = rootElements.getTertiaryBoundary();
+                                }
+                                assert(tLimit == 0x4000 || (tLimit & ~Collation.ONLY_TERTIARY_MASK) == 0);
+                                tertiaries.initForTertiary();
+                                if(!tertiaries.allocWeights(t, tLimit, tCount)) {
+                                    // C++ U_BUFFER_OVERFLOW_ERROR
+                                    throw new UnsupportedOperationException("tertiary tailoring gap too small");
+                                }
+                                tIsTailored = true;
+                            }
+                            t = (int)tertiaries.nextWeight();
+                            assert(t != 0xffffffff);
+                        } else {
+                            t = weight16FromNode(node);
+                            tIsTailored = false;
+                            if(DEBUG) {
+                                System.out.printf("    ter     %x\n", alignWeightRight(t));
+                            }
+                        }
+                    } else {
+                        if(strength == Collator.SECONDARY) {
+                            if(isTailoredNode(node)) {
+                                if(DEBUG) {
+                                    System.out.print("  sec+          ");
+                                }
+                                if(!sIsTailored) {
+                                    // First tailored secondary node for p.
+                                    int sCount = countTailoredNodes(nodesArray, nextIndex,
+                                                                        Collator.SECONDARY) + 1;
+                                    int sLimit;
+                                    if(s == 0) {
+                                        // Gap at the beginning of the secondary CE range.
+                                        s = rootElements.getSecondaryBoundary() - 0x100;
+                                        sLimit = (int)(rootElements.getFirstSecondaryCE() >> 16);
+                                    } else if(s == BEFORE_WEIGHT16) {
+                                        sLimit = Collation.COMMON_WEIGHT16;
+                                    } else if(!pIsTailored) {
+                                        // p is a root primary.
+                                        sLimit = rootElements.getSecondaryAfter(pIndex, s);
+                                    } else {
+                                        // p is a tailored primary.
+                                        assert(s == Collation.COMMON_WEIGHT16);
+                                        sLimit = rootElements.getSecondaryBoundary();
+                                    }
+                                    if(s == Collation.COMMON_WEIGHT16) {
+                                        // Do not tailor into the getSortKey() range of
+                                        // compressed common secondaries.
+                                        s = rootElements.getLastCommonSecondary();
+                                    }
+                                    secondaries.initForSecondary();
+                                    if(!secondaries.allocWeights(s, sLimit, sCount)) {
+                                        // C++ U_BUFFER_OVERFLOW_ERROR
+                                        throw new UnsupportedOperationException("secondary tailoring gap too small");
+                                    }
+                                    sIsTailored = true;
+                                }
+                                s = (int)secondaries.nextWeight();
+                                assert(s != 0xffffffff);
+                            } else {
+                                s = weight16FromNode(node);
+                                sIsTailored = false;
+                                if(DEBUG) {
+                                    System.out.printf("  sec       %x\n", alignWeightRight(s));
+                                }
+                            }
+                        } else /* Collator.PRIMARY */ {
+                            assert(isTailoredNode(node));
+                            if(DEBUG) {
+                                System.out.print("pri+            ");
+                            }
+                            if(!pIsTailored) {
+                                // First tailored primary node in this list.
+                                int pCount = countTailoredNodes(nodesArray, nextIndex,
+                                                                    Collator.PRIMARY) + 1;
+                                boolean isCompressible = baseData.isCompressiblePrimary(p);
+                                long pLimit =
+                                    rootElements.getPrimaryAfter(p, pIndex, isCompressible);
+                                primaries.initForPrimary(isCompressible);
+                                if(!primaries.allocWeights(p, pLimit, pCount)) {
+                                    // C++ U_BUFFER_OVERFLOW_ERROR  // TODO: introduce a more specific UErrorCode?
+                                    throw new UnsupportedOperationException("primary tailoring gap too small");
+                                }
+                                pIsTailored = true;
+                            }
+                            p = primaries.nextWeight();
+                            assert(p != 0xffffffffL);
+                            s = Collation.COMMON_WEIGHT16;
+                            sIsTailored = false;
+                        }
+                        t = s == 0 ? 0 : Collation.COMMON_WEIGHT16;
+                        tIsTailored = false;
+                    }
+                    q = 0;
+                }
+                if(isTailoredNode(node)) {
+                    nodesArray[i] = Collation.makeCE(p, s, t, q);
+                    if(DEBUG) {
+                        System.out.printf("%016x\n", nodesArray[i]);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Counts the tailored nodes of the given strength up to the next node
+     * which is either stronger or has an explicit weight of this strength.
+     */
+    private static int countTailoredNodes(long[] nodesArray, int i, int strength) {
+        int count = 0;
+        for(;;) {
+            if(i == 0) { break; }
+            long node = nodesArray[i];
+            if(strengthFromNode(node) < strength) { break; }
+            if(strengthFromNode(node) == strength) {
+                if(isTailoredNode(node)) {
+                    ++count;
+                } else {
+                    break;
+                }
+            }
+            i = nextIndexFromNode(node);
+        }
+        return count;
+    }
+
+    private static final class CEFinalizer implements CollationDataBuilder.CEModifier {
+        CEFinalizer(long[] ces) {
+            finalCEs = ces;
+        }
+        public long modifyCE32(int ce32) {
+            assert(!Collation.isSpecialCE32(ce32));
+            if(CollationBuilder.isTempCE32(ce32)) {
+                // retain case bits
+                return finalCEs[CollationBuilder.indexFromTempCE32(ce32)] | ((ce32 & 0xc0) << 8);
+            } else {
+                return Collation.NO_CE;
+            }
+        }
+        public long modifyCE(long ce) {
+            if(CollationBuilder.isTempCE(ce)) {
+                // retain case bits
+                return finalCEs[CollationBuilder.indexFromTempCE(ce)] | (ce & 0xc000);
+            } else {
+                return Collation.NO_CE;
+            }
+        }
+
+        private long[] finalCEs;
+    };
+
+    /** Replaces temporary CEs with the final CEs they point to. */
+    private void finalizeCEs() {
+        CollationDataBuilder newBuilder = new CollationDataBuilder();
+        newBuilder.initForTailoring(baseData);
+        CEFinalizer finalizer = new CEFinalizer(nodes.getBuffer());
+        newBuilder.copyFrom(dataBuilder, finalizer);
+        dataBuilder = newBuilder;
+    }
+
+    /**
+     * Encodes "temporary CE" data into a CE that fits into the CE32 data structure,
+     * with 2-byte primary, 1-byte secondary and 6-bit tertiary,
+     * with valid CE byte values.
+     *
+     * The index must not exceed 20 bits (0xfffff).
+     * The strength must fit into 2 bits (Collator.PRIMARY..Collator.QUATERNARY).
+     *
+     * Temporary CEs are distinguished from real CEs by their use of
+     * secondary weights 06..45 which are otherwise reserved for compressed sort keys.
+     *
+     * The case bits are unused and available.
+     */
+    private static long tempCEFromIndexAndStrength(int index, int strength) {
+        return
+            // CE byte offsets, to ensure valid CE bytes, and case bits 11
+            0x4040000006002000L +
+            // index bits 19..13 -> primary byte 1 = CE bits 63..56 (byte values 40..BF)
+            ((long)(index & 0xfe000) << 43) +
+            // index bits 12..6 -> primary byte 2 = CE bits 55..48 (byte values 40..BF)
+            ((long)(index & 0x1fc0) << 42) +
+            // index bits 5..0 -> secondary byte 1 = CE bits 31..24 (byte values 06..45)
+            ((index & 0x3f) << 24) +
+            // strength bits 1..0 -> tertiary byte 1 = CE bits 13..8 (byte values 20..23)
+            (strength << 8);
+    }
+    private static int indexFromTempCE(long tempCE) {
+        tempCE -= 0x4040000006002000L;
+        return
+            ((int)(tempCE >> 43) & 0xfe000) |
+            ((int)(tempCE >> 42) & 0x1fc0) |
+            ((int)(tempCE >> 24) & 0x3f);
+    }
+    private static int strengthFromTempCE(long tempCE) {
+        return ((int)tempCE >> 8) & 3;
+    }
+    private static boolean isTempCE(long ce) {
+        int sec = (int)ce >>> 24;
+        return 6 <= sec && sec <= 0x45;
+    }
+
+    private static int indexFromTempCE32(int tempCE32) {
+        tempCE32 -= 0x40400620;
+        return
+            ((int)(tempCE32 >> 11) & 0xfe000) |
+            ((int)(tempCE32 >> 10) & 0x1fc0) |
+            ((int)(tempCE32 >> 8) & 0x3f);
+    }
+    private static boolean isTempCE32(int ce32) {
+        return
+            (ce32 & 0xff) >= 2 &&  // not a long-primary/long-secondary CE32
+            6 <= ((ce32 >> 8) & 0xff) && ((ce32 >> 8) & 0xff) <= 0x45;
+    }
+
+    private static int ceStrength(long ce) {
+        return
+            isTempCE(ce) ? strengthFromTempCE(ce) :
+            (ce & 0xff00000000000000L) != 0 ? Collator.PRIMARY :
+            ((int)ce & 0xff000000) != 0 ? Collator.SECONDARY :
+            ce != 0 ? Collator.TERTIARY :
+            Collator.IDENTICAL;
+    }
+
+    /** The secondary/tertiary lower limit for tailoring before the common weight. */
+    private static final int BEFORE_WEIGHT16 = Collation.MERGE_SEPARATOR_WEIGHT16;
+
+    /** At most 1M nodes, limited by the 20 bits in node bit fields. */
+    private static final int MAX_INDEX = 0xfffff;
+    /**
+     * Node bit 6 is set on a primary node if there are tailored nodes
+     * with secondary values below the common secondary weight (05),
+     * from a reset-secondary-before (&[before 2]).
+     */
+    private static final int HAS_BEFORE2 = 0x40;
+    /**
+     * Node bit 5 is set on a primary or secondary node if there are tailored nodes
+     * with tertiary values below the common tertiary weight (05),
+     * from a reset-tertiary-before (&[before 3]).
+     */
+    private static final int HAS_BEFORE3 = 0x20;
+    /**
+     * Node bit 3 distinguishes a tailored node, which has no weight value,
+     * from a node with an explicit (root or default) weight.
+     */
+    private static final int IS_TAILORED = 8;
+
+    private static long nodeFromWeight32(long weight32) {
+        return weight32 << 32;
+    }
+    private static long nodeFromWeight16(int weight16) {
+        return (long)weight16 << 48;
+    }
+    private static long nodeFromPreviousIndex(int previous) {
+        return (long)previous << 28;
+    }
+    private static long nodeFromNextIndex(int next) {
+        return next << 8;
+    }
+    private static long nodeFromStrength(int strength) {
+        return strength;
+    }
+
+    private static long weight32FromNode(long node) {
+        return node >>> 32;
+    }
+    private static int weight16FromNode(long node) {
+        return (int)(node >> 48) & 0xffff;
+    }
+    private static int previousIndexFromNode(long node) {
+        return (int)(node >> 28) & MAX_INDEX;
+    }
+    private static int nextIndexFromNode(long node) {
+        return ((int)node >> 8) & MAX_INDEX;
+    }
+    private static int strengthFromNode(long node) {
+        return (int)node & 3;
+    }
+
+    private static boolean nodeHasBefore2(long node) {
+        return (node & HAS_BEFORE2) != 0;
+    }
+    private static boolean nodeHasBefore3(long node) {
+        return (node & HAS_BEFORE3) != 0;
+    }
+    private static boolean nodeHasAnyBefore(long node) {
+        return (node & (HAS_BEFORE2 | HAS_BEFORE3)) != 0;
+    }
+    private static boolean isTailoredNode(long node) {
+        return (node & IS_TAILORED) != 0;
+    }
+
+    private static long changeNodePreviousIndex(long node, int previous) {
+        return (node & 0xffff00000fffffffL) | nodeFromPreviousIndex(previous);
+    }
+    private static long changeNodeNextIndex(long node, int next) {
+        return (node & 0xfffffffff00000ffL) | nodeFromNextIndex(next);
+    }
+
+    private Normalizer2 nfd, fcd;
+    private Normalizer2Impl nfcImpl;
+
+    private CollationTailoring base;
+    private CollationData baseData;
+    private CollationRootElements rootElements;
+    private long variableTop;
+
+    private CollationDataBuilder dataBuilder;
+    private boolean fastLatinEnabled;
+    private UnicodeSet optimizeSet = new UnicodeSet();
+
+    private long[] ces = new long[Collation.MAX_EXPANSION_LENGTH];
+    private int cesLength;
+
+    /**
+     * Indexes of nodes with root primary weights, sorted by primary.
+     * Compact form of a TreeMap from root primary to node index.
+     *
+     * This is a performance optimization for finding reset positions.
+     * Without this, we would have to search through the entire nodes list.
+     * It also allows storing root primary weights in list head nodes,
+     * without previous index, leaving room in root primary nodes for 32-bit primary weights.
+     */
+    private UVector32 rootPrimaryIndexes;
+    /**
+     * Data structure for assigning tailored weights and CEs.
+     * Doubly-linked lists of nodes in mostly collation order.
+     * Each list starts with a root primary node and ends with a nextIndex of 0.
+     *
+     * When there are any nodes in the list, then there is always a root primary node at index 0.
+     * This allows some code not to have to check explicitly for nextIndex==0.
+     *
+     * Root primary nodes have 32-bit weights but do not have previous indexes.
+     * All other nodes have at most 16-bit weights and do have previous indexes.
+     *
+     * Nodes with explicit weights store root collator weights,
+     * or default weak weights (e.g., secondary 05) for stronger nodes.
+     * "Tailored" nodes, with the IS_TAILORED bit set,
+     * do not store explicit weights but rather
+     * create a difference of a certain strength from the preceding node.
+     *
+     * A root node is followed by either
+     * - a root/default node of the same strength, or
+     * - a root/default node of the next-weaker strength, or
+     * - a tailored node of the same strength.
+     *
+     * A node of a given strength normally implies "common" weights on weaker levels.
+     *
+     * A node with HAS_BEFORE2 must be immediately followed by
+     * a secondary node with BEFORE_WEIGHT16, then a secondary tailored node,
+     * and later an explicit common-secondary node.
+     * (&[before 2] resets to the BEFORE_WEIGHT16 node so that
+     * the following addRelation(secondary) tailors right after that.
+     * If we did not have this node and instead were to reset on the primary node,
+     * then addRelation(secondary) would skip forward to the the COMMON_WEIGHT16 node.)
+     *
+     * All secondary tailored nodes between these two explicit ones
+     * will be assigned lower-than-common secondary weights.
+     * If the flag is not set, then there are no explicit secondary nodes
+     * with the common or lower weights.
+     *
+     * Same for HAS_BEFORE3 for tertiary nodes and weights.
+     * A node must not have both flags set.
+     *
+     * Tailored CEs are initially represented in a CollationDataBuilder as temporary CEs
+     * which point to stable indexes in this list,
+     * and temporary CEs stored in a CollationDataBuilder only point to tailored nodes.
+     *
+     * A temporary CE in the ces[] array may point to a non-tailored reset-before-position node,
+     * until the next relation is added.
+     *
+     * At the end, the tailored weights are allocated as necessary,
+     * then the tailored nodes are replaced with final CEs,
+     * and the CollationData is rewritten by replacing temporary CEs with final ones.
+     *
+     * We cannot simply insert new nodes in the middle of the array
+     * because that would invalidate the indexes stored in existing temporary CEs.
+     * We need to use a linked graph with stable indexes to existing nodes.
+     * A doubly-linked list seems easiest to maintain.
+     *
+     * Each node is stored as an long, with its fields stored as bit fields.
+     *
+     * Root primary node:
+     * - primary weight: 32 bits 63..32
+     * - reserved/unused/zero: 4 bits 31..28
+     *
+     * Weaker root nodes & tailored nodes:
+     * - a weight: 16 bits 63..48
+     *   + a root or default weight for a non-tailored node
+     *   + unused/zero for a tailored node
+     * - index to the previous node: 20 bits 47..28
+     *
+     * All types of nodes:
+     * - index to the next node: 20 bits 27..8
+     *   + nextIndex=0 in last node per root-primary list
+     * - reserved/unused/zero bits: bits 7, 4, 2
+     * - HAS_BEFORE2: bit 6
+     * - HAS_BEFORE3: bit 5
+     * - IS_TAILORED: bit 3
+     * - the difference strength (primary/secondary/tertiary/quaternary): 2 bits 1..0
+     *
+     * We could allocate structs with pointers, but we would have to store them
+     * in a pointer list so that they can be indexed from temporary CEs,
+     * and they would require more memory allocations.
+     */
+    private UVector64 nodes;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationCompare.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationCompare.java

new file mode 100644 (file)

index 0000000..58163f6
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationCompare.java
@@ -0,0 +1,357 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2014, International Business Machines
+ * Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ * CollationCompare.java, ported from collationcompare.h/.cpp
+ *
+ * C++ version created on: 2012feb14 with new and old collation code
+ * created by: Markus W. Scherer
+ */
+
+package com.ibm.icu.impl.coll;
+
+import com.ibm.icu.text.Collator;
+
+public final class CollationCompare /* all static */ {
+    public static int compareUpToQuaternary(CollationIterator left, CollationIterator right,
+            CollationSettings settings) {
+        int options = settings.options;
+        long variableTop;
+        if ((options & CollationSettings.ALTERNATE_MASK) == 0) {
+            variableTop = 0;
+        } else {
+            // +1 so that we can use "<" and primary ignorables test out early.
+            variableTop = settings.variableTop + 1;
+        }
+        boolean anyVariable = false;
+
+        // Fetch CEs, compare primaries, store secondary & tertiary weights.
+        for (;;) {
+            // We fetch CEs until we get a non-ignorable primary or reach the end.
+            long leftPrimary;
+            do {
+                long ce = left.nextCE();
+                leftPrimary = ce >>> 32;
+                if (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY) {
+                    // Variable CE, shift it to quaternary level.
+                    // Ignore all following primary ignorables, and shift further variable CEs.
+                    anyVariable = true;
+                    do {
+                        // Store only the primary of the variable CE.
+                        left.setCurrentCE(ce & 0xffffffff00000000L);
+                        for (;;) {
+                            ce = left.nextCE();
+                            leftPrimary = ce >>> 32;
+                            if (leftPrimary == 0) {
+                                left.setCurrentCE(0);
+                            } else {
+                                break;
+                            }
+                        }
+                    } while (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY);
+                }
+            } while (leftPrimary == 0);
+
+            long rightPrimary;
+            do {
+                long ce = right.nextCE();
+                rightPrimary = ce >>> 32;
+                if (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY) {
+                    // Variable CE, shift it to quaternary level.
+                    // Ignore all following primary ignorables, and shift further variable CEs.
+                    anyVariable = true;
+                    do {
+                        // Store only the primary of the variable CE.
+                        right.setCurrentCE(ce & 0xffffffff00000000L);
+                        for (;;) {
+                            ce = right.nextCE();
+                            rightPrimary = ce >>> 32;
+                            if (rightPrimary == 0) {
+                                right.setCurrentCE(0);
+                            } else {
+                                break;
+                            }
+                        }
+                    } while (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY);
+                }
+            } while (rightPrimary == 0);
+
+            if (leftPrimary != rightPrimary) {
+                // Return the primary difference, with script reordering.
+                byte[] reorderTable = settings.reorderTable;
+                if (reorderTable != null) {
+                    leftPrimary = Collation.reorder(reorderTable, leftPrimary);
+                    rightPrimary = Collation.reorder(reorderTable, rightPrimary);
+                }
+                return (leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER;
+            }
+            if (leftPrimary == Collation.NO_CE_PRIMARY) {
+                break;
+            }
+        }
+
+        // Compare the buffered secondary & tertiary weights.
+        // We might skip the secondary level but continue with the case level
+        // which is turned on separately.
+        if (CollationSettings.getStrength(options) >= Collator.SECONDARY) {
+            if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) {
+                int leftIndex = 0;
+                int rightIndex = 0;
+                for (;;) {
+                    int leftSecondary;
+                    do {
+                        leftSecondary = ((int) left.getCE(leftIndex++)) >>> 16;
+                    } while (leftSecondary == 0);
+
+                    int rightSecondary;
+                    do {
+                        rightSecondary = ((int) right.getCE(rightIndex++)) >>> 16;
+                    } while (rightSecondary == 0);
+
+                    if (leftSecondary != rightSecondary) {
+                        return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER;
+                    }
+                    if (leftSecondary == Collation.NO_CE_WEIGHT16) {
+                        break;
+                    }
+                }
+            } else {
+                // The backwards secondary level compares secondary weights backwards
+                // within segments separated by the merge separator (U+FFFE, weight 02).
+                int leftStart = 0;
+                int rightStart = 0;
+                for (;;) {
+                    // Find the merge separator or the NO_CE terminator.
+                    int leftLimit = leftStart;
+                    long leftLower32;
+                    while ((leftLower32 = left.getCE(leftLimit) & 0xffffffffL) > Collation.MERGE_SEPARATOR_LOWER32
+                            || leftLower32 == 0) {
+                        ++leftLimit;
+                    }
+                    int rightLimit = rightStart;
+                    long rightLower32;
+                    while ((rightLower32 = right.getCE(rightLimit) & 0xffffffffL) > Collation.MERGE_SEPARATOR_LOWER32
+                            || rightLower32 == 0) {
+                        ++rightLimit;
+                    }
+
+                    // Compare the segments.
+                    int leftIndex = leftLimit;
+                    int rightIndex = rightLimit;
+                    for (;;) {
+                        int leftSecondary = 0;
+                        while (leftSecondary == 0 && leftIndex > leftStart) {
+                            leftSecondary = ((int) left.getCE(--leftIndex)) >>> 16;
+                        }
+
+                        int rightSecondary = 0;
+                        while (rightSecondary == 0 && rightIndex > rightStart) {
+                            rightSecondary = ((int) right.getCE(--rightIndex)) >>> 16;
+                        }
+
+                        if (leftSecondary != rightSecondary) {
+                            return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER;
+                        }
+                        if (leftSecondary == 0) {
+                            break;
+                        }
+                    }
+
+                    // Did we reach the end of either string?
+                    // Both strings have the same number of merge separators,
+                    // or else there would have been a primary-level difference.
+                    assert (left.getCE(leftLimit) == right.getCE(rightLimit));
+                    if (left.getCE(leftLimit) == Collation.NO_CE) {
+                        break;
+                    }
+                    // Skip both merge separators and continue.
+                    leftStart = leftLimit + 1;
+                    rightStart = rightLimit + 1;
+                }
+            }
+        }
+
+        if ((options & CollationSettings.CASE_LEVEL) != 0) {
+            int strength = CollationSettings.getStrength(options);
+            int leftIndex = 0;
+            int rightIndex = 0;
+            for (;;) {
+                int leftCase, leftLower32, rightCase;
+                if (strength == Collator.PRIMARY) {
+                    // Primary+caseLevel: Ignore case level weights of primary ignorables.
+                    // Otherwise we would get a-umlaut > a
+                    // which is not desirable for accent-insensitive sorting.
+                    // Check for (lower 32 bits) == 0 as well because variable CEs are stored
+                    // with only primary weights.
+                    long ce;
+                    do {
+                        ce = left.getCE(leftIndex++);
+                        leftCase = (int) ce;
+                    } while ((ce >>> 32) == 0 || leftCase == 0);
+                    leftLower32 = leftCase;
+                    leftCase &= 0xc000;
+
+                    do {
+                        ce = right.getCE(rightIndex++);
+                        rightCase = (int) ce;
+                    } while ((ce >>> 32) == 0 || rightCase == 0);
+                    rightCase &= 0xc000;
+                } else {
+                    // Secondary+caseLevel: By analogy with the above,
+                    // ignore case level weights of secondary ignorables.
+                    //
+                    // Note: A tertiary CE has uppercase case bits (0.0.ut)
+                    // to keep tertiary+caseFirst well-formed.
+                    //
+                    // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables.
+                    // Otherwise a tertiary CE's uppercase would be no greater than
+                    // a primary/secondary CE's uppercase.
+                    // (See UCA well-formedness condition 2.)
+                    // We could construct a special case weight higher than uppercase,
+                    // but it's simpler to always ignore case weights of secondary ignorables,
+                    // turning 0.0.ut into 0.0.0.t.
+                    // (See LDML Collation, Case Parameters.)
+                    do {
+                        leftCase = (int) left.getCE(leftIndex++);
+                    } while ((leftCase & 0xffff0000) == 0);
+                    leftLower32 = leftCase;
+                    leftCase &= 0xc000;
+
+                    do {
+                        rightCase = (int) right.getCE(rightIndex++);
+                    } while ((rightCase & 0xffff0000) == 0);
+                    rightCase &= 0xc000;
+                }
+
+                // No need to handle NO_CE and MERGE_SEPARATOR specially:
+                // There is one case weight for each previous-level weight,
+                // so level length differences were handled there.
+                if (leftCase != rightCase) {
+                    if ((options & CollationSettings.UPPER_FIRST) == 0) {
+                        return (leftCase < rightCase) ? Collation.LESS : Collation.GREATER;
+                    } else {
+                        return (leftCase < rightCase) ? Collation.GREATER : Collation.LESS;
+                    }
+                }
+                if ((leftLower32 >>> 16) == Collation.NO_CE_WEIGHT16) {
+                    break;
+                }
+            }
+        }
+        if (CollationSettings.getStrength(options) <= Collator.SECONDARY) {
+            return Collation.EQUAL;
+        }
+
+        int tertiaryMask = CollationSettings.getTertiaryMask(options);
+
+        int leftIndex = 0;
+        int rightIndex = 0;
+        int anyQuaternaries = 0;
+        for (;;) {
+            int leftLower32, leftTertiary;
+            do {
+                leftLower32 = (int) left.getCE(leftIndex++);
+                anyQuaternaries |= leftLower32;
+                assert ((leftLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (leftLower32 & 0xc0c0) == 0);
+                leftTertiary = leftLower32 & tertiaryMask;
+            } while (leftTertiary == 0);
+
+            int rightLower32, rightTertiary;
+            do {
+                rightLower32 = (int) right.getCE(rightIndex++);
+                anyQuaternaries |= rightLower32;
+                assert ((rightLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (rightLower32 & 0xc0c0) == 0);
+                rightTertiary = rightLower32 & tertiaryMask;
+            } while (rightTertiary == 0);
+
+            if (leftTertiary != rightTertiary) {
+                if (CollationSettings.sortsTertiaryUpperCaseFirst(options)) {
+                    // Pass through NO_CE and MERGE_SEPARATOR
+                    // and keep real tertiary weights larger than the MERGE_SEPARATOR.
+                    // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
+                    // to keep tertiary CEs well-formed.
+                    // Their case+tertiary weights must be greater than those of
+                    // primary and secondary CEs.
+                    if (leftTertiary > Collation.MERGE_SEPARATOR_WEIGHT16) {
+                        if ((leftLower32 & 0xffff0000) != 0) {
+                            leftTertiary ^= 0xc000;
+                        } else {
+                            leftTertiary += 0x4000;
+                        }
+                    }
+                    if (rightTertiary > Collation.MERGE_SEPARATOR_WEIGHT16) {
+                        if ((rightLower32 & 0xffff0000) != 0) {
+                            rightTertiary ^= 0xc000;
+                        } else {
+                            rightTertiary += 0x4000;
+                        }
+                    }
+                }
+                return (leftTertiary < rightTertiary) ? Collation.LESS : Collation.GREATER;
+            }
+            if (leftTertiary == Collation.NO_CE_WEIGHT16) {
+                break;
+            }
+        }
+        if (CollationSettings.getStrength(options) <= Collator.TERTIARY) {
+            return Collation.EQUAL;
+        }
+
+        if (!anyVariable && (anyQuaternaries & 0xc0) == 0) {
+            // If there are no "variable" CEs and no non-zero quaternary weights,
+            // then there are no quaternary differences.
+            return Collation.EQUAL;
+        }
+
+        leftIndex = 0;
+        rightIndex = 0;
+        for (;;) {
+            long leftQuaternary;
+            do {
+                long ce = left.getCE(leftIndex++);
+                leftQuaternary = ce & 0xffff;
+                if (leftQuaternary == 0) {
+                    // Variable primary or completely ignorable.
+                    leftQuaternary = ce >>> 32;
+                } else if (leftQuaternary <= Collation.MERGE_SEPARATOR_WEIGHT16) {
+                    // Leave NO_CE or MERGE_SEPARATOR as is.
+                } else {
+                    // Regular CE, not tertiary ignorable.
+                    // Preserve the quaternary weight in bits 7..6.
+                    leftQuaternary |= 0xffffff3fL;
+                }
+            } while (leftQuaternary == 0);
+
+            long rightQuaternary;
+            do {
+                long ce = right.getCE(rightIndex++);
+                rightQuaternary = ce & 0xffff;
+                if (rightQuaternary == 0) {
+                    // Variable primary or completely ignorable.
+                    rightQuaternary = ce >>> 32;
+                } else if (rightQuaternary <= Collation.MERGE_SEPARATOR_WEIGHT16) {
+                    // Leave NO_CE or MERGE_SEPARATOR as is.
+                } else {
+                    // Regular CE, not tertiary ignorable.
+                    // Preserve the quaternary weight in bits 7..6.
+                    rightQuaternary |= 0xffffff3fL;
+                }
+            } while (rightQuaternary == 0);
+
+            if (leftQuaternary != rightQuaternary) {
+                // Return the difference, with script reordering.
+                byte[] reorderTable = settings.reorderTable;
+                if (reorderTable != null) {
+                    leftQuaternary = Collation.reorder(reorderTable, leftQuaternary);
+                    rightQuaternary = Collation.reorder(reorderTable, rightQuaternary);
+                }
+                return (leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER;
+            }
+            if (leftQuaternary == Collation.NO_CE_WEIGHT16) {
+                break;
+            }
+        }
+        return Collation.EQUAL;
+    }
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationData.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationData.java

new file mode 100644 (file)

index 0000000..85dc0ac
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationData.java
@@ -0,0 +1,380 @@
+/*
+*******************************************************************************
+* Copyright (C) 2010-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationData.java, ported from collationdata.h/.cpp
+*
+* C++ version created on: 2010oct27
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import com.ibm.icu.impl.Normalizer2Impl;
+import com.ibm.icu.impl.Trie2_32;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * Collation data container.
+ * Immutable data created by a CollationDataBuilder, or loaded from a file,
+ * or deserialized from API-provided binary data.
+ *
+ * Includes data for the collation base (root/default), aliased if this is not the base.
+ */
+public final class CollationData {
+    CollationData(Normalizer2Impl nfc) {
+        nfcImpl = nfc;
+    }
+
+    public int getCE32(int c) {
+        return trie.get(c);
+    }
+
+    int getCE32FromSupplementary(int c) {
+        return trie.get(c);  // TODO: port UTRIE2_GET32_FROM_SUPP(trie, c) to Java?
+    }
+
+    boolean isDigit(int c) {
+        return c < 0x660 ? c <= 0x39 && 0x30 <= c :
+                Collation.hasCE32Tag(getCE32(c), Collation.DIGIT_TAG);
+    }
+
+    public boolean isUnsafeBackward(int c, boolean numeric) {
+        return unsafeBackwardSet.contains(c) || (numeric && isDigit(c));
+    }
+
+    public boolean isCompressibleLeadByte(int b) {
+        return compressibleBytes[b];
+    }
+
+    public boolean isCompressiblePrimary(long p) {
+        return isCompressibleLeadByte((int)p >>> 24);
+    }
+
+    /**
+     * Returns the CE32 from two contexts words.
+     * Access to the defaultCE32 for contraction and prefix matching.
+     */
+    int getCE32FromContexts(int index) {
+        return ((int)contexts.charAt(index) << 16) | contexts.charAt(index + 1);
+    }
+
+    /**
+     * Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG).
+     * Requires that ce32 is special.
+     */
+    int getIndirectCE32(int ce32) {
+        assert(Collation.isSpecialCE32(ce32));
+        int tag = Collation.tagFromCE32(ce32);
+        if(tag == Collation.DIGIT_TAG) {
+            // Fetch the non-numeric-collation CE32.
+            ce32 = ce32s[Collation.indexFromCE32(ce32)];
+        } else if(tag == Collation.LEAD_SURROGATE_TAG) {
+            ce32 = Collation.UNASSIGNED_CE32;
+        } else if(tag == Collation.U0000_TAG) {
+            // Fetch the normal ce32 for U+0000.
+            ce32 = ce32s[0];
+        }
+        return ce32;
+    }
+
+    /**
+     * Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG),
+     * if ce32 is special.
+     */
+    int getFinalCE32(int ce32) {
+        if(Collation.isSpecialCE32(ce32)) {
+            ce32 = getIndirectCE32(ce32);
+        }
+        return ce32;
+    }
+
+    /**
+     * Computes a CE from c's ce32 which has the OFFSET_TAG.
+     */
+    long getCEFromOffsetCE32(int c, int ce32) {
+        long dataCE = ces[Collation.indexFromCE32(ce32)];
+        return Collation.makeCE(Collation.getThreeBytePrimaryForOffsetData(c, dataCE));
+    }
+
+    /**
+     * Returns the FCD16 value for code point c. c must be >= 0.
+     */
+    int getFCD16(int c) {
+        return nfcImpl.getFCD16(c);
+    }
+
+    /**
+     * Returns the first primary for the script's reordering group.
+     * @return the primary with only the first primary lead byte of the group
+     *         (not necessarily an actual root collator primary weight),
+     *         or 0 if the script is unknown
+     */
+    long getFirstPrimaryForGroup(int script) {
+        int index = findScript(script);
+        if(index < 0) {
+            return 0;
+        }
+        long head = scripts[index];
+        return (head & 0xff00) << 16;
+    }
+
+    /**
+     * Returns the last primary for the script's reordering group.
+     * @return the last primary of the group
+     *         (not an actual root collator primary weight),
+     *         or 0 if the script is unknown
+     */
+    public long getLastPrimaryForGroup(int script) {
+        int index = findScript(script);
+        if(index < 0) {
+            return 0;
+        }
+        int head = scripts[index];
+        long lastByte = head & 0xff;
+        return ((lastByte + 1) << 24) - 1;
+    }
+
+    /**
+     * Finds the reordering group which contains the primary weight.
+     * @return the first script of the group, or -1 if the weight is beyond the last group
+     */
+    public int getGroupForPrimary(long p) {
+        p >>= 24;  // Reordering groups are distinguished by primary lead bytes.
+        for(int i = 0; i < scripts.length; i = i + 2 + scripts[i + 1]) {
+            int lastByte = scripts[i] & 0xff;
+            if(p <= lastByte) {
+                return scripts[i + 2];
+            }
+        }
+        return -1;
+    }
+
+    private int findScript(int script) {
+        if(script < 0 || 0xffff < script) { return -1; }
+        for(int i = 0; i < scripts.length;) {
+            int limit = i + 2 + scripts[i + 1];
+            for(int j = i + 2; j < limit; ++j) {
+                if(script == scripts[j]) { return i; }
+            }
+            i = limit;
+        }
+        return -1;
+    }
+
+    public int[] getEquivalentScripts(int script) {
+        int i = findScript(script);
+        if(i < 0) { return EMPTY_INT_ARRAY; }
+        int length = scripts[i + 1];
+        assert(length != 0);
+        int dest[] = new int[length];
+        i += 2;
+        dest[0] = scripts[i++];
+        for(int j = 1; j < length; ++j) {
+            script = scripts[i++];
+            // Sorted insertion.
+            for(int k = j;; --k) {
+                // Invariant: dest[k] is free to receive either script or dest[k - 1].
+                if(k > 0 && script < dest[k - 1]) {
+                    dest[k] = dest[k - 1];
+                } else {
+                    dest[k] = script;
+                    break;
+                }
+            }
+        }
+        return dest;
+    }
+
+    /**
+     * Writes the permutation table for the given reordering of scripts and groups,
+     * mapping from default-order primary-weight lead bytes to reordered lead bytes.
+     * The caller checks for illegal arguments and
+     * takes care of [DEFAULT] and memory allocation.
+     */
+    public void makeReorderTable(int[] reorder, byte[] table) {
+        int length = reorder.length;
+        // Initialize the table.
+        // Never reorder special low and high primary lead bytes.
+        int lowByte;
+        for(lowByte = 0; lowByte <= Collation.MERGE_SEPARATOR_BYTE; ++lowByte) {
+            table[lowByte] = (byte)lowByte;
+        }
+        // lowByte == 03
+
+        int highByte;
+        for(highByte = 0xff; highByte >= Collation.TRAIL_WEIGHT_BYTE; --highByte) {
+            table[highByte] = (byte)highByte;
+        }
+        // highByte == FE
+
+        // Set intermediate bytes to 0 to indicate that they have not been set yet.
+        for(int i = lowByte; i <= highByte; ++i) {
+            table[i] = 0;
+        }
+
+        // Get the set of special reorder codes in the input list.
+        // This supports up to 32 special reorder codes;
+        // it works for data with codes beyond Collator.ReorderCodes.LIMIT.
+        int specials = 0;
+        for(int i = 0; i < length; ++i) {
+            int reorderCode = reorder[i] - Collator.ReorderCodes.FIRST;
+            if(0 <= reorderCode && reorderCode <= 31) {
+                specials |= 1 << reorderCode;
+            }
+        }
+
+        // Start the reordering with the special low reorder codes that do not occur in the input.
+        for(int i = 0;; i += 3) {
+            if(scripts[i + 1] != 1) { break; }  // Went beyond special single-code reorder codes.
+            int reorderCode = scripts[i + 2] - Collator.ReorderCodes.FIRST;
+            if(reorderCode < 0) { break; }  // Went beyond special reorder codes.
+            if((specials & (1 << reorderCode)) == 0) {
+                int head = scripts[i];
+                int firstByte = head >> 8;
+                int lastByte = head & 0xff;
+                do { table[firstByte++] = (byte)lowByte++; } while(firstByte <= lastByte);
+            }
+        }
+
+        // Reorder according to the input scripts, continuing from the bottom of the bytes range.
+        for(int i = 0; i < length;) {
+            int script = reorder[i++];
+            if(script == UScript.UNKNOWN) {
+                // Put the remaining scripts at the top.
+                while(i < length) {
+                    script = reorder[--length];
+                    if(script == UScript.UNKNOWN) {  // Must occur at most once.
+                        throw new IllegalArgumentException(
+                                "setReorderCodes(): duplicate UScript.UNKNOWN");
+                    }
+                    if(script == Collator.ReorderCodes.DEFAULT) {
+                        throw new IllegalArgumentException(
+                                "setReorderCodes(): UScript.DEFAULT together with other scripts");
+                    }
+                    int index = findScript(script);
+                    if(index < 0) { continue; }
+                    int head = scripts[index];
+                    int firstByte = head >> 8;
+                    int lastByte = head & 0xff;
+                    if(table[firstByte] != 0) {  // Duplicate or equivalent script.
+                        throw new IllegalArgumentException(
+                                "setReorderCodes(): duplicate or equivalent script " +
+                                scriptCodeString(script));
+                    }
+                    do { table[lastByte--] = (byte)highByte--; } while(firstByte <= lastByte);
+                }
+                break;
+            }
+            if(script == Collator.ReorderCodes.DEFAULT) {
+                // The default code must be the only one in the list, and that is handled by the caller.
+                // Otherwise it must not be used.
+                throw new IllegalArgumentException(
+                        "setReorderCodes(): UScript.DEFAULT together with other scripts");
+            }
+            int index = findScript(script);
+            if(index < 0) { continue; }
+            int head = scripts[index];
+            int firstByte = head >> 8;
+            int lastByte = head & 0xff;
+            if(table[firstByte] != 0) {  // Duplicate or equivalent script.
+                throw new IllegalArgumentException(
+                        "setReorderCodes(): duplicate or equivalent script " +
+                        scriptCodeString(script));
+            }
+            do { table[firstByte++] = (byte)lowByte++; } while(firstByte <= lastByte);
+        }
+
+        // Put all remaining scripts into the middle.
+        // Avoid table[0] which must remain 0.
+        for(int i = 1; i <= 0xff; ++i) {
+            if(table[i] == 0) { table[i] = (byte)lowByte++; }
+        }
+        assert(lowByte == highByte + 1);
+    }
+
+    private static String scriptCodeString(int script) {
+        // Do not use the script name here: We do not want to depend on that data.
+        return (script < Collator.ReorderCodes.FIRST) ?
+                Integer.toString(script) : "0x" + Integer.toHexString(script);
+    }
+
+    private static final int[] EMPTY_INT_ARRAY = new int[0];
+
+    /** @see jamoCE32s */
+    static final int JAMO_CE32S_LENGTH = 19 + 21 + 27;
+
+    /** Main lookup trie. */
+    Trie2_32 trie;
+    /**
+     * Array of CE32 values.
+     * At index 0 there must be CE32(U+0000)
+     * to support U+0000's special-tag for NUL-termination handling.
+     */
+    int[] ce32s;
+    /** Array of CE values for expansions and OFFSET_TAG. */
+    long[] ces;
+    /** Array of prefix and contraction-suffix matching data. */
+    String contexts;
+    /** Base collation data, or null if this data itself is a base. */
+    public CollationData base;
+    /**
+     * Simple array of JAMO_CE32S_LENGTH=19+21+27 CE32s, one per canonical Jamo L/V/T.
+     * They are normally simple CE32s, rarely expansions.
+     * For fast handling of HANGUL_TAG.
+     */
+    int[] jamoCE32s = new int[JAMO_CE32S_LENGTH];
+    public Normalizer2Impl nfcImpl;
+    /** The single-byte primary weight (xx000000) for numeric collation. */
+    long numericPrimary = 0x12000000;
+
+    /** 256 flags for which primary-weight lead bytes are compressible. */
+    public boolean[] compressibleBytes;
+    /**
+     * Set of code points that are unsafe for starting string comparison after an identical prefix,
+     * or in backwards CE iteration.
+     */
+    UnicodeSet unsafeBackwardSet;
+
+    /**
+     * Fast Latin table for common-Latin-text string comparisons.
+     * Data structure see class CollationFastLatin.
+     */
+    public char[] fastLatinTable;
+    /**
+     * Header portion of the fastLatinTable.
+     * In C++, these are one array, and the header is skipped for mapping characters.
+     * In Java, two arrays work better.
+     */
+    char[] fastLatinTableHeader;
+
+    /**
+     * Data for scripts and reordering groups.
+     * Uses include building a reordering permutation table and
+     * providing script boundaries to AlphabeticIndex.
+     *
+     * This data is a sorted list of primary-weight lead byte ranges (reordering groups),
+     * each with a list of pairs sorted in base collation order;
+     * each pair contains a script/reorder code and the lowest primary weight for that script.
+     *
+     * Data structure:
+     * - Each reordering group is encoded in n+2 16-bit integers.
+     *   - First integer:
+     *     Bits 15..8: First byte of the reordering group's range.
+     *     Bits  7..0: Last byte of the reordering group's range.
+     *   - Second integer:
+     *     Length n of the list of script/reordering codes.
+     *   - Each further integer is a script or reordering code.
+     */
+    char[] scripts;
+
+    /**
+     * Collation elements in the root collator.
+     * Used by the CollationRootElements class. The data structure is described there.
+     * null in a tailoring.
+     */
+    public long[] rootElements;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataBuilder.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataBuilder.java

new file mode 100644 (file)

index 0000000..6e64ff4
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataBuilder.java
@@ -0,0 +1,1351 @@
+/*
+*******************************************************************************
+* Copyright (C) 2012-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationDataBuilder.java, ported from collationdatabuilder.h/.cpp
+*
+* C++ version created on: 2012apr01
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+
+import com.ibm.icu.impl.Norm2AllModes;
+import com.ibm.icu.impl.Normalizer2Impl;
+import com.ibm.icu.impl.Trie2;
+import com.ibm.icu.impl.Normalizer2Impl.Hangul;
+import com.ibm.icu.impl.Trie2Writable;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSetIterator;
+import com.ibm.icu.util.CharsTrie;
+import com.ibm.icu.util.CharsTrieBuilder;
+import com.ibm.icu.util.StringTrieBuilder;
+
+/**
+ * Low-level CollationData builder.
+ * Takes (character, CE) pairs and builds them into runtime data structures.
+ * Supports characters with context prefixes and contraction suffixes.
+ */
+final class CollationDataBuilder {  // not final in C++
+    /**
+     * Collation element modifier. Interface class for a modifier
+     * that changes a tailoring builder's temporary CEs to final CEs.
+     * Called for every non-special CE32 and every expansion CE.
+     */
+    interface CEModifier {
+        /** Returns a new CE to replace the non-special input CE32, or else Collation.NO_CE. */
+        long modifyCE32(int ce32);
+        /** Returns a new CE to replace the input CE, or else Collation.NO_CE. */
+        long modifyCE(long ce);
+    }
+
+    CollationDataBuilder() {
+        nfcImpl = Norm2AllModes.getNFCInstance().impl;
+        base = null;
+        baseSettings = null;
+        trie = null;
+        ce32s = new UVector32();
+        ce64s = new UVector64();
+        conditionalCE32s = new ArrayList<ConditionalCE32>();
+        modified = false;
+        fastLatinEnabled = false;
+        fastLatinBuilder = null;
+        collIter = null;
+        // Reserve the first CE32 for U+0000.
+        ce32s.addElement(0);
+    }
+
+    void initForTailoring(CollationData b) {
+        if(trie != null) {
+            throw new IllegalStateException("attempt to reuse a CollationDataBuilder");
+        }
+        if(b == null) {
+            throw new IllegalArgumentException("null CollationData");
+        }
+        base = b;
+
+        // For a tailoring, the default is to fall back to the base.
+        trie = new Trie2Writable(Collation.FALLBACK_CE32, Collation.FFFD_CE32);
+
+        // Set the Latin-1 letters block so that it is allocated first in the data array,
+        // to try to improve locality of reference when sorting Latin-1 text.
+        // Do not use utrie2_setRange32() since that will not actually allocate blocks
+        // that are filled with the default value.
+        // ASCII (0..7F) is already preallocated anyway.
+        for(int c = 0xc0; c <= 0xff; ++c) {
+            trie.set(c, Collation.FALLBACK_CE32);
+        }
+
+        // Hangul syllables are not tailorable (except via tailoring Jamos).
+        // Always set the Hangul tag to help performance.
+        // Do this here, rather than in buildMappings(),
+        // so that we see the HANGUL_TAG in various assertions.
+        int hangulCE32 = Collation.makeCE32FromTagAndIndex(Collation.HANGUL_TAG, 0);
+        trie.setRange(Hangul.HANGUL_BASE, Hangul.HANGUL_END, hangulCE32, true);
+
+        // Copy the set contents but don't copy/clone the set as a whole because
+        // that would copy the isFrozen state too.
+        unsafeBackwardSet.addAll(b.unsafeBackwardSet);
+    }
+
+    boolean isCompressibleLeadByte(int b) {
+        return base.isCompressibleLeadByte(b);
+    }
+
+    boolean isCompressiblePrimary(long p) {
+        return isCompressibleLeadByte((int)p >>> 24);
+    }
+
+    /**
+     * @return true if this builder has mappings (e.g., add() has been called)
+     */
+    boolean hasMappings() { return modified; }
+
+    /**
+     * @return true if c has CEs in this builder
+     */
+    boolean isAssigned(int c) {
+        return Collation.isAssignedCE32(trie.get(c));
+    }
+
+    void add(CharSequence prefix, CharSequence s, long ces[], int cesLength) {
+        int ce32 = encodeCEs(ces, cesLength);
+        addCE32(prefix, s, ce32);
+    }
+
+    /**
+     * Encodes the ces as either the returned ce32 by itself,
+     * or by storing an expansion, with the returned ce32 referring to that.
+     *
+     * <p>add(p, s, ces, cesLength) = addCE32(p, s, encodeCEs(ces, cesLength))
+     */
+    int encodeCEs(long ces[], int cesLength) {
+        if(cesLength < 0 || cesLength > Collation.MAX_EXPANSION_LENGTH) {
+            throw new IllegalArgumentException("mapping to too many CEs");
+        }
+        if(!isMutable()) {
+            throw new IllegalStateException("attempt to add mappings after build()");
+        }
+        if(cesLength == 0) {
+            // Convenience: We cannot map to nothing, but we can map to a completely ignorable CE.
+            // Do this here so that callers need not do it.
+            return encodeOneCEAsCE32(0);
+        } else if(cesLength == 1) {
+            return encodeOneCE(ces[0]);
+        } else if(cesLength == 2) {
+            // Try to encode two CEs as one CE32.
+            long ce0 = ces[0];
+            long ce1 = ces[1];
+            long p0 = ce0 >>> 32;
+            if((ce0 & 0xffffffffff00ffL) == Collation.COMMON_SECONDARY_CE &&
+                    (ce1 & 0xffffffff00ffffffL) == Collation.COMMON_TERTIARY_CE &&
+                    p0 != 0) {
+                // Latin mini expansion
+                return
+                    (int)p0 |
+                    (((int)ce0 & 0xff00) << 8) |
+                    (((int)ce1 >> 16) & 0xff00) |
+                    Collation.SPECIAL_CE32_LOW_BYTE |
+                    Collation.LATIN_EXPANSION_TAG;
+            }
+        }
+        // Try to encode two or more CEs as CE32s.
+        int[] newCE32s = new int[Collation.MAX_EXPANSION_LENGTH];  // TODO: instance field?
+        for(int i = 0;; ++i) {
+            if(i == cesLength) {
+                return encodeExpansion32(newCE32s, 0, cesLength);
+            }
+            int ce32 = encodeOneCEAsCE32(ces[i]);
+            if(ce32 == Collation.NO_CE32) { break; }
+            newCE32s[i] = ce32;
+        }
+        return encodeExpansion(ces, 0, cesLength);
+    }
+
+    void addCE32(CharSequence prefix, CharSequence s, int ce32) {
+        if(s.length() == 0) {
+            throw new IllegalArgumentException("mapping from empty string");
+        }
+        if(!isMutable()) {
+            throw new IllegalStateException("attempt to add mappings after build()");
+        }
+        int c = Character.codePointAt(s, 0);
+        int cLength = Character.charCount(c);
+        int oldCE32 = trie.get(c);
+        boolean hasContext = prefix.length() != 0|| s.length() > cLength;
+        if(oldCE32 == Collation.FALLBACK_CE32) {
+            // First tailoring for c.
+            // If c has contextual base mappings or if we add a contextual mapping,
+            // then copy the base mappings.
+            // Otherwise we just override the base mapping.
+            int baseCE32 = base.getFinalCE32(base.getCE32(c));
+            if(hasContext || Collation.ce32HasContext(baseCE32)) {
+                oldCE32 = copyFromBaseCE32(c, baseCE32, true);
+                trie.set(c, oldCE32);
+            }
+        }
+        if(!hasContext) {
+            // No prefix, no contraction.
+            if(!isBuilderContextCE32(oldCE32)) {
+                trie.set(c, ce32);
+            } else {
+                ConditionalCE32 cond = getConditionalCE32ForCE32(oldCE32);
+                cond.builtCE32 = Collation.NO_CE32;
+                cond.ce32 = ce32;
+            }
+        } else {
+            ConditionalCE32 cond;
+            if(!isBuilderContextCE32(oldCE32)) {
+                // Replace the simple oldCE32 with a builder context CE32
+                // pointing to a new ConditionalCE32 list head.
+                int index = addConditionalCE32("\0", oldCE32);
+                int contextCE32 = makeBuilderContextCE32(index);
+                trie.set(c, contextCE32);
+                contextChars.add(c);
+                cond = getConditionalCE32(index);
+            } else {
+                cond = getConditionalCE32ForCE32(oldCE32);
+                cond.builtCE32 = Collation.NO_CE32;
+            }
+            CharSequence suffix = s.subSequence(cLength, s.length());
+            String context = new StringBuilder().append((char)prefix.length()).
+                    append(prefix).append(suffix).toString();
+            unsafeBackwardSet.addAll(suffix);
+            for(;;) {
+                // invariant: context > cond.context
+                int next = cond.next;
+                if(next < 0) {
+                    // Append a new ConditionalCE32 after cond.
+                    int index = addConditionalCE32(context, ce32);
+                    cond.next = index;
+                    break;
+                }
+                ConditionalCE32 nextCond = getConditionalCE32(next);
+                int cmp = context.compareTo(nextCond.context);
+                if(cmp < 0) {
+                    // Insert a new ConditionalCE32 between cond and nextCond.
+                    int index = addConditionalCE32(context, ce32);
+                    cond.next = index;
+                    getConditionalCE32(index).next = next;
+                    break;
+                } else if(cmp == 0) {
+                    // Same context as before, overwrite its ce32.
+                    nextCond.ce32 = ce32;
+                    break;
+                }
+                cond = nextCond;
+            }
+        }
+        modified = true;
+    }
+
+    /**
+     * Copies all mappings from the src builder, with modifications.
+     * This builder here must not be built yet, and should be empty.
+     */
+    void copyFrom(CollationDataBuilder src, CEModifier modifier) {
+        if(!isMutable()) {
+            throw new IllegalStateException("attempt to copyFrom() after build()");
+        }
+        CopyHelper helper = new CopyHelper(src, this, modifier);
+        Iterator<Trie2.Range> trieIterator = src.trie.iterator();
+        Trie2.Range range;
+        while(trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
+            enumRangeForCopy(range.startCodePoint, range.endCodePoint, range.value, helper);
+        }
+        // Update the contextChars and the unsafeBackwardSet while copying,
+        // in case a character had conditional mappings in the source builder
+        // and they were removed later.
+        modified |= src.modified;
+    }
+
+    void optimize(UnicodeSet set) {
+        if(set.isEmpty()) { return; }
+        UnicodeSetIterator iter = new UnicodeSetIterator(set);
+        while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) {
+            int c = iter.codepoint;
+            int ce32 = trie.get(c);
+            if(ce32 == Collation.FALLBACK_CE32) {
+                ce32 = base.getFinalCE32(base.getCE32(c));
+                ce32 = copyFromBaseCE32(c, ce32, true);
+                trie.set(c, ce32);
+            }
+        }
+        modified = true;
+    }
+
+    void suppressContractions(UnicodeSet set) {
+        if(set.isEmpty()) { return; }
+        UnicodeSetIterator iter = new UnicodeSetIterator(set);
+        while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) {
+            int c = iter.codepoint;
+            int ce32 = trie.get(c);
+            if(ce32 == Collation.FALLBACK_CE32) {
+                ce32 = base.getFinalCE32(base.getCE32(c));
+                if(Collation.ce32HasContext(ce32)) {
+                    ce32 = copyFromBaseCE32(c, ce32, false /* without context */);
+                    trie.set(c, ce32);
+                }
+            } else if(isBuilderContextCE32(ce32)) {
+                ce32 = getConditionalCE32ForCE32(ce32).ce32;
+                // Simply abandon the list of ConditionalCE32.
+                // The caller will copy this builder in the end,
+                // eliminating unreachable data.
+                trie.set(c, ce32);
+                contextChars.remove(c);
+            }
+        }
+        modified = true;
+    }
+
+    void enableFastLatin() { fastLatinEnabled = true; }
+    void build(CollationData data) {
+        buildMappings(data);
+        if(base != null) {
+            data.numericPrimary = base.numericPrimary;
+            data.compressibleBytes = base.compressibleBytes;
+            data.scripts = base.scripts;
+        }
+        buildFastLatinTable(data);
+    }
+
+    /**
+     * Looks up CEs for s and appends them to the ces array.
+     * Does not handle normalization: s should be in FCD form.
+     *
+     * Does not write completely ignorable CEs.
+     * Does not write beyond Collation.MAX_EXPANSION_LENGTH.
+     *
+     * @return incremented cesLength
+     */
+    int getCEs(CharSequence s, long ces[], int cesLength) {
+        return getCEs(s, 0, ces, cesLength);
+    }
+
+    int getCEs(CharSequence prefix, CharSequence s, long ces[], int cesLength) {
+        int prefixLength = prefix.length();
+        if(prefixLength == 0) {
+            return getCEs(s, 0, ces, cesLength);
+        } else {
+            return getCEs(new StringBuilder(prefix).append(s), prefixLength, ces, cesLength);
+        }
+    }
+
+    /**
+     * Build-time context and CE32 for a code point.
+     * If a code point has contextual mappings, then the default (no-context) mapping
+     * and all conditional mappings are stored in a singly-linked list
+     * of ConditionalCE32, sorted by context strings.
+     *
+     * Context strings sort by prefix length, then by prefix, then by contraction suffix.
+     * Context strings must be unique and in ascending order.
+     */
+    private static final class ConditionalCE32 {
+        ConditionalCE32(String ct, int ce) {
+            context = ct;
+            ce32 = ce;
+            defaultCE32 = Collation.NO_CE32;
+            builtCE32 = Collation.NO_CE32;
+            next = -1;
+        }
+
+        boolean hasContext() { return context.length() > 1; }
+        int prefixLength() { return context.charAt(0); }
+
+        /**
+         * "\0" for the first entry for any code point, with its default CE32.
+         *
+         * Otherwise one unit with the length of the prefix string,
+         * then the prefix string, then the contraction suffix.
+         */
+        String context;
+        /**
+         * CE32 for the code point and its context.
+         * Can be special (e.g., for an expansion) but not contextual (prefix or contraction tag).
+         */
+        int ce32;
+        /**
+         * Default CE32 for all contexts with this same prefix.
+         * Initially NO_CE32. Set only while building runtime data structures,
+         * and only on one of the nodes of a sub-list with the same prefix.
+         */
+        int defaultCE32;
+        /**
+         * CE32 for the built contexts.
+         * When fetching CEs from the builder, the contexts are built into their runtime form
+         * so that the normal collation implementation can process them.
+         * The result is cached in the list head. It is reset when the contexts are modified.
+         */
+        int builtCE32;
+        /**
+         * Index of the next ConditionalCE32.
+         * Negative for the end of the list.
+         */
+        int next;
+    }
+
+    protected int getCE32FromOffsetCE32(boolean fromBase, int c, int ce32) {
+        int i = Collation.indexFromCE32(ce32);
+        long dataCE = fromBase ? base.ces[i] : ce64s.elementAti(i);
+        long p = Collation.getThreeBytePrimaryForOffsetData(c, dataCE);
+        return Collation.makeLongPrimaryCE32(p);
+    }
+
+    protected int addCE(long ce) {
+        int length = ce64s.size();
+        for(int i = 0; i < length; ++i) {
+            if(ce == ce64s.elementAti(i)) { return i; }
+        }
+        ce64s.addElement(ce);
+        return length;
+    }
+
+    protected int addCE32(int ce32) {
+        int length = ce32s.size();
+        for(int i = 0; i < length; ++i) {
+            if(ce32 == ce32s.elementAti(i)) { return i; }
+        }
+        ce32s.addElement(ce32);  
+        return length;
+    }
+
+    protected int addConditionalCE32(String context, int ce32) {
+        assert(context.length() != 0);
+        int index = conditionalCE32s.size();
+        if(index > Collation.MAX_INDEX) {
+            throw new IndexOutOfBoundsException("too many context-sensitive mappings");
+            // BufferOverflowException is a better fit
+            // but cannot be constructed with a message string.
+        }
+        ConditionalCE32 cond = new ConditionalCE32(context, ce32);
+        conditionalCE32s.add(cond);
+        return index;
+    }
+
+    protected ConditionalCE32 getConditionalCE32(int index) {
+        return conditionalCE32s.get(index);
+    }
+    protected ConditionalCE32 getConditionalCE32ForCE32(int ce32) {
+        return getConditionalCE32(Collation.indexFromCE32(ce32));
+    }
+
+    protected static int makeBuilderContextCE32(int index) {
+        return Collation.makeCE32FromTagAndIndex(Collation.BUILDER_DATA_TAG, index);
+    }
+    protected static boolean isBuilderContextCE32(int ce32) {
+        return Collation.hasCE32Tag(ce32, Collation.BUILDER_DATA_TAG);
+    }
+
+    protected static int encodeOneCEAsCE32(long ce) {
+        long p = ce >>> 32;
+        int lower32 = (int)ce;
+        int t = lower32 & 0xffff;
+        assert((t & 0xc000) != 0xc000);  // Impossible case bits 11 mark special CE32s.
+        if((ce & 0xffff00ff00ffL) == 0) {
+            // normal form ppppsstt
+            return (int)p | (lower32 >>> 16) | (t >> 8);
+        } else if((ce & 0xffffffffffL) == Collation.COMMON_SEC_AND_TER_CE) {
+            // long-primary form ppppppC1
+            return Collation.makeLongPrimaryCE32(p);
+        } else if(p == 0 && (t & 0xff) == 0) {
+            // long-secondary form ssssttC2
+            return Collation.makeLongSecondaryCE32(lower32);
+        }
+        return Collation.NO_CE32;
+    }
+
+    protected int encodeOneCE(long ce) {
+        // Try to encode one CE as one CE32.
+        int ce32 = encodeOneCEAsCE32(ce);
+        if(ce32 != Collation.NO_CE32) { return ce32; }
+        int index = addCE(ce);
+        if(index > Collation.MAX_INDEX) {
+            throw new IndexOutOfBoundsException("too many mappings");
+            // BufferOverflowException is a better fit
+            // but cannot be constructed with a message string.
+        }
+        return Collation.makeCE32FromTagIndexAndLength(Collation.EXPANSION_TAG, index, 1);
+    }
+
+    protected int encodeExpansion(long ces[], int start, int length) {
+        // See if this sequence of CEs has already been stored.
+        long first = ces[start];
+        int ce64sMax = ce64s.size() - length;
+        for(int i = 0; i <= ce64sMax; ++i) {
+            if(first == ce64s.elementAti(i)) {
+                if(i > Collation.MAX_INDEX) {
+                    throw new IndexOutOfBoundsException("too many mappings");
+                    // BufferOverflowException is a better fit
+                    // but cannot be constructed with a message string.
+                }
+                for(int j = 1;; ++j) {
+                    if(j == length) {
+                        return Collation.makeCE32FromTagIndexAndLength(
+                                Collation.EXPANSION_TAG, i, length);
+                    }
+                    if(ce64s.elementAti(i + j) != ces[start + j]) { break; }
+                }
+            }
+        }
+        // Store the new sequence.
+        int i = ce64s.size();
+        if(i > Collation.MAX_INDEX) {
+            throw new IndexOutOfBoundsException("too many mappings");
+            // BufferOverflowException is a better fit
+            // but cannot be constructed with a message string.
+        }
+        for(int j = 0; j < length; ++j) {
+            ce64s.addElement(ces[start + j]);
+        }
+        return Collation.makeCE32FromTagIndexAndLength(Collation.EXPANSION_TAG, i, length);
+    }
+
+    protected int encodeExpansion32(int newCE32s[], int start, int length) {
+        // See if this sequence of CE32s has already been stored.
+        int first = newCE32s[start];
+        int ce32sMax = ce32s.size() - length;
+        for(int i = 0; i <= ce32sMax; ++i) {
+            if(first == ce32s.elementAti(i)) {
+                if(i > Collation.MAX_INDEX) {
+                    throw new IndexOutOfBoundsException("too many mappings");
+                    // BufferOverflowException is a better fit
+                    // but cannot be constructed with a message string.
+                }
+                for(int j = 1;; ++j) {
+                    if(j == length) {
+                        return Collation.makeCE32FromTagIndexAndLength(
+                                Collation.EXPANSION32_TAG, i, length);
+                    }
+                    if(ce32s.elementAti(i + j) != newCE32s[start + j]) { break; }
+                }
+            }
+        }
+        // Store the new sequence.
+        int i = ce32s.size();
+        if(i > Collation.MAX_INDEX) {
+            throw new IndexOutOfBoundsException("too many mappings");
+            // BufferOverflowException is a better fit
+            // but cannot be constructed with a message string.
+        }
+        for(int j = 0; j < length; ++j) {
+            ce32s.addElement(newCE32s[start + j]);
+        }
+        return Collation.makeCE32FromTagIndexAndLength(Collation.EXPANSION32_TAG, i, length);
+    }
+
+    protected int copyFromBaseCE32(int c, int ce32, boolean withContext) {
+        if(!Collation.isSpecialCE32(ce32)) { return ce32; }
+        switch(Collation.tagFromCE32(ce32)) {
+        case Collation.LONG_PRIMARY_TAG:
+        case Collation.LONG_SECONDARY_TAG:
+        case Collation.LATIN_EXPANSION_TAG:
+            // copy as is
+            break;
+        case Collation.EXPANSION32_TAG: {
+            int index = Collation.indexFromCE32(ce32);
+            int length = Collation.lengthFromCE32(ce32);
+            ce32 = encodeExpansion32(base.ce32s, index, length);
+            break;
+        }
+        case Collation.EXPANSION_TAG: {
+            int index = Collation.indexFromCE32(ce32);
+            int length = Collation.lengthFromCE32(ce32);
+            ce32 = encodeExpansion(base.ces, index, length);
+            break;
+        }
+        case Collation.PREFIX_TAG: {
+            // Flatten prefixes and nested suffixes (contractions)
+            // into a linear list of ConditionalCE32.
+            int trieIndex = Collation.indexFromCE32(ce32);
+            ce32 = base.getCE32FromContexts(trieIndex);  // Default if no prefix match.
+            if(!withContext) {
+                return copyFromBaseCE32(c, ce32, false);
+            }
+            ConditionalCE32 head = new ConditionalCE32("", 0);
+            StringBuilder context = new StringBuilder("\0");
+            int index;
+            if(Collation.isContractionCE32(ce32)) {
+                index = copyContractionsFromBaseCE32(context, c, ce32, head);
+            } else {
+                ce32 = copyFromBaseCE32(c, ce32, true);
+                head.next = index = addConditionalCE32(context.toString(), ce32);
+            }
+            ConditionalCE32 cond = getConditionalCE32(index);  // the last ConditionalCE32 so far
+            CharsTrie.Iterator prefixes = CharsTrie.iterator(base.contexts, trieIndex + 2, 0);
+            while(prefixes.hasNext()) {
+                CharsTrie.Entry entry = prefixes.next();
+                context.setLength(0);
+                context.append(entry.chars).reverse().insert(0, (char)entry.chars.length());
+                ce32 = entry.value;
+                if(Collation.isContractionCE32(ce32)) {
+                    index = copyContractionsFromBaseCE32(context, c, ce32, cond);
+                } else {
+                    ce32 = copyFromBaseCE32(c, ce32, true);
+                    cond.next = index = addConditionalCE32(context.toString(), ce32);
+                }
+                cond = getConditionalCE32(index);
+            }
+            ce32 = makeBuilderContextCE32(head.next);
+            contextChars.add(c);
+            break;
+        }
+        case Collation.CONTRACTION_TAG: {
+            if(!withContext) {
+                int index = Collation.indexFromCE32(ce32);
+                ce32 = base.getCE32FromContexts(index);  // Default if no suffix match.
+                return copyFromBaseCE32(c, ce32, false);
+            }
+            ConditionalCE32 head = new ConditionalCE32("", 0);
+            StringBuilder context = new StringBuilder("\0");
+            copyContractionsFromBaseCE32(context, c, ce32, head);
+            ce32 = makeBuilderContextCE32(head.next);
+            contextChars.add(c);
+            break;
+        }
+        case Collation.HANGUL_TAG:
+            throw new UnsupportedOperationException("We forbid tailoring of Hangul syllables.");
+        case Collation.OFFSET_TAG:
+            ce32 = getCE32FromOffsetCE32(true, c, ce32);
+            break;
+        case Collation.IMPLICIT_TAG:
+            ce32 = encodeOneCE(Collation.unassignedCEFromCodePoint(c));
+            break;
+        default:
+            throw new AssertionError("copyFromBaseCE32(c, ce32, withContext) " +
+                    "requires ce32 == base.getFinalCE32(ce32)");
+        }
+        return ce32;
+    }
+
+    /**
+     * Copies base contractions to a list of ConditionalCE32.
+     * Sets cond.next to the index of the first new item
+     * and returns the index of the last new item.
+     */
+    protected int copyContractionsFromBaseCE32(StringBuilder context, int c, int ce32,
+            ConditionalCE32 cond) {
+        int trieIndex = Collation.indexFromCE32(ce32);
+        int index;
+        if((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
+            // No match on the single code point.
+            // We are underneath a prefix, and the default mapping is just
+            // a fallback to the mappings for a shorter prefix.
+            assert(context.length() > 1);
+            index = -1;
+        } else {
+            ce32 = base.getCE32FromContexts(trieIndex);  // Default if no suffix match.
+            assert(!Collation.isContractionCE32(ce32));
+            ce32 = copyFromBaseCE32(c, ce32, true);
+            cond.next = index = addConditionalCE32(context.toString(), ce32);
+            cond = getConditionalCE32(index);
+        }
+
+        int suffixStart = context.length();
+        CharsTrie.Iterator suffixes = CharsTrie.iterator(base.contexts, trieIndex + 2, 0);
+        while(suffixes.hasNext()) {
+            CharsTrie.Entry entry = suffixes.next();
+            context.append(entry.chars);
+            ce32 = copyFromBaseCE32(c, entry.value, true);
+            cond.next = index = addConditionalCE32(context.toString(), ce32);
+            // No need to update the unsafeBackwardSet because the tailoring set
+            // is already a copy of the base set.
+            cond = getConditionalCE32(index);
+            context.setLength(suffixStart);
+        }
+        assert(index >= 0);
+        return index;
+    }
+
+    private static final class CopyHelper {
+        CopyHelper(CollationDataBuilder s, CollationDataBuilder d,
+                  CollationDataBuilder.CEModifier m) {
+            src = s;
+            dest = d;
+            modifier = m;
+        }
+
+        void copyRangeCE32(int start, int end, int ce32) {
+            ce32 = copyCE32(ce32);
+            dest.trie.setRange(start, end, ce32, true);
+            if(CollationDataBuilder.isBuilderContextCE32(ce32)) {
+                dest.contextChars.add(start, end);
+            }
+        }
+
+        int copyCE32(int ce32) {
+            if(!Collation.isSpecialCE32(ce32)) {
+                long ce = modifier.modifyCE32(ce32);
+                if(ce != Collation.NO_CE) {
+                    ce32 = dest.encodeOneCE(ce);
+                }
+            } else {
+                int tag = Collation.tagFromCE32(ce32);
+                if(tag == Collation.EXPANSION32_TAG) {
+                    int[] srcCE32s = src.ce32s.getBuffer();
+                    int srcIndex = Collation.indexFromCE32(ce32);
+                    int length = Collation.lengthFromCE32(ce32);
+                    // Inspect the source CE32s. Just copy them if none are modified.
+                    // Otherwise copy to modifiedCEs, with modifications.
+                    boolean isModified = false;
+                    for(int i = 0; i < length; ++i) {
+                        ce32 = srcCE32s[srcIndex + i];
+                        long ce;
+                        if(Collation.isSpecialCE32(ce32) ||
+                                (ce = modifier.modifyCE32(ce32)) == Collation.NO_CE) {
+                            if(isModified) {
+                                modifiedCEs[i] = Collation.ceFromCE32(ce32);
+                            }
+                        } else {
+                            if(!isModified) {
+                                for(int j = 0; j < i; ++j) {
+                                    modifiedCEs[j] = Collation.ceFromCE32(srcCE32s[srcIndex + j]);
+                                }
+                                isModified = true;
+                            }
+                            modifiedCEs[i] = ce;
+                        }
+                    }
+                    if(isModified) {
+                        ce32 = dest.encodeCEs(modifiedCEs, length);
+                    } else {
+                        ce32 = dest.encodeExpansion32(srcCE32s, srcIndex, length);
+                    }
+                } else if(tag == Collation.EXPANSION_TAG) {
+                    long[] srcCEs = src.ce64s.getBuffer();
+                    int srcIndex = Collation.indexFromCE32(ce32);
+                    int length = Collation.lengthFromCE32(ce32);
+                    // Inspect the source CEs. Just copy them if none are modified.
+                    // Otherwise copy to modifiedCEs, with modifications.
+                    boolean isModified = false;
+                    for(int i = 0; i < length; ++i) {
+                        long srcCE = srcCEs[srcIndex + i];
+                        long ce = modifier.modifyCE(srcCE);
+                        if(ce == Collation.NO_CE) {
+                            if(isModified) {
+                                modifiedCEs[i] = srcCE;
+                            }
+                        } else {
+                            if(!isModified) {
+                                for(int j = 0; j < i; ++j) {
+                                    modifiedCEs[j] = srcCEs[srcIndex + j];
+                                }
+                                isModified = true;
+                            }
+                            modifiedCEs[i] = ce;
+                        }
+                    }
+                    if(isModified) {
+                        ce32 = dest.encodeCEs(modifiedCEs, length);
+                    } else {
+                        ce32 = dest.encodeExpansion(srcCEs, srcIndex, length);
+                    }
+                } else if(tag == Collation.BUILDER_DATA_TAG) {
+                    // Copy the list of ConditionalCE32.
+                    ConditionalCE32 cond = src.getConditionalCE32ForCE32(ce32);
+                    assert(!cond.hasContext());
+                    int destIndex = dest.addConditionalCE32(
+                            cond.context, copyCE32(cond.ce32));
+                    ce32 = CollationDataBuilder.makeBuilderContextCE32(destIndex);
+                    while(cond.next >= 0) {
+                        cond = src.getConditionalCE32(cond.next);
+                        ConditionalCE32 prevDestCond = dest.getConditionalCE32(destIndex);
+                        destIndex = dest.addConditionalCE32(
+                                cond.context, copyCE32(cond.ce32));
+                        int suffixStart = cond.prefixLength() + 1;
+                        dest.unsafeBackwardSet.addAll(cond.context.substring(suffixStart));
+                        prevDestCond.next = destIndex;
+                    }
+                } else {
+                    // Just copy long CEs and Latin mini expansions (and other expected values) as is,
+                    // assuming that the modifier would not modify them.
+                    assert(tag == Collation.LONG_PRIMARY_TAG ||
+                            tag == Collation.LONG_SECONDARY_TAG ||
+                            tag == Collation.LATIN_EXPANSION_TAG ||
+                            tag == Collation.HANGUL_TAG);
+                }
+            }
+            return ce32;
+        }
+
+        CollationDataBuilder src;
+        CollationDataBuilder dest;
+        CollationDataBuilder.CEModifier modifier;
+        long[] modifiedCEs = new long[Collation.MAX_EXPANSION_LENGTH];
+    }
+
+    private static void
+    enumRangeForCopy(int start, int end, int value, CopyHelper helper) {
+        if(value != Collation.UNASSIGNED_CE32 && value != Collation.FALLBACK_CE32) {
+            helper.copyRangeCE32(start, end, value);
+        }
+    }
+
+    protected boolean getJamoCE32s(int jamoCE32s[]) {
+        boolean anyJamoAssigned = base == null;  // always set jamoCE32s in the base data
+        boolean needToCopyFromBase = false;
+        for(int j = 0; j < CollationData.JAMO_CE32S_LENGTH; ++j) {  // Count across Jamo types.
+            int jamo = jamoCpFromIndex(j);
+            boolean fromBase = false;
+            int ce32 = trie.get(jamo);
+            anyJamoAssigned |= Collation.isAssignedCE32(ce32);
+            // TODO: Try to prevent [optimize [Jamo]] from counting as anyJamoAssigned.
+            // (As of CLDR 24 [2013] the Korean tailoring does not optimize conjoining Jamo.)
+            if(ce32 == Collation.FALLBACK_CE32) {
+                fromBase = true;
+                ce32 = base.getCE32(jamo);
+            }
+            if(Collation.isSpecialCE32(ce32)) {
+                switch(Collation.tagFromCE32(ce32)) {
+                case Collation.LONG_PRIMARY_TAG:
+                case Collation.LONG_SECONDARY_TAG:
+                case Collation.LATIN_EXPANSION_TAG:
+                    // Copy the ce32 as-is.
+                    break;
+                case Collation.EXPANSION32_TAG:
+                case Collation.EXPANSION_TAG:
+                case Collation.PREFIX_TAG:
+                case Collation.CONTRACTION_TAG:
+                    if(fromBase) {
+                        // Defer copying until we know if anyJamoAssigned.
+                        ce32 = Collation.FALLBACK_CE32;
+                        needToCopyFromBase = true;
+                    }
+                    break;
+                case Collation.IMPLICIT_TAG:
+                    // An unassigned Jamo should only occur in tests with incomplete bases.
+                    assert(fromBase);
+                    ce32 = Collation.FALLBACK_CE32;
+                    needToCopyFromBase = true;
+                    break;
+                case Collation.OFFSET_TAG:
+                    ce32 = getCE32FromOffsetCE32(fromBase, jamo, ce32);
+                    break;
+                case Collation.FALLBACK_TAG:
+                case Collation.RESERVED_TAG_3:
+                case Collation.BUILDER_DATA_TAG:
+                case Collation.DIGIT_TAG:
+                case Collation.U0000_TAG:
+                case Collation.HANGUL_TAG:
+                case Collation.LEAD_SURROGATE_TAG:
+                    throw new AssertionError(String.format("unexpected special tag in ce32=0x%08x", ce32));
+                }
+            }
+            jamoCE32s[j] = ce32;
+        }
+        if(anyJamoAssigned && needToCopyFromBase) {
+            for(int j = 0; j < CollationData.JAMO_CE32S_LENGTH; ++j) {
+                if(jamoCE32s[j] == Collation.FALLBACK_CE32) {
+                    int jamo = jamoCpFromIndex(j);
+                    jamoCE32s[j] = copyFromBaseCE32(jamo, base.getCE32(jamo),
+                                                    /*withContext=*/ true);
+                }
+            }
+        }
+        return anyJamoAssigned;
+    }
+
+    protected void setDigitTags() {
+        UnicodeSet digits = new UnicodeSet("[:Nd:]");
+        UnicodeSetIterator iter = new UnicodeSetIterator(digits);
+        while(iter.next()) {
+            assert(iter.codepoint != UnicodeSetIterator.IS_STRING);
+            int c = iter.codepoint;
+            int ce32 = trie.get(c);
+            if(ce32 != Collation.FALLBACK_CE32 && ce32 != Collation.UNASSIGNED_CE32) {
+                int index = addCE32(ce32);
+                if(index > Collation.MAX_INDEX) {
+                    throw new IndexOutOfBoundsException("too many mappings");
+                    // BufferOverflowException is a better fit
+                    // but cannot be constructed with a message string.
+                }
+                ce32 = Collation.makeCE32FromTagIndexAndLength(
+                        Collation.DIGIT_TAG, index, UCharacter.digit(c));  // u_charDigitValue(c)
+                trie.set(c, ce32);
+            }
+        }
+    }
+
+    protected void setLeadSurrogates() {
+        for(char lead = 0xd800; lead < 0xdc00; ++lead) {
+            int leadValue = -1;
+            // utrie2_enumForLeadSurrogate(trie, lead, null, , &value);
+            Iterator<Trie2.Range> trieIterator = trie.iteratorForLeadSurrogate(lead);
+            while(trieIterator.hasNext()) {
+                Trie2.Range range = trieIterator.next();
+                // The rest of this loop is equivalent to C++ enumRangeLeadValue().
+                int value = range.value;
+                if(value == Collation.UNASSIGNED_CE32) {
+                    value = Collation.LEAD_ALL_UNASSIGNED;
+                } else if(value == Collation.FALLBACK_CE32) {
+                    value = Collation.LEAD_ALL_FALLBACK;
+                } else {
+                    leadValue = Collation.LEAD_MIXED;
+                    break;
+                }
+                if(leadValue < 0) {
+                    leadValue = value;
+                } else if(leadValue != value) {
+                    leadValue = Collation.LEAD_MIXED;
+                    break;
+                }
+            }
+            trie.setForLeadSurrogateCodeUnit(lead,
+                    Collation.makeCE32FromTagAndIndex(Collation.LEAD_SURROGATE_TAG, 0) | leadValue);
+        }
+    }
+
+    protected void buildMappings(CollationData data) {
+        if(!isMutable()) {
+            throw new IllegalStateException("attempt to build() after build()");
+        }
+
+        buildContexts();
+
+        int[] jamoCE32s = new int[CollationData.JAMO_CE32S_LENGTH];
+        int jamoIndex = -1;
+        if(getJamoCE32s(jamoCE32s)) {
+            jamoIndex = ce32s.size();
+            for(int i = 0; i < CollationData.JAMO_CE32S_LENGTH; ++i) {
+                ce32s.addElement(jamoCE32s[i]);
+            }
+            // Small optimization: Use a bit in the Hangul ce32
+            // to indicate that none of the Jamo CE32s are isSpecialCE32()
+            // (as it should be in the root collator).
+            // It allows CollationIterator to avoid recursive function calls and per-Jamo tests.
+            // In order to still have good trie compression and keep this code simple,
+            // we only set this flag if a whole block of 588 Hangul syllables starting with
+            // a common leading consonant (Jamo L) has this property.
+            boolean isAnyJamoVTSpecial = false;
+            for(int i = Hangul.JAMO_L_COUNT; i < CollationData.JAMO_CE32S_LENGTH; ++i) {
+                if(Collation.isSpecialCE32(jamoCE32s[i])) {
+                    isAnyJamoVTSpecial = true;
+                    break;
+                }
+            }
+            int hangulCE32 = Collation.makeCE32FromTagAndIndex(Collation.HANGUL_TAG, 0);
+            int c = Hangul.HANGUL_BASE;
+            for(int i = 0; i < Hangul.JAMO_L_COUNT; ++i) {  // iterate over the Jamo L
+                int ce32 = hangulCE32;
+                if(!isAnyJamoVTSpecial && !Collation.isSpecialCE32(jamoCE32s[i])) {
+                    ce32 |= Collation.HANGUL_NO_SPECIAL_JAMO;
+                }
+                int limit = c + Hangul.JAMO_VT_COUNT;
+                trie.setRange(c, limit - 1, ce32, true);
+                c = limit;
+            }
+        } else {
+            // Copy the Hangul CE32s from the base in blocks per Jamo L,
+            // assuming that HANGUL_NO_SPECIAL_JAMO is set or not set for whole blocks.
+            for(int c = Hangul.HANGUL_BASE; c < Hangul.HANGUL_LIMIT;) {
+                int ce32 = base.getCE32(c);
+                assert(Collation.hasCE32Tag(ce32, Collation.HANGUL_TAG));
+                int limit = c + Hangul.JAMO_VT_COUNT;
+                trie.setRange(c, limit - 1, ce32, true);
+                c = limit;
+            }
+        }
+
+        setDigitTags();
+        setLeadSurrogates();
+
+        // For U+0000, move its normal ce32 into CE32s[0] and set U0000_TAG.
+        ce32s.setElementAt(trie.get(0), 0);
+        trie.set(0, Collation.makeCE32FromTagAndIndex(Collation.U0000_TAG, 0));
+
+        data.trie = trie.toTrie2_32();
+
+        // Mark each lead surrogate as "unsafe"
+        // if any of its 1024 associated supplementary code points is "unsafe".
+        int c = 0x10000;
+        for(char lead = 0xd800; lead < 0xdc00; ++lead, c += 0x400) {
+            if(unsafeBackwardSet.containsSome(c, c + 0x3ff)) {
+                unsafeBackwardSet.add(lead);
+            }
+        }
+        unsafeBackwardSet.freeze();
+
+        data.ce32s = ce32s.getBuffer();
+        data.ces = ce64s.getBuffer();
+        data.contexts = contexts.toString();
+
+        data.base = base;
+        if(jamoIndex >= 0) {
+            data.jamoCE32s = jamoCE32s;  // C++: data.ce32s + jamoIndex
+        } else {
+            data.jamoCE32s = base.jamoCE32s;
+        }
+        data.unsafeBackwardSet = unsafeBackwardSet;
+    }
+
+    protected void clearContexts() {
+        contexts.setLength(0);
+        UnicodeSetIterator iter = new UnicodeSetIterator(contextChars);
+        while(iter.next()) {
+            assert(iter.codepoint != UnicodeSetIterator.IS_STRING);
+            int ce32 = trie.get(iter.codepoint);
+            assert(isBuilderContextCE32(ce32));
+            getConditionalCE32ForCE32(ce32).builtCE32 = Collation.NO_CE32;
+        }
+    }
+
+    protected void buildContexts() {
+        // Ignore abandoned lists and the cached builtCE32,
+        // and build all contexts from scratch.
+        contexts.setLength(0);
+        UnicodeSetIterator iter = new UnicodeSetIterator(contextChars);
+        while(iter.next()) {
+            assert(iter.codepoint != UnicodeSetIterator.IS_STRING);
+            int c = iter.codepoint;
+            int ce32 = trie.get(c);
+            if(!isBuilderContextCE32(ce32)) {
+                throw new AssertionError("Impossible: No context data for c in contextChars.");
+            }
+            ConditionalCE32 cond = getConditionalCE32ForCE32(ce32);
+            ce32 = buildContext(cond);
+            trie.set(c, ce32);
+        }
+    }
+
+    protected int buildContext(ConditionalCE32 head) {
+        // The list head must have no context.
+        assert(!head.hasContext());
+        // The list head must be followed by one or more nodes that all do have context.
+        assert(head.next >= 0);
+        CharsTrieBuilder prefixBuilder = new CharsTrieBuilder();
+        CharsTrieBuilder contractionBuilder = new CharsTrieBuilder();
+        for(ConditionalCE32 cond = head;; cond = getConditionalCE32(cond.next)) {
+            // After the list head, the prefix or suffix can be empty, but not both.
+            assert(cond == head || cond.hasContext());
+            int prefixLength = cond.prefixLength();
+            StringBuilder prefix = new StringBuilder().append(cond.context, 0, prefixLength + 1);
+            String prefixString = prefix.toString();
+            // Collect all contraction suffixes for one prefix.
+            ConditionalCE32 firstCond = cond;
+            ConditionalCE32 lastCond = cond;
+            while(cond.next >= 0 &&
+                    (cond = getConditionalCE32(cond.next)).context.startsWith(prefixString)) {
+                lastCond = cond;
+            }
+            int ce32;
+            int suffixStart = prefixLength + 1;  // == prefix.length()
+            if(lastCond.context.length() == suffixStart) {
+                // One prefix without contraction suffix.
+                assert(firstCond == lastCond);
+                ce32 = lastCond.ce32;
+                cond = lastCond;
+            } else {
+                // Build the contractions trie.
+                contractionBuilder.clear();
+                // Entry for an empty suffix, to be stored before the trie.
+                int emptySuffixCE32 = Collation.NO_CE32;  // Will always be set to a real value.
+                int flags = 0;
+                if(firstCond.context.length() == suffixStart) {
+                    // There is a mapping for the prefix and the single character c. (p|c)
+                    // If no other suffix matches, then we return this value.
+                    emptySuffixCE32 = firstCond.ce32;
+                    cond = getConditionalCE32(firstCond.next);
+                } else {
+                    // There is no mapping for the prefix and just the single character.
+                    // (There is no p|c, only p|cd, p|ce etc.)
+                    flags |= Collation.CONTRACT_SINGLE_CP_NO_MATCH;
+                    // When the prefix matches but none of the prefix-specific suffixes,
+                    // then we fall back to the mappings with the next-longest prefix,
+                    // and ultimately to mappings with no prefix.
+                    // Each fallback might be another set of contractions.
+                    // For example, if there are mappings for ch, p|cd, p|ce, but not for p|c,
+                    // then in text "pch" we find the ch contraction.
+                    for(cond = head;; cond = getConditionalCE32(cond.next)) {
+                        int length = cond.prefixLength();
+                        if(length == prefixLength) { break; }
+                        if(cond.defaultCE32 != Collation.NO_CE32 &&
+                                (length==0 || prefixString.regionMatches(
+                                        prefix.length() - length, cond.context, 1, length)
+                                        /* C++: prefix.endsWith(cond.context, 1, length) */)) {
+                            emptySuffixCE32 = cond.defaultCE32;
+                        }
+                    }
+                    cond = firstCond;
+                }
+                // Optimization: Set a flag when
+                // the first character of every contraction suffix has lccc!=0.
+                // Short-circuits contraction matching when a normal letter follows.
+                flags |= Collation.CONTRACT_NEXT_CCC;
+                // Add all of the non-empty suffixes into the contraction trie.
+                for(;;) {
+                    String suffix = cond.context.substring(suffixStart);
+                    int fcd16 = nfcImpl.getFCD16(suffix.codePointAt(0));
+                    if(fcd16 <= 0xff) {
+                        flags &= ~Collation.CONTRACT_NEXT_CCC;
+                    }
+                    fcd16 = nfcImpl.getFCD16(suffix.codePointBefore(suffix.length()));
+                    if(fcd16 > 0xff) {
+                        // The last suffix character has lccc!=0, allowing for discontiguous contractions.
+                        flags |= Collation.CONTRACT_TRAILING_CCC;
+                    }
+                    contractionBuilder.add(suffix, cond.ce32);
+                    if(cond == lastCond) { break; }
+                    cond = getConditionalCE32(cond.next);
+                }
+                int index = addContextTrie(emptySuffixCE32, contractionBuilder);
+                if(index > Collation.MAX_INDEX) {
+                    throw new IndexOutOfBoundsException("too many context-sensitive mappings");
+                    // BufferOverflowException is a better fit
+                    // but cannot be constructed with a message string.
+                }
+                ce32 = Collation.makeCE32FromTagAndIndex(Collation.CONTRACTION_TAG, index) | flags;
+            }
+            assert(cond == lastCond);
+            firstCond.defaultCE32 = ce32;
+            if(prefixLength == 0) {
+                if(cond.next < 0) {
+                    // No non-empty prefixes, only contractions.
+                    return ce32;
+                }
+            } else {
+                prefix.delete(0, 1);  // Remove the length unit.
+                prefix.reverse();
+                prefixBuilder.add(prefix, ce32);
+                if(cond.next < 0) { break; }
+            }
+        }
+        assert(head.defaultCE32 != Collation.NO_CE32);
+        int index = addContextTrie(head.defaultCE32, prefixBuilder);
+        if(index > Collation.MAX_INDEX) {
+            throw new IndexOutOfBoundsException("too many context-sensitive mappings");
+            // BufferOverflowException is a better fit
+            // but cannot be constructed with a message string.
+        }
+        return Collation.makeCE32FromTagAndIndex(Collation.PREFIX_TAG, index);
+    }
+
+    protected int addContextTrie(int defaultCE32, CharsTrieBuilder trieBuilder) {
+        StringBuilder context = new StringBuilder();
+        context.append((char)(defaultCE32 >> 16)).append((char)defaultCE32);
+        context.append(trieBuilder.buildCharSequence(StringTrieBuilder.Option.SMALL));
+        int index = contexts.indexOf(context.toString());
+        if(index < 0) {
+            index = contexts.length();
+            contexts.append(context);
+        }
+        return index;
+    }
+
+    protected void buildFastLatinTable(CollationData data) {
+        if(!fastLatinEnabled) { return; }
+
+        fastLatinBuilder = new CollationFastLatinBuilder();
+        if(fastLatinBuilder.forData(data)) {
+            char[] header = fastLatinBuilder.getHeader();
+            char[] table = fastLatinBuilder.getTable();
+            if(base != null &&
+                    Arrays.equals(header, base.fastLatinTableHeader) &&
+                    Arrays.equals(table, base.fastLatinTable)) {
+                // Same fast Latin table as in the base, use that one instead.
+                fastLatinBuilder = null;
+                header = base.fastLatinTableHeader;
+                table = base.fastLatinTable;
+            }
+            data.fastLatinTableHeader = header;
+            data.fastLatinTable = table;
+        } else {
+            fastLatinBuilder = null;
+        }
+    }
+
+    protected int getCEs(CharSequence s, int start, long ces[], int cesLength) {
+        if(collIter == null) {
+            collIter = new DataBuilderCollationIterator(this, new CollationData(nfcImpl));
+            if(collIter == null) { return 0; }
+        }
+        return collIter.fetchCEs(s, start, ces, cesLength);
+    }
+
+    protected static int jamoCpFromIndex(int i) {
+        // 0 <= i < CollationData.JAMO_CE32S_LENGTH = 19 + 21 + 27
+        if(i < Hangul.JAMO_L_COUNT) { return Hangul.JAMO_L_BASE + i; }
+        i -= Hangul.JAMO_L_COUNT;
+        if(i < Hangul.JAMO_V_COUNT) { return Hangul.JAMO_V_BASE + i; }
+        i -= Hangul.JAMO_V_COUNT;
+        // i < 27
+        return Hangul.JAMO_T_BASE + 1 + i;
+    }
+
+    /**
+     * Build-time collation element and character iterator.
+     * Uses the runtime CollationIterator for fetching CEs for a string
+     * but reads from the builder's unfinished data structures.
+     * In particular, this class reads from the unfinished trie
+     * and has to avoid CollationIterator.nextCE() and redirect other
+     * calls to data.getCE32() and data.getCE32FromSupplementary().
+     *
+     * We do this so that we need not implement the collation algorithm
+     * again for the builder and make it behave exactly like the runtime code.
+     * That would be more difficult to test and maintain than this indirection.
+     *
+     * Some CE32 tags (for example, the DIGIT_TAG) do not occur in the builder data,
+     * so the data accesses from those code paths need not be modified.
+     *
+     * This class iterates directly over whole code points
+     * so that the CollationIterator does not need the finished trie
+     * for handling the LEAD_SURROGATE_TAG.
+     */
+    private static final class DataBuilderCollationIterator extends CollationIterator {
+        DataBuilderCollationIterator(CollationDataBuilder b, CollationData newData) {
+            super(newData, /*numeric=*/ false);
+            builder = b;
+            builderData = newData;
+            builderData.base = builder.base;
+            // Set all of the jamoCE32s[] to indirection CE32s.
+            for(int j = 0; j < CollationData.JAMO_CE32S_LENGTH; ++j) {  // Count across Jamo types.
+                int jamo = CollationDataBuilder.jamoCpFromIndex(j);
+                jamoCE32s[j] = Collation.makeCE32FromTagAndIndex(Collation.BUILDER_DATA_TAG, jamo) |
+                        CollationDataBuilder.IS_BUILDER_JAMO_CE32;
+            }
+            builderData.jamoCE32s = jamoCE32s;
+        }
+
+        int fetchCEs(CharSequence str, int start, long ces[], int cesLength) {
+            // Set the pointers each time, in case they changed due to reallocation.
+            builderData.ce32s = builder.ce32s.getBuffer();
+            builderData.ces = builder.ce64s.getBuffer();
+            builderData.contexts = builder.contexts.toString();
+            // Modified copy of CollationIterator.nextCE() and CollationIterator.nextCEFromCE32().
+            reset();
+            s = str;
+            pos = start;
+            while(pos < s.length()) {
+                // No need to keep all CEs in the iterator buffer.
+                clearCEs();
+                int c = Character.codePointAt(s, pos);
+                pos += Character.charCount(c);
+                int ce32 = builder.trie.get(c);
+                CollationData d;
+                if(ce32 == Collation.FALLBACK_CE32) {
+                    d = builder.base;
+                    ce32 = builder.base.getCE32(c);
+                } else {
+                    d = builderData;
+                }
+                appendCEsFromCE32(d, c, ce32, /*forward=*/ true);
+                for(int i = 0; i < getCEsLength(); ++i) {
+                    long ce = getCE(i);
+                    if(ce != 0) {
+                        if(cesLength < Collation.MAX_EXPANSION_LENGTH) {
+                            ces[cesLength] = ce;
+                        }
+                        ++cesLength;
+                    }
+                }
+            }
+            return cesLength;
+        }
+
+        @Override
+        public void resetToOffset(int newOffset) {
+            reset();
+            pos = newOffset;
+        }
+
+        @Override
+        public int getOffset() {
+            return pos;
+        }
+
+        @Override
+        public int nextCodePoint() {
+            if(pos == s.length()) {
+                return Collation.SENTINEL_CP;
+            }
+            int c = Character.codePointAt(s, pos);
+            pos += Character.charCount(c);
+            return c;
+        }
+
+        @Override
+        public int previousCodePoint() {
+            if(pos == 0) {
+                return Collation.SENTINEL_CP;
+            }
+            int c = Character.codePointBefore(s, pos);
+            pos -= Character.charCount(c);
+            return c;
+        }
+
+        @Override
+        protected void forwardNumCodePoints(int num) {
+            pos = Character.offsetByCodePoints(s, pos, num);
+        }
+
+        @Override
+        protected void backwardNumCodePoints(int num) {
+            pos = Character.offsetByCodePoints(s, pos, -num);
+        }
+
+        @Override
+        protected int getDataCE32(int c) {
+            return builder.trie.get(c);
+        }
+
+        @Override
+        protected int getCE32FromBuilderData(int ce32) {
+            assert(Collation.hasCE32Tag(ce32, Collation.BUILDER_DATA_TAG));
+            if((ce32 & CollationDataBuilder.IS_BUILDER_JAMO_CE32) != 0) {
+                int jamo = Collation.indexFromCE32(ce32);
+                return builder.trie.get(jamo);
+            } else {
+                ConditionalCE32 cond = builder.getConditionalCE32ForCE32(ce32);
+                if(cond.builtCE32 == Collation.NO_CE32) {
+                    // Build the context-sensitive mappings into their runtime form and cache the result.
+                    try {
+                        cond.builtCE32 = builder.buildContext(cond);
+                    } catch(IndexOutOfBoundsException e) {
+                        builder.clearContexts();
+                        cond.builtCE32 = builder.buildContext(cond);
+                    }
+                    builderData.contexts = builder.contexts.toString();
+                }
+                return cond.builtCE32;
+            }
+        }
+
+        protected final CollationDataBuilder builder;
+        protected final CollationData builderData;
+        protected final int[] jamoCE32s = new int[CollationData.JAMO_CE32S_LENGTH];
+        protected CharSequence s;
+        protected int pos;
+    }
+
+    protected final boolean isMutable() {
+        // C++ tests !(trie == NULL || utrie2_isFrozen(trie))
+        // but Java Trie2Writable does not have an observable isFrozen() state.
+        return trie != null && unsafeBackwardSet != null && !unsafeBackwardSet.isFrozen();
+    }
+
+    /** @see Collation.BUILDER_DATA_TAG */
+    private static final int IS_BUILDER_JAMO_CE32 = 0x100;
+
+    protected Normalizer2Impl nfcImpl;
+    protected CollationData base;
+    protected CollationSettings baseSettings;
+    protected Trie2Writable trie;
+    protected UVector32 ce32s;
+    protected UVector64 ce64s;
+    protected ArrayList<ConditionalCE32> conditionalCE32s;  // vector of ConditionalCE32
+    // Characters that have context (prefixes or contraction suffixes).
+    protected UnicodeSet contextChars = new UnicodeSet();
+    // Serialized UCharsTrie structures for finalized contexts.
+    protected StringBuilder contexts = new StringBuilder();
+    protected UnicodeSet unsafeBackwardSet = new UnicodeSet();
+    protected boolean modified;
+
+    protected boolean fastLatinEnabled;
+    protected CollationFastLatinBuilder fastLatinBuilder;
+
+    protected DataBuilderCollationIterator collIter;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java

new file mode 100644 (file)

index 0000000..b3c78e4
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java
@@ -0,0 +1,491 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationDataReader.java, ported from collationdatareader.h/.cpp
+*
+* C++ version created on: 2013feb07
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import com.ibm.icu.impl.ICUBinary;
+import com.ibm.icu.impl.Trie2_32;
+import com.ibm.icu.impl.USerializedSet;
+import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * Collation binary data reader.
+ */
+final class CollationDataReader /* all static */ {
+    // The following constants are also copied into source/common/ucol_swp.cpp.
+    // Keep them in sync!
+    /**
+     * Number of int indexes.
+     *
+     * Can be 2 if there are only options.
+     * Can be 7 or 8 if there are only options and a script reordering.
+     * The loader treats any index>=indexes[IX_INDEXES_LENGTH] as 0.
+     */
+    static final int IX_INDEXES_LENGTH = 0;
+    /**
+     * Bits 31..24: numericPrimary, for numeric collation
+     *      23..16: fast Latin format version (0 = no fast Latin table)
+     *      15.. 0: options bit set
+     */
+    static final int IX_OPTIONS = 1;
+    static final int IX_RESERVED2 = 2;
+    static final int IX_RESERVED3 = 3;
+
+    /** Array offset to Jamo CE32s in ce32s[], or <0 if none. */
+    static final int IX_JAMO_CE32S_START = 4;
+
+    // Byte offsets from the start of the data, after the generic header.
+    // The indexes[] are at byte offset 0, other data follows.
+    // Each data item is aligned properly.
+    // The data items should be in descending order of unit size,
+    // to minimize the need for padding.
+    // Each item's byte length is given by the difference between its offset and
+    // the next index/offset value.
+    /** Byte offset to int reorderCodes[]. */
+    static final int IX_REORDER_CODES_OFFSET = 5;
+    /**
+     * Byte offset to uint8_t reorderTable[].
+     * Empty table if <256 bytes (padding only).
+     * Otherwise 256 bytes or more (with padding).
+     */
+    static final int IX_REORDER_TABLE_OFFSET = 6;
+    /** Byte offset to the collation trie. Its length is a multiple of 8 bytes. */
+    static final int IX_TRIE_OFFSET = 7;
+
+    static final int IX_RESERVED8_OFFSET = 8;
+    /** Byte offset to long ces[]. */
+    static final int IX_CES_OFFSET = 9;
+    static final int IX_RESERVED10_OFFSET = 10;
+    /** Byte offset to int ce32s[]. */
+    static final int IX_CE32S_OFFSET = 11;
+
+    /** Byte offset to uint32_t rootElements[]. */
+    static final int IX_ROOT_ELEMENTS_OFFSET = 12;
+    /** Byte offset to UChar *contexts[]. */
+    static final int IX_CONTEXTS_OFFSET = 13;
+    /** Byte offset to char [] with serialized unsafeBackwardSet. */
+    static final int IX_UNSAFE_BWD_OFFSET = 14;
+    /** Byte offset to char fastLatinTable[]. */
+    static final int IX_FAST_LATIN_TABLE_OFFSET = 15;
+
+    /** Byte offset to char scripts[]. */
+    static final int IX_SCRIPTS_OFFSET = 16;
+    /**
+     * Byte offset to boolean compressibleBytes[].
+     * Empty table if <256 bytes (padding only).
+     * Otherwise 256 bytes or more (with padding).
+     */
+    static final int IX_COMPRESSIBLE_BYTES_OFFSET = 17;
+    static final int IX_RESERVED18_OFFSET = 18;
+    static final int IX_TOTAL_SIZE = 19;
+
+    static void read(CollationTailoring base, InputStream inBytes,
+                     CollationTailoring tailoring) throws IOException {
+        BufferedInputStream bis = new BufferedInputStream(inBytes);
+        tailoring.version = ICUBinary.readHeaderAndDataVersion(bis, DATA_FORMAT, IS_ACCEPTABLE);
+        if(base != null && base.getUCAVersion() != tailoring.getUCAVersion()) {
+            throw new RuntimeException("Tailoring UCA version differs from base data UCA version");
+        }
+
+        DataInputStream ds = new DataInputStream(bis);
+        int indexesLength = ds.readInt();  // inIndexes[IX_INDEXES_LENGTH]
+        if(indexesLength < 2) {
+            throw new RuntimeException("not enough indexes");
+        }
+        int[] inIndexes = new int[IX_TOTAL_SIZE + 1];
+        inIndexes[0] = indexesLength;
+        for(int i = 1; i < indexesLength && i < inIndexes.length; ++i) {
+            inIndexes[i] = ds.readInt();
+        }
+        for(int i = indexesLength; i < inIndexes.length; ++i) {
+            inIndexes[i] = -1;
+        }
+        if(indexesLength > inIndexes.length) {
+            ds.skipBytes((indexesLength - inIndexes.length) * 4);
+        }
+
+        // Assume that the tailoring data is in initial state,
+        // with null pointers and 0 lengths.
+
+        // Set pointers to non-empty data parts.
+        // Do this in order of their byte offsets. (Should help porting to Java.)
+
+        int index;  // one of the indexes[] slots
+        int offset;  // byte offset for the index part
+        int length;  // number of bytes in the index part
+
+        CollationData baseData = base == null ? null : base.data;
+        int[] reorderCodes;
+        index = IX_REORDER_CODES_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(length >= 4) {
+            if(baseData == null) {
+                // We assume for collation settings that
+                // the base data does not have a reordering.
+                throw new RuntimeException("Collation base data must not reorder scripts");
+            }
+            reorderCodes = new int[length / 4];
+            for(int i = 0; i < length / 4; ++i) {
+                reorderCodes[i] = ds.readInt();
+            }
+            length &= 3;
+        } else {
+            reorderCodes = new int[0];
+        }
+        ds.skipBytes(length);
+
+        // There should be a reorder table only if there are reorder codes.
+        // However, when there are reorder codes the reorder table may be omitted to reduce
+        // the data size.
+        byte[] reorderTable = null;
+        index = IX_REORDER_TABLE_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(length >= 256) {
+            if(reorderCodes.length == 0) {
+                throw new RuntimeException("Reordering table without reordering codes");
+            }
+            reorderTable = new byte[256];
+            ds.readFully(reorderTable);
+            length -= 256;
+        } else {
+            // If we have reorder codes, then build the reorderTable at the end,
+            // when the CollationData is otherwise complete.
+        }
+        ds.skipBytes(length);
+
+        if(baseData != null && baseData.numericPrimary != (inIndexes[IX_OPTIONS] & 0xff000000L)) {
+            throw new RuntimeException("Tailoring numeric primary weight differs from base data");
+        }
+        CollationData data = null;  // Remains null if there are no mappings.
+
+        index = IX_TRIE_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(length >= 8) {
+            tailoring.ensureOwnedData();
+            data = tailoring.ownedData;
+            data.base = baseData;
+            data.numericPrimary = inIndexes[IX_OPTIONS] & 0xff000000L;
+            data.trie = tailoring.trie = Trie2_32.createFromSerialized(ds);
+            int trieLength = data.trie.getSerializedLength();
+            if(trieLength > length) {
+                throw new RuntimeException("Not enough bytes for the mappings trie");  // No mappings.
+            }
+            length -= trieLength;
+        } else if(baseData != null) {
+            // Use the base data. Only the settings are tailored.
+            tailoring.data = baseData;
+        } else {
+            throw new RuntimeException("Missing collation data mappings");  // No mappings.
+        }
+        ds.skipBytes(length);
+
+        index = IX_RESERVED8_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        ds.skipBytes(length);
+
+        index = IX_CES_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(length >= 8) {
+            if(data == null) {
+                throw new RuntimeException("Tailored ces without tailored trie");
+            }
+            data.ces = new long[length / 8];
+            for(int i = 0; i < length / 8; ++i) {
+                data.ces[i] = ds.readLong();
+            }
+            length &= 7;
+        }
+        ds.skipBytes(length);
+
+        index = IX_RESERVED10_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        ds.skipBytes(length);
+
+        index = IX_CE32S_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(length >= 4) {
+            if(data == null) {
+                throw new RuntimeException("Tailored ce32s without tailored trie");
+            }
+            data.ce32s = new int[length / 4];
+            for(int i = 0; i < length / 4; ++i) {
+                data.ce32s[i] = ds.readInt();
+            }
+            length &= 3;
+        }
+        ds.skipBytes(length);
+
+        int jamoCE32sStart = inIndexes[IX_JAMO_CE32S_START];
+        if(jamoCE32sStart >= 0) {
+            if(data == null || data.ce32s == null) {
+                throw new RuntimeException("JamoCE32sStart index into non-existent ce32s[]");
+            }
+            data.jamoCE32s = new int[CollationData.JAMO_CE32S_LENGTH];
+            System.arraycopy(data.ce32s, jamoCE32sStart, data.jamoCE32s, 0, CollationData.JAMO_CE32S_LENGTH);
+        } else if(data == null) {
+            // Nothing to do.
+        } else if(baseData != null) {
+            data.jamoCE32s = baseData.jamoCE32s;
+        } else {
+            throw new RuntimeException("Missing Jamo CE32s for Hangul processing");
+        }
+
+        index = IX_ROOT_ELEMENTS_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(length >= 4) {
+            int rootElementsLength = length / 4;
+            if(data == null) {
+                throw new RuntimeException("Root elements but no mappings");
+            }
+            if(rootElementsLength <= CollationRootElements.IX_SEC_TER_BOUNDARIES) {
+                throw new RuntimeException("Root elements array too short");
+            }
+            data.rootElements = new long[rootElementsLength];
+            for(int i = 0; i < rootElementsLength; ++i) {
+                data.rootElements[i] = ds.readInt() & 0xffffffffL;  // unsigned int -> long
+            }
+            long commonSecTer = data.rootElements[CollationRootElements.IX_COMMON_SEC_AND_TER_CE];
+            if(commonSecTer != Collation.COMMON_SEC_AND_TER_CE) {
+                throw new RuntimeException("Common sec/ter weights in base data differ from the hardcoded value");
+            }
+            long secTerBoundaries = data.rootElements[CollationRootElements.IX_SEC_TER_BOUNDARIES];
+            if((secTerBoundaries >>> 24) < CollationKeys.SEC_COMMON_HIGH) {
+                // [fixed last secondary common byte] is too low,
+                // and secondary weights would collide with compressed common secondaries.
+                throw new RuntimeException("[fixed last secondary common byte] is too low");
+            }
+            length &= 3;
+        }
+        ds.skipBytes(length);
+
+        index = IX_CONTEXTS_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(length >= 2) {
+            if(data == null) {
+                throw new RuntimeException("Tailored contexts without tailored trie");
+            }
+            StringBuilder sb = new StringBuilder(length / 2);
+            for(int i = 0; i < length / 2; ++i) {
+                sb.append(ds.readChar());
+            }
+            data.contexts = sb.toString();
+            length &= 1;
+        }
+        ds.skipBytes(length);
+
+        index = IX_UNSAFE_BWD_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(length >= 2) {
+            if(data == null) {
+                throw new RuntimeException("Unsafe-backward-set but no mappings");
+            }
+            if(baseData == null) {
+                // Create the unsafe-backward set for the root collator.
+                // Include all non-zero combining marks and trail surrogates.
+                // We do this at load time, rather than at build time,
+                // to simplify Unicode version bootstrapping:
+                // The root data builder only needs the new FractionalUCA.txt data,
+                // but it need not be built with a version of ICU already updated to
+                // the corresponding new Unicode Character Database.
+                //
+                // The following is an optimized version of
+                // new UnicodeSet("[[:^lccc=0:][\\udc00-\\udfff]]").
+                // It is faster and requires fewer code dependencies.
+                tailoring.unsafeBackwardSet = new UnicodeSet(0xdc00, 0xdfff);  // trail surrogates
+                data.nfcImpl.addLcccChars(tailoring.unsafeBackwardSet);
+            } else {
+                // Clone the root collator's set contents.
+                tailoring.unsafeBackwardSet = baseData.unsafeBackwardSet.cloneAsThawed();
+            }
+            // Add the ranges from the data file to the unsafe-backward set.
+            USerializedSet sset = new USerializedSet();
+            char[] unsafeData = new char[length / 2];
+            for(int i = 0; i < length / 2; ++i) {
+                unsafeData[i] = ds.readChar();
+            }
+            length &= 1;
+            sset.getSet(unsafeData, 0);
+            int count = sset.countRanges();
+            int[] range = new int[2];
+            for(int i = 0; i < count; ++i) {
+                sset.getRange(i, range);
+                tailoring.unsafeBackwardSet.add(range[0], range[1]);
+            }
+            // Mark each lead surrogate as "unsafe"
+            // if any of its 1024 associated supplementary code points is "unsafe".
+            int c = 0x10000;
+            for(int lead = 0xd800; lead < 0xdc00; ++lead, c += 0x400) {
+                if(!tailoring.unsafeBackwardSet.containsNone(c, c + 0x3ff)) {
+                    tailoring.unsafeBackwardSet.add(lead);
+                }
+            }
+            tailoring.unsafeBackwardSet.freeze();
+            data.unsafeBackwardSet = tailoring.unsafeBackwardSet;
+        } else if(data == null) {
+            // Nothing to do.
+        } else if(baseData != null) {
+            // No tailoring-specific data: Alias the root collator's set.
+            data.unsafeBackwardSet = baseData.unsafeBackwardSet;
+        } else {
+            throw new RuntimeException("Missing unsafe-backward-set");
+        }
+        ds.skipBytes(length);
+
+        // If the fast Latin format version is different,
+        // or the version is set to 0 for "no fast Latin table",
+        // then just always use the normal string comparison path.
+        index = IX_FAST_LATIN_TABLE_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(data != null) {
+            data.fastLatinTable = null;
+            data.fastLatinTableHeader = null;
+            if(((inIndexes[IX_OPTIONS] >> 16) & 0xff) == CollationFastLatin.VERSION) {
+                if(length >= 2) {
+                    char header0 = ds.readChar();
+                    int headerLength = header0 & 0xff;
+                    data.fastLatinTableHeader = new char[headerLength];
+                    data.fastLatinTableHeader[0] = header0;
+                    for(int i = 1; i < headerLength; ++i) {
+                        data.fastLatinTableHeader[i] = ds.readChar();
+                    }
+                    int tableLength = length / 2 - headerLength;
+                    data.fastLatinTable = new char[tableLength];
+                    for(int i = 0; i < tableLength; ++i) {
+                        data.fastLatinTable[i] = ds.readChar();
+                    }
+                    length &= 1;
+                    if((header0 >> 8) != CollationFastLatin.VERSION) {
+                        throw new RuntimeException("Fast-Latin table version differs from version in data header");
+                    }
+                } else if(baseData != null) {
+                    data.fastLatinTable = baseData.fastLatinTable;
+                    data.fastLatinTableHeader = baseData.fastLatinTableHeader;
+                }
+            }
+        }
+        ds.skipBytes(length);
+
+        index = IX_SCRIPTS_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(length >= 2) {
+            if(data == null) {
+                throw new RuntimeException("Script order data but no mappings");
+            }
+            data.scripts = new char[length / 2];
+            for(int i = 0; i < length / 2; ++i) {
+                data.scripts[i] = ds.readChar();
+            }
+            length &= 1;
+        } else if(data == null) {
+            // Nothing to do.
+        } else if(baseData != null) {
+            data.scripts = baseData.scripts;
+        }
+        ds.skipBytes(length);
+
+        index = IX_COMPRESSIBLE_BYTES_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        if(length >= 256) {
+            if(data == null) {
+                throw new RuntimeException("Data for compressible primary lead bytes but no mappings");
+            }
+            data.compressibleBytes = new boolean[256];
+            for(int i = 0; i < 256; ++i) {
+                data.compressibleBytes[i] = ds.readBoolean();
+            }
+            length -= 256;
+        } else if(data == null) {
+            // Nothing to do.
+        } else if(baseData != null) {
+            data.compressibleBytes = baseData.compressibleBytes;
+        } else {
+            throw new RuntimeException("Missing data for compressible primary lead bytes");
+        }
+        ds.skipBytes(length);
+
+        index = IX_RESERVED18_OFFSET;
+        offset = inIndexes[index];
+        length = inIndexes[index + 1] - offset;
+        ds.skipBytes(length);
+
+        ds.close();
+
+        CollationSettings ts = tailoring.settings.readOnly();
+        int options = inIndexes[IX_OPTIONS] & 0xffff;
+        char[] fastLatinPrimaries = new char[CollationFastLatin.LATIN_LIMIT];
+        int fastLatinOptions = CollationFastLatin.getOptions(
+                tailoring.data, ts, fastLatinPrimaries);
+        if(options == ts.options && ts.variableTop != 0 &&
+                Arrays.equals(reorderCodes, ts.reorderCodes) &&
+                fastLatinOptions == ts.fastLatinOptions &&
+                (fastLatinOptions < 0 ||
+                        Arrays.equals(fastLatinPrimaries, ts.fastLatinPrimaries))) {
+            return;
+        }
+
+        CollationSettings settings = tailoring.settings.copyOnWrite();
+        settings.options = options;
+        // Set variableTop from options and scripts data.
+        settings.variableTop = tailoring.data.getLastPrimaryForGroup(
+                Collator.ReorderCodes.FIRST + settings.getMaxVariable());
+        if(settings.variableTop == 0) {
+            throw new RuntimeException("The maxVariable could not be mapped to a variableTop");
+        }
+
+        if(reorderCodes.length == 0 || reorderTable != null) {
+            settings.setReordering(reorderCodes, reorderTable);
+        } else {
+            byte[] table = new byte[256];
+            baseData.makeReorderTable(reorderCodes, table);
+            settings.setReordering(reorderCodes, table);
+        }
+
+        settings.fastLatinOptions = CollationFastLatin.getOptions(
+            tailoring.data, settings,
+            settings.fastLatinPrimaries);
+    }
+
+    private static final class IsAcceptable implements ICUBinary.Authenticate {
+        // @Override when we switch to Java 6
+        public boolean isDataVersionAcceptable(byte version[]) {
+            return version[0] == 4;
+        }
+    }
+    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
+    private static final byte DATA_FORMAT[] = { 0x55, 0x43, 0x6f, 0x6c  };  // "UCol"
+
+    private CollationDataReader() {}  // no constructor
+}
+
+/*
+ * Format of collation data (ucadata.icu, binary data in coll/ *.res files):
+ * See ICU4C source/common/collationdatareader.h.
+ */
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFCD.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFCD.java

new file mode 100644 (file)

index 0000000..1ab3ee1
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFCD.java
@@ -0,0 +1,399 @@
+/*
+*******************************************************************************
+* Copyright (C) 2012-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationFCD.java, ported from collationfcd.h/.cpp
+*
+* C++ version created on: 2012aug18
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import com.ibm.icu.text.UTF16;
+
+/**
+ * Data and functions for the FCD check fast path.
+ *
+ * The fast path looks at a pair of 16-bit code units and checks
+ * whether there is an FCD boundary between them;
+ * there is if the first unit has a trailing ccc=0 (!hasTccc(first))
+ * or the second unit has a leading ccc=0 (!hasLccc(second)),
+ * or both.
+ * When the fast path finds a possible non-boundary,
+ * then the FCD check slow path looks at the actual sequence of FCD values.
+ *
+ * This is a pure optimization.
+ * The fast path must at least find all possible non-boundaries.
+ * If the fast path is too pessimistic, it costs performance.
+ *
+ * For a pair of BMP characters, the fast path tests are precise (1 bit per character).
+ *
+ * For a supplementary code point, the two units are its lead and trail surrogates.
+ * We set hasTccc(lead)=true if any of its 1024 associated supplementary code points
+ * has lccc!=0 or tccc!=0.
+ * We set hasLccc(trail)=true for all trail surrogates.
+ * As a result, we leave the fast path if the lead surrogate might start a
+ * supplementary code point that is not FCD-inert.
+ * (So the fast path need not detect that there is a surrogate pair,
+ * nor look ahead to the next full code point.)
+ *
+ * hasLccc(lead)=true if any of its 1024 associated supplementary code points
+ * has lccc!=0, for fast boundary checking between BMP & supplementary.
+ *
+ * hasTccc(trail)=false:
+ * It should only be tested for unpaired trail surrogates which are FCD-inert.
+ */
+public final class CollationFCD {
+    public static boolean hasLccc(int c) {
+        assert c <= 0xffff;
+        // c can be negative, e.g., Collation.SENTINEL_CP from UCharIterator;
+        // that is handled in the first test.
+        int i;
+        return
+            // U+0300 is the first character with lccc!=0.
+            c >= 0x300 &&
+            (i = lcccIndex[c >> 5]) != 0 &&
+            (lcccBits[i] & (1 << (c & 0x1f))) != 0;
+    }
+
+    public static boolean hasTccc(int c) {
+        assert c <= 0xffff;
+        // c can be negative, e.g., Collation.SENTINEL_CP from UCharIterator;
+        // that is handled in the first test.
+        int i;
+        return
+            // U+00C0 is the first character with tccc!=0.
+            c >= 0xc0 &&
+            (i = tcccIndex[c >> 5]) != 0 &&
+            (tcccBits[i] & (1 << (c & 0x1f))) != 0;
+    }
+
+    static boolean mayHaveLccc(int c) {
+        // Handles all of Unicode 0..10FFFF.
+        // c can be negative, e.g., Collation.SENTINEL_CP.
+        // U+0300 is the first character with lccc!=0.
+        if(c < 0x300) { return false; }
+        if(c > 0xffff) { c = UTF16.getLeadSurrogate(c); }
+        int i;
+        return
+            (i = lcccIndex[c >> 5]) != 0 &&
+            (lcccBits[i] & (1 << (c & 0x1f))) != 0;
+    }
+
+    /**
+     * Tibetan composite vowel signs (U+0F73, U+0F75, U+0F81)
+     * must be decomposed before reaching the core collation code,
+     * or else some sequences including them, even ones passing the FCD check,
+     * do not yield canonically equivalent results.
+     *
+     * This is a fast and imprecise test.
+     *
+     * @param c a code point
+     * @return true if c is U+0F73, U+0F75 or U+0F81 or one of several other Tibetan characters
+     */
+    static boolean maybeTibetanCompositeVowel(int c) {
+        return (c & 0x1fff01) == 0xf01;
+    }
+
+    /**
+     * Tibetan composite vowel signs (U+0F73, U+0F75, U+0F81)
+     * must be decomposed before reaching the core collation code,
+     * or else some sequences including them, even ones passing the FCD check,
+     * do not yield canonically equivalent results.
+     *
+     * They have distinct lccc/tccc combinations: 129/130 or 129/132.
+     *
+     * @param fcd16 the FCD value (lccc/tccc combination) of a code point
+     * @return true if fcd16 is from U+0F73, U+0F75 or U+0F81
+     */
+    static boolean isFCD16OfTibetanCompositeVowel(int fcd16) {
+        return fcd16 == 0x8182 || fcd16 == 0x8184;
+    }
+
+    // CollationFCD();  // No instantiation.
+
+    // TODO: machine-generate by: icu/tools/unicode/c/genuca/genuca.cpp
+
+    private static final byte[] lcccIndex={
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,1,2,3,0,0,0,0,
+0,0,0,0,4,0,0,0,0,0,0,0,5,6,7,0,
+8,0,9,0xa,0,0,0xb,0xc,0xd,0xe,0xf,0,0,0,0,0x10,
+0x11,0x12,0x13,0,0,0,0,0x14,0,0x15,0x16,0,0,0x15,0x17,0,
+0,0x15,0x17,0,0,0x15,0x17,0,0,0x15,0x17,0,0,0,0x17,0,
+0,0,0x18,0,0,0x15,0x17,0,0,0,0x17,0,0,0,0x19,0,
+0,0x1a,0x1b,0,0,0x1c,0x1b,0,0x1c,0x1d,0,0x1e,0x1f,0,0x20,0,
+0,0x21,0,0,0x17,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0x22,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x23,0x23,0,0,0,0,0x24,0,
+0,0,0,0,0,0x25,0,0,0,0x13,0,0,0,0,0,0,
+0x26,0,0,0x27,0,0,0,0,0,0x23,0x28,0x10,0,0x29,0,0x2a,
+0,0x2b,0,0,0,0,0x2c,0x2d,0,0,0,0,0,0,1,0x2e,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0x2f,0x30,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0x31,0,0,0,0x32,0,0,0,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0x33,0,0,0x34,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0x35,0x32,0,0,0x36,0,0,0,0,0,0,0,0,
+0x20,0,0,0,0,0,0x28,0x37,0,0x38,0x39,0,0,0x39,0x3a,0,
+0,0,0,0,0,0x3b,0x3c,0x3d,0,0,0,0,0,0,0,0x17,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x3e,0x23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x3f,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0x40,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+    private static final byte[] tcccIndex={
+0,0,0,0,0,0,2,3,4,5,6,7,0,8,9,0xa,
+0xb,0xc,0,0,0,0,0,0,1,1,0xd,0xe,0xf,0x10,0x11,0,
+0x12,0x13,0x14,0x15,0x16,0,0x17,0x18,0,0,0,0,0x19,0x1a,0x1b,0,
+0x1c,0x1d,0x1e,0x1f,0,0,0x20,0x21,0x22,0x23,0x24,0,0,0,0,0x25,
+0x26,0x27,0x28,0,0,0,0,0x29,0,0x2a,0x2b,0,0,0x2c,0x2d,0,
+0,0x2e,0x2f,0,0,0x2c,0x30,0,0,0x2c,0x31,0,0,0,0x30,0,
+0,0,0x32,0,0,0x2c,0x30,0,0,0,0x30,0,0,0,0x33,0,
+0,0x34,0x35,0,0,0x36,0x35,0,0x36,0x37,0,0x38,0x39,0,0x3a,0,
+0,0x3b,0,0,0x30,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0x3c,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x3d,0x3d,0,0,0,0,0x3e,0,
+0,0,0,0,0,0x3f,0,0,0,0x28,0,0,0,0,0,0,
+0x40,0,0,0x41,0,0,0,0,0,0x3d,0x42,0x25,0,0x43,0,0x44,
+0,0x45,0,0,0,0,0x46,0x47,0,0,0,0,0,0,1,0x48,
+1,1,1,1,0x49,1,1,0x4a,0x4b,1,0x4c,0x4d,1,0x4e,0x4f,0x50,
+0,0,0,0,0,0,0x51,0x52,0,0x53,0,0,0x54,0x55,0x56,0,
+0x57,0x58,0x59,0x5a,0x5b,0x5c,0,0x5d,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0x2c,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0x5e,0,0,0,0x5f,0,0,0,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0x60,0x61,0x62,0x63,0x61,0x62,0x64,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0x65,0x5f,0,0,0x66,0,0,0,0,0,0,0,0,
+0x3a,0,0,0,0,0,0x42,0x67,0,0x68,0x69,0,0,0x69,0x6a,0,
+0,0,0,0,0,0x6b,0x6c,0x6d,0,0,0,0,0,0,0,0x30,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x6e,0x3d,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x3c,0x6f,0x70,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0x71,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+    private static final int[] lcccBits={
+0,0xffffffff,0xffff7fff,0xffff,0xf8,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0xfffff800,0x10000,0x9fc00000,0x3d9f,0x20000,0xffff0000,0x7ff,
+0xff800,0xfbc00000,0x3eef,0xe000000,0x7ffffff0,0x10000000,0x1e2000,0x2000,0x602000,0x400,0x7000000,0xf00,0x3000000,0x2a00000,0x3c3e0000,0xdf,
+0x40,0x6800000,0xe0000000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0x10,0xc00,0xc0040,0x800000,0xfff70000,0x1021fd,0xf000007f,0x1fff0000,
+0x1ffe2,0x38000,0x80000000,0xfc00,0x6000000,0x3ff08000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,0x35,0x40000000,
+0x7f
+};
+    private static final int[] tcccBits={
+0,0xffffffff,0x3e7effbf,0xbe7effbf,0xfffcffff,0x7ef1ff3f,0xfff3f1f8,0x7fffff3f,0x18003,0xdfffe000,0xff31ffcf,0xcfffffff,0xfffc0,0xffff7fff,0xffff,0x1d760,
+0x1fc00,0x187c00,0x200708b,0x2000000,0x708b0000,0xc00000,0xf8,0xfccf0006,0x33ffcfc,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0x7c,0xfffff800,0x10000,
+0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0xff800,0xfbc00000,0x3eef,0xe000000,0x7ffffff0,0x10120200,0xff1e2000,0x10000000,0xb0002000,0x10480000,0x4e002000,
+0x2000,0x30002000,0x602100,0x24000400,0x7000000,0xf00,0x3000000,0x2a00000,0x3d7e0000,0xdf,0x40,0x6800000,0xe0000000,0x100000,0x20040000,0x200,
+0x1800000,0x9fe00001,0x10,0xc00,0xc0040,0x800000,0xfff70000,0x1021fd,0xf000007f,0xbffffff,0x3ffffff,0x3f3fffff,0xaaff3f3f,0x3fffffff,0x1fdfffff,0xefcfffde,
+0x1fdc7fff,0x1fff0000,0x1ffe2,0x800,0xc000000,0x4000,0xe000,0x1210,0x50,0x292,0x333e005,0x333,0xf000,0x3c0f,0x38000,0x80000000,
+0xfc00,0x55555000,0x36db02a5,0x46100000,0x47900000,0x3ff08000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,0x35,0x5f7ffc00,
+0x7fdb,0x7f
+};
+
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatin.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatin.java

new file mode 100644 (file)

index 0000000..ec5e344
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatin.java
@@ -0,0 +1,885 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationFastLatin.java, ported from collationfastlatin.h/.cpp
+*
+* C++ version created on: 2013aug09
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.Collator;
+
+public final class CollationFastLatin /* all static */ {
+    /**
+     * Fast Latin format version (one byte 1..FF).
+     * Must be incremented for any runtime-incompatible changes,
+     * in particular, for changes to any of the following constants.
+     *
+     * When the major version number of the main data format changes,
+     * we can reset this fast Latin version to 1.
+     */
+    public static final int VERSION = 1;
+
+    public static final int LATIN_MAX = 0x17f;
+    public static final int LATIN_LIMIT = LATIN_MAX + 1;
+
+    static final int LATIN_MAX_UTF8_LEAD = 0xc5;  // UTF-8 lead byte of LATIN_MAX
+
+    static final int PUNCT_START = 0x2000;
+    static final int PUNCT_LIMIT = 0x2040;
+
+    // excludes U+FFFE & U+FFFF
+    static final int NUM_FAST_CHARS = LATIN_LIMIT + (PUNCT_LIMIT - PUNCT_START);
+
+    // Note on the supported weight ranges:
+    // Analysis of UCA 6.3 and CLDR 23 non-search tailorings shows that
+    // the CEs for characters in the above ranges, excluding expansions with length >2,
+    // excluding contractions of >2 characters, and other restrictions
+    // (see the builder's getCEsFromCE32()),
+    // use at most about 150 primary weights,
+    // where about 94 primary weights are possibly-variable (space/punct/symbol/currency),
+    // at most 4 secondary before-common weights,
+    // at most 4 secondary after-common weights,
+    // at most 16 secondary high weights (in secondary CEs), and
+    // at most 4 tertiary after-common weights.
+    // The following ranges are designed to support slightly more weights than that.
+    // (en_US_POSIX is unusual: It creates about 64 variable + 116 Latin primaries.)
+
+    // Digits may use long primaries (preserving more short ones)
+    // or short primaries (faster) without changing this data structure.
+    // (If we supported numeric collation, then digits would have to have long primaries
+    // so that special handling does not affect the fast path.)
+
+    static final int SHORT_PRIMARY_MASK = 0xfc00;  // bits 15..10
+    static final int INDEX_MASK = 0x3ff;  // bits 9..0 for expansions & contractions
+    static final int SECONDARY_MASK = 0x3e0;  // bits 9..5
+    static final int CASE_MASK = 0x18;  // bits 4..3
+    static final int LONG_PRIMARY_MASK = 0xfff8;  // bits 15..3
+    static final int TERTIARY_MASK = 7;  // bits 2..0
+    static final int CASE_AND_TERTIARY_MASK = CASE_MASK | TERTIARY_MASK;
+
+    static final int TWO_SHORT_PRIMARIES_MASK =
+            (SHORT_PRIMARY_MASK << 16) | SHORT_PRIMARY_MASK;  // 0xfc00fc00
+    static final int TWO_LONG_PRIMARIES_MASK =
+            (LONG_PRIMARY_MASK << 16) | LONG_PRIMARY_MASK;  // 0xfff8fff8
+    static final int TWO_SECONDARIES_MASK =
+            (SECONDARY_MASK << 16) | SECONDARY_MASK;  // 0x3e003e0
+    static final int TWO_CASES_MASK =
+            (CASE_MASK << 16) | CASE_MASK;  // 0x180018
+    static final int TWO_TERTIARIES_MASK =
+            (TERTIARY_MASK << 16) | TERTIARY_MASK;  // 0x70007
+
+    /**
+     * Contraction with one fast Latin character.
+     * Use INDEX_MASK to find the start of the contraction list after the fixed table.
+     * The first entry contains the default mapping.
+     * Otherwise use CONTR_CHAR_MASK for the contraction character index
+     * (in ascending order).
+     * Use CONTR_LENGTH_SHIFT for the length of the entry
+     * (1=BAIL_OUT, 2=one CE, 3=two CEs).
+     *
+     * Also, U+0000 maps to a contraction entry, so that the fast path need not
+     * check for NUL termination.
+     * It usually maps to a contraction list with only the completely ignorable default value.
+     */
+    static final int CONTRACTION = 0x400;
+    /**
+     * An expansion encodes two CEs.
+     * Use INDEX_MASK to find the pair of CEs after the fixed table.
+     *
+     * The higher a mini CE value, the easier it is to process.
+     * For expansions and higher, no context needs to be considered.
+     */
+    static final int EXPANSION = 0x800;
+    /**
+     * Encodes one CE with a long/low mini primary (there are 128).
+     * All potentially-variable primaries must be in this range,
+     * to make the short-primary path as fast as possible.
+     */
+    static final int MIN_LONG = 0xc00;
+    static final int LONG_INC = 8;
+    static final int MAX_LONG = 0xff8;
+    /**
+     * Encodes one CE with a short/high primary (there are 60),
+     * plus a secondary CE if the secondary weight is high.
+     * Fast handling: At least all letter primaries should be in this range.
+     */
+    static final int MIN_SHORT = 0x1000;
+    static final int SHORT_INC = 0x400;
+    /** The highest primary weight is reserved for U+FFFF. */
+    static final int MAX_SHORT = SHORT_PRIMARY_MASK;
+
+    static final int MIN_SEC_BEFORE = 0;  // must add SEC_OFFSET
+    static final int SEC_INC = 0x20;
+    static final int MAX_SEC_BEFORE = MIN_SEC_BEFORE + 4 * SEC_INC;  // 5 before common
+    static final int COMMON_SEC = MAX_SEC_BEFORE + SEC_INC;
+    static final int MIN_SEC_AFTER = COMMON_SEC + SEC_INC;
+    static final int MAX_SEC_AFTER = MIN_SEC_AFTER + 5 * SEC_INC;  // 6 after common
+    static final int MIN_SEC_HIGH = MAX_SEC_AFTER + SEC_INC;  // 20 high secondaries
+    static final int MAX_SEC_HIGH = SECONDARY_MASK;
+
+    /**
+     * Lookup: Add this offset to secondary weights, except for completely ignorable CEs.
+     * Must be greater than any special value, e.g., MERGE_WEIGHT.
+     * The exact value is not relevant for the format version.
+     */
+    static final int SEC_OFFSET = SEC_INC;
+    static final int COMMON_SEC_PLUS_OFFSET = COMMON_SEC + SEC_OFFSET;
+
+    static final int TWO_SEC_OFFSETS =
+            (SEC_OFFSET << 16) | SEC_OFFSET;  // 0x200020
+    static final int TWO_COMMON_SEC_PLUS_OFFSET =
+            (COMMON_SEC_PLUS_OFFSET << 16) | COMMON_SEC_PLUS_OFFSET;
+
+    static final int LOWER_CASE = 8;  // case bits include this offset
+    static final int TWO_LOWER_CASES = (LOWER_CASE << 16) | LOWER_CASE;  // 0x80008
+
+    static final int COMMON_TER = 0;  // must add TER_OFFSET
+    static final int MAX_TER_AFTER = 7;  // 7 after common
+
+    /**
+     * Lookup: Add this offset to tertiary weights, except for completely ignorable CEs.
+     * Must be greater than any special value, e.g., MERGE_WEIGHT.
+     * Must be greater than case bits as well, so that with combined case+tertiary weights
+     * plus the offset the tertiary bits does not spill over into the case bits.
+     * The exact value is not relevant for the format version.
+     */
+    static final int TER_OFFSET = SEC_OFFSET;
+    static final int COMMON_TER_PLUS_OFFSET = COMMON_TER + TER_OFFSET;
+
+    static final int TWO_TER_OFFSETS = (TER_OFFSET << 16) | TER_OFFSET;
+    static final int TWO_COMMON_TER_PLUS_OFFSET =
+            (COMMON_TER_PLUS_OFFSET << 16) | COMMON_TER_PLUS_OFFSET;
+
+    static final int MERGE_WEIGHT = 3;
+    static final int EOS = 2;  // end of string
+    static final int BAIL_OUT = 1;
+
+    /**
+     * Contraction result first word bits 8..0 contain the
+     * second contraction character, as a char index 0..NUM_FAST_CHARS-1.
+     * Each contraction list is terminated with a word containing CONTR_CHAR_MASK.
+     */
+    static final int CONTR_CHAR_MASK = 0x1ff;
+    /**
+     * Contraction result first word bits 10..9 contain the result length:
+     * 1=bail out, 2=one mini CE, 3=two mini CEs
+     */
+    static final int CONTR_LENGTH_SHIFT = 9;
+
+    /**
+     * Comparison return value when the regular comparison must be used.
+     * The exact value is not relevant for the format version.
+     */
+    public static final int BAIL_OUT_RESULT = -2;
+
+    static int getCharIndex(char c) {
+        if(c <= LATIN_MAX) {
+            return c;
+        } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
+            return c - (PUNCT_START - LATIN_LIMIT);
+        } else {
+            // Not a fast Latin character.
+            // Note: U+FFFE & U+FFFF are forbidden in tailorings
+            // and thus do not occur in any contractions.
+            return -1;
+        }
+    }
+
+    /**
+     * Computes the options value for the compare functions
+     * and writes the precomputed primary weights.
+     * Returns -1 if the Latin fastpath is not supported for the data and settings.
+     * The capacity must be LATIN_LIMIT.
+     */
+    public static int getOptions(CollationData data, CollationSettings settings,
+            char[] primaries) {
+        char[] header = data.fastLatinTableHeader;
+        if(header == null) { return -1; }
+        assert((header[0] >> 8) == VERSION);
+        assert(primaries.length == LATIN_LIMIT);
+        if(primaries.length != LATIN_LIMIT) { return -1; }
+
+        int miniVarTop;
+        if((settings.options & CollationSettings.ALTERNATE_MASK) == 0) {
+            // No mini primaries are variable, set a variableTop just below the
+            // lowest long mini primary.
+            miniVarTop = MIN_LONG - 1;
+        } else {
+            int v1 = (int)(settings.variableTop >> 24);
+            int headerLength = header[0] & 0xff;
+            int i = headerLength - 1;
+            if(i <= 0 || v1 > (header[i] & 0x7f)) {
+                return -1;  // variableTop >= digits, should not occur
+            }
+            while(i > 1 && v1 <= (header[i - 1] & 0x7f)) { --i; }
+            // In the table header, the miniVarTop is in bits 15..7, with 4 zero bits 19..16 implied.
+            // Shift right to make it comparable with long mini primaries in bits 15..3.
+            miniVarTop = (header[i] & 0xff80) >> 4;
+        }
+
+        byte[] reorderTable = settings.reorderTable;
+        if(reorderTable != null) {
+            char[] scripts = data.scripts;
+            int length = data.scripts.length;
+            int prevLastByte = 0;
+            for(int i = 0; i < length;) {
+                // reordered last byte of the group
+                int lastByte = reorderTable[scripts[i] & 0xff] & 0xff;
+                if(lastByte < prevLastByte) {
+                    // The permutation affects the groups up to Latin.
+                    return -1;
+                }
+                if(scripts[i + 2] == UScript.LATIN) { break; }
+                i = i + 2 + scripts[i + 1];
+                prevLastByte = lastByte;
+            }
+        }
+
+        char[] table = data.fastLatinTable;  // skip the header
+        for(int c = 0; c < LATIN_LIMIT; ++c) {
+            int p = table[c];
+            if(p >= MIN_SHORT) {
+                p &= SHORT_PRIMARY_MASK;
+            } else if(p > miniVarTop) {
+                p &= LONG_PRIMARY_MASK;
+            } else {
+                p = 0;
+            }
+            primaries[c] = (char)p;
+        }
+        if((settings.options & CollationSettings.NUMERIC) != 0) {
+            // Bail out for digits.
+            for(int c = 0x30; c <= 0x39; ++c) { primaries[c] = 0; }
+        }
+
+        // Shift the miniVarTop above other options.
+        return (miniVarTop << 16) | settings.options;
+    }
+
+    public static int compareUTF16(char[] table, char[] primaries, int options,
+            CharSequence left, CharSequence right, int startIndex) {
+        // This is a modified copy of CollationCompare.compareUpToQuaternary(),
+        // optimized for common Latin text.
+        // Keep them in sync!
+
+        int variableTop = options >> 16;  // see getOptions()
+        options &= 0xffff;  // needed for CollationSettings.getStrength() to work
+
+        // Check for supported characters, fetch mini CEs, and compare primaries.
+        int leftIndex = startIndex, rightIndex = startIndex;
+        /**
+         * Single mini CE or a pair.
+         * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits.
+         * If there is only one, then it is in the lower bits, and the upper bits are 0.
+         */
+        int leftPair = 0, rightPair = 0;
+        for(;;) {
+            // We fetch CEs until we get a non-ignorable primary or reach the end.
+            while(leftPair == 0) {
+                if(leftIndex == left.length()) {
+                    leftPair = EOS;
+                    break;
+                }
+                int c = left.charAt(leftIndex++);
+                if(c <= LATIN_MAX) {
+                    leftPair = primaries[c];
+                    if(leftPair != 0) { break; }
+                    if(c <= 0x39 && c >= 0x30 && (options & CollationSettings.NUMERIC) != 0) {
+                        return BAIL_OUT_RESULT;
+                    }
+                    leftPair = table[c];
+                } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
+                    leftPair = table[c - PUNCT_START + LATIN_LIMIT];
+                } else {
+                    leftPair = lookup(table, c);
+                }
+                if(leftPair >= MIN_SHORT) {
+                    leftPair &= SHORT_PRIMARY_MASK;
+                    break;
+                } else if(leftPair > variableTop) {
+                    leftPair &= LONG_PRIMARY_MASK;
+                    break;
+                } else {
+                    long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
+                    if(pairAndInc < 0) {
+                        ++leftIndex;
+                        pairAndInc = ~pairAndInc;
+                    }
+                    leftPair = (int)pairAndInc;
+                    if(leftPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
+                    leftPair = getPrimaries(variableTop, leftPair);
+                }
+            }
+
+            while(rightPair == 0) {
+                if(rightIndex == right.length()) {
+                    rightPair = EOS;
+                    break;
+                }
+                int c = right.charAt(rightIndex++);
+                if(c <= LATIN_MAX) {
+                    rightPair = primaries[c];
+                    if(rightPair != 0) { break; }
+                    if(c <= 0x39 && c >= 0x30 && (options & CollationSettings.NUMERIC) != 0) {
+                        return BAIL_OUT_RESULT;
+                    }
+                    rightPair = table[c];
+                } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
+                    rightPair = table[c - PUNCT_START + LATIN_LIMIT];
+                } else {
+                    rightPair = lookup(table, c);
+                }
+                if(rightPair >= MIN_SHORT) {
+                    rightPair &= SHORT_PRIMARY_MASK;
+                    break;
+                } else if(rightPair > variableTop) {
+                    rightPair &= LONG_PRIMARY_MASK;
+                    break;
+                } else {
+                    long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
+                    if(pairAndInc < 0) {
+                        ++rightIndex;
+                        pairAndInc = ~pairAndInc;
+                    }
+                    rightPair = (int)pairAndInc;
+                    if(rightPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
+                    rightPair = getPrimaries(variableTop, rightPair);
+                }
+            }
+
+            if(leftPair == rightPair) {
+                if(leftPair == EOS) { break; }
+                leftPair = rightPair = 0;
+                continue;
+            }
+            int leftPrimary = leftPair & 0xffff;
+            int rightPrimary = rightPair & 0xffff;
+            if(leftPrimary != rightPrimary) {
+                // Return the primary difference.
+                return (leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER;
+            }
+            if(leftPair == EOS) { break; }
+            leftPair >>>= 16;
+            rightPair >>>= 16;
+        }
+        // In the following, we need to re-fetch each character because we did not buffer the CEs,
+        // but we know that the string is well-formed and
+        // only contains supported characters and mappings.
+
+        // We might skip the secondary level but continue with the case level
+        // which is turned on separately.
+        if(CollationSettings.getStrength(options) >= Collator.SECONDARY) {
+            leftIndex = rightIndex = startIndex;
+            leftPair = rightPair = 0;
+            for(;;) {
+                while(leftPair == 0) {
+                    if(leftIndex == left.length()) {
+                        leftPair = EOS;
+                        break;
+                    }
+                    int c = left.charAt(leftIndex++);
+                    if(c <= LATIN_MAX) {
+                        leftPair = table[c];
+                    } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
+                        leftPair = table[c - PUNCT_START + LATIN_LIMIT];
+                    } else {
+                        leftPair = lookup(table, c);
+                    }
+                    if(leftPair >= MIN_SHORT) {
+                        leftPair = getSecondariesFromOneShortCE(leftPair);
+                        break;
+                    } else if(leftPair > variableTop) {
+                        leftPair = COMMON_SEC_PLUS_OFFSET;
+                        break;
+                    } else {
+                        long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
+                        if(pairAndInc < 0) {
+                            ++leftIndex;
+                            pairAndInc = ~pairAndInc;
+                        }
+                        leftPair = getSecondaries(variableTop, (int)pairAndInc);
+                    }
+                }
+
+                while(rightPair == 0) {
+                    if(rightIndex == right.length()) {
+                        rightPair = EOS;
+                        break;
+                    }
+                    int c = right.charAt(rightIndex++);
+                    if(c <= LATIN_MAX) {
+                        rightPair = table[c];
+                    } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
+                        rightPair = table[c - PUNCT_START + LATIN_LIMIT];
+                    } else {
+                        rightPair = lookup(table, c);
+                    }
+                    if(rightPair >= MIN_SHORT) {
+                        rightPair = getSecondariesFromOneShortCE(rightPair);
+                        break;
+                    } else if(rightPair > variableTop) {
+                        rightPair = COMMON_SEC_PLUS_OFFSET;
+                        break;
+                    } else {
+                        long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
+                        if(pairAndInc < 0) {
+                            ++rightIndex;
+                            pairAndInc = ~pairAndInc;
+                        }
+                        rightPair = getSecondaries(variableTop, (int)pairAndInc);
+                    }
+                }
+
+                if(leftPair == rightPair) {
+                    if(leftPair == EOS) { break; }
+                    leftPair = rightPair = 0;
+                    continue;
+                }
+                int leftSecondary = leftPair & 0xffff;
+                int rightSecondary = rightPair & 0xffff;
+                if(leftSecondary != rightSecondary) {
+                    if((options & CollationSettings.BACKWARD_SECONDARY) != 0) {
+                        // Full support for backwards secondary requires backwards contraction matching
+                        // and moving backwards between merge separators.
+                        return BAIL_OUT_RESULT;
+                    }
+                    return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER;
+                }
+                if(leftPair == EOS) { break; }
+                leftPair >>>= 16;
+                rightPair >>>= 16;
+            }
+        }
+
+        if((options & CollationSettings.CASE_LEVEL) != 0) {
+            boolean strengthIsPrimary = CollationSettings.getStrength(options) == Collator.PRIMARY;
+            leftIndex = rightIndex = startIndex;
+            leftPair = rightPair = 0;
+            for(;;) {
+                while(leftPair == 0) {
+                    if(leftIndex == left.length()) {
+                        leftPair = EOS;
+                        break;
+                    }
+                    int c = left.charAt(leftIndex++);
+                    leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
+                    if(leftPair < MIN_LONG) {
+                        long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
+                        if(pairAndInc < 0) {
+                            ++leftIndex;
+                            pairAndInc = ~pairAndInc;
+                        }
+                        leftPair = (int)pairAndInc;
+                    }
+                    leftPair = getCases(variableTop, strengthIsPrimary, leftPair);
+                }
+
+                while(rightPair == 0) {
+                    if(rightIndex == right.length()) {
+                        rightPair = EOS;
+                        break;
+                    }
+                    int c = right.charAt(rightIndex++);
+                    rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
+                    if(rightPair < MIN_LONG) {
+                        long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
+                        if(pairAndInc < 0) {
+                            ++rightIndex;
+                            pairAndInc = ~pairAndInc;
+                        }
+                        rightPair = (int)pairAndInc;
+                    }
+                    rightPair = getCases(variableTop, strengthIsPrimary, rightPair);
+                }
+
+                if(leftPair == rightPair) {
+                    if(leftPair == EOS) { break; }
+                    leftPair = rightPair = 0;
+                    continue;
+                }
+                int leftCase = leftPair & 0xffff;
+                int rightCase = rightPair & 0xffff;
+                if(leftCase != rightCase) {
+                    if((options & CollationSettings.UPPER_FIRST) == 0) {
+                        return (leftCase < rightCase) ? Collation.LESS : Collation.GREATER;
+                    } else {
+                        return (leftCase < rightCase) ? Collation.GREATER : Collation.LESS;
+                    }
+                }
+                if(leftPair == EOS) { break; }
+                leftPair >>>= 16;
+                rightPair >>>= 16;
+            }
+        }
+        if(CollationSettings.getStrength(options) <= Collator.SECONDARY) { return Collation.EQUAL; }
+
+        // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
+        boolean withCaseBits = CollationSettings.isTertiaryWithCaseBits(options);
+
+        leftIndex = rightIndex = startIndex;
+        leftPair = rightPair = 0;
+        for(;;) {
+            while(leftPair == 0) {
+                if(leftIndex == left.length()) {
+                    leftPair = EOS;
+                    break;
+                }
+                int c = left.charAt(leftIndex++);
+                leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
+                if(leftPair < MIN_LONG) {
+                    long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
+                    if(pairAndInc < 0) {
+                        ++leftIndex;
+                        pairAndInc = ~pairAndInc;
+                    }
+                    leftPair = (int)pairAndInc;
+                }
+                leftPair = getTertiaries(variableTop, withCaseBits, leftPair);
+            }
+
+            while(rightPair == 0) {
+                if(rightIndex == right.length()) {
+                    rightPair = EOS;
+                    break;
+                }
+                int c = right.charAt(rightIndex++);
+                rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
+                if(rightPair < MIN_LONG) {
+                    long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
+                    if(pairAndInc < 0) {
+                        ++rightIndex;
+                        pairAndInc = ~pairAndInc;
+                    }
+                    rightPair = (int)pairAndInc;
+                }
+                rightPair = getTertiaries(variableTop, withCaseBits, rightPair);
+            }
+
+            if(leftPair == rightPair) {
+                if(leftPair == EOS) { break; }
+                leftPair = rightPair = 0;
+                continue;
+            }
+            int leftTertiary = leftPair & 0xffff;
+            int rightTertiary = rightPair & 0xffff;
+            if(leftTertiary != rightTertiary) {
+                if(CollationSettings.sortsTertiaryUpperCaseFirst(options)) {
+                    // Pass through EOS and MERGE_WEIGHT
+                    // and keep real tertiary weights larger than the MERGE_WEIGHT.
+                    // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
+                    if(leftTertiary > MERGE_WEIGHT) {
+                        leftTertiary ^= CASE_MASK;
+                    }
+                    if(rightTertiary > MERGE_WEIGHT) {
+                        rightTertiary ^= CASE_MASK;
+                    }
+                }
+                return (leftTertiary < rightTertiary) ? Collation.LESS : Collation.GREATER;
+            }
+            if(leftPair == EOS) { break; }
+            leftPair >>>= 16;
+            rightPair >>>= 16;
+        }
+        if(CollationSettings.getStrength(options) <= Collator.TERTIARY) { return Collation.EQUAL; }
+
+        leftIndex = rightIndex = startIndex;
+        leftPair = rightPair = 0;
+        for(;;) {
+            while(leftPair == 0) {
+                if(leftIndex == left.length()) {
+                    leftPair = EOS;
+                    break;
+                }
+                int c = left.charAt(leftIndex++);
+                leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
+                if(leftPair < MIN_LONG) {
+                    long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
+                    if(pairAndInc < 0) {
+                        ++leftIndex;
+                        pairAndInc = ~pairAndInc;
+                    }
+                    leftPair = (int)pairAndInc;
+                }
+                leftPair = getQuaternaries(variableTop, leftPair);
+            }
+
+            while(rightPair == 0) {
+                if(rightIndex == right.length()) {
+                    rightPair = EOS;
+                    break;
+                }
+                int c = right.charAt(rightIndex++);
+                rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
+                if(rightPair < MIN_LONG) {
+                    long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
+                    if(pairAndInc < 0) {
+                        ++rightIndex;
+                        pairAndInc = ~pairAndInc;
+                    }
+                    rightPair = (int)pairAndInc;
+                }
+                rightPair = getQuaternaries(variableTop, rightPair);
+            }
+
+            if(leftPair == rightPair) {
+                if(leftPair == EOS) { break; }
+                leftPair = rightPair = 0;
+                continue;
+            }
+            int leftQuaternary = leftPair & 0xffff;
+            int rightQuaternary = rightPair & 0xffff;
+            if(leftQuaternary != rightQuaternary) {
+                return (leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER;
+            }
+            if(leftPair == EOS) { break; }
+            leftPair >>>= 16;
+            rightPair >>>= 16;
+        }
+        return Collation.EQUAL;
+    }
+
+    private static int lookup(char[] table, int c) {
+        assert(c > LATIN_MAX);
+        if(PUNCT_START <= c && c < PUNCT_LIMIT) {
+            return table[c - PUNCT_START + LATIN_LIMIT];
+        } else if(c == 0xfffe) {
+            return MERGE_WEIGHT;
+        } else if(c == 0xffff) {
+            return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER;
+        } else {
+            return BAIL_OUT;
+        }
+    }
+
+    /**
+     * Java returns a negative result (use the '~' operator) if sIndex is to be incremented.
+     * C++ modifies sIndex.
+     */
+    private static long nextPair(char[] table, int c, int ce, CharSequence s16, int sIndex) {
+        if(ce >= MIN_LONG || ce < CONTRACTION) {
+            return ce;  // simple or special mini CE
+        } else if(ce >= EXPANSION) {
+            int index = NUM_FAST_CHARS + (ce & INDEX_MASK);
+            return ((long)table[index + 1] << 16) | table[index];
+        } else /* ce >= CONTRACTION */ {
+            // Contraction list: Default mapping followed by
+            // 0 or more single-character contraction suffix mappings.
+            int index = NUM_FAST_CHARS + (ce & INDEX_MASK);
+            boolean inc = false;  // true if the next char is consumed.
+            if(sIndex != s16.length()) {
+                // Read the next character.
+                int c2;
+                int nextIndex = sIndex;
+                c2 = s16.charAt(nextIndex++);
+                if(c2 > LATIN_MAX) {
+                    if(PUNCT_START <= c2 && c2 < PUNCT_LIMIT) {
+                        c2 = c2 - PUNCT_START + LATIN_LIMIT;  // 2000..203F -> 0180..01BF
+                    } else if(c2 == 0xfffe || c2 == 0xffff) {
+                        c2 = -1;  // U+FFFE & U+FFFF cannot occur in contractions.
+                    } else {
+                        return BAIL_OUT;
+                    }
+                }
+                // Look for the next character in the contraction suffix list,
+                // which is in ascending order of single suffix characters.
+                int i = index;
+                int head = table[i];  // first skip the default mapping
+                int x;
+                do {
+                    i += head >> CONTR_LENGTH_SHIFT;
+                    head = table[i];
+                    x = head & CONTR_CHAR_MASK;
+                } while(x < c2);
+                if(x == c2) {
+                    index = i;
+                    inc = true;
+                }
+            }
+            // Return the CE or CEs for the default or contraction mapping.
+            int length = table[index] >> CONTR_LENGTH_SHIFT;
+            if(length == 1) {
+                return BAIL_OUT;
+            }
+            ce = table[index + 1];
+            long result;
+            if(length == 2) {
+                result = ce;
+            } else {
+                result = ((long)table[index + 2] << 16) | ce;
+            }
+            return inc ? ~result : result;
+        }
+    }
+
+    private static int getPrimaries(int variableTop, int pair) {
+        int ce = pair & 0xffff;
+        if(ce >= MIN_SHORT) { return pair & TWO_SHORT_PRIMARIES_MASK; }
+        if(ce > variableTop) { return pair & TWO_LONG_PRIMARIES_MASK; }
+        if(ce >= MIN_LONG) { return 0; }  // variable
+        return pair;  // special mini CE
+    }
+
+    private static int getSecondariesFromOneShortCE(int ce) {
+        ce &= SECONDARY_MASK;
+        if(ce < MIN_SEC_HIGH) {
+            return ce + SEC_OFFSET;
+        } else {
+            return ((ce + SEC_OFFSET) << 16) | COMMON_SEC_PLUS_OFFSET;
+        }
+    }
+
+    private static int getSecondaries(int variableTop, int pair) {
+        if(pair <= 0xffff) {
+            // one mini CE
+            if(pair >= MIN_SHORT) {
+                pair = getSecondariesFromOneShortCE(pair);
+            } else if(pair > variableTop) {
+                pair = COMMON_SEC_PLUS_OFFSET;
+            } else if(pair >= MIN_LONG) {
+                pair = 0;  // variable
+            }
+            // else special mini CE
+        } else {
+            int ce = pair & 0xffff;
+            if(ce >= MIN_SHORT) {
+                pair = (pair & TWO_SECONDARIES_MASK) + TWO_SEC_OFFSETS;
+            } else if(ce > variableTop) {
+                pair = TWO_COMMON_SEC_PLUS_OFFSET;
+            } else {
+                assert(ce >= MIN_LONG);
+                pair = 0;  // variable
+            }
+        }
+        return pair;
+    }
+
+    private static int getCases(int variableTop, boolean strengthIsPrimary, int pair) {
+        // Primary+caseLevel: Ignore case level weights of primary ignorables.
+        // Otherwise: Ignore case level weights of secondary ignorables.
+        // For details see the comments in the CollationCompare class.
+        // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
+        if(pair <= 0xffff) {
+            // one mini CE
+            if(pair >= MIN_SHORT) {
+                // A high secondary weight means we really have two CEs,
+                // a primary CE and a secondary CE.
+                int ce = pair;
+                pair &= CASE_MASK;  // explicit weight of primary CE
+                if(!strengthIsPrimary && (ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
+                    pair |= LOWER_CASE << 16;  // implied weight of secondary CE
+                }
+            } else if(pair > variableTop) {
+                pair = LOWER_CASE;
+            } else if(pair >= MIN_LONG) {
+                pair = 0;  // variable
+            }
+            // else special mini CE
+        } else {
+            // two mini CEs, same primary groups, neither expands like above
+            int ce = pair & 0xffff;
+            if(ce >= MIN_SHORT) {
+                if(strengthIsPrimary && (pair & (SHORT_PRIMARY_MASK << 16)) == 0) {
+                    pair &= CASE_MASK;
+                } else {
+                    pair &= TWO_CASES_MASK;
+                }
+            } else if(ce > variableTop) {
+                pair = TWO_LOWER_CASES;
+            } else {
+                assert(ce >= MIN_LONG);
+                pair = 0;  // variable
+            }
+        }
+        return pair;
+    }
+
+    private static int getTertiaries(int variableTop, boolean withCaseBits, int pair) {
+        if(pair <= 0xffff) {
+            // one mini CE
+            if(pair >= MIN_SHORT) {
+                // A high secondary weight means we really have two CEs,
+                // a primary CE and a secondary CE.
+                int ce = pair;
+                if(withCaseBits) {
+                    pair = (pair & CASE_AND_TERTIARY_MASK) + TER_OFFSET;
+                    if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
+                        pair |= (LOWER_CASE | COMMON_TER_PLUS_OFFSET) << 16;
+                    }
+                } else {
+                    pair = (pair & TERTIARY_MASK) + TER_OFFSET;
+                    if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
+                        pair |= COMMON_TER_PLUS_OFFSET << 16;
+                    }
+                }
+            } else if(pair > variableTop) {
+                pair = (pair & TERTIARY_MASK) + TER_OFFSET;
+                if(withCaseBits) {
+                    pair |= LOWER_CASE;
+                }
+            } else if(pair >= MIN_LONG) {
+                pair = 0;  // variable
+            }
+            // else special mini CE
+        } else {
+            // two mini CEs, same primary groups, neither expands like above
+            int ce = pair & 0xffff;
+            if(ce >= MIN_SHORT) {
+                if(withCaseBits) {
+                    pair &= TWO_CASES_MASK | TWO_TERTIARIES_MASK;
+                } else {
+                    pair &= TWO_TERTIARIES_MASK;
+                }
+                pair += TWO_TER_OFFSETS;
+            } else if(ce > variableTop) {
+                pair = (pair & TWO_TERTIARIES_MASK) + TWO_TER_OFFSETS;
+                if(withCaseBits) {
+                    pair |= TWO_LOWER_CASES;
+                }
+            } else {
+                assert(ce >= MIN_LONG);
+                pair = 0;  // variable
+            }
+        }
+        return pair;
+    }
+
+    private static int getQuaternaries(int variableTop, int pair) {
+        // Return the primary weight of a variable CE,
+        // or the maximum primary weight for a non-variable, not-completely-ignorable CE.
+        if(pair <= 0xffff) {
+            // one mini CE
+            if(pair >= MIN_SHORT) {
+                // A high secondary weight means we really have two CEs,
+                // a primary CE and a secondary CE.
+                if((pair & SECONDARY_MASK) >= MIN_SEC_HIGH) {
+                    pair = TWO_SHORT_PRIMARIES_MASK;
+                } else {
+                    pair = SHORT_PRIMARY_MASK;
+                }
+            } else if(pair > variableTop) {
+                pair = SHORT_PRIMARY_MASK;
+            } else if(pair >= MIN_LONG) {
+                pair &= LONG_PRIMARY_MASK;  // variable
+            }
+            // else special mini CE
+        } else {
+            // two mini CEs, same primary groups, neither expands like above
+            int ce = pair & 0xffff;
+            if(ce > variableTop) {
+                pair = TWO_SHORT_PRIMARIES_MASK;
+            } else {
+                assert(ce >= MIN_LONG);
+                pair &= TWO_LONG_PRIMARIES_MASK;  // variable
+            }
+        }
+        return pair;
+    }
+
+    private CollationFastLatin() {}  // no constructor
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatinBuilder.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatinBuilder.java

new file mode 100644 (file)

index 0000000..7891da0
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatinBuilder.java
@@ -0,0 +1,714 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationFastLatinBuilder.java, ported from collationfastlatinbuilder.h/.cpp
+*
+* C++ version created on: 2013aug09
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.Collator;
+import com.ibm.icu.util.CharsTrie;
+
+final class CollationFastLatinBuilder {
+    // #define DEBUG_COLLATION_FAST_LATIN_BUILDER 0  // 0 or 1 or 2
+
+    /**
+     * Compare two signed long values as if they were unsigned.
+     */
+    private static final int compareInt64AsUnsigned(long a, long b) {
+        a += 0x8000000000000000L;
+        b += 0x8000000000000000L;
+        if(a < b) {
+            return -1;
+        } else if(a > b) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    /**
+     * Like Java Collections.binarySearch(List, String, Comparator).
+     *
+     * @return the index>=0 where the item was found,
+     *         or the index<0 for inserting the string at ~index in sorted order
+     */
+    private static final int binarySearch(long[] list, int limit, long ce) {
+        if (limit == 0) { return ~0; }
+        int start = 0;
+        for (;;) {
+            int i = (start + limit) / 2;
+            int cmp = compareInt64AsUnsigned(ce, list[i]);
+            if (cmp == 0) {
+                return i;
+            } else if (cmp < 0) {
+                if (i == start) {
+                    return ~start;  // insert ce before i
+                }
+                limit = i;
+            } else {
+                if (i == start) {
+                    return ~(start + 1);  // insert ce after i
+                }
+                start = i;
+            }
+        }
+    }
+
+    CollationFastLatinBuilder() {
+        ce0 = 0;
+        ce1 = 0;
+        contractionCEs = new UVector64();
+        uniqueCEs = new UVector64();
+        miniCEs = null;
+        firstDigitPrimary = 0;
+        firstLatinPrimary = 0;
+        lastLatinPrimary = 0;
+        firstShortPrimary = 0;
+        shortPrimaryOverflow = false;
+        headerLength = 0;
+    }
+
+    boolean forData(CollationData data) {
+        if(result.length() != 0) {  // This builder is not reusable.
+            throw new IllegalStateException("attempt to reuse a CollationFastLatinBuilder");
+        }
+        if(!loadGroups(data)) { return false; }
+
+        // Fast handling of digits.
+        firstShortPrimary = firstDigitPrimary;
+        getCEs(data);
+        encodeUniqueCEs();
+        if(shortPrimaryOverflow) {
+            // Give digits long mini primaries,
+            // so that there are more short primaries for letters.
+            firstShortPrimary = firstLatinPrimary;
+            resetCEs();
+            getCEs(data);
+            encodeUniqueCEs();
+        }
+        // Note: If we still have a short-primary overflow but not a long-primary overflow,
+        // then we could calculate how many more long primaries would fit,
+        // and set the firstShortPrimary to that many after the current firstShortPrimary,
+        // and try again.
+        // However, this might only benefit the en_US_POSIX tailoring,
+        // and it is simpler to suppress building fast Latin data for it in genrb,
+        // or by returning false here if shortPrimaryOverflow.
+
+        boolean ok = !shortPrimaryOverflow;
+        if(ok) {
+            encodeCharCEs();
+            encodeContractions();
+        }
+        contractionCEs.removeAllElements();  // might reduce heap memory usage
+        uniqueCEs.removeAllElements();
+        return ok;
+    }
+
+    // C++ returns one combined array with the contents of the result buffer.
+    // Java returns two arrays (header & table) because we cannot use pointer arithmetic,
+    // and we do not want to index into the table with an offset.
+    char[] getHeader() {
+        char[] resultArray = new char[headerLength];
+        result.getChars(0, headerLength, resultArray, 0);
+        return resultArray;
+    }
+
+    char[] getTable() {
+        char[] resultArray = new char[result.length() - headerLength];
+        result.getChars(headerLength, result.length(), resultArray, 0);
+        return resultArray;
+    }
+
+    private boolean loadGroups(CollationData data) {
+        result.append(0);  // reserved for version & headerLength
+        // The first few reordering groups should be special groups
+        // (space, punct, ..., digit) followed by Latn, then Grek and other scripts.
+        for(int i = 0;;) {
+            if(i >= data.scripts.length) {
+                throw new AssertionError("no Latn script");
+            }
+            int head = data.scripts[i];
+            int lastByte = head & 0xff;  // last primary byte in the group
+            int group = data.scripts[i + 2];
+            if(group == Collator.ReorderCodes.DIGIT) {
+                firstDigitPrimary = (long)(head & 0xff00) << 16;
+                headerLength = result.length();
+                int r0 = (CollationFastLatin.VERSION << 8) | headerLength;
+                result.setCharAt(0, (char)r0);
+            } else if(group == UScript.LATIN) {
+                if(firstDigitPrimary == 0) {
+                    throw new AssertionError("no digit group");
+                }
+                firstLatinPrimary = (long)(head & 0xff00) << 16;
+                lastLatinPrimary = ((long)lastByte << 24) | 0xffffff;
+                break;
+            } else if(firstDigitPrimary == 0) {
+                // a group below digits
+                if(lastByte > 0x7f) {
+                    // We only use 7 bits for the last byte of a below-digits group.
+                    // This does not warrant an errorCode, but we do not build a fast Latin table.
+                    return false;
+                }
+                result.append((char)lastByte);
+            }
+            i = i + 2 + data.scripts[i + 1];
+        }
+        return true;
+    }
+
+    private boolean inSameGroup(long p, long q) {
+        // Both or neither need to be encoded as short primaries,
+        // so that we can test only one and use the same bit mask.
+        if(p >= firstShortPrimary) {
+            return q >= firstShortPrimary;
+        } else if(q >= firstShortPrimary) {
+            return false;
+        }
+        // Both or neither must be potentially-variable,
+        // so that we can test only one and determine if both are variable.
+        if(p >= firstDigitPrimary) {
+            return q >= firstDigitPrimary;
+        } else if(q >= firstDigitPrimary) {
+            return false;
+        }
+        // Both will be encoded with long mini primaries.
+        // They must be in the same special reordering group,
+        // so that we can test only one and determine if both are variable.
+        p >>= 24;  // first primary byte
+        q >>= 24;
+        assert(p != 0 && q != 0);
+        assert(p <= result.charAt(headerLength - 1));  // the loop will terminate
+        for(int i = 1;; ++i) {
+            long lastByte = result.charAt(i);
+            if(p <= lastByte) {
+                return q <= lastByte;
+            } else if(q <= lastByte) {
+                return false;
+            }
+        }
+    }
+
+    private void resetCEs() {
+        contractionCEs.removeAllElements();
+        uniqueCEs.removeAllElements();
+        shortPrimaryOverflow = false;
+        result.setLength(headerLength);
+    }
+
+    private void getCEs(CollationData data) {
+        int i = 0;
+        for(char c = 0;; ++i, ++c) {
+            if(c == CollationFastLatin.LATIN_LIMIT) {
+                c = CollationFastLatin.PUNCT_START;
+            } else if(c == CollationFastLatin.PUNCT_LIMIT) {
+                break;
+            }
+            CollationData d;
+            int ce32 = data.getCE32(c);
+            if(ce32 == Collation.FALLBACK_CE32) {
+                d = data.base;
+                ce32 = d.getCE32(c);
+            } else {
+                d = data;
+            }
+            if(getCEsFromCE32(d, c, ce32)) {
+                charCEs[i][0] = ce0;
+                charCEs[i][1] = ce1;
+                addUniqueCE(ce0);
+                addUniqueCE(ce1);
+            } else {
+                // bail out for c
+                charCEs[i][0] = ce0 = Collation.NO_CE;
+                charCEs[i][1] = ce1 = 0;
+            }
+            if(c == 0 && !isContractionCharCE(ce0)) {
+                // Always map U+0000 to a contraction.
+                // Write a contraction list with only a default value if there is no real contraction.
+                assert(contractionCEs.isEmpty());
+                addContractionEntry(CollationFastLatin.CONTR_CHAR_MASK, ce0, ce1);
+                charCEs[0][0] = (Collation.NO_CE_PRIMARY << 32) | CONTRACTION_FLAG;
+                charCEs[0][1] = 0;
+            }
+        }
+        // Terminate the last contraction list.
+        contractionCEs.addElement(CollationFastLatin.CONTR_CHAR_MASK);
+    }
+
+    private boolean getCEsFromCE32(CollationData data, int c, int ce32) {
+        ce32 = data.getFinalCE32(ce32);
+        ce1 = 0;
+        if(Collation.isSimpleOrLongCE32(ce32)) {
+            ce0 = Collation.ceFromCE32(ce32);
+        } else {
+            switch(Collation.tagFromCE32(ce32)) {
+            case Collation.LATIN_EXPANSION_TAG:
+                ce0 = Collation.latinCE0FromCE32(ce32);
+                ce1 = Collation.latinCE1FromCE32(ce32);
+                break;
+            case Collation.EXPANSION32_TAG: {
+                int index = Collation.indexFromCE32(ce32);
+                int length = Collation.lengthFromCE32(ce32);
+                if(length <= 2) {
+                    ce0 = Collation.ceFromCE32(data.ce32s[index]);
+                    if(length == 2) {
+                        ce1 = Collation.ceFromCE32(data.ce32s[index + 1]);
+                    }
+                    break;
+                } else {
+                    return false;
+                }
+            }
+            case Collation.EXPANSION_TAG: {
+                int index = Collation.indexFromCE32(ce32);
+                int length = Collation.lengthFromCE32(ce32);
+                if(length <= 2) {
+                    ce0 = data.ces[index];
+                    if(length == 2) {
+                        ce1 = data.ces[index + 1];
+                    }
+                    break;
+                } else {
+                    return false;
+                }
+            }
+            // Note: We could support PREFIX_TAG (assert c>=0)
+            // by recursing on its default CE32 and checking that none of the prefixes starts
+            // with a fast Latin character.
+            // However, currently (2013) there are only the L-before-middle-dot
+            // prefix mappings in the Latin range, and those would be rejected anyway.
+            case Collation.CONTRACTION_TAG:
+                assert(c >= 0);
+                return getCEsFromContractionCE32(data, ce32);
+            case Collation.OFFSET_TAG:
+                assert(c >= 0);
+                ce0 = data.getCEFromOffsetCE32(c, ce32);
+                break;
+            default:
+                return false;
+            }
+        }
+        // A mapping can be completely ignorable.
+        if(ce0 == 0) { return ce1 == 0; }
+        // We do not support an ignorable ce0 unless it is completely ignorable.
+        long p0 = ce0 >>> 32;
+        if(p0 == 0) { return false; }
+        // We only support primaries up to the Latin script.
+        if(p0 > lastLatinPrimary) { return false; }
+        // We support non-common secondary and case weights only together with short primaries.
+        int lower32_0 = (int)ce0;
+        if(p0 < firstShortPrimary) {
+            int sc0 = lower32_0 & Collation.SECONDARY_AND_CASE_MASK;
+            if(sc0 != Collation.COMMON_SECONDARY_CE) { return false; }
+        }
+        // No below-common tertiary weights.
+        if((lower32_0 & Collation.ONLY_TERTIARY_MASK) < Collation.COMMON_WEIGHT16) { return false; }
+        if(ce1 != 0) {
+            // Both primaries must be in the same group,
+            // or both must get short mini primaries,
+            // or a short-primary CE is followed by a secondary CE.
+            // This is so that we can test the first primary and use the same mask for both,
+            // and determine for both whether they are variable.
+            long p1 = ce1 >>> 32;
+            if(p1 == 0 ? p0 < firstShortPrimary : !inSameGroup(p0, p1)) { return false; }
+            int lower32_1 = (int)ce1;
+            // No tertiary CEs.
+            if((lower32_1 >>> 16) == 0) { return false; }
+            // We support non-common secondary and case weights
+            // only for secondary CEs or together with short primaries.
+            if(p1 != 0 && p1 < firstShortPrimary) {
+                int sc1 = lower32_1 & Collation.SECONDARY_AND_CASE_MASK;
+                if(sc1 != Collation.COMMON_SECONDARY_CE) { return false; }
+            }
+            // No below-common tertiary weights.
+            if((lower32_0 & Collation.ONLY_TERTIARY_MASK) < Collation.COMMON_WEIGHT16) { return false; }
+        }
+        // No quaternary weights.
+        if(((ce0 | ce1) & Collation.QUATERNARY_MASK) != 0) { return false; }
+        return true;
+    }
+
+    private boolean getCEsFromContractionCE32(CollationData data, int ce32) {
+        int trieIndex = Collation.indexFromCE32(ce32);
+        ce32 = data.getCE32FromContexts(trieIndex);  // Default if no suffix match.
+        // Since the original ce32 is not a prefix mapping,
+        // the default ce32 must not be another contraction.
+        assert(!Collation.isContractionCE32(ce32));
+        int contractionIndex = contractionCEs.size();
+        if(getCEsFromCE32(data, Collation.SENTINEL_CP, ce32)) {
+            addContractionEntry(CollationFastLatin.CONTR_CHAR_MASK, ce0, ce1);
+        } else {
+            // Bail out for c-without-contraction.
+            addContractionEntry(CollationFastLatin.CONTR_CHAR_MASK, Collation.NO_CE, 0);
+        }
+        // Handle an encodable contraction unless the next contraction is too long
+        // and starts with the same character.
+        int prevX = -1;
+        boolean addContraction = false;
+        CharsTrie.Iterator suffixes = CharsTrie.iterator(data.contexts, trieIndex + 2, 0);
+        while(suffixes.hasNext()) {
+            CharsTrie.Entry entry = suffixes.next();
+            CharSequence suffix = entry.chars;
+            int x = CollationFastLatin.getCharIndex(suffix.charAt(0));
+            if(x < 0) { continue; }  // ignore anything but fast Latin text
+            if(x == prevX) {
+                if(addContraction) {
+                    // Bail out for all contractions starting with this character.
+                    addContractionEntry(x, Collation.NO_CE, 0);
+                    addContraction = false;
+                }
+                continue;
+            }
+            if(addContraction) {
+                addContractionEntry(prevX, ce0, ce1);
+            }
+            ce32 = entry.value;
+            if(suffix.length() == 1 && getCEsFromCE32(data, Collation.SENTINEL_CP, ce32)) {
+                addContraction = true;
+            } else {
+                addContractionEntry(x, Collation.NO_CE, 0);
+                addContraction = false;
+            }
+            prevX = x;
+        }
+        if(addContraction) {
+            addContractionEntry(prevX, ce0, ce1);
+        }
+        // Note: There might not be any fast Latin contractions, but
+        // we need to enter contraction handling anyway so that we can bail out
+        // when there is a non-fast-Latin character following.
+        // For example: Danish &Y<<u+umlaut, when we compare Y vs. u\u0308 we need to see the
+        // following umlaut and bail out, rather than return the difference of Y vs. u.
+        ce0 = (Collation.NO_CE_PRIMARY << 32) | CONTRACTION_FLAG | contractionIndex;
+        ce1 = 0;
+        return true;
+    }
+
+    private void addContractionEntry(int x, long cce0, long cce1) {
+        contractionCEs.addElement(x);
+        contractionCEs.addElement(cce0);
+        contractionCEs.addElement(cce1);
+        addUniqueCE(cce0);
+        addUniqueCE(cce1);
+    }
+
+    private void addUniqueCE(long ce) {
+        if(ce == 0 || (ce >>> 32) == Collation.NO_CE_PRIMARY) { return; }
+        ce &= ~(long)Collation.CASE_MASK;  // blank out case bits
+        int i = binarySearch(uniqueCEs.getBuffer(), uniqueCEs.size(), ce);
+        if(i < 0) {
+            uniqueCEs.insertElementAt(ce, ~i);
+        }
+    }
+
+    private int getMiniCE(long ce) {
+        ce &= ~(long)Collation.CASE_MASK;  // blank out case bits
+        int index = binarySearch(uniqueCEs.getBuffer(), uniqueCEs.size(), ce);
+        assert(index >= 0);
+        return miniCEs[index];
+    }
+
+    private void encodeUniqueCEs() {
+        miniCEs = new char[uniqueCEs.size()];
+        int group = 1;
+        long lastGroupByte = result.charAt(group);
+        // The lowest unique CE must be at least a secondary CE.
+        assert(((int)uniqueCEs.elementAti(0) >>> 16) != 0);
+        long prevPrimary = 0;
+        int prevSecondary = 0;
+        int pri = 0;
+        int sec = 0;
+        int ter = CollationFastLatin.COMMON_TER;
+        for(int i = 0; i < uniqueCEs.size(); ++i) {
+            long ce = uniqueCEs.elementAti(i);
+            // Note: At least one of the p/s/t weights changes from one unique CE to the next.
+            // (uniqueCEs does not store case bits.)
+            long p = ce >>> 32;
+            if(p != prevPrimary) {
+                int p1 = (int)(p >> 24);
+                while(p1 > lastGroupByte) {
+                    assert(pri <= CollationFastLatin.MAX_LONG);
+                    // Add the last "long primary" in or before the group
+                    // into the upper 9 bits of the group entry.
+                    result.setCharAt(group, (char)((pri << 4) | lastGroupByte));
+                    if(++group < headerLength) {  // group is 1-based
+                        lastGroupByte = result.charAt(group);
+                    } else {
+                        lastGroupByte = 0xff;
+                        break;
+                    }
+                }
+                if(p < firstShortPrimary) {
+                    if(pri == 0) {
+                        pri = CollationFastLatin.MIN_LONG;
+                    } else if(pri < CollationFastLatin.MAX_LONG) {
+                        pri += CollationFastLatin.LONG_INC;
+                    } else {
+    /* #if DEBUG_COLLATION_FAST_LATIN_BUILDER
+                        printf("long-primary overflow for %08x\n", p);
+    #endif */
+                        miniCEs[i] = CollationFastLatin.BAIL_OUT;
+                        continue;
+                    }
+                } else {
+                    if(pri < CollationFastLatin.MIN_SHORT) {
+                        pri = CollationFastLatin.MIN_SHORT;
+                    } else if(pri < (CollationFastLatin.MAX_SHORT - CollationFastLatin.SHORT_INC)) {
+                        // Reserve the highest primary weight for U+FFFF.
+                        pri += CollationFastLatin.SHORT_INC;
+                    } else {
+    /* #if DEBUG_COLLATION_FAST_LATIN_BUILDER
+                        printf("short-primary overflow for %08x\n", p);
+    #endif */
+                        shortPrimaryOverflow = true;
+                        miniCEs[i] = CollationFastLatin.BAIL_OUT;
+                        continue;
+                    }
+                }
+                prevPrimary = p;
+                prevSecondary = Collation.COMMON_WEIGHT16;
+                sec = CollationFastLatin.COMMON_SEC;
+                ter = CollationFastLatin.COMMON_TER;
+            }
+            int lower32 = (int)ce;
+            int s = lower32 >>> 16;
+            if(s != prevSecondary) {
+                if(pri == 0) {
+                    if(sec == 0) {
+                        sec = CollationFastLatin.MIN_SEC_HIGH;
+                    } else if(sec < CollationFastLatin.MAX_SEC_HIGH) {
+                        sec += CollationFastLatin.SEC_INC;
+                    } else {
+                        miniCEs[i] = CollationFastLatin.BAIL_OUT;
+                        continue;
+                    }
+                    prevSecondary = s;
+                    ter = CollationFastLatin.COMMON_TER;
+                } else if(s < Collation.COMMON_WEIGHT16) {
+                    if(sec == CollationFastLatin.COMMON_SEC) {
+                        sec = CollationFastLatin.MIN_SEC_BEFORE;
+                    } else if(sec < CollationFastLatin.MAX_SEC_BEFORE) {
+                        sec += CollationFastLatin.SEC_INC;
+                    } else {
+                        miniCEs[i] = CollationFastLatin.BAIL_OUT;
+                        continue;
+                    }
+                } else if(s == Collation.COMMON_WEIGHT16) {
+                    sec = CollationFastLatin.COMMON_SEC;
+                } else {
+                    if(sec < CollationFastLatin.MIN_SEC_AFTER) {
+                        sec = CollationFastLatin.MIN_SEC_AFTER;
+                    } else if(sec < CollationFastLatin.MAX_SEC_AFTER) {
+                        sec += CollationFastLatin.SEC_INC;
+                    } else {
+                        miniCEs[i] = CollationFastLatin.BAIL_OUT;
+                        continue;
+                    }
+                }
+                prevSecondary = s;
+                ter = CollationFastLatin.COMMON_TER;
+            }
+            assert((lower32 & Collation.CASE_MASK) == 0);  // blanked out in uniqueCEs
+            int t = lower32 & Collation.ONLY_TERTIARY_MASK;
+            if(t > Collation.COMMON_WEIGHT16) {
+                if(ter < CollationFastLatin.MAX_TER_AFTER) {
+                    ++ter;
+                } else {
+                    miniCEs[i] = CollationFastLatin.BAIL_OUT;
+                    continue;
+                }
+            }
+            if(CollationFastLatin.MIN_LONG <= pri && pri <= CollationFastLatin.MAX_LONG) {
+                assert(sec == CollationFastLatin.COMMON_SEC);
+                miniCEs[i] = (char)(pri | ter);
+            } else {
+                miniCEs[i] = (char)(pri | sec | ter);
+            }
+        }
+    /* #if DEBUG_COLLATION_FAST_LATIN_BUILDER
+        printf("last mini primary: %04x\n", pri);
+    #endif */
+    /* #if DEBUG_COLLATION_FAST_LATIN_BUILDER >= 2
+        for(int i = 0; i < uniqueCEs.size(); ++i) {
+            long ce = uniqueCEs.elementAti(i);
+            printf("unique CE 0x%016lx -> 0x%04x\n", ce, miniCEs[i]);
+        }
+    #endif */
+    }
+
+    private void encodeCharCEs() {
+        int miniCEsStart = result.length();
+        for(int i = 0; i < CollationFastLatin.NUM_FAST_CHARS; ++i) {
+            result.append(0);  // initialize to completely ignorable
+        }
+        int indexBase = result.length();
+        for(int i = 0; i < CollationFastLatin.NUM_FAST_CHARS; ++i) {
+            long ce = charCEs[i][0];
+            if(isContractionCharCE(ce)) { continue; }  // defer contraction
+            int miniCE = encodeTwoCEs(ce, charCEs[i][1]);
+            if((miniCE >>> 16) > 0) {   // if ((unsigned)miniCE > 0xffff)
+                // Note: There is a chance that this new expansion is the same as a previous one,
+                // and if so, then we could reuse the other expansion.
+                // However, that seems unlikely.
+                int expansionIndex = result.length() - indexBase;
+                if(expansionIndex > CollationFastLatin.INDEX_MASK) {
+                    miniCE = CollationFastLatin.BAIL_OUT;
+                } else {
+                    result.append((char)(miniCE >> 16)).append((char)miniCE);
+                    miniCE = CollationFastLatin.EXPANSION | expansionIndex;
+                }
+            }
+            result.setCharAt(miniCEsStart + i, (char)miniCE);
+        }
+    }
+
+    private void encodeContractions() {
+        // We encode all contraction lists so that the first word of a list
+        // terminates the previous list, and we only need one additional terminator at the end.
+        int indexBase = headerLength + CollationFastLatin.NUM_FAST_CHARS;
+        int firstContractionIndex = result.length();
+        for(int i = 0; i < CollationFastLatin.NUM_FAST_CHARS; ++i) {
+            long ce = charCEs[i][0];
+            if(!isContractionCharCE(ce)) { continue; }
+            int contractionIndex = result.length() - indexBase;
+            if(contractionIndex > CollationFastLatin.INDEX_MASK) {
+                result.setCharAt(headerLength + i, (char) CollationFastLatin.BAIL_OUT);
+                continue;
+            }
+            boolean firstTriple = true;
+            for(int index = (int)ce & 0x7fffffff;; index += 3) {
+                long x = contractionCEs.elementAti(index);
+                if(x == CollationFastLatin.CONTR_CHAR_MASK && !firstTriple) { break; }
+                long cce0 = contractionCEs.elementAti(index + 1);
+                long cce1 = contractionCEs.elementAti(index + 2);
+                int miniCE = encodeTwoCEs(cce0, cce1);
+                if(miniCE == CollationFastLatin.BAIL_OUT) {
+                    result.append((char)(x | (1 << CollationFastLatin.CONTR_LENGTH_SHIFT)));
+                } else if(miniCE <= 0xffff) {
+                    result.append((char)(x | (2 << CollationFastLatin.CONTR_LENGTH_SHIFT)));
+                    result.append((char)miniCE);
+                } else {
+                    result.append((char)(x | (3 << CollationFastLatin.CONTR_LENGTH_SHIFT)));
+                    result.append((char)(miniCE >> 16)).append((char)miniCE);
+                }
+                firstTriple = false;
+            }
+            // Note: There is a chance that this new contraction list is the same as a previous one,
+            // and if so, then we could truncate the result and reuse the other list.
+            // However, that seems unlikely.
+            result.setCharAt(headerLength + i,
+                            (char)(CollationFastLatin.CONTRACTION | contractionIndex));
+        }
+        if(result.length() > firstContractionIndex) {
+            // Terminate the last contraction list.
+            result.append((char)CollationFastLatin.CONTR_CHAR_MASK);
+        }
+    /* #if DEBUG_COLLATION_FAST_LATIN_BUILDER
+        printf("** fast Latin %d * 2 = %d bytes\n", result.length(), result.length() * 2);
+        puts("   header & below-digit groups map");
+        int i = 0;
+        for(; i < headerLength; ++i) {
+            printf(" %04x", result[i]);
+        }
+        printf("\n   char mini CEs");
+        assert(CollationFastLatin.NUM_FAST_CHARS % 16 == 0);
+        for(; i < indexBase; i += 16) {
+            int c = i - headerLength;
+            if(c >= CollationFastLatin.LATIN_LIMIT) {
+                c = CollationFastLatin.PUNCT_START + c - CollationFastLatin.LATIN_LIMIT;
+            }
+            printf("\n %04x:", c);
+            for(int j = 0; j < 16; ++j) {
+                printf(" %04x", result[i + j]);
+            }
+        }
+        printf("\n   expansions & contractions");
+        for(; i < result.length(); ++i) {
+            if((i - indexBase) % 16 == 0) { puts(""); }
+            printf(" %04x", result[i]);
+        }
+        puts("");
+    #endif */
+    }
+
+    private int encodeTwoCEs(long first, long second) {
+        if(first == 0) {
+            return 0;  // completely ignorable
+        }
+        if(first == Collation.NO_CE) {
+            return CollationFastLatin.BAIL_OUT;
+        }
+        assert((first >>> 32) != Collation.NO_CE_PRIMARY);
+
+        int miniCE = getMiniCE(first);
+        if(miniCE == CollationFastLatin.BAIL_OUT) { return miniCE; }
+        if(miniCE >= CollationFastLatin.MIN_SHORT) {
+            // Extract & copy the case bits.
+            // Shift them from normal CE bits 15..14 to mini CE bits 4..3.
+            int c = (((int)first & Collation.CASE_MASK) >> (14 - 3));
+            // Only in mini CEs: Ignorable case bits = 0, lowercase = 1.
+            c += CollationFastLatin.LOWER_CASE;
+            miniCE |= c;
+        }
+        if(second == 0) { return miniCE; }
+
+        int miniCE1 = getMiniCE(second);
+        if(miniCE1 == CollationFastLatin.BAIL_OUT) { return miniCE1; }
+
+        int case1 = (int)second & Collation.CASE_MASK;
+        if(miniCE >= CollationFastLatin.MIN_SHORT &&
+                (miniCE & CollationFastLatin.SECONDARY_MASK) == CollationFastLatin.COMMON_SEC) {
+            // Try to combine the two mini CEs into one.
+            int sec1 = miniCE1 & CollationFastLatin.SECONDARY_MASK;
+            int ter1 = miniCE1 & CollationFastLatin.TERTIARY_MASK;
+            if(sec1 >= CollationFastLatin.MIN_SEC_HIGH && case1 == 0 &&
+                    ter1 == CollationFastLatin.COMMON_TER) {
+                // sec1>=sec_high implies pri1==0.
+                return (miniCE & ~CollationFastLatin.SECONDARY_MASK) | sec1;
+            }
+        }
+
+        if(miniCE1 <= CollationFastLatin.SECONDARY_MASK || CollationFastLatin.MIN_SHORT <= miniCE1) {
+            // Secondary CE, or a CE with a short primary, copy the case bits.
+            case1 = (case1 >> (14 - 3)) + CollationFastLatin.LOWER_CASE;
+            miniCE1 |= case1;
+        }
+        return (miniCE << 16) | miniCE1;
+    }
+
+    private static boolean isContractionCharCE(long ce) {
+        return (ce >>> 32) == Collation.NO_CE_PRIMARY && ce != Collation.NO_CE;
+    }
+
+    private static final long CONTRACTION_FLAG = 0x80000000L;
+
+    // temporary "buffer"
+    private long ce0, ce1;
+
+    private long[][] charCEs = new long[CollationFastLatin.NUM_FAST_CHARS][2];
+
+    private UVector64 contractionCEs;
+    private UVector64 uniqueCEs;
+
+    /** One 16-bit mini CE per unique CE. */
+    private char[] miniCEs;
+
+    // These are constant for a given list of CollationData.scripts.
+    private long firstDigitPrimary;
+    private long firstLatinPrimary;
+    private long lastLatinPrimary;
+    // This determines the first normal primary weight which is mapped to
+    // a short mini primary. It must be >=firstDigitPrimary.
+    private long firstShortPrimary;
+
+    private boolean shortPrimaryOverflow;
+
+    private StringBuilder result = new StringBuilder();
+    private int headerLength;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationIterator.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationIterator.java

new file mode 100644 (file)

index 0000000..46dfd17
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationIterator.java
@@ -0,0 +1,1121 @@
+/*
+*******************************************************************************
+* Copyright (C) 2010-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationIterator.java, ported from collationiterator.h/.cpp
+*
+* C++ version created on: 2010oct27
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import com.ibm.icu.impl.Normalizer2Impl.Hangul;
+import com.ibm.icu.impl.Trie2_32;
+import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.util.CharsTrie;
+
+/**
+ * Collation element iterator and abstract character iterator.
+ *
+ * When a method returns a code point value, it must be in 0..10FFFF,
+ * except it can be negative as a sentinel value.
+ */
+public abstract class CollationIterator {
+    private static final class CEBuffer {
+        /** Large enough for CEs of most short strings. */
+        private static final int INITIAL_CAPACITY = 40;
+
+        CEBuffer() {}
+
+        void append(long ce) {
+            if(length >= INITIAL_CAPACITY) {
+                ensureAppendCapacity(1);
+            }
+            buffer[length++] = ce;
+        }
+
+        void appendUnsafe(long ce) {
+            buffer[length++] = ce;
+        }
+
+        void ensureAppendCapacity(int appCap) {
+            int capacity = buffer.length;
+            if((length + appCap) <= capacity) { return; }
+            do {
+                if(capacity < 1000) {
+                    capacity *= 4;
+                } else {
+                    capacity *= 2;
+                }
+            } while(capacity < (length + appCap));
+            long[] newBuffer = new long[capacity];
+            System.arraycopy(buffer, 0, newBuffer, 0, length);
+            buffer = newBuffer;
+        }
+
+        void incLength() {
+            // Use INITIAL_CAPACITY for a very simple fastpath.
+            // (Rather than buffer.getCapacity().)
+            if(length >= INITIAL_CAPACITY) {
+                ensureAppendCapacity(1);
+            }
+            ++length;
+        }
+
+        long set(int i, long ce) {
+            return buffer[i] = ce;
+        }
+        long get(int i) { return buffer[i]; }
+
+        long[] getCEs() { return buffer; }
+
+        int length = 0;
+
+        private long[] buffer = new long[INITIAL_CAPACITY];
+    }
+
+    // State of combining marks skipped in discontiguous contraction.
+    // We create a state object on first use and keep it around deactivated between uses.
+    private static final class SkippedState {
+        // Born active but empty.
+        SkippedState() {}
+        void clear() {
+            oldBuffer.setLength(0);
+            pos = 0;
+            // The newBuffer is reset by setFirstSkipped().
+        }
+
+        boolean isEmpty() { return oldBuffer.length() == 0; }
+
+        boolean hasNext() { return pos < oldBuffer.length(); }
+
+        // Requires hasNext().
+        int next() {
+            int c = oldBuffer.codePointAt(pos);
+            pos += Character.charCount(c);
+            return c;
+        }
+
+        // Accounts for one more input code point read beyond the end of the marks buffer.
+        void incBeyond() {
+            assert(!hasNext());
+            ++pos;
+        }
+
+        // Goes backward through the skipped-marks buffer.
+        // Returns the number of code points read beyond the skipped marks
+        // that need to be backtracked through normal input.
+        int backwardNumCodePoints(int n) {
+            int length = oldBuffer.length();
+            int beyond = pos - length;
+            if(beyond > 0) {
+                if(beyond >= n) {
+                    // Not back far enough to re-enter the oldBuffer.
+                    pos -= n;
+                    return n;
+                } else {
+                    // Back out all beyond-oldBuffer code points and re-enter the buffer.
+                    pos = oldBuffer.offsetByCodePoints(length, beyond - n);
+                    return beyond;
+                }
+            } else {
+                // Go backwards from inside the oldBuffer.
+                pos = oldBuffer.offsetByCodePoints(pos, -n);
+                return 0;
+            }
+        }
+
+        void setFirstSkipped(int c) {
+            skipLengthAtMatch = 0;
+            newBuffer.setLength(0);
+            newBuffer.appendCodePoint(c);
+        }
+
+        void skip(int c) {
+            newBuffer.appendCodePoint(c);
+        }
+
+        void recordMatch() { skipLengthAtMatch = newBuffer.length(); }
+
+        // Replaces the characters we consumed with the newly skipped ones.
+        void replaceMatch() {
+            // Note: UnicodeString.replace() pins pos to at most length().
+            int oldLength = oldBuffer.length();
+            if(pos > oldLength) { pos = oldLength; }
+            oldBuffer.delete(0, pos).insert(0, newBuffer, 0, skipLengthAtMatch);
+            pos = 0;
+        }
+
+        void saveTrieState(CharsTrie trie) { trie.saveState(state); }
+        void resetToTrieState(CharsTrie trie) { trie.resetToState(state); }
+
+        // Combining marks skipped in previous discontiguous-contraction matching.
+        // After that discontiguous contraction was completed, we start reading them from here.
+        private final StringBuilder oldBuffer = new StringBuilder();
+        // Combining marks newly skipped in current discontiguous-contraction matching.
+        // These might have been read from the normal text or from the oldBuffer.
+        private final StringBuilder newBuffer = new StringBuilder();
+        // Reading index in oldBuffer,
+        // or counter for how many code points have been read beyond oldBuffer (pos-oldBuffer.length()).
+        private int pos;
+        // newBuffer.length() at the time of the last matching character.
+        // When a partial match fails, we back out skipped and partial-matching input characters.
+        private int skipLengthAtMatch;
+        // We save the trie state before we attempt to match a character,
+        // so that we can skip it and try the next one.
+        private CharsTrie.State state = new CharsTrie.State();
+    };
+
+    /**
+     * Partially constructs the iterator.
+     * In Java, we cache partially constructed iterators
+     * and finish their setup when starting to work on text
+     * (via reset(boolean) and the setText(numeric, ...) methods of subclasses).
+     * This avoids memory allocations for iterators that remain unused.
+     *
+     * <p>In C++, there is only one constructor, and iterators are
+     * stack-allocated as needed.
+     */
+    public CollationIterator(CollationData d) {
+        trie = d.trie;
+        data = d;
+        numCpFwd = -1;
+        isNumeric = false;
+        ceBuffer = null;
+    }
+
+    public CollationIterator(CollationData d, boolean numeric) {
+        trie = d.trie;
+        data = d;
+        numCpFwd = -1;
+        isNumeric = numeric;
+        ceBuffer = new CEBuffer();
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        // Subclasses: Call this method and then add more specific checks.
+        // Compare the iterator state but not the collation data (trie & data fields):
+        // Assume that the caller compares the data.
+        // Ignore skipped since that should be unused between calls to nextCE().
+        // (It only stays around to avoid another memory allocation.)
+        if(!this.getClass().equals(other.getClass())) { return false; }
+        CollationIterator o = (CollationIterator)other;
+        if(!(ceBuffer.length == o.ceBuffer.length &&
+                cesIndex == o.cesIndex &&
+                numCpFwd == o.numCpFwd &&
+                isNumeric == o.isNumeric)) {
+            return false;
+        }
+        for(int i = 0; i < ceBuffer.length; ++i) {
+            if(ceBuffer.get(i) != o.ceBuffer.get(i)) { return false; }
+        }
+        return true;
+    }
+
+    /**
+     * Resets the iterator state and sets the position to the specified offset.
+     * Subclasses must implement, and must call the parent class method,
+     * or CollationIterator.reset().
+     */
+    public abstract void resetToOffset(int newOffset);
+
+    public abstract int getOffset();
+
+    /**
+     * Returns the next collation element.
+     */
+    public final long nextCE() {
+        if(cesIndex < ceBuffer.length) {
+            // Return the next buffered CE.
+            return ceBuffer.get(cesIndex++);
+        }
+        assert cesIndex == ceBuffer.length;
+        ceBuffer.incLength();
+        long cAndCE32 = handleNextCE32();
+        int c = (int)(cAndCE32 >> 32);
+        int ce32 = (int)cAndCE32;
+        int t = ce32 & 0xff;
+        if(t < Collation.SPECIAL_CE32_LOW_BYTE) {  // Forced-inline of isSpecialCE32(ce32).
+            // Normal CE from the main data.
+            // Forced-inline of ceFromSimpleCE32(ce32).
+            return ceBuffer.set(cesIndex++,
+                    ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | (t << 8));
+        }
+        CollationData d;
+        // The compiler should be able to optimize the previous and the following
+        // comparisons of t with the same constant.
+        if(t == Collation.SPECIAL_CE32_LOW_BYTE) {
+            if(c < 0) {
+                return ceBuffer.set(cesIndex++, Collation.NO_CE);
+            }
+            d = data.base;
+            ce32 = d.getCE32(c);
+            t = ce32 & 0xff;
+            if(t < Collation.SPECIAL_CE32_LOW_BYTE) {
+                // Normal CE from the base data.
+                return ceBuffer.set(cesIndex++,
+                        ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | (t << 8));
+            }
+        } else {
+            d = data;
+        }
+        if(t == Collation.LONG_PRIMARY_CE32_LOW_BYTE) {
+            // Forced-inline of ceFromLongPrimaryCE32(ce32).
+            return ceBuffer.set(cesIndex++,
+                    ((long)(ce32 - t) << 32) | Collation.COMMON_SEC_AND_TER_CE);
+        }
+        return nextCEFromCE32(d, c, ce32);
+    }
+
+    /**
+     * Fetches all CEs.
+     * @return getCEsLength()
+     */
+    public final int fetchCEs() {
+        while(nextCE() != Collation.NO_CE) {
+            // No need to loop for each expansion CE.
+            cesIndex = ceBuffer.length;
+        }
+        return ceBuffer.length;
+    }
+
+    /**
+     * Overwrites the current CE (the last one returned by nextCE()).
+     */
+    final void setCurrentCE(long ce) {
+        assert cesIndex > 0;
+        ceBuffer.set(cesIndex - 1, ce);
+    }
+
+    /**
+     * Returns the previous collation element.
+     */
+    public final long previousCE(UVector32 offsets) {
+        if(ceBuffer.length > 0) {
+            // Return the previous buffered CE.
+            return ceBuffer.get(--ceBuffer.length);
+        }
+        offsets.removeAllElements();
+        int limitOffset = getOffset();
+        int c = previousCodePoint();
+        if(c < 0) { return Collation.NO_CE; }
+        if(data.isUnsafeBackward(c, isNumeric)) {
+            return previousCEUnsafe(c, offsets);
+        }
+        // Simple, safe-backwards iteration:
+        // Get a CE going backwards, handle prefixes but no contractions.
+        int ce32 = data.getCE32(c);
+        CollationData d;
+        if(ce32 == Collation.FALLBACK_CE32) {
+            d = data.base;
+            ce32 = d.getCE32(c);
+        } else {
+            d = data;
+        }
+        if(Collation.isSimpleOrLongCE32(ce32)) {
+            return Collation.ceFromCE32(ce32);
+        }
+        appendCEsFromCE32(d, c, ce32, false);
+        if(ceBuffer.length > 1) {
+            offsets.addElement(getOffset());
+            // For an expansion, the offset of each non-initial CE is the limit offset,
+            // consistent with forward iteration.
+            while(offsets.size() <= ceBuffer.length) {
+                offsets.addElement(limitOffset);
+            };
+        }
+        return ceBuffer.get(--ceBuffer.length);
+    }
+
+    public final int getCEsLength() {
+        return ceBuffer.length;
+    }
+
+    public final long getCE(int i) {
+        return ceBuffer.get(i);
+    }
+
+    public final long[] getCEs() {
+        return ceBuffer.getCEs();
+    }
+
+    final void clearCEs() {
+        cesIndex = ceBuffer.length = 0;
+    }
+
+    public final void clearCEsIfNoneRemaining() {
+        if(cesIndex == ceBuffer.length) { clearCEs(); }
+    }
+
+    /**
+     * Returns the next code point (with post-increment).
+     * Public for identical-level comparison and for testing.
+     */
+    public abstract int nextCodePoint();
+
+    /**
+     * Returns the previous code point (with pre-decrement).
+     * Public for identical-level comparison and for testing.
+     */
+    public abstract int previousCodePoint();
+
+    protected final void reset() {
+        cesIndex = ceBuffer.length = 0;
+        if(skipped != null) { skipped.clear(); }
+    }
+    /**
+     * Resets the state as well as the numeric setting,
+     * and completes the initialization.
+     * Only exists in Java where we reset cached CollationIterator instances
+     * rather than stack-allocating temporary ones.
+     * (See also the constructor comments.)
+     */
+    protected final void reset(boolean numeric) {
+        if(ceBuffer == null) {
+            ceBuffer = new CEBuffer();
+        }
+        reset();
+        isNumeric = numeric;
+    }
+
+    /**
+     * Returns the next code point and its local CE32 value.
+     * Returns Collation.FALLBACK_CE32 at the end of the text (c<0)
+     * or when c's CE32 value is to be looked up in the base data (fallback).
+     *
+     * The code point is used for fallbacks, context and implicit weights.
+     * It is ignored when the returned CE32 is not special (e.g., FFFD_CE32).
+     *
+     * Returns the code point in bits 63..32 (signed) and the CE32 in bits 31..0.
+     */
+    protected long handleNextCE32() {
+        int c = nextCodePoint();
+        if(c < 0) { return NO_CP_AND_CE32; }
+        return makeCodePointAndCE32Pair(c, data.getCE32(c));
+    }
+    protected long makeCodePointAndCE32Pair(int c, int ce32) {
+        return ((long)c << 32) | (ce32 & 0xffffffffL);
+    }
+    protected static final long NO_CP_AND_CE32 = (-1L << 32) | (Collation.FALLBACK_CE32 & 0xffffffffL);
+
+    /**
+     * Called when handleNextCE32() returns a LEAD_SURROGATE_TAG for a lead surrogate code unit.
+     * Returns the trail surrogate in that case and advances past it,
+     * if a trail surrogate follows the lead surrogate.
+     * Otherwise returns any other code unit and does not advance.
+     */
+    protected char handleGetTrailSurrogate() {
+        return 0;
+    }
+
+    /**
+     * Called when handleNextCE32() returns with c==0, to see whether it is a NUL terminator.
+     * (Not needed in Java.)
+     */
+    /*protected boolean foundNULTerminator() {
+        return false;
+    }*/
+
+    /**
+     * @return false if surrogate code points U+D800..U+DFFF
+     *         map to their own implicit primary weights (for UTF-16),
+     *         or true if they map to CE(U+FFFD) (for UTF-8)
+     */
+    protected boolean forbidSurrogateCodePoints() {
+        return false;
+    }
+
+    protected abstract void forwardNumCodePoints(int num);
+
+    protected abstract void backwardNumCodePoints(int num);
+
+    /**
+     * Returns the CE32 from the data trie.
+     * Normally the same as data.getCE32(), but overridden in the builder.
+     * Call this only when the faster data.getCE32() cannot be used.
+     */
+    protected int getDataCE32(int c) {
+        return data.getCE32(c);
+    }
+
+    protected int getCE32FromBuilderData(int ce32) {
+        throw new RuntimeException("internal program error: should be unreachable");
+    }
+
+    protected final void appendCEsFromCE32(CollationData d, int c, int ce32,
+                           boolean forward) {
+        while(Collation.isSpecialCE32(ce32)) {
+            switch(Collation.tagFromCE32(ce32)) {
+            case Collation.FALLBACK_TAG:
+            case Collation.RESERVED_TAG_3:
+                throw new RuntimeException("internal program error: should be unreachable");
+            case Collation.LONG_PRIMARY_TAG:
+                ceBuffer.append(Collation.ceFromLongPrimaryCE32(ce32));
+                return;
+            case Collation.LONG_SECONDARY_TAG:
+                ceBuffer.append(Collation.ceFromLongSecondaryCE32(ce32));
+                return;
+            case Collation.LATIN_EXPANSION_TAG:
+                ceBuffer.ensureAppendCapacity(2);
+                ceBuffer.set(ceBuffer.length, Collation.latinCE0FromCE32(ce32));
+                ceBuffer.set(ceBuffer.length + 1, Collation.latinCE1FromCE32(ce32));
+                ceBuffer.length += 2;
+                return;
+            case Collation.EXPANSION32_TAG: {
+                int index = Collation.indexFromCE32(ce32);
+                int length = Collation.lengthFromCE32(ce32);
+                ceBuffer.ensureAppendCapacity(length);
+                do {
+                    ceBuffer.appendUnsafe(Collation.ceFromCE32(d.ce32s[index++]));
+                } while(--length > 0);
+                return;
+            }
+            case Collation.EXPANSION_TAG: {
+                int index = Collation.indexFromCE32(ce32);
+                int length = Collation.lengthFromCE32(ce32);
+                ceBuffer.ensureAppendCapacity(length);
+                do {
+                    ceBuffer.appendUnsafe(d.ces[index++]);
+                } while(--length > 0);
+                return;
+            }
+            case Collation.BUILDER_DATA_TAG:
+                ce32 = getCE32FromBuilderData(ce32);
+                if(ce32 == Collation.FALLBACK_CE32) {
+                    d = data.base;
+                    ce32 = d.getCE32(c);
+                }
+                break;
+            case Collation.PREFIX_TAG:
+                if(forward) { backwardNumCodePoints(1); }
+                ce32 = getCE32FromPrefix(d, ce32);
+                if(forward) { forwardNumCodePoints(1); }
+                break;
+            case Collation.CONTRACTION_TAG: {
+                int index = Collation.indexFromCE32(ce32);
+                int defaultCE32 = d.getCE32FromContexts(index);  // Default if no suffix match.
+                if(!forward) {
+                    // Backward contractions are handled by previousCEUnsafe().
+                    // c has contractions but they were not found.
+                    ce32 = defaultCE32;
+                    break;
+                }
+                int nextCp;
+                if(skipped == null && numCpFwd < 0) {
+                    // Some portion of nextCE32FromContraction() pulled out here as an ASCII fast path,
+                    // avoiding the function call and the nextSkippedCodePoint() overhead.
+                    nextCp = nextCodePoint();
+                    if(nextCp < 0) {
+                        // No more text.
+                        ce32 = defaultCE32;
+                        break;
+                    } else if((ce32 & Collation.CONTRACT_NEXT_CCC) != 0 &&
+                            !CollationFCD.mayHaveLccc(nextCp)) {
+                        // All contraction suffixes start with characters with lccc!=0
+                        // but the next code point has lccc==0.
+                        backwardNumCodePoints(1);
+                        ce32 = defaultCE32;
+                        break;
+                    }
+                } else {
+                    nextCp = nextSkippedCodePoint();
+                    if(nextCp < 0) {
+                        // No more text.
+                        ce32 = defaultCE32;
+                        break;
+                    } else if((ce32 & Collation.CONTRACT_NEXT_CCC) != 0 &&
+                            !CollationFCD.mayHaveLccc(nextCp)) {
+                        // All contraction suffixes start with characters with lccc!=0
+                        // but the next code point has lccc==0.
+                        backwardNumSkipped(1);
+                        ce32 = defaultCE32;
+                        break;
+                    }
+                }
+                ce32 = nextCE32FromContraction(d, ce32, d.contexts, index + 2, defaultCE32, nextCp);
+                if(ce32 == Collation.NO_CE32) {
+                    // CEs from a discontiguous contraction plus the skipped combining marks
+                    // have been appended already.
+                    return;
+                }
+                break;
+            }
+            case Collation.DIGIT_TAG:
+                if(isNumeric) {
+                    appendNumericCEs(ce32, forward);
+                    return;
+                } else {
+                    // Fetch the non-numeric-collation CE32 and continue.
+                    ce32 = d.ce32s[Collation.indexFromCE32(ce32)];
+                    break;
+                }
+            case Collation.U0000_TAG:
+                assert(c == 0);
+                // NUL-terminated input not supported in Java.
+                // Fetch the normal ce32 for U+0000 and continue.
+                ce32 = d.ce32s[0];
+                break;
+            case Collation.HANGUL_TAG: {
+                int[] jamoCE32s = d.jamoCE32s;
+                c -= Hangul.HANGUL_BASE;
+                int t = c % Hangul.JAMO_T_COUNT;
+                c /= Hangul.JAMO_T_COUNT;
+                int v = c % Hangul.JAMO_V_COUNT;
+                c /= Hangul.JAMO_V_COUNT;
+                if((ce32 & Collation.HANGUL_NO_SPECIAL_JAMO) != 0) {
+                    // None of the Jamo CE32s are isSpecialCE32().
+                    // Avoid recursive function calls and per-Jamo tests.
+                    ceBuffer.ensureAppendCapacity(t == 0 ? 2 : 3);
+                    ceBuffer.set(ceBuffer.length, Collation.ceFromCE32(jamoCE32s[c]));
+                    ceBuffer.set(ceBuffer.length + 1, Collation.ceFromCE32(jamoCE32s[19 + v]));
+                    ceBuffer.length += 2;
+                    if(t != 0) {
+                        ceBuffer.appendUnsafe(Collation.ceFromCE32(jamoCE32s[39 + t]));
+                    }
+                    return;
+                } else {
+                    // We should not need to compute each Jamo code point.
+                    // In particular, there should be no offset or implicit ce32.
+                    appendCEsFromCE32(d, Collation.SENTINEL_CP, jamoCE32s[c], forward);
+                    appendCEsFromCE32(d, Collation.SENTINEL_CP, jamoCE32s[19 + v], forward);
+                    if(t == 0) { return; }
+                    // offset 39 = 19 + 21 - 1:
+                    // 19 = JAMO_L_COUNT
+                    // 21 = JAMO_T_COUNT
+                    // -1 = omit t==0
+                    ce32 = jamoCE32s[39 + t];
+                    c = Collation.SENTINEL_CP;
+                    break;
+                }
+            }
+            case Collation.LEAD_SURROGATE_TAG: {
+                assert(forward);  // Backward iteration should never see lead surrogate code _unit_ data.
+                assert(isLeadSurrogate(c));
+                char trail;
+                if(Character.isLowSurrogate(trail = handleGetTrailSurrogate())) {
+                    c = Character.toCodePoint((char)c, trail);
+                    ce32 &= Collation.LEAD_TYPE_MASK;
+                    if(ce32 == Collation.LEAD_ALL_UNASSIGNED) {
+                        ce32 = Collation.UNASSIGNED_CE32;  // unassigned-implicit
+                    } else if(ce32 == Collation.LEAD_ALL_FALLBACK ||
+                            (ce32 = d.getCE32FromSupplementary(c)) == Collation.FALLBACK_CE32) {
+                        // fall back to the base data
+                        d = d.base;
+                        ce32 = d.getCE32FromSupplementary(c);
+                    }
+                } else {
+                    // c is an unpaired surrogate.
+                    ce32 = Collation.UNASSIGNED_CE32;
+                }
+                break;
+            }
+            case Collation.OFFSET_TAG:
+                assert(c >= 0);
+                ceBuffer.append(d.getCEFromOffsetCE32(c, ce32));
+                return;
+            case Collation.IMPLICIT_TAG:
+                assert(c >= 0);
+                if(isSurrogate(c) && forbidSurrogateCodePoints()) {
+                    ce32 = Collation.FFFD_CE32;
+                    break;
+                } else {
+                    ceBuffer.append(Collation.unassignedCEFromCodePoint(c));
+                    return;
+                }
+            }
+        }
+        ceBuffer.append(Collation.ceFromSimpleCE32(ce32));
+    }
+
+    // TODO: Propose widening the UTF16 method.
+    private static final boolean isSurrogate(int c) {
+        return (c & 0xfffff800) == 0xd800;
+    }
+
+    // TODO: Propose widening the UTF16 method.
+    protected static final boolean isLeadSurrogate(int c) {
+        return (c & 0xfffffc00) == 0xd800;
+    }
+
+    // TODO: Propose widening the UTF16 method.
+    protected static final boolean isTrailSurrogate(int c) {
+        return (c & 0xfffffc00) == 0xdc00;
+    }
+
+    // Main lookup trie of the data object.
+    protected final Trie2_32 trie;
+    protected final CollationData data;
+
+    private final long nextCEFromCE32(CollationData d, int c, int ce32) {
+        --ceBuffer.length;  // Undo ceBuffer.incLength().
+        appendCEsFromCE32(d, c, ce32, true);
+        return ceBuffer.get(cesIndex++);
+    }
+
+    private final int getCE32FromPrefix(CollationData d, int ce32) {
+        int index = Collation.indexFromCE32(ce32);
+        ce32 = d.getCE32FromContexts(index);  // Default if no prefix match.
+        index += 2;
+        // Number of code points read before the original code point.
+        int lookBehind = 0;
+        CharsTrie prefixes = new CharsTrie(d.contexts, index);
+        for(;;) {
+            int c = previousCodePoint();
+            if(c < 0) { break; }
+            ++lookBehind;
+            BytesTrie.Result match = prefixes.nextForCodePoint(c);
+            if(match.hasValue()) {
+                ce32 = prefixes.getValue();
+            }
+            if(!match.hasNext()) { break; }
+        }
+        forwardNumCodePoints(lookBehind);
+        return ce32;
+    }
+
+    private final int nextSkippedCodePoint() {
+        if(skipped != null && skipped.hasNext()) { return skipped.next(); }
+        if(numCpFwd == 0) { return Collation.SENTINEL_CP; }
+        int c = nextCodePoint();
+        if(skipped != null && !skipped.isEmpty() && c >= 0) { skipped.incBeyond(); }
+        if(numCpFwd > 0 && c >= 0) { --numCpFwd; }
+        return c;
+    }
+
+    private final void backwardNumSkipped(int n) {
+        if(skipped != null && !skipped.isEmpty()) {
+            n = skipped.backwardNumCodePoints(n);
+        }
+        backwardNumCodePoints(n);
+        if(numCpFwd >= 0) { numCpFwd += n; }
+    }
+
+    private final int nextCE32FromContraction(
+            CollationData d, int contractionCE32,
+            CharSequence trieChars, int trieOffset, int ce32, int c) {
+        // c: next code point after the original one
+
+        // Number of code points read beyond the original code point.
+        // Needed for discontiguous contraction matching.
+        int lookAhead = 1;
+        // Number of code points read since the last match (initially only c).
+        int sinceMatch = 1;
+        // Normally we only need a contiguous match,
+        // and therefore need not remember the suffixes state from before a mismatch for retrying.
+        // If we are already processing skipped combining marks, then we do track the state.
+        CharsTrie suffixes = new CharsTrie(trieChars, trieOffset);
+        if(skipped != null && !skipped.isEmpty()) { skipped.saveTrieState(suffixes); }
+        BytesTrie.Result match = suffixes.firstForCodePoint(c);
+        for(;;) {
+            int nextCp;
+            if(match.hasValue()) {
+                ce32 = suffixes.getValue();
+                if(!match.hasNext() || (c = nextSkippedCodePoint()) < 0) {
+                    return ce32;
+                }
+                if(skipped != null && !skipped.isEmpty()) { skipped.saveTrieState(suffixes); }
+                sinceMatch = 1;
+            } else if(match == BytesTrie.Result.NO_MATCH || (nextCp = nextSkippedCodePoint()) < 0) {
+                // No match for c, or partial match (BytesTrie.Result.NO_VALUE) and no further text.
+                // Back up if necessary, and try a discontiguous contraction.
+                if((contractionCE32 & Collation.CONTRACT_TRAILING_CCC) != 0 &&
+                        // Discontiguous contraction matching extends an existing match.
+                        // If there is no match yet, then there is nothing to do.
+                        ((contractionCE32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) == 0 ||
+                            sinceMatch < lookAhead)) {
+                    // The last character of at least one suffix has lccc!=0,
+                    // allowing for discontiguous contractions.
+                    // UCA S2.1.1 only processes non-starters immediately following
+                    // "a match in the table" (sinceMatch=1).
+                    if(sinceMatch > 1) {
+                        // Return to the state after the last match.
+                        // (Return to sinceMatch=0 and re-fetch the first partially-matched character.)
+                        backwardNumSkipped(sinceMatch);
+                        c = nextSkippedCodePoint();
+                        lookAhead -= sinceMatch - 1;
+                        sinceMatch = 1;
+                    }
+                    if(d.getFCD16(c) > 0xff) {
+                        return nextCE32FromDiscontiguousContraction(
+                            d, suffixes, ce32, lookAhead, c);
+                    }
+                }
+                break;
+            } else {
+                // Continue after partial match (BytesTrie.Result.NO_VALUE) for c.
+                // It does not have a result value, therefore it is not itself "a match in the table".
+                // If a partially-matched c has ccc!=0 then
+                // it might be skipped in discontiguous contraction.
+                c = nextCp;
+                ++sinceMatch;
+            }
+            ++lookAhead;
+            match = suffixes.nextForCodePoint(c);
+        }
+        backwardNumSkipped(sinceMatch);
+        return ce32;
+    }
+
+    private final int nextCE32FromDiscontiguousContraction(
+            CollationData d, CharsTrie suffixes, int ce32,
+            int lookAhead, int c) {
+        // UCA section 3.3.2 Contractions:
+        // Contractions that end with non-starter characters
+        // are known as discontiguous contractions.
+        // ... discontiguous contractions must be detected in input text
+        // whenever the final sequence of non-starter characters could be rearranged
+        // so as to make a contiguous matching sequence that is canonically equivalent.
+
+        // UCA: http://www.unicode.org/reports/tr10/#S2.1
+        // S2.1 Find the longest initial substring S at each point that has a match in the table.
+        // S2.1.1 If there are any non-starters following S, process each non-starter C.
+        // S2.1.2 If C is not blocked from S, find if S + C has a match in the table.
+        //     Note: A non-starter in a string is called blocked
+        //     if there is another non-starter of the same canonical combining class or zero
+        //     between it and the last character of canonical combining class 0.
+        // S2.1.3 If there is a match, replace S by S + C, and remove C.
+
+        // First: Is a discontiguous contraction even possible?
+        int fcd16 = d.getFCD16(c);
+        assert(fcd16 > 0xff);  // The caller checked this already, as a shortcut.
+        int nextCp = nextSkippedCodePoint();
+        if(nextCp < 0) {
+            // No further text.
+            backwardNumSkipped(1);
+            return ce32;
+        }
+        ++lookAhead;
+        int prevCC = fcd16 & 0xff;
+        fcd16 = d.getFCD16(nextCp);
+        if(fcd16 <= 0xff) {
+            // The next code point after c is a starter (S2.1.1 "process each non-starter").
+            backwardNumSkipped(2);
+            return ce32;
+        }
+
+        // We have read and matched (lookAhead-2) code points,
+        // read non-matching c and peeked ahead at nextCp.
+        // Return to the state before the mismatch and continue matching with nextCp.
+        if(skipped == null || skipped.isEmpty()) {
+            if(skipped == null) {
+                skipped = new SkippedState();
+            }
+            suffixes.reset();
+            if(lookAhead > 2) {
+                // Replay the partial match so far.
+                backwardNumCodePoints(lookAhead);
+                suffixes.firstForCodePoint(nextCodePoint());
+                for(int i = 3; i < lookAhead; ++i) {
+                    suffixes.nextForCodePoint(nextCodePoint());
+                }
+                // Skip c (which did not match) and nextCp (which we will try now).
+                forwardNumCodePoints(2);
+            }
+            skipped.saveTrieState(suffixes);
+        } else {
+            // Reset to the trie state before the failed match of c.
+            skipped.resetToTrieState(suffixes);
+        }
+
+        skipped.setFirstSkipped(c);
+        // Number of code points read since the last match (at this point: c and nextCp).
+        int sinceMatch = 2;
+        c = nextCp;
+        for(;;) {
+            BytesTrie.Result match;
+            // "If C is not blocked from S, find if S + C has a match in the table." (S2.1.2)
+            if(prevCC < (fcd16 >> 8) && (match = suffixes.nextForCodePoint(c)).hasValue()) {
+                // "If there is a match, replace S by S + C, and remove C." (S2.1.3)
+                // Keep prevCC unchanged.
+                ce32 = suffixes.getValue();
+                sinceMatch = 0;
+                skipped.recordMatch();
+                if(!match.hasNext()) { break; }
+                skipped.saveTrieState(suffixes);
+            } else {
+                // No match for "S + C", skip C.
+                skipped.skip(c);
+                skipped.resetToTrieState(suffixes);
+                prevCC = fcd16 & 0xff;
+            }
+            if((c = nextSkippedCodePoint()) < 0) { break; }
+            ++sinceMatch;
+            fcd16 = d.getFCD16(c);
+            if(fcd16 <= 0xff) {
+                // The next code point after c is a starter (S2.1.1 "process each non-starter").
+                break;
+            }
+        }
+        backwardNumSkipped(sinceMatch);
+        boolean isTopDiscontiguous = skipped.isEmpty();
+        skipped.replaceMatch();
+        if(isTopDiscontiguous && !skipped.isEmpty()) {
+            // We did get a match after skipping one or more combining marks,
+            // and we are not in a recursive discontiguous contraction.
+            // Append CEs from the contraction ce32
+            // and then from the combining marks that we skipped before the match.
+            c = Collation.SENTINEL_CP;
+            for(;;) {
+                appendCEsFromCE32(d, c, ce32, true);
+                // Fetch CE32s for skipped combining marks from the normal data, with fallback,
+                // rather than from the CollationData where we found the contraction.
+                if(!skipped.hasNext()) { break; }
+                c = skipped.next();
+                ce32 = getDataCE32(c);
+                if(ce32 == Collation.FALLBACK_CE32) {
+                    d = data.base;
+                    ce32 = d.getCE32(c);
+                } else {
+                    d = data;
+                }
+                // Note: A nested discontiguous-contraction match
+                // replaces consumed combining marks with newly skipped ones
+                // and resets the reading position to the beginning.
+            }
+            skipped.clear();
+            ce32 = Collation.NO_CE32;  // Signal to the caller that the result is in the ceBuffer.
+        }
+        return ce32;
+    }
+
+    /**
+     * Returns the previous CE when data.isUnsafeBackward(c, isNumeric).
+     */
+    private final long previousCEUnsafe(int c, UVector32 offsets) {
+        // We just move through the input counting safe and unsafe code points
+        // without collecting the unsafe-backward substring into a buffer and
+        // switching to it.
+        // This is to keep the logic simple. Otherwise we would have to handle
+        // prefix matching going before the backward buffer, switching
+        // to iteration and back, etc.
+        // In the most important case of iterating over a normal string,
+        // reading from the string itself is already maximally fast.
+        // The only drawback there is that after getting the CEs we always
+        // skip backward to the safe character rather than switching out
+        // of a backwardBuffer.
+        // But this should not be the common case for previousCE(),
+        // and correctness and maintainability are more important than
+        // complex optimizations.
+        // Find the first safe character before c.
+        int numBackward = 1;
+        while((c = previousCodePoint()) >= 0) {
+            ++numBackward;
+            if(!data.isUnsafeBackward(c, isNumeric)) {
+                break;
+            }
+        }
+        // Set the forward iteration limit.
+        // Note: This counts code points.
+        // We cannot enforce a limit in the middle of a surrogate pair or similar.
+        numCpFwd = numBackward;
+        // Reset the forward iterator.
+        cesIndex = 0;
+        assert(ceBuffer.length == 0);
+        // Go forward and collect the CEs.
+        int offset = getOffset();
+        while(numCpFwd > 0) {
+            // nextCE() normally reads one code point.
+            // Contraction matching and digit specials read more and check numCpFwd.
+            --numCpFwd;
+            // Append one or more CEs to the ceBuffer.
+            nextCE();
+            assert(ceBuffer.get(ceBuffer.length - 1) != Collation.NO_CE);
+            // No need to loop for getting each expansion CE from nextCE().
+            cesIndex = ceBuffer.length;
+            // However, we need to write an offset for each CE.
+            // This is for CollationElementIterator.getOffset() to return
+            // intermediate offsets from the unsafe-backwards segment.
+            assert(offsets.size() < ceBuffer.length);
+            offsets.addElement(offset);
+            // For an expansion, the offset of each non-initial CE is the limit offset,
+            // consistent with forward iteration.
+            offset = getOffset();
+            while(offsets.size() < ceBuffer.length) {
+                offsets.addElement(offset);
+            };
+        }
+        assert(offsets.size() == ceBuffer.length);
+        // End offset corresponding to just after the unsafe-backwards segment.
+        offsets.addElement(offset);
+        // Reset the forward iteration limit
+        // and move backward to before the segment for which we fetched CEs.
+        numCpFwd = -1;
+        backwardNumCodePoints(numBackward);
+        // Use the collected CEs and return the last one.
+        cesIndex = 0;  // Avoid cesIndex > ceBuffer.length when that gets decremented.
+        return ceBuffer.get(--ceBuffer.length);
+    }
+
+    /**
+     * Turns a string of digits (bytes 0..9)
+     * into a sequence of CEs that will sort in numeric order.
+     *
+     * Starts from this ce32's digit value and consumes the following/preceding digits.
+     * The digits string must not be empty and must not have leading zeros.
+     */
+    private final void appendNumericCEs(int ce32, boolean forward) {
+        // Collect digits.
+        // TODO: Use some kind of a byte buffer? We only store values 0..9.
+        StringBuilder digits = new StringBuilder();
+        if(forward) {
+            for(;;) {
+                char digit = Collation.digitFromCE32(ce32);
+                digits.append(digit);
+                if(numCpFwd == 0) { break; }
+                int c = nextCodePoint();
+                if(c < 0) { break; }
+                ce32 = data.getCE32(c);
+                if(ce32 == Collation.FALLBACK_CE32) {
+                    ce32 = data.base.getCE32(c);
+                }
+                if(!Collation.hasCE32Tag(ce32, Collation.DIGIT_TAG)) {
+                    backwardNumCodePoints(1);
+                    break;
+                }
+                if(numCpFwd > 0) { --numCpFwd; }
+            }
+        } else {
+            for(;;) {
+                char digit = Collation.digitFromCE32(ce32);
+                digits.append(digit);
+                int c = previousCodePoint();
+                if(c < 0) { break; }
+                ce32 = data.getCE32(c);
+                if(ce32 == Collation.FALLBACK_CE32) {
+                    ce32 = data.base.getCE32(c);
+                }
+                if(!Collation.hasCE32Tag(ce32, Collation.DIGIT_TAG)) {
+                    forwardNumCodePoints(1);
+                    break;
+                }
+            }
+            // Reverse the digit string.
+            digits.reverse();
+        }
+        int pos = 0;
+        do {
+            // Skip leading zeros.
+            while(pos < (digits.length() - 1) && digits.charAt(pos) == 0) { ++pos; }
+            // Write a sequence of CEs for at most 254 digits at a time.
+            int segmentLength = digits.length() - pos;
+            if(segmentLength > 254) { segmentLength = 254; }
+            appendNumericSegmentCEs(digits.subSequence(pos, pos + segmentLength));
+            pos += segmentLength;
+        } while(pos < digits.length());
+    }
+
+    /**
+     * Turns 1..254 digits into a sequence of CEs.
+     * Called by appendNumericCEs() for each segment of at most 254 digits.
+     */
+    private final void appendNumericSegmentCEs(CharSequence digits) {
+        int length = digits.length();
+        assert(1 <= length && length <= 254);
+        assert(length == 1 || digits.charAt(0) != 0);
+        long numericPrimary = data.numericPrimary;
+        // Note: We use primary byte values 2..255: digits are not compressible.
+        if(length <= 7) {
+            // Very dense encoding for small numbers.
+            int value = digits.charAt(0);
+            for(int i = 1; i < length; ++i) {
+                value = value * 10 + digits.charAt(i);
+            }
+            // Primary weight second byte values:
+            //     74 byte values   2.. 75 for small numbers in two-byte primary weights.
+            //     40 byte values  76..115 for medium numbers in three-byte primary weights.
+            //     16 byte values 116..131 for large numbers in four-byte primary weights.
+            //    124 byte values 132..255 for very large numbers with 4..127 digit pairs.
+            int firstByte = 2;
+            int numBytes = 74;
+            if(value < numBytes) {
+                // Two-byte primary for 0..73, good for day & month numbers etc.
+                long primary = numericPrimary | ((firstByte + value) << 16);
+                ceBuffer.append(Collation.makeCE(primary));
+                return;
+            }
+            value -= numBytes;
+            firstByte += numBytes;
+            numBytes = 40;
+            if(value < numBytes * 254) {
+                // Three-byte primary for 74..10233=74+40*254-1, good for year numbers and more.
+                long primary = numericPrimary |
+                    ((firstByte + value / 254) << 16) | ((2 + value % 254) << 8);
+                ceBuffer.append(Collation.makeCE(primary));
+                return;
+            }
+            value -= numBytes * 254;
+            firstByte += numBytes;
+            numBytes = 16;
+            if(value < numBytes * 254 * 254) {
+                // Four-byte primary for 10234..1042489=10234+16*254*254-1.
+                long primary = numericPrimary | (2 + value % 254);
+                value /= 254;
+                primary |= (2 + value % 254) << 8;
+                value /= 254;
+                primary |= (firstByte + value % 254) << 16;
+                ceBuffer.append(Collation.makeCE(primary));
+                return;
+            }
+            // original value > 1042489
+        }
+        assert(length >= 7);
+
+        // The second primary byte value 132..255 indicates the number of digit pairs (4..127),
+        // then we generate primary bytes with those pairs.
+        // Omit trailing 00 pairs.
+        // Decrement the value for the last pair.
+
+        // Set the exponent. 4 pairs.132, 5 pairs.133, ..., 127 pairs.255.
+        int numPairs = (length + 1) / 2;
+        long primary = numericPrimary | ((132 - 4 + numPairs) << 16);
+        // Find the length without trailing 00 pairs.
+        while(digits.charAt(length - 1) == 0 && digits.charAt(length - 2) == 0) {
+            length -= 2;
+        }
+        // Read the first pair.
+        int pair;
+        int pos;
+        if((length & 1) != 0) {
+            // Only "half a pair" if we have an odd number of digits.
+            pair = digits.charAt(0);
+            pos = 1;
+        } else {
+            pair = digits.charAt(0) * 10 + digits.charAt(1);
+            pos = 2;
+        }
+        pair = 11 + 2 * pair;
+        // Add the pairs of digits between pos and length.
+        int shift = 8;
+        while(pos < length) {
+            if(shift == 0) {
+                // Every three pairs/bytes we need to store a 4-byte-primary CE
+                // and start with a new CE with the '0' primary lead byte.
+                primary |= pair;
+                ceBuffer.append(Collation.makeCE(primary));
+                primary = numericPrimary;
+                shift = 16;
+            } else {
+                primary |= pair << shift;
+                shift -= 8;
+            }
+            pair = 11 + 2 * (digits.charAt(pos) * 10 + digits.charAt(pos + 1));
+            pos += 2;
+        }
+        primary |= (pair - 1) << shift;
+        ceBuffer.append(Collation.makeCE(primary));
+    }
+
+    private CEBuffer ceBuffer;
+    private int cesIndex;
+
+    private SkippedState skipped;
+
+    // Number of code points to read forward, or -1.
+    // Used as a forward iteration limit in previousCEUnsafe().
+    private int numCpFwd;
+    // Numeric collation (CollationSettings.NUMERIC).
+    private boolean isNumeric;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationKeys.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationKeys.java

new file mode 100644 (file)

index 0000000..d3e9863
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationKeys.java
@@ -0,0 +1,822 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2012-2014, International Business Machines
+ * Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ * CollationKeys.java, ported from collationkeys.h/.cpp
+ *
+ * C++ version created on: 2012sep02
+ * created by: Markus W. Scherer
+ */
+
+package com.ibm.icu.impl.coll;
+
+import com.ibm.icu.text.Collator;
+
+public final class CollationKeys /* all methods are static */ {
+
+    // Java porting note: C++ SortKeyByteSink class extends a common class ByteSink,
+    // which is not available in Java. We don't need a super class created for implementing
+    // collation features.
+    public static abstract class SortKeyByteSink {
+        protected byte[] buffer_;
+        // protected int capacity_; == buffer_.length
+        private int appended_ = 0;
+        // not used in Java -- private int ignore_ = 0;
+
+        public SortKeyByteSink(byte[] dest) {
+            buffer_ = dest;
+        }
+
+        /**
+         * Needed in Java for when we write to the buffer directly.
+         * In C++, the SortKeyByteSink is a subclass of ByteSink and lower-level code can write to that.
+         * TODO: Can we make Java SortKeyByteSink have-a ByteArrayWrapper and write through to it?
+         * Or maybe create interface ByteSink, have SortKeyByteSink implement it, and have BOCSU write to that??
+         */
+        public void setBufferAndAppended(byte[] dest, int app) {
+            buffer_ = dest;
+            appended_ = app;
+        }
+
+        /* not used in Java -- public void IgnoreBytes(int numIgnore) {
+            ignore_ = numIgnore;
+        } */
+
+        /**
+         * @param bytes
+         *            the array of byte
+         * @param n
+         *            the length of bytes to be appended
+         */
+        public void Append(byte[] bytes, int n) {
+            if (n <= 0 || bytes == null) {
+                return;
+            }
+
+            /* not used in Java -- if (ignore_ > 0) {
+                int ignoreRest = ignore_ - n;
+                if (ignoreRest >= 0) {
+                    ignore_ = ignoreRest;
+                    return;
+                } else {
+                    start = ignore_;
+                    n = -ignoreRest;
+                    ignore_ = 0;
+                }
+            } */
+
+            int length = appended_;
+            appended_ += n;
+
+            int available = buffer_.length - length;
+            if (n <= available) {
+                System.arraycopy(bytes, 0, buffer_, length, n);
+            } else {
+                AppendBeyondCapacity(bytes, 0, n, length);
+            }
+        }
+
+        public void Append(int b) {
+            /* not used in Java -- if (ignore_ > 0) {
+                --ignore_;
+            } else */ {
+                if (appended_ < buffer_.length || Resize(1, appended_)) {
+                    buffer_[appended_] = (byte) b;
+                }
+                ++appended_;
+            }
+        }
+
+        // Java porting note: This method is not used by collator implementation.
+        //
+        // virtual char *GetAppendBuffer(int min_capacity,
+        // int desired_capacity_hint,
+        // char *scratch, int scratch_capacity,
+        // int *result_capacity);
+
+        public int NumberOfBytesAppended() {
+            return appended_;
+        }
+
+        public int GetRemainingCapacity() {
+            return /* not used in Java -- ignore_ + */ buffer_.length - appended_;
+        }
+
+        public boolean Overflowed() {
+            return appended_ > buffer_.length;
+        }
+
+        /* not used in Java -- public boolean IsOk() {
+            return true;
+        } */
+
+        /**
+         * @param bytes
+         *            the array of byte
+         * @param start
+         *            the start index within the array to be appended
+         * @param n
+         *            the length of bytes to be appended
+         * @param length
+         *            the length of buffer required to store the entire data (i.e. already appended
+         *            bytes + bytes to be appended by this method)
+         */
+        protected abstract void AppendBeyondCapacity(byte[] bytes, int start, int n, int length);
+
+        protected abstract boolean Resize(int appendCapacity, int length);
+    }
+
+    public static class LevelCallback {
+        /**
+         * @param level
+         *            The next level about to be written to the ByteSink.
+         * @return true if the level is to be written (the base class implementation always returns
+         *         true)
+         */
+        boolean needToWrite(int level) {
+            return true;
+        }
+    }
+    public static final LevelCallback SIMPLE_LEVEL_FALLBACK = new LevelCallback();
+
+    private static final class SortKeyLevel {
+        private static final int INITIAL_CAPACITY = 40;
+
+        byte[] buffer = new byte[INITIAL_CAPACITY];
+        int len = 0;
+        // not used in Java -- private static final boolean ok = true;  // In C++ "ok" is reset when memory allocations fail.
+
+        SortKeyLevel() {
+        }
+
+        /* not used in Java -- boolean isOk() {
+            return ok;
+        } */
+
+        boolean isEmpty() {
+            return len == 0;
+        }
+
+        int length() {
+            return len;
+        }
+
+        // Java porting note: Java uses this instead of C++ operator [] overload
+        // uint8_t operator[](int index)
+        byte getAt(int index) {
+            return buffer[index];
+        }
+
+        byte[] data() {
+            return buffer;
+        }
+
+        void appendByte(int b) {
+            if (len < buffer.length || ensureCapacity(1)) {
+                buffer[len++] = (byte) b;
+            }
+        }
+
+        void appendWeight16(int w) {
+            assert ((w & 0xffff) != 0);
+            byte b0 = (byte) (w >>> 8);
+            byte b1 = (byte) w;
+            int appendLength = (b1 == 0) ? 1 : 2;
+            if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
+                buffer[len++] = b0;
+                if (b1 != 0) {
+                    buffer[len++] = b1;
+                }
+            }
+        }
+
+        void appendWeight32(long w) {
+            assert (w != 0);
+            byte[] bytes = new byte[] { (byte) (w >>> 24), (byte) (w >>> 16), (byte) (w >>> 8),
+                    (byte) w };
+            int appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
+            if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
+                buffer[len++] = bytes[0];
+                if (bytes[1] != 0) {
+                    buffer[len++] = bytes[1];
+                    if (bytes[2] != 0) {
+                        buffer[len++] = bytes[2];
+                        if (bytes[3] != 0) {
+                            buffer[len++] = bytes[3];
+                        }
+                    }
+                }
+            }
+        }
+
+        void appendReverseWeight16(int w) {
+            assert ((w & 0xffff) != 0);
+            byte b0 = (byte) (w >>> 8);
+            byte b1 = (byte) w;
+            int appendLength = (b1 == 0) ? 1 : 2;
+            if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
+                if (b1 == 0) {
+                    buffer[len++] = b0;
+                } else {
+                    buffer[len] = b1;
+                    buffer[len + 1] = b0;
+                    len += 2;
+                }
+            }
+        }
+
+        // Appends all but the last byte to the sink. The last byte should be the 01 terminator.
+        void appendTo(SortKeyByteSink sink) {
+            assert (len > 0 && buffer[len - 1] == 1);
+            sink.Append(buffer, len - 1);
+        }
+
+        private boolean ensureCapacity(int appendCapacity) {
+            /* not used in Java -- if (!ok) {
+                return false;
+            } */
+            int newCapacity = 2 * buffer.length;
+            int altCapacity = len + 2 * appendCapacity;
+            if (newCapacity < altCapacity) {
+                newCapacity = altCapacity;
+            }
+            if (newCapacity < 200) {
+                newCapacity = 200;
+            }
+            byte[] newbuf = new byte[newCapacity];
+            System.arraycopy(buffer, 0, newbuf, 0, len);
+            buffer = newbuf;
+
+            return true;
+        }
+    }
+
+    private static SortKeyLevel getSortKeyLevel(int levels, int level) {
+        return (levels & level) != 0 ? new SortKeyLevel() : null;
+    }
+
+    private CollationKeys() {
+    } // no instantiation
+
+    // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
+    private static final int SEC_COMMON_LOW = Collation.COMMON_BYTE;
+    private static final int SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
+    static final int SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; // read by CollationDataReader
+    private static final int SEC_COMMON_MAX_COUNT = 0x21;
+
+    // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
+    private static final int CASE_LOWER_FIRST_COMMON_LOW = 1;
+    private static final int CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
+    private static final int CASE_LOWER_FIRST_COMMON_HIGH = 13;
+    private static final int CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
+
+    // Case level, upperFirst: Compress up to 13 common weights as 3..15.
+    private static final int CASE_UPPER_FIRST_COMMON_LOW = 3;
+    @SuppressWarnings("unused")
+    private static final int CASE_UPPER_FIRST_COMMON_HIGH = 15;
+    private static final int CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
+
+    // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
+    private static final int TER_ONLY_COMMON_LOW = Collation.COMMON_BYTE;
+    private static final int TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
+    private static final int TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
+    private static final int TER_ONLY_COMMON_MAX_COUNT = 0x61;
+
+    // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
+    private static final int TER_LOWER_FIRST_COMMON_LOW = Collation.COMMON_BYTE;
+    private static final int TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
+    private static final int TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
+    private static final int TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
+
+    // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
+    private static final int TER_UPPER_FIRST_COMMON_LOW = Collation.COMMON_BYTE + 0x80;
+    private static final int TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
+    private static final int TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
+    private static final int TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
+
+    // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
+    private static final int QUAT_COMMON_LOW = 0x1c;
+    private static final int QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
+    private static final int QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
+    private static final int QUAT_COMMON_MAX_COUNT = 0x71;
+    // Primary weights shifted to quaternary level must be encoded with
+    // a lead byte below the common-weight compression range.
+    private static final int QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b
+
+    /**
+     * Map from collation strength (UColAttributeValue) to a mask of Collation.Level bits up to that
+     * strength, excluding the CASE_LEVEL which is independent of the strength, and excluding
+     * IDENTICAL_LEVEL which this function does not write.
+     */
+    private static final int levelMasks[] = new int[] {
+        2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
+        6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
+        0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
+        0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0,
+        0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
+    };
+
+    /**
+     * Writes the sort key bytes for minLevel up to the iterator data's strength. Optionally writes
+     * the case level. Stops writing levels when callback.needToWrite(level) returns false.
+     * Separates levels with the LEVEL_SEPARATOR_BYTE but does not write a TERMINATOR_BYTE.
+     */
+    public static void writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes,
+            CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback,
+            boolean preflight) {
+
+        int options = settings.options;
+        // Set of levels to process and write.
+        int levels = levelMasks[CollationSettings.getStrength(options)];
+        if ((options & CollationSettings.CASE_LEVEL) != 0) {
+            levels |= Collation.CASE_LEVEL_FLAG;
+        }
+        // Minus the levels below minLevel.
+        levels &= ~((1 << minLevel) - 1);
+        if (levels == 0) {
+            return;
+        }
+
+        long variableTop;
+        if ((options & CollationSettings.ALTERNATE_MASK) == 0) {
+            variableTop = 0;
+        } else {
+            // +1 so that we can use "<" and primary ignorables test out early.
+            variableTop = settings.variableTop + 1;
+        }
+        byte[] reorderTable = settings.reorderTable;
+
+        int tertiaryMask = CollationSettings.getTertiaryMask(options);
+
+        byte[] p234 = new byte[3];
+        SortKeyLevel cases = getSortKeyLevel(levels, Collation.CASE_LEVEL_FLAG);
+        SortKeyLevel secondaries = getSortKeyLevel(levels, Collation.SECONDARY_LEVEL_FLAG);
+        SortKeyLevel tertiaries = getSortKeyLevel(levels, Collation.TERTIARY_LEVEL_FLAG);
+        SortKeyLevel quaternaries = getSortKeyLevel(levels, Collation.QUATERNARY_LEVEL_FLAG);
+
+        int compressedP1 = 0; // 0==no compression; otherwise reordered compressible lead byte
+        int commonCases = 0;
+        int commonSecondaries = 0;
+        int commonTertiaries = 0;
+        int commonQuaternaries = 0;
+
+        int prevSecondary = 0;
+        boolean anyMergeSeparators = false;
+
+        for (;;) {
+            // No need to keep all CEs in the buffer when we write a sort key.
+            iter.clearCEsIfNoneRemaining();
+            long ce = iter.nextCE();
+            long p = ce >>> 32;
+            if (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY) {
+                // Variable CE, shift it to quaternary level.
+                // Ignore all following primary ignorables, and shift further variable CEs.
+                if (commonQuaternaries != 0) {
+                    --commonQuaternaries;
+                    while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
+                        quaternaries.appendByte(QUAT_COMMON_MIDDLE);
+                        commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
+                    }
+                    // Shifted primary weights are lower than the common weight.
+                    quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
+                    commonQuaternaries = 0;
+                }
+                do {
+                    if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
+                        int p1 = (int) p >>> 24;
+                        if (reorderTable != null) {
+                            p1 = reorderTable[p1] & 0xff;
+                        }
+                        if (p1 >= QUAT_SHIFTED_LIMIT_BYTE) {
+                            // Prevent shifted primary lead bytes from
+                            // overlapping with the common compression range.
+                            quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
+                        }
+                        quaternaries.appendWeight32((p1 << 24) | (p & 0xffffff));
+                    }
+                    do {
+                        ce = iter.nextCE();
+                        p = ce >>> 32;
+                    } while (p == 0);
+                } while (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY);
+            }
+            // ce could be primary ignorable, or NO_CE, or the merge separator,
+            // or a regular primary CE, but it is not variable.
+            // If ce==NO_CE, then write nothing for the primary level but
+            // terminate compression on all levels and then exit the loop.
+            if (p > Collation.NO_CE_PRIMARY && (levels & Collation.PRIMARY_LEVEL_FLAG) != 0) {
+                int p1 = (int) p >>> 24;
+                if (reorderTable != null) {
+                    p1 = reorderTable[p1] & 0xff;
+                }
+                if (p1 != compressedP1) {
+                    if (compressedP1 != 0) {
+                        if (p1 < compressedP1) {
+                            // No primary compression terminator
+                            // at the end of the level or merged segment.
+                            if (p1 > Collation.MERGE_SEPARATOR_BYTE) {
+                                sink.Append(Collation.PRIMARY_COMPRESSION_LOW_BYTE);
+                            }
+                        } else {
+                            sink.Append(Collation.PRIMARY_COMPRESSION_HIGH_BYTE);
+                        }
+                    }
+                    sink.Append(p1);
+                    // Test the un-reordered lead byte for compressibility but
+                    // remember the reordered lead byte.
+                    if (compressibleBytes[(int) p >>> 24]) {
+                        compressedP1 = p1;
+                    } else {
+                        compressedP1 = 0;
+                    }
+                }
+                byte p2 = (byte) (p >>> 16);
+                if (p2 != 0) {
+                    p234[0] = p2;
+                    p234[1] = (byte) (p >>> 8);
+                    p234[2] = (byte) p;
+                    sink.Append(p234, (p234[1] == 0) ? 1 : (p234[2] == 0) ? 2 : 3);
+                }
+                // Optimization for internalNextSortKeyPart():
+                // When the primary level overflows we can stop because we need not
+                // calculate (preflight) the whole sort key length.
+                if (!preflight && sink.Overflowed()) {
+                    // not used in Java -- if (!sink.IsOk()) {
+                    // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in
+                    // C implementation. IsOk() in Java always returns true, so this
+                    // is a dead code.
+                    return;
+                }
+            }
+
+            int lower32 = (int) ce;
+            if (lower32 == 0) {
+                continue;
+            } // completely ignorable, no secondary/case/tertiary/quaternary
+
+            if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) {
+                int s = lower32 >>> 16;  // 16 bits
+                if (s == 0) {
+                    // secondary ignorable
+                } else if (s == Collation.COMMON_WEIGHT16) {
+                    ++commonSecondaries;
+                } else if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) {
+                    if (commonSecondaries != 0) {
+                        --commonSecondaries;
+                        while (commonSecondaries >= SEC_COMMON_MAX_COUNT) {
+                            secondaries.appendByte(SEC_COMMON_MIDDLE);
+                            commonSecondaries -= SEC_COMMON_MAX_COUNT;
+                        }
+                        int b;
+                        if (s < Collation.COMMON_WEIGHT16) {
+                            b = SEC_COMMON_LOW + commonSecondaries;
+                        } else {
+                            b = SEC_COMMON_HIGH - commonSecondaries;
+                        }
+                        secondaries.appendByte(b);
+                        commonSecondaries = 0;
+                    }
+                    secondaries.appendWeight16(s);
+                } else {
+                    if (commonSecondaries != 0) {
+                        --commonSecondaries;
+                        // Append reverse weights. The level will be re-reversed later.
+                        int remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
+                        int b;
+                        if (prevSecondary < Collation.COMMON_WEIGHT16) {
+                            b = SEC_COMMON_LOW + remainder;
+                        } else {
+                            b = SEC_COMMON_HIGH - remainder;
+                        }
+                        secondaries.appendByte(b);
+                        commonSecondaries -= remainder;
+                        // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
+                        while (commonSecondaries > 0) { // same as >= SEC_COMMON_MAX_COUNT
+                            secondaries.appendByte(SEC_COMMON_MIDDLE);
+                            commonSecondaries -= SEC_COMMON_MAX_COUNT;
+                        }
+                        // commonSecondaries == 0
+                    }
+                    // Reduce separators so that we can look for byte<=1 later.
+                    if (s <= Collation.MERGE_SEPARATOR_WEIGHT16) {
+                        if (s == Collation.MERGE_SEPARATOR_WEIGHT16) {
+                            anyMergeSeparators = true;
+                        }
+                        secondaries.appendByte((s >>> 8) - 1);
+                    } else {
+                        secondaries.appendReverseWeight16(s);
+                    }
+                    prevSecondary = s;
+                }
+            }
+
+            if ((levels & Collation.CASE_LEVEL_FLAG) != 0) {
+                if ((CollationSettings.getStrength(options) == Collator.PRIMARY) ? p == 0
+                        : (lower32 >>> 16) == 0) {
+                    // Primary+caseLevel: Ignore case level weights of primary ignorables.
+                    // Otherwise: Ignore case level weights of secondary ignorables.
+                    // For details see the comments in the CollationCompare class.
+                } else {
+                    int c = (lower32 >>> 8) & 0xff; // case bits & tertiary lead byte
+                    assert ((c & 0xc0) != 0xc0);
+                    if ((c & 0xc0) == 0 && c > Collation.MERGE_SEPARATOR_BYTE) {
+                        ++commonCases;
+                    } else {
+                        if ((options & CollationSettings.UPPER_FIRST) == 0) {
+                            // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14,
+                            // upper=15.
+                            if (commonCases != 0) {
+                                --commonCases;
+                                while (commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
+                                    cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
+                                    commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
+                                }
+                                int b;
+                                if (c <= Collation.MERGE_SEPARATOR_BYTE) {
+                                    b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
+                                } else {
+                                    b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
+                                }
+                                cases.appendByte(b << 4);
+                                commonCases = 0;
+                            }
+                            if (c > Collation.MERGE_SEPARATOR_BYTE) {
+                                c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >>> 6)) << 4; // 14 or 15
+                            }
+                        } else {
+                            // upperFirst: Compress common weights to nibbles 3..15, mixed=2,
+                            // upper=1.
+                            // The compressed common case weights only go up from the "low" value
+                            // because with upperFirst the common weight is the highest one.
+                            if (commonCases != 0) {
+                                --commonCases;
+                                while (commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
+                                    cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
+                                    commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
+                                }
+                                cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
+                                commonCases = 0;
+                            }
+                            if (c > Collation.MERGE_SEPARATOR_BYTE) {
+                                c = (CASE_UPPER_FIRST_COMMON_LOW - (c >>> 6)) << 4; // 2 or 1
+                            }
+                        }
+                        // c is a separator byte 01 or 02,
+                        // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
+                        cases.appendByte(c);
+                    }
+                }
+            }
+
+            if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) {
+                int t = lower32 & tertiaryMask;
+                assert ((lower32 & 0xc000) != 0xc000);
+                if (t == Collation.COMMON_WEIGHT16) {
+                    ++commonTertiaries;
+                } else if ((tertiaryMask & 0x8000) == 0) {
+                    // Tertiary weights without case bits.
+                    // Move lead bytes 06..3F to C6..FF for a large common-weight range.
+                    if (commonTertiaries != 0) {
+                        --commonTertiaries;
+                        while (commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
+                            tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
+                            commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
+                        }
+                        int b;
+                        if (t < Collation.COMMON_WEIGHT16) {
+                            b = TER_ONLY_COMMON_LOW + commonTertiaries;
+                        } else {
+                            b = TER_ONLY_COMMON_HIGH - commonTertiaries;
+                        }
+                        tertiaries.appendByte(b);
+                        commonTertiaries = 0;
+                    }
+                    if (t > Collation.COMMON_WEIGHT16) {
+                        t += 0xc000;
+                    }
+                    tertiaries.appendWeight16(t);
+                } else if ((options & CollationSettings.UPPER_FIRST) == 0) {
+                    // Tertiary weights with caseFirst=lowerFirst.
+                    // Move lead bytes 06..BF to 46..FF for the common-weight range.
+                    if (commonTertiaries != 0) {
+                        --commonTertiaries;
+                        while (commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
+                            tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
+                            commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
+                        }
+                        int b;
+                        if (t < Collation.COMMON_WEIGHT16) {
+                            b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
+                        } else {
+                            b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
+                        }
+                        tertiaries.appendByte(b);
+                        commonTertiaries = 0;
+                    }
+                    if (t > Collation.COMMON_WEIGHT16) {
+                        t += 0x4000;
+                    }
+                    tertiaries.appendWeight16(t);
+                } else {
+                    // Tertiary weights with caseFirst=upperFirst.
+                    // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
+                    // to keep tertiary CEs well-formed.
+                    // Their case+tertiary weights must be greater than those of
+                    // primary and secondary CEs.
+                    //
+                    // Separators    01..02 -> 01..02  (unchanged)
+                    // Lowercase     03..04 -> 83..84  (includes uncased)
+                    // Common weight     05 -> 85..C5  (common-weight compression range)
+                    // Lowercase     06..3F -> C6..FF
+                    // Mixed case    43..7F -> 43..7F
+                    // Uppercase     83..BF -> 03..3F
+                    // Tertiary CE   86..BF -> C6..FF
+                    if (t <= Collation.MERGE_SEPARATOR_WEIGHT16) {
+                        // Keep separators unchanged.
+                    } else if ((lower32 >>> 16) != 0) {
+                        // Invert case bits of primary & secondary CEs.
+                        t ^= 0xc000;
+                        if (t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
+                            t -= 0x4000;
+                        }
+                    } else {
+                        // Keep uppercase bits of tertiary CEs.
+                        assert (0x8600 <= t && t <= 0xbfff);
+                        t += 0x4000;
+                    }
+                    if (commonTertiaries != 0) {
+                        --commonTertiaries;
+                        while (commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
+                            tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
+                            commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
+                        }
+                        int b;
+                        if (t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
+                            b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
+                        } else {
+                            b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
+                        }
+                        tertiaries.appendByte(b);
+                        commonTertiaries = 0;
+                    }
+                    tertiaries.appendWeight16(t);
+                }
+            }
+
+            if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
+                int q = lower32 & 0xffff;
+                if ((q & 0xc0) == 0 && q > Collation.MERGE_SEPARATOR_WEIGHT16) {
+                    ++commonQuaternaries;
+                } else if (q <= Collation.MERGE_SEPARATOR_WEIGHT16
+                        && (options & CollationSettings.ALTERNATE_MASK) == 0
+                        && (quaternaries.isEmpty() || quaternaries.getAt(quaternaries.length() - 1) == Collation.MERGE_SEPARATOR_BYTE)) {
+                    // If alternate=non-ignorable and there are only
+                    // common quaternary weights between two separators,
+                    // then we need not write anything between these separators.
+                    // The only weights greater than the merge separator and less than the common
+                    // weight
+                    // are shifted primary weights, which are not generated for
+                    // alternate=non-ignorable.
+                    // There are also exactly as many quaternary weights as tertiary weights,
+                    // so level length differences are handled already on tertiary level.
+                    // Any above-common quaternary weight will compare greater regardless.
+                    quaternaries.appendByte(q >>> 8);
+                } else {
+                    if (q <= Collation.MERGE_SEPARATOR_WEIGHT16) {
+                        q >>>= 8;
+                    } else {
+                        q = 0xfc + ((q >>> 6) & 3);
+                    }
+                    if (commonQuaternaries != 0) {
+                        --commonQuaternaries;
+                        while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
+                            quaternaries.appendByte(QUAT_COMMON_MIDDLE);
+                            commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
+                        }
+                        int b;
+                        if (q < QUAT_COMMON_LOW) {
+                            b = QUAT_COMMON_LOW + commonQuaternaries;
+                        } else {
+                            b = QUAT_COMMON_HIGH - commonQuaternaries;
+                        }
+                        quaternaries.appendByte(b);
+                        commonQuaternaries = 0;
+                    }
+                    quaternaries.appendByte(q);
+                }
+            }
+
+            if ((lower32 >>> 24) == Collation.LEVEL_SEPARATOR_BYTE) {
+                break;
+            } // ce == NO_CE
+        }
+
+        // Append the beyond-primary levels.
+        // not used in Java -- boolean ok = true;
+        if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) {
+            if (!callback.needToWrite(Collation.SECONDARY_LEVEL)) {
+                return;
+            }
+            // not used in Java -- ok &= secondaries.isOk();
+            sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
+            byte[] secs = secondaries.data();
+            int length = secondaries.length() - 1; // Ignore the trailing NO_CE.
+            if ((options & CollationSettings.BACKWARD_SECONDARY) != 0) {
+                // The backwards secondary level compares secondary weights backwards
+                // within segments separated by the merge separator (U+FFFE, weight 02).
+                // The separator weights 01 & 02 were reduced to 00 & 01 so that
+                // we do not accidentally separate at a _second_ weight byte of 02.
+                int start = 0;
+                for (;;) {
+                    // Find the merge separator or the NO_CE terminator.
+                    int limit;
+                    if (anyMergeSeparators) {
+                        limit = start;
+                        while (((int)secs[limit] & 0xff) > 1) {
+                            ++limit;
+                        }
+                    } else {
+                        limit = length;
+                    }
+                    // Reverse this segment.
+                    if (start < limit) {
+                        for (int i = start, j = limit - 1; i < j; i++, j--) {
+                            byte tmp = secs[i];
+                            secs[i] = secs[j];
+                            secs[j] = tmp;
+                        }
+                    }
+                    // Did we reach the end of the string?
+                    if (secs[limit] == 0) {
+                        break;
+                    }
+                    // Restore the merge separator.
+                    secs[limit] = 2;
+                    // Skip the merge separator and continue.
+                    start = limit + 1;
+                }
+            }
+            sink.Append(secs, length);
+        }
+
+        if ((levels & Collation.CASE_LEVEL_FLAG) != 0) {
+            if (!callback.needToWrite(Collation.CASE_LEVEL)) {
+                return;
+            }
+            // not used in Java -- ok &= cases.isOk();
+            sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
+            // Write pairs of nibbles as bytes, except separator bytes as themselves.
+            int length = cases.length() - 1; // Ignore the trailing NO_CE.
+            byte b = 0;
+            for (int i = 0; i < length; ++i) {
+                byte c = cases.getAt(i);
+                if (c <= Collation.MERGE_SEPARATOR_BYTE) {
+                    assert (c != 0);
+                    if (b != 0) {
+                        sink.Append(b);
+                        b = 0;
+                    }
+                    sink.Append(c);
+                } else {
+                    assert ((c & 0xf) == 0);
+                    if (b == 0) {
+                        b = c;
+                    } else {
+                        sink.Append(b | (c >>> 4));
+                        b = 0;
+                    }
+                }
+            }
+            if (b != 0) {
+                sink.Append(b);
+            }
+        }
+
+        if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) {
+            if (!callback.needToWrite(Collation.TERTIARY_LEVEL)) {
+                return;
+            }
+            // not used in Java -- ok &= tertiaries.isOk();
+            sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
+            tertiaries.appendTo(sink);
+        }
+
+        if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
+            if (!callback.needToWrite(Collation.QUATERNARY_LEVEL)) {
+                return;
+            }
+            // not used in Java -- ok &= quaternaries.isOk();
+            sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
+            quaternaries.appendTo(sink);
+        }
+
+        // not used in Java -- if (!ok || !sink.IsOk()) {
+        // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in
+        // C implementation. IsOk() in Java always returns true, so this
+        // is a dead code.
+    }
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java

new file mode 100644 (file)

index 0000000..3f77162
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java
@@ -0,0 +1,232 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1996-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*
+* CollationLoader.java, ported from ucol_res.cpp
+*
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.MissingResourceException;
+
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+
+/**
+ * Convenience string denoting the Collation data tree
+ */
+public final class CollationLoader {
+
+    // not implemented, all methods are static
+    private CollationLoader() {
+    }
+
+    private static volatile String rootRules = null;
+
+    private static void loadRootRules() {
+        if (rootRules != null) {
+            return;
+        }
+        synchronized(CollationLoader.class) {
+            if (rootRules == null) {
+                UResourceBundle rootBundle = UResourceBundle.getBundleInstance(
+                        ICUResourceBundle.ICU_COLLATION_BASE_NAME, ULocale.ROOT);
+                rootRules = rootBundle.getString("UCARules");
+            }
+        }
+    }
+
+    // C++: static void appendRootRules(UnicodeString &s)
+    public static String getRootRules() {
+        loadRootRules();
+        return rootRules;
+    }
+
+    static String loadRules(ULocale locale, CharSequence collationType) {
+        UResourceBundle bundle = UResourceBundle.getBundleInstance(
+                ICUResourceBundle.ICU_COLLATION_BASE_NAME, locale);
+        UResourceBundle data = ((ICUResourceBundle)bundle).getWithFallback("collations/" + collationType);
+        String rules = data.getString("Sequence");
+        return rules;
+    }
+
+    private static final UResourceBundle getWithFallback(UResourceBundle table, String entryName) {
+        try {
+            return ((ICUResourceBundle)table).getWithFallback(entryName);
+        } catch(MissingResourceException e) {
+            return null;
+        }
+    }
+
+    public static CollationTailoring loadTailoring(ULocale locale, Output<ULocale> outValidLocale) {
+
+        // Java porting note: ICU4J getWithFallback/getStringWithFallback currently does not
+        // work well when alias table is involved in a resource path, unless full path is specified.
+        // For now, collation resources does not contain such data, so the code below should work fine.
+
+        CollationTailoring root = CollationRoot.getRoot();
+        String localeName = locale.getName();
+        if (localeName.length() == 0 || localeName.equals("root")) {
+            outValidLocale.value = ULocale.ROOT;
+            return root;
+        }
+
+        UResourceBundle bundle = null;
+        try {
+            bundle = UResourceBundle.getBundleInstance(
+                    ICUResourceBundle.ICU_COLLATION_BASE_NAME, locale);
+        } catch (MissingResourceException e) {
+            outValidLocale.value = ULocale.ROOT;
+            return root;
+        }
+
+        ULocale validLocale = bundle.getULocale();
+        // Normalize the root locale. See
+        // http://bugs.icu-project.org/trac/ticket/10715
+        String validLocaleName = validLocale.getName();
+        if (validLocaleName.length() == 0 || validLocaleName.equals("root")) {
+            validLocale = ULocale.ROOT;
+        }
+        outValidLocale.value = validLocale;
+
+        // There are zero or more tailorings in the collations table.
+        UResourceBundle collations;
+        try {
+            collations = ((ICUResourceBundle)bundle).get("collations");
+            if (collations == null) {
+                return root;
+            }
+        } catch(MissingResourceException ignored) {
+            return root;
+        }
+
+        // Fetch the collation type from the locale ID and the default type from the data.
+        String type = locale.getKeywordValue("collation");
+        String defaultType = "standard";
+
+        try {
+            String defT = ((ICUResourceBundle)collations).getStringWithFallback("default");
+            if (defT != null) {
+                defaultType = defT;
+            }
+        } catch(MissingResourceException ignored) {
+        }
+
+        if (type == null || type.equals("default")) {
+            type = defaultType;
+        }
+
+        // Load the collations/type tailoring, with type fallback.
+
+        // Java porting note: typeFallback is used for setting U_USING_DEFAULT_WARNING in
+        // ICU4C, but not used by ICU4J
+
+        // boolean typeFallback = false;
+        UResourceBundle data = getWithFallback(collations, type);
+        if (data == null &&
+                type.length() > 6 && type.startsWith("search")) {
+            // fall back from something like "searchjl" to "search"
+            // typeFallback = true;
+            type = "search";
+            data = getWithFallback(collations, type);
+        }
+
+        if (data == null && !type.equals(defaultType)) {
+            // fall back to the default type
+            // typeFallback = true;
+            type = defaultType;
+            data = getWithFallback(collations, type);
+        }
+
+        if (data == null && !type.equals("standard")) {
+            // fall back to the "standard" type
+            // typeFallback = true;
+            type = "standard";
+            data = getWithFallback(collations, type);
+        }
+
+        if (data == null) {
+            return root;
+        }
+
+        // Is this the same as the root collator? If so, then use that instead.
+        ULocale actualLocale = data.getULocale();
+        // http://bugs.icu-project.org/trac/ticket/10715 ICUResourceBundle(root).getULocale() != ULocale.ROOT
+        // Therefore not just if (actualLocale.equals(ULocale.ROOT) && type.equals("standard")) {
+        String actualLocaleName = actualLocale.getName();
+        if (actualLocaleName.length() == 0 || actualLocaleName.equals("root")) {
+            actualLocale = ULocale.ROOT;
+            if (type.equals("standard")) {
+                return root;
+            }
+        }
+
+        CollationTailoring t = new CollationTailoring(root.settings);
+        t.actualLocale = actualLocale;
+
+        // deserialize
+        UResourceBundle binary = ((ICUResourceBundle)data).get("%%CollationBin");
+        byte[] inBytes = binary.getBinary(null);
+        ByteArrayInputStream inStream = new ByteArrayInputStream(inBytes);
+        try {
+            CollationDataReader.read(root, inStream, t);
+        } catch (IOException e) {
+            throw new RuntimeException("Failed to load collation tailoring data for locale:"
+                    + actualLocale + " type:" + type, e);
+        }   // No need to close BAIS.
+
+        // Try to fetch the optional rules string.
+        try {
+            String s = ((ICUResourceBundle)data).getString("Sequence");
+            if (s != null) {
+                t.rules = s;
+            }
+        } catch(MissingResourceException ignored) {
+        }
+
+        // Set the collation types on the informational locales,
+        // except when they match the default types (for brevity and backwards compatibility).
+        // For the valid locale, suppress the default type.
+        if (!type.equals(defaultType)) {
+            outValidLocale.value = validLocale.setKeywordValue("collation", type);
+        }
+
+        // For the actual locale, suppress the default type *according to the actual locale*.
+        // For example, zh has default=pinyin and contains all of the Chinese tailorings.
+        // zh_Hant has default=stroke but has no other data.
+        // For the valid locale "zh_Hant" we need to suppress stroke.
+        // For the actual locale "zh" we need to suppress pinyin instead.
+        if (!actualLocale.equals(validLocale)) {
+            // Opening a bundle for the actual locale should always succeed.
+            UResourceBundle actualBundle = UResourceBundle.getBundleInstance(
+                    ICUResourceBundle.ICU_COLLATION_BASE_NAME, actualLocale);
+            try {
+                String defT = ((ICUResourceBundle)actualBundle).getStringWithFallback("collations/default");
+                if (defT != null) {
+                    defaultType = defT;
+                }
+            } catch(MissingResourceException ignored) {
+            }
+        }
+
+        if (!type.equals(defaultType)) {
+            t.actualLocale = t.actualLocale.setKeywordValue("collation", type);
+        }
+
+        // if (typeFallback) {
+        //     ICU4C implementation sets U_USING_DEFAULT_WARNING here
+        // }
+
+        return t;
+    }
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java

new file mode 100644 (file)

index 0000000..b75949b
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java
@@ -0,0 +1,62 @@
+/*
+*******************************************************************************
+* Copyright (C) 2012-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationRoot.java, ported from collationroot.h/.cpp
+*
+* C++ version created on: 2012dec17
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.MissingResourceException;
+
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+
+/**
+ * Collation root provider.
+ */
+public final class CollationRoot {  // purely static
+    private static final CollationTailoring rootSingleton;
+    private static final RuntimeException exception;
+
+    public static final CollationTailoring getRoot() {
+        if(exception != null) {
+            throw exception;
+        }
+        return rootSingleton;
+    }
+    public static final CollationData getData() {
+        CollationTailoring root = getRoot();
+        return root.data;
+    }
+    static final CollationSettings getSettings() {
+        CollationTailoring root = getRoot();
+        return root.settings.readOnly();
+    }
+
+    static {  // Corresponds to C++ load() function.
+        CollationTailoring t = new CollationTailoring(null);
+        String path = ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu";
+        InputStream inBytes = ICUData.getRequiredStream(path);
+        RuntimeException e2 = null;
+        try {
+            CollationDataReader.read(null, inBytes, t);
+        } catch(IOException e) {
+            t = null;
+            e2 = new MissingResourceException(
+                    "IOException while reading CLDR root data",
+                    "CollationRoot", path);
+        } catch(RuntimeException e) {
+            t = null;
+            e2 = e;
+        }
+        rootSingleton = t;
+        exception = e2;
+    }
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRootElements.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRootElements.java

new file mode 100644 (file)

index 0000000..6047f97
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRootElements.java
@@ -0,0 +1,464 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationRootElements.java, ported from collationrootelements.h/.cpp
+*
+* C++ version created on: 2013mar01
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+/**
+ * Container and access methods for collation elements and weights
+ * that occur in the root collator.
+ * Needed for finding boundaries for building a tailoring.
+ *
+ * This class takes and returns 16-bit secondary and tertiary weights.
+ */
+public final class CollationRootElements {
+    public CollationRootElements(long[] rootElements) {
+        elements = rootElements;
+    }
+
+    /**
+     * Higher than any root primary.
+     */
+    public static final long PRIMARY_SENTINEL = 0xffffff00L;
+
+    /**
+     * Flag in a root element, set if the element contains secondary & tertiary weights,
+     * rather than a primary.
+     */
+    public static final int SEC_TER_DELTA_FLAG = 0x80;
+    /**
+     * Mask for getting the primary range step value from a primary-range-end element.
+     */
+    public static final int PRIMARY_STEP_MASK = 0x7f;
+
+    /**
+     * Index of the first CE with a non-zero tertiary weight.
+     * Same as the start of the compact root elements table.
+     */
+    public static final int IX_FIRST_TERTIARY_INDEX = 0;
+    /**
+     * Index of the first CE with a non-zero secondary weight.
+     */
+    static final int IX_FIRST_SECONDARY_INDEX = 1;
+    /**
+     * Index of the first CE with a non-zero primary weight.
+     */
+    static final int IX_FIRST_PRIMARY_INDEX = 2;
+    /**
+     * Must match Collation.COMMON_SEC_AND_TER_CE.
+     */
+    static final int IX_COMMON_SEC_AND_TER_CE = 3;
+    /**
+     * Secondary & tertiary boundaries.
+     * Bits 31..24: [fixed last secondary common byte 45]
+     * Bits 23..16: [fixed first ignorable secondary byte 80]
+     * Bits 15.. 8: reserved, 0
+     * Bits  7.. 0: [fixed first ignorable tertiary byte 3C]
+     */
+    static final int IX_SEC_TER_BOUNDARIES = 4;
+    /**
+     * The current number of indexes.
+     * Currently the same as elements[IX_FIRST_TERTIARY_INDEX].
+     */
+    static final int IX_COUNT = 5;
+
+    /**
+     * Returns the boundary between tertiary weights of primary/secondary CEs
+     * and those of tertiary CEs.
+     * This is the upper limit for tertiaries of primary/secondary CEs.
+     * This minus one is the lower limit for tertiaries of tertiary CEs.
+     */
+    public int getTertiaryBoundary() {
+        return ((int)elements[IX_SEC_TER_BOUNDARIES] << 8) & 0xff00;
+    }
+
+    /**
+     * Returns the first assigned tertiary CE.
+     */
+    long getFirstTertiaryCE() {
+        return elements[(int)elements[IX_FIRST_TERTIARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
+    }
+
+    /**
+     * Returns the last assigned tertiary CE.
+     */
+    long getLastTertiaryCE() {
+        return elements[(int)elements[IX_FIRST_SECONDARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
+    }
+
+    /**
+     * Returns the last common secondary weight.
+     * This is the lower limit for secondaries of primary CEs.
+     */
+    public int getLastCommonSecondary() {
+        return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 16) & 0xff00;
+    }
+
+    /**
+     * Returns the boundary between secondary weights of primary CEs
+     * and those of secondary CEs.
+     * This is the upper limit for secondaries of primary CEs.
+     * This minus one is the lower limit for secondaries of secondary CEs.
+     */
+    public int getSecondaryBoundary() {
+        return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 8) & 0xff00;
+    }
+
+    /**
+     * Returns the first assigned secondary CE.
+     */
+    long getFirstSecondaryCE() {
+        return elements[(int)elements[IX_FIRST_SECONDARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
+    }
+
+    /**
+     * Returns the last assigned secondary CE.
+     */
+    long getLastSecondaryCE() {
+        return elements[(int)elements[IX_FIRST_PRIMARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
+    }
+
+    /**
+     * Returns the first assigned primary weight.
+     */
+    long getFirstPrimary() {
+        return elements[(int)elements[IX_FIRST_PRIMARY_INDEX]];  // step=0: cannot be a range end
+    }
+
+    /**
+     * Returns the first assigned primary CE.
+     */
+    long getFirstPrimaryCE() {
+        return Collation.makeCE(getFirstPrimary());
+    }
+
+    /**
+     * Returns the last root CE with a primary weight before p.
+     * Intended only for reordering group boundaries.
+     */
+    long lastCEWithPrimaryBefore(long p) {
+        if(p == 0) { return 0; }
+        assert(p > elements[(int)elements[IX_FIRST_PRIMARY_INDEX]]);
+        int index = findP(p);
+        long q = elements[index];
+        long secTer;
+        if(p == (q & 0xffffff00L)) {
+            // p == elements[index] is a root primary. Find the CE before it.
+            // We must not be in a primary range.
+            assert((q & PRIMARY_STEP_MASK) == 0);
+            secTer = elements[index - 1];
+            if((secTer & SEC_TER_DELTA_FLAG) == 0) {
+                // Primary CE just before p.
+                p = secTer & 0xffffff00L;
+                secTer = Collation.COMMON_SEC_AND_TER_CE;
+            } else {
+                // secTer = last secondary & tertiary for the previous primary
+                index -= 2;
+                for(;;) {
+                    p = elements[index];
+                    if((p & SEC_TER_DELTA_FLAG) == 0) {
+                        p &= 0xffffff00L;
+                        break;
+                    }
+                    --index;
+                }
+            }
+        } else {
+            // p > elements[index] which is the previous primary.
+            // Find the last secondary & tertiary weights for it.
+            p = q & 0xffffff00L;
+            secTer = Collation.COMMON_SEC_AND_TER_CE;
+            for(;;) {
+                q = elements[++index];
+                if((q & SEC_TER_DELTA_FLAG) == 0) {
+                    // We must not be in a primary range.
+                    assert((q & PRIMARY_STEP_MASK) == 0);
+                    break;
+                }
+                secTer = q;
+            }
+        }
+        return (p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
+    }
+
+    /**
+     * Returns the first root CE with a primary weight of at least p.
+     * Intended only for reordering group boundaries.
+     */
+    long firstCEWithPrimaryAtLeast(long p) {
+        if(p == 0) { return 0; }
+        int index = findP(p);
+        if(p != (elements[index] & 0xffffff00L)) {
+            for(;;) {
+                p = elements[++index];
+                if((p & SEC_TER_DELTA_FLAG) == 0) {
+                    // First primary after p. We must not be in a primary range.
+                    assert((p & PRIMARY_STEP_MASK) == 0);
+                    break;
+                }
+            }
+        }
+        // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
+        return (p << 32) | Collation.COMMON_SEC_AND_TER_CE;
+    }
+
+    /**
+     * Returns the primary weight before p.
+     * p must be greater than the first root primary.
+     */
+    long getPrimaryBefore(long p, boolean isCompressible) {
+        int index = findPrimary(p);
+        int step;
+        long q = elements[index];
+        if(p == (q & 0xffffff00L)) {
+            // Found p itself. Return the previous primary.
+            // See if p is at the end of a previous range.
+            step = (int)q & PRIMARY_STEP_MASK;
+            if(step == 0) {
+                // p is not at the end of a range. Look for the previous primary.
+                do {
+                    p = elements[--index];
+                } while((p & SEC_TER_DELTA_FLAG) != 0);
+                return p & 0xffffff00L;
+            }
+        } else {
+            // p is in a range, and not at the start.
+            long nextElement = elements[index + 1];
+            assert(isEndOfPrimaryRange(nextElement));
+            step = (int)nextElement & PRIMARY_STEP_MASK;
+        }
+        // Return the previous range primary.
+        if((p & 0xffff) == 0) {
+            return Collation.decTwoBytePrimaryByOneStep(p, isCompressible, step);
+        } else {
+            return Collation.decThreeBytePrimaryByOneStep(p, isCompressible, step);
+        }
+    }
+
+    /** Returns the secondary weight before [p, s]. */
+    int getSecondaryBefore(long p, int s) {
+        int index;
+        int previousSec, sec;
+        if(p == 0) {
+            index = (int)elements[IX_FIRST_SECONDARY_INDEX];
+            // Gap at the beginning of the secondary CE range.
+            previousSec = 0;
+            sec = (int)(elements[index] >> 16);
+        } else {
+            index = findPrimary(p) + 1;
+            previousSec = Collation.MERGE_SEPARATOR_WEIGHT16;
+            sec = Collation.COMMON_WEIGHT16;
+        }
+        assert(s >= sec);
+        while(s > sec) {
+            previousSec = sec;
+            assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
+            sec = (int)(elements[index++] >> 16);
+        }
+        assert(sec == s);
+        return previousSec;
+    }
+
+    /** Returns the tertiary weight before [p, s, t]. */
+    int getTertiaryBefore(long p, int s, int t) {
+        assert((t & ~Collation.ONLY_TERTIARY_MASK) == 0);
+        int index;
+        int previousTer;
+        long secTer;
+        if(p == 0) {
+            if(s == 0) {
+                index = (int)elements[IX_FIRST_TERTIARY_INDEX];
+                // Gap at the beginning of the tertiary CE range.
+                previousTer = 0;
+            } else {
+                index = (int)elements[IX_FIRST_SECONDARY_INDEX];
+                previousTer = Collation.MERGE_SEPARATOR_WEIGHT16;
+            }
+            secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
+        } else {
+            index = findPrimary(p) + 1;
+            previousTer = Collation.MERGE_SEPARATOR_WEIGHT16;
+            secTer = Collation.COMMON_SEC_AND_TER_CE;
+        }
+        long st = ((long)s << 16) | t;
+        while(st > secTer) {
+            if((int)(secTer >> 16) == s) { previousTer = (int)secTer; }
+            assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
+            secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
+        }
+        assert(secTer == st);
+        return previousTer & 0xffff;
+    }
+
+    /**
+     * Finds the index of the input primary.
+     * p must occur as a root primary, and must not be 0.
+     */
+    int findPrimary(long p) {
+        // Requirement: p must occur as a root primary.
+        assert((p & 0xff) == 0);  // at most a 3-byte primary
+        int index = findP(p);
+        // If p is in a range, then we just assume that p is an actual primary in this range.
+        // (Too cumbersome/expensive to check.)
+        // Otherwise, it must be an exact match.
+        assert(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00L));
+        return index;
+    }
+
+    /**
+     * Returns the primary weight after p where index=findPrimary(p).
+     * p must be at least the first root primary.
+     */
+    long getPrimaryAfter(long p, int index, boolean isCompressible) {
+        assert(p == (elements[index] & 0xffffff00L) || isEndOfPrimaryRange(elements[index + 1]));
+        long q = elements[++index];
+        int step;
+        if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int)q & PRIMARY_STEP_MASK) != 0) {
+            // Return the next primary in this range.
+            if((p & 0xffff) == 0) {
+                return Collation.incTwoBytePrimaryByOffset(p, isCompressible, step);
+            } else {
+                return Collation.incThreeBytePrimaryByOffset(p, isCompressible, step);
+            }
+        } else {
+            // Return the next primary in the list.
+            while((q & SEC_TER_DELTA_FLAG) != 0) {
+                q = elements[++index];
+            }
+            assert((q & PRIMARY_STEP_MASK) == 0);
+            return q;
+        }
+    }
+    /**
+     * Returns the secondary weight after [p, s] where index=findPrimary(p)
+     * except use index=0 for p=0.
+     */
+    int getSecondaryAfter(int index, int s) {
+        int secLimit;
+        if(index == 0) {
+            // primary = 0
+            index = (int)elements[IX_FIRST_SECONDARY_INDEX];
+            // Gap at the end of the secondary CE range.
+            secLimit = 0x10000;
+        } else {
+            assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
+            ++index;
+            // Gap for secondaries of primary CEs.
+            secLimit = getSecondaryBoundary();
+        }
+        for(;;) {
+            long secTer = elements[index];
+            if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
+            int sec = (int)(secTer >> 16);
+            if(sec > s) { return sec; }
+            ++index;
+        }
+    }
+    /**
+     * Returns the tertiary weight after [p, s, t] where index=findPrimary(p)
+     * except use index=0 for p=0.
+     */
+    int getTertiaryAfter(int index, int s, int t) {
+        int terLimit;
+        if(index == 0) {
+            // primary = 0
+            if(s == 0) {
+                index = (int)elements[IX_FIRST_TERTIARY_INDEX];
+                // Gap at the end of the tertiary CE range.
+                terLimit = 0x4000;
+            } else {
+                index = (int)elements[IX_FIRST_SECONDARY_INDEX];
+                // Gap for tertiaries of primary/secondary CEs.
+                terLimit = getTertiaryBoundary();
+            }
+        } else {
+            assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
+            ++index;
+            terLimit = getTertiaryBoundary();
+        }
+        long st = (((long)s & 0xffffffffL) << 16) | t;
+        for(;;) {
+            long secTer = elements[index];
+            // No tertiary greater than t for this primary+secondary.
+            if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
+            secTer &= ~SEC_TER_DELTA_FLAG;
+            if(secTer > st) { return (int)secTer & 0xffff; }
+            ++index;
+        }
+    }
+
+    /**
+     * Finds the largest index i where elements[i]<=p.
+     * Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL).
+     * Does not require that p is a root collator primary.
+     */
+    private int findP(long p) {
+        // p need not occur as a root primary.
+        // For example, it might be a reordering group boundary.
+        assert((p >> 24) != Collation.UNASSIGNED_IMPLICIT_BYTE);
+        // modified binary search
+        int start = (int)elements[IX_FIRST_PRIMARY_INDEX];
+        assert(p >= elements[start]);
+        int limit = elements.length - 1;
+        assert(elements[limit] >= PRIMARY_SENTINEL);
+        assert(p < elements[limit]);
+        while((start + 1) < limit) {
+            // Invariant: elements[start] and elements[limit] are primaries,
+            // and elements[start]<=p<=elements[limit].
+            int i = (start + limit) / 2;
+            long q = elements[i];
+            if((q & SEC_TER_DELTA_FLAG) != 0) {
+                // Find the next primary.
+                int j = i + 1;
+                for(;;) {
+                    if(j == limit) { break; }
+                    q = elements[j];
+                    if((q & SEC_TER_DELTA_FLAG) == 0) {
+                        i = j;
+                        break;
+                    }
+                    ++j;
+                }
+                if((q & SEC_TER_DELTA_FLAG) != 0) {
+                    // Find the preceding primary.
+                    j = i - 1;
+                    for(;;) {
+                        if(j == start) { break; }
+                        q = elements[j];
+                        if((q & SEC_TER_DELTA_FLAG) == 0) {
+                            i = j;
+                            break;
+                        }
+                        --j;
+                    }
+                    if((q & SEC_TER_DELTA_FLAG) != 0) {
+                        // No primary between start and limit.
+                        break;
+                    }
+                }
+            }
+            if(p < (q & 0xffffff00L)) {  // Reset the "step" bits of a range end primary.
+                limit = i;
+            } else {
+                start = i;
+            }
+        }
+        return start;
+    }
+
+    private static boolean isEndOfPrimaryRange(long q) {
+        return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0;
+    }
+
+    /**
+     * Data structure: See ICU4C source/i18n/collationrootelements.h.
+     */
+    private long[] elements;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRuleParser.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRuleParser.java

new file mode 100644 (file)

index 0000000..9ebbcb2
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRuleParser.java
@@ -0,0 +1,919 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationRuleParser.java, ported from collationruleparser.h/.cpp
+*
+* C++ version created on: 2013apr10
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.text.ParseException;
+import java.util.ArrayList;
+
+import com.ibm.icu.impl.IllegalIcuArgumentException;
+import com.ibm.icu.impl.PatternProps;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.Normalizer2;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.ULocale;
+
+public final class CollationRuleParser {
+    /** Special reset positions. */
+    enum Position {
+        FIRST_TERTIARY_IGNORABLE,
+        LAST_TERTIARY_IGNORABLE,
+        FIRST_SECONDARY_IGNORABLE,
+        LAST_SECONDARY_IGNORABLE,
+        FIRST_PRIMARY_IGNORABLE,
+        LAST_PRIMARY_IGNORABLE,
+        FIRST_VARIABLE,
+        LAST_VARIABLE,
+        FIRST_REGULAR,
+        LAST_REGULAR,
+        FIRST_IMPLICIT,
+        LAST_IMPLICIT,
+        FIRST_TRAILING,
+        LAST_TRAILING
+    }
+    static final Position[] POSITION_VALUES = Position.values();
+
+    /**
+     * First character of contractions that encode special reset positions.
+     * U+FFFE cannot be tailored via rule syntax.
+     *
+     * The second contraction character is POS_BASE + Position.
+     */
+    static final char POS_LEAD = 0xfffe;
+    /**
+     * Base for the second character of contractions that encode special reset positions.
+     * Braille characters U+28xx are printable and normalization-inert.
+     * @see POS_LEAD
+     */
+    static final char POS_BASE = 0x2800;
+
+    static abstract class Sink {
+        /**
+         * Adds a reset.
+         * strength=UCOL_IDENTICAL for &str.
+         * strength=UCOL_PRIMARY/UCOL_SECONDARY/UCOL_TERTIARY for &[before n]str where n=1/2/3.
+         */
+        abstract void addReset(int strength, CharSequence str);
+        /**
+         * Adds a relation with strength and prefix | str / extension.
+         */
+        abstract void addRelation(int strength, CharSequence prefix,
+                CharSequence str, CharSequence extension);
+
+        void suppressContractions(UnicodeSet set) {}
+
+        void optimize(UnicodeSet set) {}
+    }
+
+    interface Importer {
+        String getRules(String localeID, String collationType);
+    }
+
+    /**
+     * Constructor.
+     * The Sink must be set before parsing.
+     * The Importer can be set, otherwise [import locale] syntax is not supported.
+     */
+    CollationRuleParser(CollationData base) {
+        baseData = base;
+    }
+
+    /**
+     * Sets the pointer to a Sink object.
+     * The pointer is aliased: Pointer copy without cloning or taking ownership.
+     */
+    void setSink(Sink sinkAlias) {
+        sink = sinkAlias;
+    }
+
+    /**
+     * Sets the pointer to an Importer object.
+     * The pointer is aliased: Pointer copy without cloning or taking ownership.
+     */
+    void setImporter(Importer importerAlias) {
+        importer = importerAlias;
+    }
+
+    void parse(String ruleString, CollationSettings outSettings) throws ParseException {
+        settings = outSettings;
+        parse(ruleString);
+    }
+
+    private static final int UCOL_DEFAULT = -1;
+    private static final int UCOL_OFF = 0;
+    private static final int UCOL_ON = 1;
+
+    /** UCOL_PRIMARY=0 .. UCOL_IDENTICAL=15 */
+    private static final int STRENGTH_MASK = 0xf;
+    private static final int STARRED_FLAG = 0x10;
+    private static final int OFFSET_SHIFT = 8;
+
+    private static final String BEFORE = "[before";
+
+    // In C++, we parse into temporary UnicodeString objects named "raw" or "str".
+    // In Java, we reuse this StringBuilder.
+    private final StringBuilder rawBuilder = new StringBuilder();
+
+    private void parse(String ruleString) throws ParseException {
+        rules = ruleString;
+        ruleIndex = 0;
+
+        while(ruleIndex < rules.length()) {
+            char c = rules.charAt(ruleIndex);
+            if(PatternProps.isWhiteSpace(c)) {
+                ++ruleIndex;
+                continue;
+            }
+            switch(c) {
+            case 0x26:  // '&'
+                parseRuleChain();
+                break;
+            case 0x5b:  // '['
+                parseSetting();
+                break;
+            case 0x23:  // '#' starts a comment, until the end of the line
+                ruleIndex = skipComment(ruleIndex + 1);
+                break;
+            case 0x40:  // '@' is equivalent to [backwards 2]
+                settings.setFlag(CollationSettings.BACKWARD_SECONDARY, true);
+                ++ruleIndex;
+                break;
+            case 0x21:  // '!' used to turn on Thai/Lao character reversal
+                // Accept but ignore. The root collator has contractions
+                // that are equivalent to the character reversal, where appropriate.
+                ++ruleIndex;
+                break;
+            default:
+                setParseError("expected a reset or setting or comment");
+                break;
+            }
+        }
+    }
+
+    private void parseRuleChain() throws ParseException {
+        int resetStrength = parseResetAndPosition();
+        boolean isFirstRelation = true;
+        for(;;) {
+            int result = parseRelationOperator();
+            if(result < 0) {
+                if(ruleIndex < rules.length() && rules.charAt(ruleIndex) == 0x23) {
+                    // '#' starts a comment, until the end of the line
+                    ruleIndex = skipComment(ruleIndex + 1);
+                    continue;
+                }
+                if(isFirstRelation) {
+                    setParseError("reset not followed by a relation");
+                }
+                return;
+            }
+            int strength = result & STRENGTH_MASK;
+            if(resetStrength < Collator.IDENTICAL) {
+                // reset-before rule chain
+                if(isFirstRelation) {
+                    if(strength != resetStrength) {
+                        setParseError("reset-before strength differs from its first relation");
+                        return;
+                    }
+                } else {
+                    if(strength < resetStrength) {
+                        setParseError("reset-before strength followed by a stronger relation");
+                        return;
+                    }
+                }
+            }
+            int i = ruleIndex + (result >> OFFSET_SHIFT);  // skip over the relation operator
+            if((result & STARRED_FLAG) == 0) {
+                parseRelationStrings(strength, i);
+            } else {
+                parseStarredCharacters(strength, i);
+            }
+            isFirstRelation = false;
+        }
+    }
+
+    private int parseResetAndPosition() throws ParseException {
+        int i = skipWhiteSpace(ruleIndex + 1);
+        int j;
+        char c;
+        int resetStrength;
+        if(rules.regionMatches(i, BEFORE, 0, BEFORE.length()) &&
+                (j = i + BEFORE.length()) < rules.length() &&
+                PatternProps.isWhiteSpace(rules.charAt(j)) &&
+                ((j = skipWhiteSpace(j + 1)) + 1) < rules.length() &&
+                0x31 <= (c = rules.charAt(j)) && c <= 0x33 &&
+                rules.charAt(j + 1) == 0x5d) {
+            // &[before n] with n=1 or 2 or 3
+            resetStrength = Collator.PRIMARY + (c - 0x31);
+            i = skipWhiteSpace(j + 2);
+        } else {
+            resetStrength = Collator.IDENTICAL;
+        }
+        if(i >= rules.length()) {
+            setParseError("reset without position");
+            return UCOL_DEFAULT;
+        }
+        if(rules.charAt(i) == 0x5b) {  // '['
+            i = parseSpecialPosition(i, rawBuilder);
+        } else {
+            i = parseTailoringString(i, rawBuilder);
+        }
+        try {
+            sink.addReset(resetStrength, rawBuilder);
+        } catch(Exception e) {
+            setParseError("adding reset failed", e);
+            return UCOL_DEFAULT;
+        }
+        ruleIndex = i;
+        return resetStrength;
+    }
+
+    private int parseRelationOperator() {
+        ruleIndex = skipWhiteSpace(ruleIndex);
+        if(ruleIndex >= rules.length()) { return UCOL_DEFAULT; }
+        int strength;
+        int i = ruleIndex;
+        char c = rules.charAt(i++);
+        switch(c) {
+        case 0x3c:  // '<'
+            if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<
+                ++i;
+                if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<<
+                    ++i;
+                    if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<<<
+                        ++i;
+                        strength = Collator.QUATERNARY;
+                    } else {
+                        strength = Collator.TERTIARY;
+                    }
+                } else {
+                    strength = Collator.SECONDARY;
+                }
+            } else {
+                strength = Collator.PRIMARY;
+            }
+            if(i < rules.length() && rules.charAt(i) == 0x2a) {  // '*'
+                ++i;
+                strength |= STARRED_FLAG;
+            }
+            break;
+        case 0x3b:  // ';' same as <<
+            strength = Collator.SECONDARY;
+            break;
+        case 0x2c:  // ',' same as <<<
+            strength = Collator.TERTIARY;
+            break;
+        case 0x3d:  // '='
+            strength = Collator.IDENTICAL;
+            if(i < rules.length() && rules.charAt(i) == 0x2a) {  // '*'
+                ++i;
+                strength |= STARRED_FLAG;
+            }
+            break;
+        default:
+            return UCOL_DEFAULT;
+        }
+        return ((i - ruleIndex) << OFFSET_SHIFT) | strength;
+    }
+
+    private void parseRelationStrings(int strength, int i) throws ParseException {
+        // Parse
+        //     prefix | str / extension
+        // where prefix and extension are optional.
+        String prefix = "";
+        CharSequence extension = "";
+        i = parseTailoringString(i, rawBuilder);
+        char next = (i < rules.length()) ? rules.charAt(i) : 0;
+        if(next == 0x7c) {  // '|' separates the context prefix from the string.
+            prefix = rawBuilder.toString();
+            i = parseTailoringString(i + 1, rawBuilder);
+            next = (i < rules.length()) ? rules.charAt(i) : 0;
+        }
+        // str = rawBuilder (do not modify rawBuilder any more in this function)
+        if(next == 0x2f) {  // '/' separates the string from the extension.
+            StringBuilder extBuilder = new StringBuilder();
+            i = parseTailoringString(i + 1, extBuilder);
+            extension = extBuilder;
+        }
+        if(prefix.length() != 0) {
+            int prefix0 = prefix.codePointAt(0);
+            int c = rawBuilder.codePointAt(0);
+            if(!nfc.hasBoundaryBefore(prefix0) || !nfc.hasBoundaryBefore(c)) {
+                setParseError("in 'prefix|str', prefix and str must each start with an NFC boundary");
+                return;
+            }
+        }
+        try {
+            sink.addRelation(strength, prefix, rawBuilder, extension);
+        } catch(Exception e) {
+            setParseError("adding relation failed", e);
+            return;
+        }
+        ruleIndex = i;
+    }
+
+    private void parseStarredCharacters(int strength, int i) throws ParseException {
+        String empty = "";
+        i = parseString(skipWhiteSpace(i), rawBuilder);
+        if(rawBuilder.length() == 0) {
+            setParseError("missing starred-relation string");
+            return;
+        }
+        int prev = -1;
+        int j = 0;
+        for(;;) {
+            while(j < rawBuilder.length()) {
+                int c = rawBuilder.codePointAt(j);
+                if(!nfd.isInert(c)) {
+                    setParseError("starred-relation string is not all NFD-inert");
+                    return;
+                }
+                try {
+                    sink.addRelation(strength, empty, UTF16.valueOf(c), empty);
+                } catch(Exception e) {
+                    setParseError("adding relation failed", e);
+                    return;
+                }
+                j += Character.charCount(c);
+                prev = c;
+            }
+            if(i >= rules.length() || rules.charAt(i) != 0x2d) {  // '-'
+                break;
+            }
+            if(prev < 0) {
+                setParseError("range without start in starred-relation string");
+                return;
+            }
+            i = parseString(i + 1, rawBuilder);
+            if(rawBuilder.length() == 0) {
+                setParseError("range without end in starred-relation string");
+                return;
+            }
+            int c = rawBuilder.codePointAt(0);
+            if(c < prev) {
+                setParseError("range start greater than end in starred-relation string");
+                return;
+            }
+            // range prev-c
+            while(++prev <= c) {
+                if(!nfd.isInert(prev)) {
+                    setParseError("starred-relation string range is not all NFD-inert");
+                    return;
+                }
+                if(isSurrogate(prev)) {
+                    setParseError("starred-relation string range contains a surrogate");
+                    return;
+                }
+                if(0xfffd <= prev && prev <= 0xffff) {
+                    setParseError("starred-relation string range contains U+FFFD, U+FFFE or U+FFFF");
+                    return;
+                }
+                try {
+                    sink.addRelation(strength, empty, UTF16.valueOf(prev), empty);
+                } catch(Exception e) {
+                    setParseError("adding relation failed", e);
+                    return;
+                }
+            }
+            prev = -1;
+            j = Character.charCount(c);
+        }
+        ruleIndex = skipWhiteSpace(i);
+    }
+
+    private int parseTailoringString(int i, StringBuilder raw) throws ParseException {
+        i = parseString(skipWhiteSpace(i), raw);
+        if(raw.length() == 0) {
+            setParseError("missing relation string");
+        }
+        return skipWhiteSpace(i);
+    }
+
+    private int parseString(int i, StringBuilder raw) throws ParseException {
+        raw.setLength(0);
+        while(i < rules.length()) {
+            char c = rules.charAt(i++);
+            if(isSyntaxChar(c)) {
+                if(c == 0x27) {  // apostrophe
+                    if(i < rules.length() && rules.charAt(i) == 0x27) {
+                        // Double apostrophe, encodes a single one.
+                        raw.append((char)0x27);
+                        ++i;
+                        continue;
+                    }
+                    // Quote literal text until the next single apostrophe.
+                    for(;;) {
+                        if(i == rules.length()) {
+                            setParseError("quoted literal text missing terminating apostrophe");
+                            return i;
+                        }
+                        c = rules.charAt(i++);
+                        if(c == 0x27) {
+                            if(i < rules.length() && rules.charAt(i) == 0x27) {
+                                // Double apostrophe inside quoted literal text,
+                                // still encodes a single apostrophe.
+                                ++i;
+                            } else {
+                                break;
+                            }
+                        }
+                        raw.append(c);
+                    }
+                } else if(c == 0x5c) {  // backslash
+                    if(i == rules.length()) {
+                        setParseError("backslash escape at the end of the rule string");
+                        return i;
+                    }
+                    int cp = rules.codePointAt(i);
+                    raw.appendCodePoint(cp);
+                    i += Character.charCount(cp);
+                } else {
+                    // Any other syntax character terminates a string.
+                    --i;
+                    break;
+                }
+            } else if(PatternProps.isWhiteSpace(c)) {
+                // Unquoted white space terminates a string.
+                --i;
+                break;
+            } else {
+                raw.append(c);
+            }
+        }
+        for(int j = 0; j < raw.length();) {
+            int c = raw.codePointAt(j);
+            if(isSurrogate(c)) {
+                setParseError("string contains an unpaired surrogate");
+                return i;
+            }
+            if(0xfffd <= c && c <= 0xffff) {
+                setParseError("string contains U+FFFD, U+FFFE or U+FFFF");
+                return i;
+            }
+            j += Character.charCount(c);
+        }
+        return i;
+    }
+
+    // TODO: Widen UTF16.isSurrogate(char16) to take an int.
+    private static final boolean isSurrogate(int c) {
+        return (c & 0xfffff800) == 0xd800;
+    }
+
+    private static final String[] positions = {
+        "first tertiary ignorable",
+        "last tertiary ignorable",
+        "first secondary ignorable",
+        "last secondary ignorable",
+        "first primary ignorable",
+        "last primary ignorable",
+        "first variable",
+        "last variable",
+        "first regular",
+        "last regular",
+        "first implicit",
+        "last implicit",
+        "first trailing",
+        "last trailing"
+    };
+
+    /**
+     * Sets str to a contraction of U+FFFE and (U+2800 + Position).
+     * @return rule index after the special reset position
+     * @throws ParseException 
+     */
+    private int parseSpecialPosition(int i, StringBuilder str) throws ParseException {
+        int j = readWords(i + 1, rawBuilder);
+        if(j > i && rules.charAt(j) == 0x5d && rawBuilder.length() != 0) {  // words end with ]
+            ++j;
+            String raw = rawBuilder.toString();
+            str.setLength(0);
+            for(int pos = 0; pos < positions.length; ++pos) {
+                if(raw.equals(positions[pos])) {
+                    str.append(POS_LEAD).append((char)(POS_BASE + pos));
+                    return j;
+                }
+            }
+            if(raw.equals("top")) {
+                str.append(POS_LEAD).append((char)(POS_BASE + Position.LAST_REGULAR.ordinal()));
+                return j;
+            }
+            if(raw.equals("variable top")) {
+                str.append(POS_LEAD).append((char)(POS_BASE + Position.LAST_VARIABLE.ordinal()));
+                return j;
+            }
+        }
+        setParseError("not a valid special reset position");
+        return i;
+    }
+
+    private void parseSetting() throws ParseException {
+        int i = ruleIndex + 1;
+        int j = readWords(i, rawBuilder);
+        if(j <= i || rawBuilder.length() == 0) {
+            setParseError("expected a setting/option at '['");
+        }
+        // startsWith() etc. are available for String but not CharSequence/StringBuilder.
+        String raw = rawBuilder.toString();
+        if(rules.charAt(j) == 0x5d) {  // words end with ]
+            ++j;
+            if(raw.startsWith("reorder") &&
+                    (raw.length() == 7 || raw.charAt(7) == 0x20)) {
+                parseReordering(raw);
+                ruleIndex = j;
+                return;
+            }
+            if(raw.equals("backwards 2")) {
+                settings.setFlag(CollationSettings.BACKWARD_SECONDARY, true);
+                ruleIndex = j;
+                return;
+            }
+            String v;
+            int valueIndex = raw.lastIndexOf(0x20);
+            if(valueIndex >= 0) {
+                v = raw.substring(valueIndex + 1);
+                raw = raw.substring(0, valueIndex);
+            } else {
+                v = "";
+            }
+            if(raw.equals("strength") && v.length() == 1) {
+                int value = UCOL_DEFAULT;
+                char c = v.charAt(0);
+                if(0x31 <= c && c <= 0x34) {  // 1..4
+                    value = Collator.PRIMARY + (c - 0x31);
+                } else if(c == 0x49) {  // 'I'
+                    value = Collator.IDENTICAL;
+                }
+                if(value != UCOL_DEFAULT) {
+                    settings.setStrength(value);
+                    ruleIndex = j;
+                    return;
+                }
+            } else if(raw.equals("alternate")) {
+                int value = UCOL_DEFAULT;
+                if(v.equals("non-ignorable")) {
+                    value = 0;  // UCOL_NON_IGNORABLE
+                } else if(v.equals("shifted")) {
+                    value = 1;  // UCOL_SHIFTED
+                }
+                if(value != UCOL_DEFAULT) {
+                    settings.setAlternateHandlingShifted(value > 0);
+                    ruleIndex = j;
+                    return;
+                }
+            } else if(raw.equals("maxVariable")) {
+                int value = UCOL_DEFAULT;
+                if(v.equals("space")) {
+                    value = CollationSettings.MAX_VAR_SPACE;
+                } else if(v.equals("punct")) {
+                    value = CollationSettings.MAX_VAR_PUNCT;
+                } else if(v.equals("symbol")) {
+                    value = CollationSettings.MAX_VAR_SYMBOL;
+                } else if(v.equals("currency")) {
+                    value = CollationSettings.MAX_VAR_CURRENCY;
+                }
+                if(value != UCOL_DEFAULT) {
+                    settings.setMaxVariable(value, 0);
+                    settings.variableTop = baseData.getLastPrimaryForGroup(
+                        Collator.ReorderCodes.FIRST + value);
+                    assert(settings.variableTop != 0);
+                    ruleIndex = j;
+                    return;
+                }
+            } else if(raw.equals("caseFirst")) {
+                int value = UCOL_DEFAULT;
+                if(v.equals("off")) {
+                    value = UCOL_OFF;
+                } else if(v.equals("lower")) {
+                    value = CollationSettings.CASE_FIRST;  // UCOL_LOWER_FIRST
+                } else if(v.equals("upper")) {
+                    value = CollationSettings.CASE_FIRST_AND_UPPER_MASK;  // UCOL_UPPER_FIRST
+                }
+                if(value != UCOL_DEFAULT) {
+                    settings.setCaseFirst(value);
+                    ruleIndex = j;
+                    return;
+                }
+            } else if(raw.equals("caseLevel")) {
+                int value = getOnOffValue(v);
+                if(value != UCOL_DEFAULT) {
+                    settings.setFlag(CollationSettings.CASE_LEVEL, value > 0);
+                    ruleIndex = j;
+                    return;
+                }
+            } else if(raw.equals("normalization")) {
+                int value = getOnOffValue(v);
+                if(value != UCOL_DEFAULT) {
+                    settings.setFlag(CollationSettings.CHECK_FCD, value > 0);
+                    ruleIndex = j;
+                    return;
+                }
+            } else if(raw.equals("numericOrdering")) {
+                int value = getOnOffValue(v);
+                if(value != UCOL_DEFAULT) {
+                    settings.setFlag(CollationSettings.NUMERIC, value > 0);
+                    ruleIndex = j;
+                    return;
+                }
+            } else if(raw.equals("hiraganaQ")) {
+                int value = getOnOffValue(v);
+                if(value != UCOL_DEFAULT) {
+                    if(value == UCOL_ON) {
+                        setParseError("[hiraganaQ on] is not supported");
+                    }
+                    ruleIndex = j;
+                    return;
+                }
+            } else if(raw.equals("import")) {
+                // BCP 47 language tag -> ICU locale ID
+                ULocale localeID;
+                try {
+                    localeID = new ULocale.Builder().setLanguageTag(v).build();
+                } catch(Exception e) {
+                    setParseError("expected language tag in [import langTag]", e);
+                    return;
+                }
+                // localeID minus all keywords
+                String baseID = localeID.getBaseName();
+                // @collation=type, or length=0 if not specified
+                String collationType = localeID.getKeywordValue("collation");
+                if(importer == null) {
+                    setParseError("[import langTag] is not supported");
+                } else {
+                    String importedRules;
+                    try {
+                        importedRules =
+                            importer.getRules(baseID,
+                                    collationType != null ? collationType : "standard");
+                    } catch(Exception e) {
+                        setParseError("[import langTag] failed", e);
+                        return;
+                    }
+                    String outerRules = rules;
+                    int outerRuleIndex = ruleIndex;
+                    try {
+                        parse(importedRules);
+                    } catch(Exception e) {
+                        ruleIndex = outerRuleIndex;  // Restore the original index for error reporting.
+                        setParseError("parsing imported rules failed", e);
+                    }
+                    rules = outerRules;
+                    ruleIndex = j;
+                }
+                return;
+            }
+        } else if(rules.charAt(j) == 0x5b) {  // words end with [
+            UnicodeSet set = new UnicodeSet();
+            j = parseUnicodeSet(j, set);
+            if(raw.equals("optimize")) {
+                try {
+                    sink.optimize(set);
+                } catch(Exception e) {
+                    setParseError("[optimize set] failed", e);
+                }
+                ruleIndex = j;
+                return;
+            } else if(raw.equals("suppressContractions")) {
+                try {
+                    sink.suppressContractions(set);
+                } catch(Exception e) {
+                    setParseError("[suppressContractions set] failed", e);
+                }
+                ruleIndex = j;
+                return;
+            }
+        }
+        setParseError("not a valid setting/option");
+    }
+
+    private void parseReordering(CharSequence raw) throws ParseException {
+        int i = 7;  // after "reorder"
+        if(i == raw.length()) {
+            // empty [reorder] with no codes
+            settings.resetReordering();
+            return;
+        }
+        // Parse the codes in [reorder aa bb cc].
+        ArrayList<Integer> reorderCodes = new ArrayList<Integer>();
+        while(i < raw.length()) {
+            ++i;  // skip the word-separating space
+            int limit = i;
+            while(limit < raw.length() && raw.charAt(limit) != ' ') { ++limit; }
+            String word = raw.subSequence(i, limit).toString();
+            int code = getReorderCode(word);
+            if(code < 0) {
+                setParseError("unknown script or reorder code");
+                return;
+            }
+            reorderCodes.add(code);
+            i = limit;
+        }
+        int length = reorderCodes.size();
+        if(length == 1 && reorderCodes.get(0) == Collator.ReorderCodes.DEFAULT) {
+            // The root collator does not have a reordering, by definition.
+            settings.resetReordering();
+            return;
+        }
+        int[] codes = new int[reorderCodes.size()];
+        int j = 0;
+        for(Integer code : reorderCodes) { codes[j++] = code; }
+        byte[] table = new byte[256];
+        baseData.makeReorderTable(codes, table);
+        settings.setReordering(codes, table);
+    }
+
+    private static final String[] gSpecialReorderCodes = {
+        "space", "punct", "symbol", "currency", "digit"
+    };
+
+    /**
+     * Gets a script or reorder code from its string representation.
+     * @return the script/reorder code, or
+     * -1==Collator.ReorderCodes.REORDER_CODE_DEFAULT, or
+     * -2 if not recognized
+     */
+    public static int getReorderCode(String word) {
+        for(int i = 0; i < gSpecialReorderCodes.length; ++i) {
+            if(word.equalsIgnoreCase(gSpecialReorderCodes[i])) {
+                return Collator.ReorderCodes.FIRST + i;
+            }
+        }
+        try {
+            int script = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, word);
+            if(script >= 0) {
+                return script;
+            }
+        } catch (IllegalIcuArgumentException e) {
+            // fall through
+        }
+        if(word.equalsIgnoreCase("default")) {
+            return Collator.ReorderCodes.DEFAULT;
+        }
+        return -2;
+    }
+
+    private static int getOnOffValue(String s) {
+        if(s.equals("on")) {
+            return UCOL_ON;
+        } else if(s.equals("off")) {
+            return UCOL_OFF;
+        } else {
+            return UCOL_DEFAULT;
+        }
+    }
+
+    private int parseUnicodeSet(int i, UnicodeSet set) throws ParseException {
+        // Collect a UnicodeSet pattern between a balanced pair of [brackets].
+        int level = 0;
+        int j = i;
+        for(;;) {
+            if(j == rules.length()) {
+                setParseError("unbalanced UnicodeSet pattern brackets");
+                return j;
+            }
+            char c = rules.charAt(j++);
+            if(c == 0x5b) {  // '['
+                ++level;
+            } else if(c == 0x5d) {  // ']'
+                if(--level == 0) { break; }
+            }
+        }
+        try {
+            set.applyPattern(rules.substring(i, j));
+        } catch(Exception e) {
+            setParseError("not a valid UnicodeSet pattern: " + e.getMessage());
+        }
+        j = skipWhiteSpace(j);
+        if(j == rules.length() || rules.charAt(j) != 0x5d) {
+            setParseError("missing option-terminating ']' after UnicodeSet pattern");
+            return j;
+        }
+        return ++j;
+    }
+
+    private int readWords(int i, StringBuilder raw) {
+        raw.setLength(0);
+        i = skipWhiteSpace(i);
+        for(;;) {
+            if(i >= rules.length()) { return 0; }
+            char c = rules.charAt(i);
+            if(isSyntaxChar(c) && c != 0x2d && c != 0x5f) {  // syntax except -_
+                if(raw.length() == 0) { return i; }
+                int lastIndex = raw.length() - 1;
+                if(raw.charAt(lastIndex) == ' ') {  // remove trailing space
+                    raw.setLength(lastIndex);
+                }
+                return i;
+            }
+            if(PatternProps.isWhiteSpace(c)) {
+                raw.append(' ');
+                i = skipWhiteSpace(i + 1);
+            } else {
+                raw.append(c);
+                ++i;
+            }
+        }
+    }
+
+    private int skipComment(int i) {
+        // skip to past the newline
+        while(i < rules.length()) {
+            char c = rules.charAt(i++);
+            // LF or FF or CR or NEL or LS or PS
+            if(c == 0xa || c == 0xc || c == 0xd || c == 0x85 || c == 0x2028 || c == 0x2029) {
+                // Unicode Newline Guidelines: "A readline function should stop at NLF, LS, FF, or PS."
+                // NLF (new line function) = CR or LF or CR+LF or NEL.
+                // No need to collect all of CR+LF because a following LF will be ignored anyway.
+                break;
+            }
+        }
+        return i;
+    }
+
+    private void setParseError(String reason) throws ParseException {
+        throw makeParseException(reason);
+    }
+
+    private void setParseError(String reason, Exception e) throws ParseException {
+        ParseException newExc = makeParseException(reason + ": " + e.getMessage());
+        newExc.initCause(e);
+        throw newExc;
+    }
+
+    private ParseException makeParseException(String reason) {
+        return new ParseException(appendErrorContext(reason), ruleIndex);
+    }
+
+    private static final int U_PARSE_CONTEXT_LEN = 16;
+
+    // C++ setErrorContext()
+    private String appendErrorContext(String reason) {
+        // Note: This relies on the calling code maintaining the ruleIndex
+        // at a position that is useful for debugging.
+        // For example, at the beginning of a reset or relation etc.
+        StringBuilder msg = new StringBuilder(reason);
+        msg.append(" at index ").append(ruleIndex);
+        // We are not counting line numbers.
+
+        msg.append(" near \"");
+        // before ruleIndex
+        int start = ruleIndex - (U_PARSE_CONTEXT_LEN - 1);
+        if(start < 0) {
+            start = 0;
+        } else if(start > 0 && Character.isLowSurrogate(rules.charAt(start))) {
+            ++start;
+        }
+        msg.append(rules, start, ruleIndex);
+
+        msg.append('!');
+        // starting from ruleIndex
+        int length = rules.length() - ruleIndex;
+        if(length >= U_PARSE_CONTEXT_LEN) {
+            length = U_PARSE_CONTEXT_LEN - 1;
+            if(Character.isHighSurrogate(rules.charAt(ruleIndex + length - 1))) {
+                --length;
+            }
+        }
+        msg.append(rules, ruleIndex, ruleIndex + length);
+        return msg.append('\"').toString();
+    }
+
+    /**
+     * ASCII [:P:] and [:S:]:
+     * [\u0021-\u002F \u003A-\u0040 \u005B-\u0060 \u007B-\u007E]
+     */
+    private static boolean isSyntaxChar(int c) {
+        return 0x21 <= c && c <= 0x7e &&
+                (c <= 0x2f || (0x3a <= c && c <= 0x40) ||
+                (0x5b <= c && c <= 0x60) || (0x7b <= c));
+    }
+
+    private int skipWhiteSpace(int i) {
+        while(i < rules.length() && PatternProps.isWhiteSpace(rules.charAt(i))) {
+            ++i;
+        }
+        return i;
+    }
+
+    private Normalizer2 nfd = Normalizer2.getNFDInstance();
+    private Normalizer2 nfc = Normalizer2.getNFCInstance();
+
+    private String rules;
+    private final CollationData baseData;
+    private CollationSettings settings;
+
+    private Sink sink;
+    private Importer importer;
+
+    private int ruleIndex;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationSettings.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationSettings.java

new file mode 100644 (file)

index 0000000..cb5e5b7
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationSettings.java
@@ -0,0 +1,294 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationSettings.java, ported from collationsettings.h/.cpp
+*
+* C++ version created on: 2013feb07
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.util.Arrays;
+
+import com.ibm.icu.text.Collator;
+
+/**
+ * Collation settings/options/attributes.
+ * These are the values that can be changed via API.
+ */
+public final class CollationSettings extends SharedObject {
+    /**
+     * Options bit 0: Perform the FCD check on the input text and deliver normalized text.
+     */
+    public static final int CHECK_FCD = 1;
+    /**
+     * Options bit 1: Numeric collation.
+     * Also known as CODAN = COllate Digits As Numbers.
+     *
+     * Treat digit sequences as numbers with CE sequences in numeric order,
+     * rather than returning a normal CE for each digit.
+     */
+    public static final int NUMERIC = 2;
+    /**
+     * "Shifted" alternate handling, see ALTERNATE_MASK.
+     */
+    static final int SHIFTED = 4;
+    /**
+     * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable.
+     * Reserve values 8 and 0xc for shift-trimmed and blanked.
+     */
+    static final int ALTERNATE_MASK = 0xc;
+    /**
+     * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value.
+     */
+    static final int MAX_VARIABLE_SHIFT = 4;
+    /** maxVariable options bit mask before shifting. */
+    static final int MAX_VARIABLE_MASK = 0x70;
+    /** Options bit 7: Reserved/unused/0. */
+    /**
+     * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on.
+     */
+    static final int UPPER_FIRST = 0x100;
+    /**
+     * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values)
+     * unless case level is on (when they are *moved* into the separate case level).
+     * By default, the case bits are removed from the tertiary weight (ignored).
+     *
+     * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to
+     * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST.
+     */
+    public static final int CASE_FIRST = 0x200;
+    /**
+     * Options bit mask for caseFirst and upperFirst, before shifting.
+     * Same value as caseFirst==upperFirst.
+     */
+    public static final int CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST;
+    /**
+     * Options bit 10: Insert the case level between the secondary and tertiary levels.
+     */
+    public static final int CASE_LEVEL = 0x400;
+    /**
+     * Options bit 11: Compare secondary weights backwards. ("French secondary")
+     */
+    public static final int BACKWARD_SECONDARY = 0x800;
+    /**
+     * Options bits 15..12: The 4-bit strength value bit field is shifted by this value.
+     * It is the top used bit field in the options. (No need to mask after shifting.)
+     */
+    static final int STRENGTH_SHIFT = 12;
+    /** Strength options bit mask before shifting. */
+    static final int STRENGTH_MASK = 0xf000;
+
+    /** maxVariable values */
+    static final int MAX_VAR_SPACE = 0;
+    static final int MAX_VAR_PUNCT = 1;
+    static final int MAX_VAR_SYMBOL = 2;
+    static final int MAX_VAR_CURRENCY = 3;
+
+    CollationSettings() {}
+
+    @Override
+    public CollationSettings clone() {
+        CollationSettings newSettings = (CollationSettings)super.clone();
+        // Note: The reorderTable and reorderCodes need not be cloned
+        // because, in Java, they only get replaced but not modified.
+        newSettings.fastLatinPrimaries = fastLatinPrimaries.clone();
+        return newSettings;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if(!this.getClass().equals(other.getClass())) { return false; }
+        CollationSettings o = (CollationSettings)other;
+        if(options != o.options) { return false; }
+        if((options & ALTERNATE_MASK) != 0 && variableTop != o.variableTop) { return false; }
+        if(!Arrays.equals(reorderCodes, o.reorderCodes)) { return false; }
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int h = options << 8;
+        if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
+        h ^= reorderCodes.length;
+        for(int i = 0; i < reorderCodes.length; ++i) {
+            h ^= (reorderCodes[i] << i);
+        }
+        return h;
+    }
+
+    public void resetReordering() {
+        // When we turn off reordering, we want to set a null permutation
+        // rather than a no-op permutation.
+        reorderTable = null;
+        reorderCodes = EMPTY_INT_ARRAY;
+    }
+    // No aliasReordering() in Java. Use setReordering(). See comments near reorderCodes.
+    public void setReordering(int[] codes, byte[] table) {
+        if(codes == null) {
+            codes = EMPTY_INT_ARRAY;
+        }
+        assert (codes.length == 0) == (table == null);
+        reorderTable = table;
+        reorderCodes = codes;
+    }
+
+    // In C++, we use enums for attributes and their values, with a special value for the default.
+    // Combined getter/setter methods handle many attributes.
+    // In Java, we have specific methods for getting, setting, and set-to-default,
+    // except that this class uses bits in its own bit set for simple values.
+
+    public void setStrength(int value) {
+        int noStrength = options & ~STRENGTH_MASK;
+        switch(value) {
+        case Collator.PRIMARY:
+        case Collator.SECONDARY:
+        case Collator.TERTIARY:
+        case Collator.QUATERNARY:
+        case Collator.IDENTICAL:
+            options = noStrength | (value << STRENGTH_SHIFT);
+            break;
+        default:
+            throw new IllegalArgumentException("illegal strength value " + value);
+        }
+    }
+
+    public void setStrengthDefault(int defaultOptions) {
+        int noStrength = options & ~STRENGTH_MASK;
+        options = noStrength | (defaultOptions & STRENGTH_MASK);
+    }
+
+    static int getStrength(int options) {
+        return options >> STRENGTH_SHIFT;
+    }
+
+    public int getStrength() {
+        return getStrength(options);
+    }
+
+    /** Sets the options bit for an on/off attribute. */
+    public void setFlag(int bit, boolean value) {
+        if(value) {
+            options |= bit;
+        } else {
+            options &= ~bit;
+        }
+    }
+
+    public void setFlagDefault(int bit, int defaultOptions) {
+        options = (options & ~bit) | (defaultOptions & bit);
+    }
+
+    public boolean getFlag(int bit) {
+        return (options & bit) != 0;
+    }
+
+    public void setCaseFirst(int value) {
+        assert value == 0 || value == CASE_FIRST || value == CASE_FIRST_AND_UPPER_MASK;
+        int noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
+        options = noCaseFirst | value;
+    }
+
+    public void setCaseFirstDefault(int defaultOptions) {
+        int noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
+        options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
+    }
+
+    public int getCaseFirst() {
+        return options & CASE_FIRST_AND_UPPER_MASK;
+    }
+
+    public void setAlternateHandlingShifted(boolean value) {
+        int noAlternate = options & ~ALTERNATE_MASK;
+        if(value) {
+            options = noAlternate | SHIFTED;
+        } else {
+            options = noAlternate;
+        }
+    }
+
+    public void setAlternateHandlingDefault(int defaultOptions) {
+        int noAlternate = options & ~ALTERNATE_MASK;
+        options = noAlternate | (defaultOptions & ALTERNATE_MASK);
+    }
+
+    public boolean getAlternateHandling() {
+        return (options & ALTERNATE_MASK) != 0;
+    }
+
+    public void setMaxVariable(int value, int defaultOptions) {
+        int noMax = options & ~MAX_VARIABLE_MASK;
+        switch(value) {
+        case MAX_VAR_SPACE:
+        case MAX_VAR_PUNCT:
+        case MAX_VAR_SYMBOL:
+        case MAX_VAR_CURRENCY:
+            options = noMax | (value << MAX_VARIABLE_SHIFT);
+            break;
+        case -1:
+            options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
+            break;
+        default:
+            throw new IllegalArgumentException("illegal maxVariable value " + value);
+        }
+    }
+
+    public int getMaxVariable() {
+        return (options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT;
+    }
+
+    /**
+     * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off.
+     */
+    static boolean isTertiaryWithCaseBits(int options) {
+        return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST;
+    }
+    static int getTertiaryMask(int options) {
+        // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
+        return isTertiaryWithCaseBits(options) ?
+                Collation.CASE_AND_TERTIARY_MASK : Collation.ONLY_TERTIARY_MASK;
+    }
+
+    static boolean sortsTertiaryUpperCaseFirst(int options) {
+        // On tertiary level, consider case bits and sort uppercase first
+        // if caseLevel is off and caseFirst==upperFirst.
+        return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK;
+    }
+
+    public boolean dontCheckFCD() {
+        return (options & CHECK_FCD) == 0;
+    }
+
+    boolean hasBackwardSecondary() {
+        return (options & BACKWARD_SECONDARY) != 0;
+    }
+
+    public boolean isNumeric() {
+        return (options & NUMERIC) != 0;
+    }
+
+    /** CHECK_FCD etc. */
+    public int options = (Collator.TERTIARY << STRENGTH_SHIFT) |  // DEFAULT_STRENGTH
+            (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT);
+    /** Variable-top primary weight. */
+    public long variableTop;
+    /** 256-byte table for reordering permutation of primary lead bytes; null if no reordering. */
+    public byte[] reorderTable;
+    /** Array of reorder codes; ignored if length == 0. */
+    public int[] reorderCodes = EMPTY_INT_ARRAY;
+    // Note: In C++, we keep a memory block around for the reorder codes and the permutation table,
+    // and modify them for new codes.
+    // In Java, we simply copy references and then never modify the array contents.
+    // The caller must abandon the arrays.
+    // Reorder codes from the public setter API must be cloned.
+    private static final int[] EMPTY_INT_ARRAY = new int[0];
+
+    /** Options for CollationFastLatin. Negative if disabled. */
+    public int fastLatinOptions = -1;
+    // fastLatinPrimaries.length must be equal to CollationFastLatin.LATIN_LIMIT,
+    // but we do not import CollationFastLatin to reduce circular dependencies.
+    public char[] fastLatinPrimaries = new char[0x180];  // mutable contents
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java

new file mode 100644 (file)

index 0000000..b1db101
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java
@@ -0,0 +1,94 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* CollationTailoring.java, ported from collationtailoring.h/.cpp
+*
+* C++ version created on: 2013mar12
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.util.Map;
+
+import com.ibm.icu.impl.Norm2AllModes;
+import com.ibm.icu.impl.Normalizer2Impl;
+import com.ibm.icu.impl.Trie2_32;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.VersionInfo;
+
+/**
+ * Collation tailoring data & settings.
+ * This is a container of values for a collation tailoring
+ * built from rules or deserialized from binary data.
+ *
+ * It is logically immutable: Do not modify its values.
+ * The fields are public for convenience.
+ */
+public final class CollationTailoring {
+    CollationTailoring(SharedObject.Reference<CollationSettings> baseSettings) {
+        if(baseSettings != null) {
+            assert(baseSettings.readOnly().reorderCodes.length == 0);
+            assert(baseSettings.readOnly().reorderTable == null);
+            settings = baseSettings.clone();
+        } else {
+            settings = new SharedObject.Reference<CollationSettings>(new CollationSettings());
+        }
+    }
+
+    void ensureOwnedData() {
+        if(ownedData == null) {
+            Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
+            ownedData = new CollationData(nfcImpl);
+        }
+        data = ownedData;
+    }
+
+    static VersionInfo makeBaseVersion(VersionInfo ucaVersion) {
+        return VersionInfo.getInstance(
+                VersionInfo.UCOL_BUILDER_VERSION.getMajor(),
+                (ucaVersion.getMajor() << 3) + ucaVersion.getMinor(),
+                ucaVersion.getMilli() << 6,
+                0);
+    }
+    void setVersion(VersionInfo baseVersion, VersionInfo rulesVersion) {
+        version = VersionInfo.getInstance(
+                VersionInfo.UCOL_BUILDER_VERSION.getMajor(),
+                baseVersion.getMinor(),
+                (baseVersion.getMilli() & 0xc0) + ((rulesVersion.getMajor() + (rulesVersion.getMajor() >> 6)) & 0x3f),
+                (rulesVersion.getMinor() << 3) + (rulesVersion.getMinor() >> 5) + rulesVersion.getMilli() +
+                        (rulesVersion.getMicro() << 4) + (rulesVersion.getMicro() >> 4));
+    }
+    int getUCAVersion() {
+        return (version.getMinor() << 4) | (version.getMilli() >> 6);
+    }
+
+    // data for sorting etc.
+    public CollationData data;  // == base data or ownedData
+    public SharedObject.Reference<CollationSettings> settings;  // reference-counted
+    public String rules = "";
+    // The locale is null (C++: bogus) when built from rules or constructed from a binary blob.
+    // It can then be set by the service registration code which is thread-safe.
+    public ULocale actualLocale = ULocale.ROOT;
+    // UCA version u.v.w & rules version r.s.t.q:
+    // version[0]: builder version (runtime version is mixed in at runtime)
+    // version[1]: bits 7..3=u, bits 2..0=v
+    // version[2]: bits 7..6=w, bits 5..0=r
+    // version[3]= (s<<5)+(s>>3)+t+(q<<4)+(q>>4)
+    public VersionInfo version = ZERO_VERSION;
+    private static final VersionInfo ZERO_VERSION = VersionInfo.getInstance(0, 0, 0, 0);
+
+    // owned objects
+    CollationData ownedData;
+    Trie2_32 trie;
+    UnicodeSet unsafeBackwardSet;
+    public Map<Integer, Integer> maxExpansions;
+
+    /*
+     * Not Cloneable: A CollationTailoring cannot be copied.
+     * It is immutable, and the data trie cannot be copied either.
+     */
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationWeights.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationWeights.java

new file mode 100644 (file)

index 0000000..402091d
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationWeights.java
@@ -0,0 +1,537 @@
+/*  
+*******************************************************************************
+*
+*   Copyright (C) 1999-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   CollationWeights.java, ported from collationweights.h/.cpp
+*
+*   C++ version created on: 2001mar08 as ucol_wgt.h
+*   created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.util.Arrays;
+
+/**
+ * Allocates n collation element weights between two exclusive limits.
+ * Used only internally by the collation tailoring builder.
+ */
+public final class CollationWeights {
+    public CollationWeights() {}
+
+    public void initForPrimary(boolean compressible) {
+        middleLength=1;
+        minBytes[1] = Collation.MERGE_SEPARATOR_BYTE + 1;
+        maxBytes[1] = Collation.TRAIL_WEIGHT_BYTE;
+        if(compressible) {
+            minBytes[2] = Collation.PRIMARY_COMPRESSION_LOW_BYTE + 1;
+            maxBytes[2] = Collation.PRIMARY_COMPRESSION_HIGH_BYTE - 1;
+        } else {
+            minBytes[2] = 2;
+            maxBytes[2] = 0xff;
+        }
+        minBytes[3] = 2;
+        maxBytes[3] = 0xff;
+        minBytes[4] = 2;
+        maxBytes[4] = 0xff;
+    }
+
+    public void initForSecondary() {
+        // We use only the lower 16 bits for secondary weights.
+        middleLength=3;
+        minBytes[1] = 0;
+        maxBytes[1] = 0;
+        minBytes[2] = 0;
+        maxBytes[2] = 0;
+        minBytes[3] = Collation.MERGE_SEPARATOR_BYTE + 1;
+        maxBytes[3] = 0xff;
+        minBytes[4] = 2;
+        maxBytes[4] = 0xff;
+    }
+
+    public void initForTertiary() {
+        // We use only the lower 16 bits for tertiary weights.
+        middleLength=3;
+        minBytes[1] = 0;
+        maxBytes[1] = 0;
+        minBytes[2] = 0;
+        maxBytes[2] = 0;
+        // We use only 6 bits per byte.
+        // The other bits are used for case & quaternary weights.
+        minBytes[3] = Collation.MERGE_SEPARATOR_BYTE + 1;
+        maxBytes[3] = 0x3f;
+        minBytes[4] = 2;
+        maxBytes[4] = 0x3f;
+    }
+
+    /**
+     * Determine heuristically
+     * what ranges to use for a given number of weights between (excluding)
+     * two limits.
+     *
+     * @param lowerLimit A collation element weight; the ranges will be filled to cover
+     *                   weights greater than this one.
+     * @param upperLimit A collation element weight; the ranges will be filled to cover
+     *                   weights less than this one.
+     * @param n          The number of collation element weights w necessary such that
+     *                   lowerLimit<w<upperLimit in lexical order.
+     * @return true if it is possible to fit n elements between the limits
+     */
+    public boolean allocWeights(long lowerLimit, long upperLimit, int n) {
+        // Call getWeightRanges() and then determine heuristically
+        // which ranges to use for a given number of weights between (excluding)
+        // two limits.
+        // puts("");
+
+        if(!getWeightRanges(lowerLimit, upperLimit)) {
+            // printf("error: unable to get Weight ranges\n");
+            return false;
+        }
+
+        /* try until we find suitably large ranges */
+        for(;;) {
+            /* get the smallest number of bytes in a range */
+            int minLength=ranges[0].length;
+
+            if(allocWeightsInShortRanges(n, minLength)) { break; }
+
+            if(minLength == 4) {
+                // printf("error: the maximum number of %ld weights is insufficient for n=%ld\n",
+                //       minLengthCount, n);
+                return false;
+            }
+
+            if(allocWeightsInMinLengthRanges(n, minLength)) { break; }
+
+            /* no good match, lengthen all minLength ranges and iterate */
+            // printf("lengthen the short ranges from %ld bytes to %ld and iterate\n", minLength, minLength+1);
+            for(int i=0; ranges[i].length==minLength; ++i) {
+                lengthenRange(ranges[i]);
+            }
+        }
+
+        /* puts("final ranges:");
+        for(int i=0; i<rangeCount; ++i) {
+            printf("ranges[%ld] .start=0x%08lx .end=0x%08lx .length=%ld .count=%ld\n",
+                  i, ranges[i].start, ranges[i].end, ranges[i].length, ranges[i].count);
+        } */
+
+        rangeIndex = 0;
+        if(rangeCount < ranges.length) {
+            ranges[rangeCount] = null;  // force a crash when going out of bounds
+        }
+        return true;
+    }
+
+    /**
+     * Given a set of ranges calculated by allocWeights(),
+     * iterate through the weights.
+     * The ranges are modified to keep the current iteration state.
+     *
+     * @return The next weight in the ranges, or 0xffffffff if there is none left.
+     */
+    public long nextWeight() {
+        if(rangeIndex >= rangeCount) {
+            return 0xffffffffL;
+        } else {
+            /* get the next weight */
+            WeightRange range = ranges[rangeIndex];
+            long weight = range.start;
+            if(--range.count == 0) {
+                /* this range is finished */
+                ++rangeIndex;
+            } else {
+                /* increment the weight for the next value */
+                range.start = incWeight(weight, range.length);
+                assert(range.start <= range.end);
+            }
+
+            return weight;
+        }
+    }
+
+    /** @internal */
+    private static final class WeightRange implements Comparable<WeightRange> {
+        long start, end;
+        int length, count;
+
+        // Java 6: @Override
+        public int compareTo(WeightRange other) {
+            long l=start;
+            long r=other.start;
+            if(l<r) {
+                return -1;
+            } else if(l>r) {
+                return 1;
+            } else {
+                return 0;
+            }
+        }
+    }
+
+    /* helper functions for CE weights */
+
+    public static int lengthOfWeight(long weight) {
+        if((weight&0xffffff)==0) {
+            return 1;
+        } else if((weight&0xffff)==0) {
+            return 2;
+        } else if((weight&0xff)==0) {
+            return 3;
+        } else {
+            return 4;
+        }
+    }
+
+    private static int getWeightTrail(long weight, int length) {
+        return (int)(weight>>(8*(4-length)))&0xff;
+    }
+
+    private static long setWeightTrail(long weight, int length, int trail) {
+        length=8*(4-length);
+        return (weight&(0xffffff00L<<length))|((long)trail<<length);
+    }
+
+    private static int getWeightByte(long weight, int idx) {
+        return getWeightTrail(weight, idx); /* same calculation */
+    }
+
+    private static long setWeightByte(long weight, int idx, int b) {
+        long mask; /* 0xffffffff except a 00 "hole" for the index-th byte */
+
+        idx*=8;
+        if(idx<32) {
+            mask=0xffffffffL>>idx;
+        } else {
+            // Do not use int>>32 because on some platforms that does not shift at all
+            // while we need it to become 0.
+            // PowerPC: 0xffffffff>>32 = 0           (wanted)
+            // x86:     0xffffffff>>32 = 0xffffffff  (not wanted)
+            //
+            // ANSI C99 6.5.7 Bitwise shift operators:
+            // "If the value of the right operand is negative
+            // or is greater than or equal to the width of the promoted left operand,
+            // the behavior is undefined."
+            mask=0;
+        }
+        idx=32-idx;
+        mask|=0xffffff00L<<idx;
+        return (weight&mask)|((long)b<<idx);
+    }
+
+    private static long truncateWeight(long weight, int length) {
+        return weight&(0xffffffffL<<(8*(4-length)));
+    }
+
+    private static long incWeightTrail(long weight, int length) {
+        return weight+(1L<<(8*(4-length)));
+    }
+
+    private static long decWeightTrail(long weight, int length) {
+        return weight-(1L<<(8*(4-length)));
+    }
+
+    /** @return number of usable byte values for byte idx */
+    private int countBytes(int idx) {
+        return maxBytes[idx] - minBytes[idx] + 1;
+    }
+
+    private long incWeight(long weight, int length) {
+        for(;;) {
+            int b=getWeightByte(weight, length);
+            if(b<maxBytes[length]) {
+                return setWeightByte(weight, length, b+1);
+            } else {
+                // Roll over, set this byte to the minimum and increment the previous one.
+                weight=setWeightByte(weight, length, minBytes[length]);
+                --length;
+                assert(length > 0);
+            }
+        }
+    }
+
+    private long incWeightByOffset(long weight, int length, int offset) {
+        for(;;) {
+            offset += getWeightByte(weight, length);
+            if(offset <= maxBytes[length]) {
+                return setWeightByte(weight, length, offset);
+            } else {
+                // Split the offset between this byte and the previous one.
+                offset -= minBytes[length];
+                weight = setWeightByte(weight, length, minBytes[length] + offset % countBytes(length));
+                offset /= countBytes(length);
+                --length;
+                assert(length > 0);
+            }
+        }
+    }
+
+    private void lengthenRange(WeightRange range) {
+        int length=range.length+1;
+        range.start=setWeightTrail(range.start, length, minBytes[length]);
+        range.end=setWeightTrail(range.end, length, maxBytes[length]);
+        range.count*=countBytes(length);
+        range.length=length;
+    }
+
+    /**
+     * Takes two CE weights and calculates the
+     * possible ranges of weights between the two limits, excluding them.
+     * For weights with up to 4 bytes there are up to 2*4-1=7 ranges.
+     */
+    private boolean getWeightRanges(long lowerLimit, long upperLimit) {
+        assert(lowerLimit != 0);
+        assert(upperLimit != 0);
+
+        /* get the lengths of the limits */
+        int lowerLength=lengthOfWeight(lowerLimit);
+        int upperLength=lengthOfWeight(upperLimit);
+
+        // printf("length of lower limit 0x%08lx is %ld\n", lowerLimit, lowerLength);
+        // printf("length of upper limit 0x%08lx is %ld\n", upperLimit, upperLength);
+        assert(lowerLength>=middleLength);
+        // Permit upperLength<middleLength: The upper limit for secondaries is 0x10000.
+
+        if(lowerLimit>=upperLimit) {
+            // printf("error: no space between lower & upper limits\n");
+            return false;
+        }
+
+        /* check that neither is a prefix of the other */
+        if(lowerLength<upperLength) {
+            if(lowerLimit==truncateWeight(upperLimit, lowerLength)) {
+                // printf("error: lower limit 0x%08lx is a prefix of upper limit 0x%08lx\n", lowerLimit, upperLimit);
+                return false;
+            }
+        }
+        /* if the upper limit is a prefix of the lower limit then the earlier test lowerLimit>=upperLimit has caught it */
+
+        WeightRange[] lower = new WeightRange[5]; /* [0] and [1] are not used - this simplifies indexing */
+        WeightRange middle = new WeightRange();
+        WeightRange[] upper = new WeightRange[5];
+
+        /*
+         * With the limit lengths of 1..4, there are up to 7 ranges for allocation:
+         * range     minimum length
+         * lower[4]  4
+         * lower[3]  3
+         * lower[2]  2
+         * middle    1
+         * upper[2]  2
+         * upper[3]  3
+         * upper[4]  4
+         *
+         * We are now going to calculate up to 7 ranges.
+         * Some of them will typically overlap, so we will then have to merge and eliminate ranges.
+         */
+        long weight=lowerLimit;
+        for(int length=lowerLength; length>middleLength; --length) {
+            int trail=getWeightTrail(weight, length);
+            if(trail<maxBytes[length]) {
+                lower[length] = new WeightRange();
+                lower[length].start=incWeightTrail(weight, length);
+                lower[length].end=setWeightTrail(weight, length, maxBytes[length]);
+                lower[length].length=length;
+                lower[length].count=maxBytes[length]-trail;
+            }
+            weight=truncateWeight(weight, length-1);
+        }
+        if(weight<0xff000000L) {
+            middle.start=incWeightTrail(weight, middleLength);
+        } else {
+            // Prevent overflow for primary lead byte FF
+            // which would yield a middle range starting at 0.
+            middle.start=0xffffffffL;  // no middle range
+        }
+
+        weight=upperLimit;
+        for(int length=upperLength; length>middleLength; --length) {
+            int trail=getWeightTrail(weight, length);
+            if(trail>minBytes[length]) {
+                upper[length] = new WeightRange();
+                upper[length].start=setWeightTrail(weight, length, minBytes[length]);
+                upper[length].end=decWeightTrail(weight, length);
+                upper[length].length=length;
+                upper[length].count=trail-minBytes[length];
+            }
+            weight=truncateWeight(weight, length-1);
+        }
+        middle.end=decWeightTrail(weight, middleLength);
+
+        /* set the middle range */
+        middle.length=middleLength;
+        if(middle.end>=middle.start) {
+            middle.count=(int)((middle.end-middle.start)>>(8*(4-middleLength)))+1;
+        } else {
+            /* no middle range, eliminate overlaps */
+
+            /* reduce or remove the lower ranges that go beyond upperLimit */
+            for(int length=4; length>middleLength; --length) {
+                if(lower[length] != null && upper[length] != null &&
+                        lower[length].count>0 && upper[length].count>0) {
+                    long start=upper[length].start;
+                    long end=lower[length].end;
+
+                    if(end>=start || incWeight(end, length)==start) {
+                        /* lower and upper ranges collide or are directly adjacent: merge these two and remove all shorter ranges */
+                        start=lower[length].start;
+                        end=lower[length].end=upper[length].end;
+                        /*
+                         * merging directly adjacent ranges needs to subtract the 0/1 gaps in between;
+                         * it may result in a range with count>countBytes
+                         */
+                        lower[length].count=
+                                getWeightTrail(end, length)-getWeightTrail(start, length)+1+
+                                countBytes(length)*(getWeightByte(end, length-1)-getWeightByte(start, length-1));
+                        upper[length].count=0;
+                        while(--length>middleLength) {
+                            if(lower[length] != null) {
+                                lower[length].count = 0;
+                            }
+                            if(upper[length] != null) {
+                                upper[length].count = 0;
+                            }
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+
+        /* print ranges
+        for(int length=4; length>=2; --length) {
+            if(lower[length].count>0) {
+                printf("lower[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, lower[length].start, lower[length].end, lower[length].count);
+            }
+        }
+        if(middle.count>0) {
+            printf("middle   .start=0x%08lx .end=0x%08lx .count=%ld\n", middle.start, middle.end, middle.count);
+        }
+        for(int length=2; length<=4; ++length) {
+            if(upper[length].count>0) {
+                printf("upper[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, upper[length].start, upper[length].end, upper[length].count);
+            }
+        } */
+
+        /* copy the ranges, shortest first, into the result array */
+        rangeCount=0;
+        if(middle.count>0) {
+            ranges[0] = middle;
+            rangeCount=1;
+        }
+        for(int length=middleLength+1; length<=4; ++length) {
+            /* copy upper first so that later the middle range is more likely the first one to use */
+            if(upper[length] != null && upper[length].count>0) {
+                ranges[rangeCount++]=upper[length];
+            }
+            if(lower[length] != null && lower[length].count>0) {
+                ranges[rangeCount++]=lower[length];
+            }
+        }
+        return rangeCount>0;
+    }
+
+    private boolean allocWeightsInShortRanges(int n, int minLength) {
+        // See if the first few minLength and minLength+1 ranges have enough weights.
+        for(int i = 0; i < rangeCount && ranges[i].length <= (minLength + 1); ++i) {
+            if(n <= ranges[i].count) {
+                // Use the first few minLength and minLength+1 ranges.
+                if(ranges[i].length > minLength) {
+                    // Reduce the number of weights from the last minLength+1 range
+                    // which might sort before some minLength ranges,
+                    // so that we use all weights in the minLength ranges.
+                    ranges[i].count = n;
+                }
+                rangeCount = i + 1;
+                // printf("take first %ld ranges\n", rangeCount);
+
+                if(rangeCount>1) {
+                    /* sort the ranges by weight values */
+                    Arrays.sort(ranges, 0, rangeCount);
+                }
+                return true;
+            }
+            n -= ranges[i].count;  // still >0
+        }
+        return false;
+    }
+
+    private boolean allocWeightsInMinLengthRanges(int n, int minLength) {
+        // See if the minLength ranges have enough weights
+        // when we split one and lengthen the following ones.
+        int count = 0;
+        int minLengthRangeCount;
+        for(minLengthRangeCount = 0;
+                minLengthRangeCount < rangeCount &&
+                    ranges[minLengthRangeCount].length == minLength;
+                ++minLengthRangeCount) {
+            count += ranges[minLengthRangeCount].count;
+        }
+
+        int nextCountBytes = countBytes(minLength + 1);
+        if(n > count * nextCountBytes) { return false; }
+
+        // Use the minLength ranges. Merge them, and then split again as necessary.
+        long start = ranges[0].start;
+        long end = ranges[0].end;
+        for(int i = 1; i < minLengthRangeCount; ++i) {
+            if(ranges[i].start < start) { start = ranges[i].start; }
+            if(ranges[i].end > end) { end = ranges[i].end; }
+        }
+
+        // Calculate how to split the range between minLength (count1) and minLength+1 (count2).
+        // Goal:
+        //   count1 + count2 * nextCountBytes = n
+        //   count1 + count2 = count
+        // These turn into
+        //   (count - count2) + count2 * nextCountBytes = n
+        // and then into the following count1 & count2 computations.
+        int count2 = (n - count) / (nextCountBytes - 1);  // number of weights to be lengthened
+        int count1 = count - count2;  // number of minLength weights
+        if(count2 == 0 || (count1 + count2 * nextCountBytes) < n) {
+            // round up
+            ++count2;
+            --count1;
+            assert((count1 + count2 * nextCountBytes) >= n);
+        }
+
+        ranges[0].start = start;
+
+        if(count1 == 0) {
+            // Make one long range.
+            ranges[0].end = end;
+            ranges[0].count = count;
+            lengthenRange(ranges[0]);
+            rangeCount = 1;
+        } else {
+            // Split the range, lengthen the second part.
+            // printf("split the range number %ld (out of %ld minLength ranges) by %ld:%ld\n",
+            //       splitRange, rangeCount, count1, count2);
+
+            // Next start = start + count1. First end = 1 before that.
+            ranges[0].end = incWeightByOffset(start, minLength, count1 - 1);
+            ranges[0].count = count1;
+
+            if(ranges[1] == null) {
+                ranges[1] = new WeightRange();
+            }
+            ranges[1].start = incWeight(ranges[0].end, minLength);
+            ranges[1].end = end;
+            ranges[1].length = minLength;  // +1 when lengthened
+            ranges[1].count = count2;  // *countBytes when lengthened
+            lengthenRange(ranges[1]);
+            rangeCount = 2;
+        }
+        return true;
+    }
+
+    private int middleLength;
+    private int[] minBytes = new int[5];  // for byte 1, 2, 3, 4
+    private int[] maxBytes = new int[5];
+    private WeightRange[] ranges = new WeightRange[7];
+    private int rangeIndex;
+    private int rangeCount;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/ContractionsAndExpansions.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/ContractionsAndExpansions.java

new file mode 100644 (file)

index 0000000..152cf49
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/ContractionsAndExpansions.java
@@ -0,0 +1,304 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* ContractionsAndExpansions.java, ported from collationsets.h/.cpp
+*
+* C++ version created on: 2013feb09
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.util.Iterator;
+
+import com.ibm.icu.impl.Trie2;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.CharsTrie;
+import com.ibm.icu.util.CharsTrie.Entry;
+
+public final class ContractionsAndExpansions {
+    // C++: The following fields are @internal, only public for access by callback.
+    private CollationData data;
+    private UnicodeSet contractions;
+    private UnicodeSet expansions;
+    private CESink sink;
+    private boolean addPrefixes;
+    private int checkTailored = 0;  // -1: collected tailored  +1: exclude tailored
+    private UnicodeSet tailored = new UnicodeSet();
+    private UnicodeSet ranges;
+    private StringBuilder unreversedPrefix = new StringBuilder();
+    private String suffix;
+    private long[] ces = new long[Collation.MAX_EXPANSION_LENGTH];
+
+    public static interface CESink {
+        void handleCE(long ce);
+        void handleExpansion(long ces[], int start, int length);
+    }
+
+    public ContractionsAndExpansions(UnicodeSet con, UnicodeSet exp, CESink s, boolean prefixes) {
+        contractions = con;
+        expansions = exp;
+        sink = s;
+        addPrefixes = prefixes;
+    }
+
+    public void forData(CollationData d) {
+        // Add all from the data, can be tailoring or base.
+        if (d.base != null) {
+            checkTailored = -1;
+        }
+        data = d;
+        Iterator<Trie2.Range> trieIterator = data.trie.iterator();
+        Trie2.Range range;
+        while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
+            enumCnERange(range.startCodePoint, range.endCodePoint, range.value, this);
+        }
+        if (d.base == null) {
+            return;
+        }
+        // Add all from the base data but only for un-tailored code points.
+        tailored.freeze();
+        checkTailored = 1;
+        data = d.base;
+        trieIterator = data.trie.iterator();
+        while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
+            enumCnERange(range.startCodePoint, range.endCodePoint, range.value, this);
+        }
+    }
+
+    private void enumCnERange(int start, int end, int ce32, ContractionsAndExpansions cne) {
+        if (cne.checkTailored == 0) {
+            // There is no tailoring.
+            // No need to collect nor check the tailored set.
+        } else if (cne.checkTailored < 0) {
+            // Collect the set of code points with mappings in the tailoring data.
+            if (ce32 == Collation.FALLBACK_CE32) {
+                return; // fallback to base, not tailored
+            } else {
+                cne.tailored.add(start, end);
+            }
+            // checkTailored > 0: Exclude tailored ranges from the base data enumeration.
+        } else if (start == end) {
+            if (cne.tailored.contains(start)) {
+                return;
+            }
+        } else if (cne.tailored.containsSome(start, end)) {
+            if (cne.ranges == null) {
+                cne.ranges = new UnicodeSet();
+            }
+            cne.ranges.set(start, end).removeAll(cne.tailored);
+            int count = cne.ranges.getRangeCount();
+            for (int i = 0; i < count; ++i) {
+                cne.handleCE32(cne.ranges.getRangeStart(i), cne.ranges.getRangeEnd(i), ce32);
+            }
+        }
+        cne.handleCE32(start, end, ce32);
+    }
+
+    public void forCodePoint(CollationData d, int c) {
+        int ce32 = d.getCE32(c);
+        if (ce32 == Collation.FALLBACK_CE32) {
+            d = d.base;
+            ce32 = d.getCE32(c);
+        }
+        data = d;
+        handleCE32(c, c, ce32);
+    }
+
+    private void handleCE32(int start, int end, int ce32) {
+        for (;;) {
+            if ((ce32 & 0xff) < Collation.SPECIAL_CE32_LOW_BYTE) {
+                // !isSpecialCE32()
+                if (sink != null) {
+                    sink.handleCE(Collation.ceFromSimpleCE32(ce32));
+                }
+                return;
+            }
+            switch (Collation.tagFromCE32(ce32)) {
+            case Collation.FALLBACK_TAG:
+                return;
+            case Collation.RESERVED_TAG_3:
+            case Collation.BUILDER_DATA_TAG:
+            case Collation.LEAD_SURROGATE_TAG:
+                // Java porting note: U_INTERNAL_PROGRAM_ERROR is set to errorCode in ICU4C.
+                throw new AssertionError(
+                        String.format("Unexpected CE32 tag type %d for ce32=0x%08x",
+                                Collation.tagFromCE32(ce32), ce32));
+            case Collation.LONG_PRIMARY_TAG:
+                if (sink != null) {
+                    sink.handleCE(Collation.ceFromLongPrimaryCE32(ce32));
+                }
+                return;
+            case Collation.LONG_SECONDARY_TAG:
+                if (sink != null) {
+                    sink.handleCE(Collation.ceFromLongSecondaryCE32(ce32));
+                }
+                return;
+            case Collation.LATIN_EXPANSION_TAG:
+                if (sink != null) {
+                    ces[0] = Collation.latinCE0FromCE32(ce32);
+                    ces[1] = Collation.latinCE1FromCE32(ce32);
+                    sink.handleExpansion(ces, 0, 2);
+                }
+                // Optimization: If we have a prefix,
+                // then the relevant strings have been added already.
+                if (unreversedPrefix.length() == 0) {
+                    addExpansions(start, end);
+                }
+                return;
+            case Collation.EXPANSION32_TAG:
+                if (sink != null) {
+                    int idx = Collation.indexFromCE32(ce32);
+                    int length = Collation.lengthFromCE32(ce32);
+                    for (int i = 0; i < length; ++i) {
+                        ces[i] = Collation.ceFromCE32(data.ce32s[idx + i]);
+                    }
+                    sink.handleExpansion(ces, 0, length);
+                }
+                // Optimization: If we have a prefix,
+                // then the relevant strings have been added already.
+                if (unreversedPrefix.length() == 0) {
+                    addExpansions(start, end);
+                }
+                return;
+            case Collation.EXPANSION_TAG:
+                if (sink != null) {
+                    int idx = Collation.indexFromCE32(ce32);
+                    int length = Collation.lengthFromCE32(ce32);
+                    sink.handleExpansion(data.ces, idx, length);
+                }
+                // Optimization: If we have a prefix,
+                // then the relevant strings have been added already.
+                if (unreversedPrefix.length() == 0) {
+                    addExpansions(start, end);
+                }
+                return;
+            case Collation.PREFIX_TAG:
+                handlePrefixes(start, end, ce32);
+                return;
+            case Collation.CONTRACTION_TAG:
+                handleContractions(start, end, ce32);
+                return;
+            case Collation.DIGIT_TAG:
+                // Fetch the non-numeric-collation CE32 and continue.
+                ce32 = data.ce32s[Collation.indexFromCE32(ce32)];
+                break;
+            case Collation.U0000_TAG:
+                assert (start == 0 && end == 0);
+                // Fetch the normal ce32 for U+0000 and continue.
+                ce32 = data.ce32s[0];
+                break;
+            case Collation.HANGUL_TAG:
+                if (sink != null) {
+                    // TODO: This should be optimized,
+                    // especially if [start..end] is the complete Hangul range. (assert that)
+                    UTF16CollationIterator iter = new UTF16CollationIterator(data);
+                    StringBuilder hangul = new StringBuilder(1);
+                    for (int c = start; c <= end; ++c) {
+                        hangul.setLength(0);
+                        hangul.appendCodePoint(c);
+                        iter.setText(false, hangul, 0);
+                        int length = iter.fetchCEs();
+                        // Ignore the terminating non-CE.
+                        assert (length >= 2 && iter.getCE(length - 1) == Collation.NO_CE);
+                        sink.handleExpansion(iter.getCEs(), 0, length - 1);
+                    }
+                }
+                // Optimization: If we have a prefix,
+                // then the relevant strings have been added already.
+                if (unreversedPrefix.length() == 0) {
+                    addExpansions(start, end);
+                }
+                return;
+            case Collation.OFFSET_TAG:
+                // Currently no need to send offset CEs to the sink.
+                return;
+            case Collation.IMPLICIT_TAG:
+                // Currently no need to send implicit CEs to the sink.
+                return;
+            }
+        }
+    }
+
+    private void handlePrefixes(int start, int end, int ce32) {
+        int index = Collation.indexFromCE32(ce32);
+        ce32 = data.getCE32FromContexts(index); // Default if no prefix match.
+        handleCE32(start, end, ce32);
+        if (!addPrefixes) {
+            return;
+        }
+        CharsTrie.Iterator prefixes = new CharsTrie(data.contexts, index + 2).iterator();
+        while (prefixes.hasNext()) {
+            Entry e = prefixes.next();
+            setPrefix(e.chars);
+            // Prefix/pre-context mappings are special kinds of contractions
+            // that always yield expansions.
+            addStrings(start, end, contractions);
+            addStrings(start, end, expansions);
+            handleCE32(start, end, e.value);
+        }
+        resetPrefix();
+    }
+
+    void handleContractions(int start, int end, int ce32) {
+        int index = Collation.indexFromCE32(ce32);
+        if ((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
+            // No match on the single code point.
+            // We are underneath a prefix, and the default mapping is just
+            // a fallback to the mappings for a shorter prefix.
+            assert (unreversedPrefix.length() != 0);
+        } else {
+            ce32 = data.getCE32FromContexts(index); // Default if no suffix match.
+            assert (!Collation.isContractionCE32(ce32));
+            handleCE32(start, end, ce32);
+        }
+        CharsTrie.Iterator suffixes = new CharsTrie(data.contexts, index + 2).iterator();
+        while (suffixes.hasNext()) {
+            Entry e = suffixes.next();
+            suffix = e.chars.toString();
+            addStrings(start, end, contractions);
+            if (unreversedPrefix.length() != 0) {
+                addStrings(start, end, expansions);
+            }
+            handleCE32(start, end, e.value);
+        }
+        suffix = null;
+    }
+
+    void addExpansions(int start, int end) {
+        if (unreversedPrefix.length() == 0 && suffix == null) {
+            if (expansions != null) {
+                expansions.add(start, end);
+            }
+        } else {
+            addStrings(start, end, expansions);
+        }
+    }
+
+    void addStrings(int start, int end, UnicodeSet set) {
+        if (set == null) {
+            return;
+        }
+        StringBuilder s = new StringBuilder(unreversedPrefix);
+        do {
+            s.appendCodePoint(start);
+            if (suffix != null) {
+                s.append(suffix);
+            }
+            set.add(s);
+            s.setLength(unreversedPrefix.length());
+        } while (++start <= end);
+    }
+
+    // Prefixes are reversed in the data structure.
+    private void setPrefix(CharSequence pfx) {
+        unreversedPrefix.setLength(0);
+        unreversedPrefix.append(pfx).reverse();
+    }
+
+    private void resetPrefix() {
+        unreversedPrefix.setLength(0);
+    }
+}
+\ No newline at end of file
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/FCDIterCollationIterator.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/FCDIterCollationIterator.java

new file mode 100644 (file)

index 0000000..862994d
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/FCDIterCollationIterator.java
@@ -0,0 +1,449 @@
+/*
+*******************************************************************************
+* Copyright (C) 2012-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* FCDIterCollationIterator.java, ported from uitercollationiterator.h/.cpp
+*
+* C++ version created on: 2012sep23 (from utf16collationiterator.h)
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import com.ibm.icu.impl.Normalizer2Impl;
+import com.ibm.icu.text.UCharacterIterator;
+
+/**
+ * Incrementally checks the input text for FCD and normalizes where necessary.
+ */
+public final class FCDIterCollationIterator extends IterCollationIterator {
+    public FCDIterCollationIterator(CollationData data, boolean numeric,
+            UCharacterIterator ui, int startIndex) {
+        super(data, numeric, ui);
+        state = State.ITER_CHECK_FWD;
+        start = startIndex;
+        nfcImpl = data.nfcImpl;
+    }
+
+    @Override
+    public void resetToOffset(int newOffset) {
+        super.resetToOffset(newOffset);
+        start = newOffset;
+        state = State.ITER_CHECK_FWD;
+    }
+
+    @Override
+    public int getOffset() {
+        if(state.compareTo(State.ITER_CHECK_BWD) <= 0) {
+            return iter.getIndex();
+        } else if(state == State.ITER_IN_FCD_SEGMENT) {
+            return pos;
+        } else if(pos == 0) {
+            return start;
+        } else {
+            return limit;
+        }
+    }
+
+    @Override
+    public int nextCodePoint() {
+        int c;
+        for(;;) {
+            if(state == State.ITER_CHECK_FWD) {
+                c = iter.next();
+                if(c < 0) {
+                    return c;
+                }
+                if(CollationFCD.hasTccc(c)) {
+                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
+                            CollationFCD.hasLccc(iter.current())) {
+                        iter.previous();
+                        if(!nextSegment()) {
+                            return Collation.SENTINEL_CP;
+                        }
+                        continue;
+                    }
+                }
+                if(isLeadSurrogate(c)) {
+                    int trail = iter.next();
+                    if(isTrailSurrogate(trail)) {
+                        return Character.toCodePoint((char)c, (char)trail);
+                    } else if(trail >= 0) {
+                        iter.previous();
+                    }
+                }
+                return c;
+            } else if(state == State.ITER_IN_FCD_SEGMENT && pos != limit) {
+                c = iter.nextCodePoint();
+                pos += Character.charCount(c);
+                assert(c >= 0);
+                return c;
+            } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 &&
+                    pos != normalized.length()) {
+                c = normalized.codePointAt(pos);
+                pos += Character.charCount(c);
+                return c;
+            } else {
+                switchToForward();
+            }
+        }
+    }
+
+    @Override
+    public int previousCodePoint() {
+        int c;
+        for(;;) {
+            if(state == State.ITER_CHECK_BWD) {
+                c = iter.previous();
+                if(c < 0) {
+                    start = pos = 0;
+                    state = State.ITER_IN_FCD_SEGMENT;
+                    return Collation.SENTINEL_CP;
+                }
+                if(CollationFCD.hasLccc(c)) {
+                    int prev = Collation.SENTINEL_CP;
+                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
+                            CollationFCD.hasTccc(prev = iter.previous())) {
+                        iter.next();
+                        if(prev >= 0) {
+                            iter.next();
+                        }
+                        if(!previousSegment()) {
+                            return Collation.SENTINEL_CP;
+                        }
+                        continue;
+                    }
+                    // hasLccc(trail)=true for all trail surrogates
+                    if(isTrailSurrogate(c)) {
+                        if(prev < 0) {
+                            prev = iter.previous();
+                        }
+                        if(isLeadSurrogate(prev)) {
+                            return Character.toCodePoint((char)prev, (char)c);
+                        }
+                    }
+                    if(prev >= 0) {
+                        iter.next();
+                    }
+                }
+                return c;
+            } else if(state == State.ITER_IN_FCD_SEGMENT && pos != start) {
+                c = iter.previousCodePoint();
+                pos -= Character.charCount(c);
+                assert(c >= 0);
+                return c;
+            } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos != 0) {
+                c = normalized.codePointBefore(pos);
+                pos -= Character.charCount(c);
+                return c;
+            } else {
+                switchToBackward();
+            }
+        }
+    }
+
+    @Override
+    protected long handleNextCE32() {
+        int c;
+        for(;;) {
+            if(state == State.ITER_CHECK_FWD) {
+                c = iter.next();
+                if(c < 0) {
+                    return NO_CP_AND_CE32;
+                }
+                if(CollationFCD.hasTccc(c)) {
+                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
+                            CollationFCD.hasLccc(iter.current())) {
+                        iter.previous();
+                        if(!nextSegment()) {
+                            c = Collation.SENTINEL_CP;
+                            return Collation.FALLBACK_CE32;
+                        }
+                        continue;
+                    }
+                }
+                break;
+            } else if(state == State.ITER_IN_FCD_SEGMENT && pos != limit) {
+                c = iter.next();
+                ++pos;
+                assert(c >= 0);
+                break;
+            } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 &&
+                    pos != normalized.length()) {
+                c = normalized.charAt(pos++);
+                break;
+            } else {
+                switchToForward();
+            }
+        }
+        return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead((char)c));
+    }
+
+    @Override
+    protected char handleGetTrailSurrogate() {
+        if(state.compareTo(State.ITER_IN_FCD_SEGMENT) <= 0) {
+            int trail = iter.next();
+            if(isTrailSurrogate(trail)) {
+                if(state == State.ITER_IN_FCD_SEGMENT) { ++pos; }
+            } else if(trail >= 0) {
+                iter.previous();
+            }
+            return (char)trail;
+        } else {
+            assert(pos < normalized.length());
+            char trail;
+            if(Character.isLowSurrogate(trail = normalized.charAt(pos))) { ++pos; }
+            return trail;
+        }
+    }
+
+    @Override
+    protected void forwardNumCodePoints(int num) {
+        // Specify the class to avoid a virtual-function indirection.
+        // In Java, we would declare this class final.
+        while(num > 0 && nextCodePoint() >= 0) {
+            --num;
+        }
+    }
+
+    @Override
+    protected void backwardNumCodePoints(int num) {
+        // Specify the class to avoid a virtual-function indirection.
+        // In Java, we would declare this class final.
+        while(num > 0 && previousCodePoint() >= 0) {
+            --num;
+        }
+    }
+
+    /**
+     * Switches to forward checking if possible.
+     */
+    private void switchToForward() {
+        assert(state == State.ITER_CHECK_BWD ||
+                (state == State.ITER_IN_FCD_SEGMENT && pos == limit) ||
+                (state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos == normalized.length()));
+        if(state == State.ITER_CHECK_BWD) {
+            // Turn around from backward checking.
+            start = pos = iter.getIndex();
+            if(pos == limit) {
+                state = State.ITER_CHECK_FWD;  // Check forward.
+            } else {  // pos < limit
+                state = State.ITER_IN_FCD_SEGMENT;  // Stay in FCD segment.
+            }
+        } else {
+            // Reached the end of the FCD segment.
+            if(state == State.ITER_IN_FCD_SEGMENT) {
+                // The input text segment is FCD, extend it forward.
+            } else {
+                // The input text segment needed to be normalized.
+                // Switch to checking forward from it.
+                if(state == State.IN_NORM_ITER_AT_START) {
+                    iter.moveIndex(limit - start);
+                }
+                start = limit;
+            }
+            state = State.ITER_CHECK_FWD;
+        }
+    }
+
+    /**
+     * Extends the FCD text segment forward or normalizes around pos.
+     * @return true if success
+     */
+    private boolean nextSegment() {
+        assert(state == State.ITER_CHECK_FWD);
+        // The input text [start..(iter index)[ passes the FCD check.
+        pos = iter.getIndex();
+        // Collect the characters being checked, in case they need to be normalized.
+        if(s == null) {
+            s = new StringBuilder();
+        } else {
+            s.setLength(0);
+        }
+        int prevCC = 0;
+        for(;;) {
+            // Fetch the next character and its fcd16 value.
+            int c = iter.nextCodePoint();
+            if(c < 0) { break; }
+            int fcd16 = nfcImpl.getFCD16(c);
+            int leadCC = fcd16 >> 8;
+            if(leadCC == 0 && s.length() != 0) {
+                // FCD boundary before this character.
+                iter.previousCodePoint();
+                break;
+            }
+            s.appendCodePoint(c);
+            if(leadCC != 0 && (prevCC > leadCC || CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
+                // Fails FCD check. Find the next FCD boundary and normalize.
+                for(;;) {
+                    c = iter.nextCodePoint();
+                    if(c < 0) { break; }
+                    if(nfcImpl.getFCD16(c) <= 0xff) {
+                        iter.previousCodePoint();
+                        break;
+                    }
+                    s.appendCodePoint(c);
+                }
+                normalize(s);
+                start = pos;
+                limit = pos + s.length();
+                state = State.IN_NORM_ITER_AT_LIMIT;
+                pos = 0;
+                return true;
+            }
+            prevCC = fcd16 & 0xff;
+            if(prevCC == 0) {
+                // FCD boundary after the last character.
+                break;
+            }
+        }
+        limit = pos + s.length();
+        assert(pos != limit);
+        iter.moveIndex(-s.length());
+        state = State.ITER_IN_FCD_SEGMENT;
+        return true;
+    }
+
+    /**
+     * Switches to backward checking.
+     */
+    private void switchToBackward() {
+        assert(state == State.ITER_CHECK_FWD ||
+                (state == State.ITER_IN_FCD_SEGMENT && pos == start) ||
+                (state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos == 0));
+        if(state == State.ITER_CHECK_FWD) {
+            // Turn around from forward checking.
+            limit = pos = iter.getIndex();
+            if(pos == start) {
+                state = State.ITER_CHECK_BWD;  // Check backward.
+            } else {  // pos > start
+                state = State.ITER_IN_FCD_SEGMENT;  // Stay in FCD segment.
+            }
+        } else {
+            // Reached the start of the FCD segment.
+            if(state == State.ITER_IN_FCD_SEGMENT) {
+                // The input text segment is FCD, extend it backward.
+            } else {
+                // The input text segment needed to be normalized.
+                // Switch to checking backward from it.
+                if(state == State.IN_NORM_ITER_AT_LIMIT) {
+                    iter.moveIndex(start - limit);
+                }
+                limit = start;
+            }
+            state = State.ITER_CHECK_BWD;
+        }
+    }
+
+    /**
+     * Extends the FCD text segment backward or normalizes around pos.
+     * @return true if success
+     */
+    private boolean previousSegment() {
+        assert(state == State.ITER_CHECK_BWD);
+        // The input text [(iter index)..limit[ passes the FCD check.
+        pos = iter.getIndex();
+        // Collect the characters being checked, in case they need to be normalized.
+        if(s == null) {
+            s = new StringBuilder();
+        } else {
+            s.setLength(0);
+        }
+        int nextCC = 0;
+        for(;;) {
+            // Fetch the previous character and its fcd16 value.
+            int c = iter.previousCodePoint();
+            if(c < 0) { break; }
+            int fcd16 = nfcImpl.getFCD16(c);
+            int trailCC = fcd16 & 0xff;
+            if(trailCC == 0 && s.length() != 0) {
+                // FCD boundary after this character.
+                iter.nextCodePoint();
+                break;
+            }
+            s.appendCodePoint(c);
+            if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) ||
+                                CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
+                // Fails FCD check. Find the previous FCD boundary and normalize.
+                while(fcd16 > 0xff) {
+                    c = iter.previousCodePoint();
+                    if(c < 0) { break; }
+                    fcd16 = nfcImpl.getFCD16(c);
+                    if(fcd16 == 0) {
+                        iter.nextCodePoint();
+                        break;
+                    }
+                    s.appendCodePoint(c);
+                }
+                s.reverse();
+                normalize(s);
+                limit = pos;
+                start = pos - s.length();
+                state = State.IN_NORM_ITER_AT_START;
+                pos = normalized.length();
+                return true;
+            }
+            nextCC = fcd16 >> 8;
+            if(nextCC == 0) {
+                // FCD boundary before the following character.
+                break;
+            }
+        }
+        start = pos - s.length();
+        assert(pos != start);
+        iter.moveIndex(s.length());
+        state = State.ITER_IN_FCD_SEGMENT;
+        return true;
+    }
+
+    private void normalize(CharSequence s) {
+        if(normalized == null) {
+            normalized = new StringBuilder();
+        }
+        // NFD without argument checking.
+        nfcImpl.decompose(s, normalized);
+    }
+
+    private enum State {
+        /**
+         * The input text [start..(iter index)[ passes the FCD check.
+         * Moving forward checks incrementally.
+         * pos & limit are undefined.
+         */
+        ITER_CHECK_FWD,
+        /**
+         * The input text [(iter index)..limit[ passes the FCD check.
+         * Moving backward checks incrementally.
+         * start & pos are undefined.
+         */
+        ITER_CHECK_BWD,
+        /**
+         * The input text [start..limit[ passes the FCD check.
+         * pos tracks the current text index.
+         */
+        ITER_IN_FCD_SEGMENT,
+        /**
+         * The input text [start..limit[ failed the FCD check and was normalized.
+         * pos tracks the current index in the normalized string.
+         * The text iterator is at the limit index.
+         */
+        IN_NORM_ITER_AT_LIMIT,
+        /**
+         * The input text [start..limit[ failed the FCD check and was normalized.
+         * pos tracks the current index in the normalized string.
+         * The text iterator is at the start index.
+         */
+        IN_NORM_ITER_AT_START
+    }
+
+    private State state;
+
+    private int start;
+    private int pos;
+    private int limit;
+
+    private final Normalizer2Impl nfcImpl;
+    private StringBuilder s;
+    private StringBuilder normalized;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/FCDUTF16CollationIterator.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/FCDUTF16CollationIterator.java

new file mode 100644 (file)

index 0000000..7a9957d
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/FCDUTF16CollationIterator.java
@@ -0,0 +1,407 @@
+/*
+*******************************************************************************
+* Copyright (C) 2010-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* FCDUTF16CollationIterator.java, ported from utf16collationiterator.h/.cpp
+*
+* C++ version created on: 2010oct27
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import com.ibm.icu.impl.Normalizer2Impl;
+
+/**
+ * Incrementally checks the input text for FCD and normalizes where necessary.
+ */
+public final class FCDUTF16CollationIterator extends UTF16CollationIterator {
+    /**
+     * Partial constructor, see {@link CollationIterator#CollationIterator(CollationData)}.
+     */
+    public FCDUTF16CollationIterator(CollationData d) {
+        super(d);
+        nfcImpl = d.nfcImpl;
+    }
+
+    public FCDUTF16CollationIterator(CollationData data, boolean numeric, CharSequence s, int p) {
+        super(data, numeric, s, p);
+        rawSeq = s;
+        segmentStart = p;
+        rawLimit = s.length();
+        nfcImpl = data.nfcImpl;
+        checkDir = 1;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        // Skip the UTF16CollationIterator and call its parent.
+        if(!((CollationIterator)this).equals(other)) { return false; }
+        FCDUTF16CollationIterator o = (FCDUTF16CollationIterator)other;
+        // Compare the iterator state but not the text: Assume that the caller does that.
+        if(checkDir != o.checkDir) { return false; }
+        if(checkDir == 0 && (seq == rawSeq) != (o.seq == o.rawSeq)) { return false; }
+        if(checkDir != 0 || seq == rawSeq) {
+            return (pos - rawStart) == (o.pos - /*o.*/ rawStart);
+        } else {
+            return (segmentStart - rawStart) == (o.segmentStart - /*o.*/ rawStart) &&
+                    (pos - start) == (o.pos - o.start);
+        }
+    }
+
+    @Override
+    public void resetToOffset(int newOffset) {
+        reset();
+        seq = rawSeq;
+        start = segmentStart = pos = rawStart + newOffset;
+        limit = rawLimit;
+        checkDir = 1;
+    }
+
+    @Override
+    public int getOffset() {
+        if(checkDir != 0 || seq == rawSeq) {
+            return pos - rawStart;
+        } else if(pos == start) {
+            return segmentStart - rawStart;
+        } else {
+            return segmentLimit - rawStart;
+        }
+    }
+
+    @Override
+    public void setText(boolean numeric, CharSequence s, int p) {
+        super.setText(numeric, s, p);
+        rawSeq = s;
+        segmentStart = p;
+        rawLimit = limit = s.length();
+        checkDir = 1;
+    }
+
+    @Override
+    public int nextCodePoint() {
+        char c;
+        for(;;) {
+            if(checkDir > 0) {
+                if(pos == limit) {
+                    return Collation.SENTINEL_CP;
+                }
+                c = seq.charAt(pos++);
+                if(CollationFCD.hasTccc(c)) {
+                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
+                            (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) {
+                        --pos;
+                        nextSegment();
+                        c = seq.charAt(pos++);
+                    }
+                }
+                break;
+            } else if(checkDir == 0 && pos != limit) {
+                c = seq.charAt(pos++);
+                break;
+            } else {
+                switchToForward();
+            }
+        }
+        char trail;
+        if(Character.isHighSurrogate(c) && pos != limit &&
+                Character.isLowSurrogate(trail = seq.charAt(pos))) {
+            ++pos;
+            return Character.toCodePoint(c, trail);
+        } else {
+            return c;
+        }
+    }
+
+    @Override
+    public int previousCodePoint() {
+        char c;
+        for(;;) {
+            if(checkDir < 0) {
+                if(pos == start) {
+                    return Collation.SENTINEL_CP;
+                }
+                c = seq.charAt(--pos);
+                if(CollationFCD.hasLccc(c)) {
+                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
+                            (pos != start && CollationFCD.hasTccc(seq.charAt(pos - 1)))) {
+                        ++pos;
+                        previousSegment();
+                        c = seq.charAt(--pos);
+                    }
+                }
+                break;
+            } else if(checkDir == 0 && pos != start) {
+                c = seq.charAt(--pos);
+                break;
+            } else {
+                switchToBackward();
+            }
+        }
+        char lead;
+        if(Character.isLowSurrogate(c) && pos != start &&
+                Character.isHighSurrogate(lead = seq.charAt(pos - 1))) {
+            --pos;
+            return Character.toCodePoint(lead, c);
+        } else {
+            return c;
+        }
+    }
+
+    @Override
+    protected long handleNextCE32() {
+        char c;
+        for(;;) {
+            if(checkDir > 0) {
+                if(pos == limit) {
+                    return NO_CP_AND_CE32;
+                }
+                c = seq.charAt(pos++);
+                if(CollationFCD.hasTccc(c)) {
+                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
+                            (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) {
+                        --pos;
+                        nextSegment();
+                        c = seq.charAt(pos++);
+                    }
+                }
+                break;
+            } else if(checkDir == 0 && pos != limit) {
+                c = seq.charAt(pos++);
+                break;
+            } else {
+                switchToForward();
+            }
+        }
+        return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead(c));
+    }
+
+    /* boolean foundNULTerminator(); */
+
+    @Override
+    protected void forwardNumCodePoints(int num) {
+        // Specify the class to avoid a virtual-function indirection.
+        // In Java, we would declare this class final.
+        while(num > 0 && nextCodePoint() >= 0) {
+            --num;
+        }
+    }
+
+    @Override
+    protected void backwardNumCodePoints(int num) {
+        // Specify the class to avoid a virtual-function indirection.
+        // In Java, we would declare this class final.
+        while(num > 0 && previousCodePoint() >= 0) {
+            --num;
+        }
+    }
+
+    /**
+     * Switches to forward checking if possible.
+     * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
+     * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
+     */
+    private void switchToForward() {
+        assert((checkDir < 0 && seq == rawSeq) || (checkDir == 0 && pos == limit));
+        if(checkDir < 0) {
+            // Turn around from backward checking.
+            start = segmentStart = pos;
+            if(pos == segmentLimit) {
+                limit = rawLimit;
+                checkDir = 1;  // Check forward.
+            } else {  // pos < segmentLimit
+                checkDir = 0;  // Stay in FCD segment.
+            }
+        } else {
+            // Reached the end of the FCD segment.
+            if(seq == rawSeq) {
+                // The input text segment is FCD, extend it forward.
+            } else {
+                // The input text segment needed to be normalized.
+                // Switch to checking forward from it.
+                seq = rawSeq;
+                pos = start = segmentStart = segmentLimit;
+                // Note: If this segment is at the end of the input text,
+                // then it might help to return false to indicate that, so that
+                // we do not have to re-check and normalize when we turn around and go backwards.
+                // However, that would complicate the call sites for an optimization of an unusual case.
+            }
+            limit = rawLimit;
+            checkDir = 1;
+        }
+    }
+
+    /**
+     * Extend the FCD text segment forward or normalize around pos.
+     * To be called when checkDir > 0 && pos != limit.
+     * Returns with checkDir == 0 and pos != limit.
+     */
+    private void nextSegment() {
+        assert(checkDir > 0 && seq == rawSeq && pos != limit);
+        // The input text [segmentStart..pos[ passes the FCD check.
+        int p = pos;
+        int prevCC = 0;
+        for(;;) {
+            // Fetch the next character's fcd16 value.
+            int q = p;
+            int c = Character.codePointAt(seq, p);
+            p += Character.charCount(c);
+            int fcd16 = nfcImpl.getFCD16(c);
+            int leadCC = fcd16 >> 8;
+            if(leadCC == 0 && q != pos) {
+                // FCD boundary before the [q, p[ character.
+                limit = segmentLimit = q;
+                break;
+            }
+            if(leadCC != 0 && (prevCC > leadCC || CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
+                // Fails FCD check. Find the next FCD boundary and normalize.
+                do {
+                    q = p;
+                    if(p == rawLimit) { break; }
+                    c = Character.codePointAt(seq, p);
+                    p += Character.charCount(c);
+                } while(nfcImpl.getFCD16(c) > 0xff);
+                normalize(pos, q);
+                pos = start;
+                break;
+            }
+            prevCC = fcd16 & 0xff;
+            if(p == rawLimit || prevCC == 0) {
+                // FCD boundary after the last character.
+                limit = segmentLimit = p;
+                break;
+            }
+        }
+        assert(pos != limit);
+        checkDir = 0;
+    }
+
+    /**
+     * Switches to backward checking.
+     * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
+     * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
+     */
+    private void switchToBackward() {
+        assert((checkDir > 0 && seq == rawSeq) || (checkDir == 0 && pos == start));
+        if(checkDir > 0) {
+            // Turn around from forward checking.
+            limit = segmentLimit = pos;
+            if(pos == segmentStart) {
+                start = rawStart;
+                checkDir = -1;  // Check backward.
+            } else {  // pos > segmentStart
+                checkDir = 0;  // Stay in FCD segment.
+            }
+        } else {
+            // Reached the start of the FCD segment.
+            if(seq == rawSeq) {
+                // The input text segment is FCD, extend it backward.
+            } else {
+                // The input text segment needed to be normalized.
+                // Switch to checking backward from it.
+                seq = rawSeq;
+                pos = limit = segmentLimit = segmentStart;
+            }
+            start = rawStart;
+            checkDir = -1;
+        }
+    }
+
+    /**
+     * Extend the FCD text segment backward or normalize around pos.
+     * To be called when checkDir < 0 && pos != start.
+     * Returns with checkDir == 0 and pos != start.
+     */
+    private void previousSegment() {
+        assert(checkDir < 0 && seq == rawSeq && pos != start);
+        // The input text [pos..segmentLimit[ passes the FCD check.
+        int p = pos;
+        int nextCC = 0;
+        for(;;) {
+            // Fetch the previous character's fcd16 value.
+            int q = p;
+            int c = Character.codePointBefore(seq, p);
+            p -= Character.charCount(c);
+            int fcd16 = nfcImpl.getFCD16(c);
+            int trailCC = fcd16 & 0xff;
+            if(trailCC == 0 && q != pos) {
+                // FCD boundary after the [p, q[ character.
+                start = segmentStart = q;
+                break;
+            }
+            if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) ||
+                                CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
+                // Fails FCD check. Find the previous FCD boundary and normalize.
+                do {
+                    q = p;
+                    if(fcd16 <= 0xff || p == rawStart) { break; }
+                    c = Character.codePointBefore(seq, p);
+                    p -= Character.charCount(c);
+                } while((fcd16 = nfcImpl.getFCD16(c)) != 0);
+                normalize(q, pos);
+                pos = limit;
+                break;
+            }
+            nextCC = fcd16 >> 8;
+            if(p == rawStart || nextCC == 0) {
+                // FCD boundary before the following character.
+                start = segmentStart = p;
+                break;
+            }
+        }
+        assert(pos != start);
+        checkDir = 0;
+    }
+
+    private void normalize(int from, int to) {
+        if(normalized == null) {
+            normalized = new StringBuilder();
+        }
+        // NFD without argument checking.
+        nfcImpl.decompose(rawSeq, from, to, normalized, to - from);
+        // Switch collation processing into the FCD buffer
+        // with the result of normalizing [segmentStart, segmentLimit[.
+        segmentStart = from;
+        segmentLimit = to;
+        seq = normalized;
+        start = 0;
+        limit = start + normalized.length();
+    }
+
+    // Text pointers: The input text is rawSeq[rawStart, rawLimit[.
+    // (In C++, these are const UChar * pointers.
+    // In Java, we use CharSequence rawSeq and the parent class' seq
+    // together with int indexes.)
+    //
+    // checkDir > 0:
+    //
+    // The input text rawSeq[segmentStart..pos[ passes the FCD check.
+    // Moving forward checks incrementally.
+    // segmentLimit is undefined. seq == rawSeq. limit == rawLimit.
+    //
+    // checkDir < 0:
+    // The input text rawSeq[pos..segmentLimit[ passes the FCD check.
+    // Moving backward checks incrementally.
+    // segmentStart is undefined. seq == rawSeq. start == rawStart.
+    //
+    // checkDir == 0:
+    //
+    // The input text rawSeq[segmentStart..segmentLimit[ is being processed.
+    // These pointers are at FCD boundaries.
+    // Either this text segment already passes the FCD check
+    // and seq==rawSeq && segmentStart==start<=pos<=limit==segmentLimit,
+    // or the current segment had to be normalized so that
+    // rawSeq[segmentStart..segmentLimit[ turned into the normalized string,
+    // corresponding to seq==normalized && 0==start<=pos<=limit==start+normalized.length().
+    private CharSequence rawSeq;
+    private static final int rawStart = 0;
+    private int segmentStart;
+    private int segmentLimit;
+    private int rawLimit;
+
+    private final Normalizer2Impl nfcImpl;
+    private StringBuilder normalized;
+    // Direction of incremental FCD check. See comments before rawStart.
+    private int checkDir;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/IterCollationIterator.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/IterCollationIterator.java

new file mode 100644 (file)

index 0000000..378c91e
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/IterCollationIterator.java
@@ -0,0 +1,75 @@
+/*
+*******************************************************************************
+* Copyright (C) 2012-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* IterCollationIterator.java, ported from uitercollationiterator.h/.cpp
+*
+* C++ version created on: 2012sep23 (from utf16collationiterator.h)
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import com.ibm.icu.text.UCharacterIterator;
+
+/**
+ * UCharIterator-based collation element and character iterator.
+ * Handles normalized text, with length or NUL-terminated.
+ * Unnormalized text is handled by a subclass.
+ */
+public class IterCollationIterator extends CollationIterator {
+    public IterCollationIterator(CollationData d, boolean numeric, UCharacterIterator ui) {
+        super(d, numeric);
+        iter = ui;
+    }
+
+    @Override
+    public void resetToOffset(int newOffset) {
+        reset();
+        iter.setIndex(newOffset);
+    }
+
+    @Override
+    public int getOffset() {
+        return iter.getIndex();
+    }
+
+    @Override
+    public int nextCodePoint() {
+        return iter.nextCodePoint();
+    }
+
+    @Override
+    public int previousCodePoint() {
+        return iter.previousCodePoint();
+    }
+
+    @Override
+    protected long handleNextCE32() {
+        int c = iter.next();
+        if(c < 0) {
+            return NO_CP_AND_CE32;
+        }
+        return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead((char)c));
+    }
+
+    @Override
+    protected char handleGetTrailSurrogate() {
+        int trail = iter.next();
+        if(!isTrailSurrogate(trail) && trail >= 0) { iter.previous(); }
+        return (char)trail;
+    }
+
+    @Override
+    protected void forwardNumCodePoints(int num) {
+        iter.moveCodePointIndex(num);
+    }
+
+    @Override
+    protected void backwardNumCodePoints(int num) {
+        iter.moveCodePointIndex(-num);
+    }
+
+    protected UCharacterIterator iter;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/SharedObject.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/SharedObject.java

new file mode 100644 (file)

index 0000000..34a2894
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/SharedObject.java
@@ -0,0 +1,194 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* SharedObject.java, ported from sharedobject.h/.cpp
+*
+* C++ version created on: 2013dec19
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Base class for shared, reference-counted, auto-deleted objects.
+ * Java subclasses are mutable and must implement clone().
+ *
+ * <p>In C++, the SharedObject base class is used for both memory and ownership management.
+ * In Java, memory management (deletion after last reference is gone)
+ * is up to the garbage collector,
+ * but the reference counter is still used to see whether the referent is the sole owner.
+ *
+ * <p>Usage:
+ * <pre>
+ * class S extends SharedObject {
+ *     public clone() { ... }
+ * }
+ *
+ * // Either use the nest class Reference (which costs an extra allocation),
+ * // or duplicate its code in the class that uses S
+ * // (which duplicates code and is more error-prone).
+ * class U {
+ *     // For read-only access, use s.readOnly().
+ *     // For writable access, use S ownedS = s.copyOnWrite();
+ *     private SharedObject.Reference&lt;S&gt; s;
+ *     // Returns a writable version of s.
+ *     // If there is exactly one owner, then s itself is returned.
+ *     // If there are multiple owners, then s is replaced with a clone,
+ *     // and that is returned.
+ *     private S getOwnedS() {
+ *         return s.copyOnWrite();
+ *     }
+ *     public U clone() {
+ *         ...
+ *         c.s = s.clone();
+ *         ...
+ *     }
+ * }
+ *
+ * class V {
+ *     // For read-only access, use s directly.
+ *     // For writable access, use S ownedS = getOwnedS();
+ *     private S s;
+ *     // Returns a writable version of s.
+ *     // If there is exactly one owner, then s itself is returned.
+ *     // If there are multiple owners, then s is replaced with a clone,
+ *     // and that is returned.
+ *     private S getOwnedS() {
+ *         if(s.getRefCount() > 1) {
+ *             S ownedS = s.clone();
+ *             s.removeRef();
+ *             s = ownedS;
+ *             ownedS.addRef();
+ *         }
+ *         return s;
+ *     }
+ *     public U clone() {
+ *         ...
+ *         s.addRef();
+ *         ...
+ *     }
+ *     protected void finalize() {
+ *         ...
+ *         if(s != null) {
+ *             s.removeRef();
+ *             s = null;
+ *         }
+ *         ...
+ *     }
+ * }
+ * </pre>
+ *
+ * Either use only Java memory management, or use addRef()/removeRef().
+ * Sharing requires reference-counting.
+ *
+ * TODO: Consider making this more widely available inside ICU,
+ * or else adopting a different model.
+ */
+public class SharedObject implements Cloneable {
+    /**
+     * Similar to a smart pointer, basically a port of the static methods of C++ SharedObject.
+     */
+    public static final class Reference<T extends SharedObject> implements Cloneable {
+        private T ref;
+
+        public Reference(T r) {
+            ref = r;
+            if(r != null) {
+                r.addRef();
+            }
+        }
+
+        @SuppressWarnings("unchecked")
+        @Override
+        public Reference<T> clone() {
+            Reference<T> c;
+            try {
+                c = (Reference<T>)super.clone();
+            } catch (CloneNotSupportedException e) {
+                // Should never happen.
+                throw new RuntimeException(e);
+            }
+            if(ref != null) {
+                ref.addRef();
+            }
+            return c;
+        }
+
+        public T readOnly() { return ref; }
+
+        /**
+         * Returns a writable version of the reference.
+         * If there is exactly one owner, then the reference itself is returned.
+         * If there are multiple owners, then the reference is replaced with a clone,
+         * and that is returned.
+         */
+        public T copyOnWrite() {
+            T r = ref;
+            if(r.getRefCount() <= 1) { return r; }
+            @SuppressWarnings("unchecked")
+            T r2 = (T)r.clone();
+            r.removeRef();
+            ref = r2;
+            r2.addRef();
+            return r2;
+        }
+
+        public void clear() {
+            if(ref != null) {
+                ref.removeRef();
+                ref = null;
+            }
+        }
+
+        @Override
+        protected void finalize() throws Throwable {
+            super.finalize();
+            clear();
+        }
+    }
+
+    /** Initializes refCount to 0. */
+    public SharedObject() {}
+
+    /** Initializes refCount to 0. */
+    @Override
+    public SharedObject clone() {
+        SharedObject c;
+        try {
+            c = (SharedObject)super.clone();
+        } catch (CloneNotSupportedException e) {
+            // Should never happen.
+            throw new RuntimeException(e);
+        }
+        c.refCount = new AtomicInteger();
+        return c;
+    }
+
+    /**
+     * Increments the number of references to this object. Thread-safe.
+     */
+    public final void addRef() { refCount.incrementAndGet(); }
+    /**
+     * Decrements the number of references to this object,
+     * and auto-deletes "this" if the number becomes 0. Thread-safe.
+     */
+    public final void removeRef() {
+        // Deletion in Java is up to the garbage collector.
+        refCount.decrementAndGet();
+    }
+
+    /**
+     * Returns the reference counter. Uses a memory barrier.
+     */
+    public final int getRefCount() { return refCount.get(); }
+
+    public final void deleteIfZeroRefCount() {
+        // Deletion in Java is up to the garbage collector.
+    }
+
+    private AtomicInteger refCount = new AtomicInteger();
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/TailoredSet.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/TailoredSet.java

new file mode 100644 (file)

index 0000000..c9cd039
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/TailoredSet.java
@@ -0,0 +1,394 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* TailoredSet.java, ported from collationsets.h/.cpp
+*
+* C++ version created on: 2013feb09
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+import java.util.Iterator;
+
+import com.ibm.icu.impl.Normalizer2Impl.Hangul;
+import com.ibm.icu.impl.Trie2;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.CharsTrie;
+import com.ibm.icu.util.CharsTrie.Entry;
+
+/**
+ * Finds the set of characters and strings that sort differently in the tailoring
+ * from the base data.
+ *
+ * Every mapping in the tailoring needs to be compared to the base,
+ * because some mappings are copied for optimization, and
+ * all contractions for a character are copied if any contractions for that character
+ * are added, modified or removed.
+ *
+ * It might be simpler to re-parse the rule string, but:
+ * - That would require duplicating some of the from-rules builder code.
+ * - That would make the runtime code depend on the builder.
+ * - That would only work if we have the rule string, and we allow users to
+ *   omit the rule string from data files.
+ */
+public final class TailoredSet {
+
+    private CollationData data;
+    private CollationData baseData;
+    private UnicodeSet tailored;
+    private StringBuilder unreversedPrefix = new StringBuilder();
+    private String suffix;
+
+    public TailoredSet(UnicodeSet t) {
+        tailored = t;
+    }
+
+    public void forData(CollationData d) {
+        data = d;
+        baseData = d.base;
+        assert (baseData != null);
+        // utrie2_enum(data->trie, NULL, enumTailoredRange, this);
+        Iterator<Trie2.Range> trieIterator = data.trie.iterator();
+        Trie2.Range range;
+        while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
+            enumTailoredRange(range.startCodePoint, range.endCodePoint, range.value, this);
+        }
+    }
+
+    private void enumTailoredRange(int start, int end, int ce32, TailoredSet ts) {
+        if (ce32 == Collation.FALLBACK_CE32) {
+            return; // fallback to base, not tailored
+        }
+        ts.handleCE32(start, end, ce32);
+    }
+
+    // Java porting note: ICU4C returns U_SUCCESS(error) and it's not applicable to ICU4J.
+    //  Also, ICU4C requires handleCE32() to be public because it is used by the callback
+    //  function (enumTailoredRange()). This is not necessary for Java implementation.
+    private void handleCE32(int start, int end, int ce32) {
+        assert (ce32 != Collation.FALLBACK_CE32);
+        if (Collation.isSpecialCE32(ce32)) {
+            ce32 = data.getIndirectCE32(ce32);
+            if (ce32 == Collation.FALLBACK_CE32) {
+                return;
+            }
+        }
+        do {
+            int baseCE32 = baseData.getFinalCE32(baseData.getCE32(start));
+            // Do not just continue if ce32 == baseCE32 because
+            // contractions and expansions in different data objects
+            // normally differ even if they have the same data offsets.
+            if (Collation.isSelfContainedCE32(ce32) && Collation.isSelfContainedCE32(baseCE32)) {
+                // fastpath
+                if (ce32 != baseCE32) {
+                    tailored.add(start);
+                }
+            } else {
+                compare(start, ce32, baseCE32);
+            }
+        } while (++start <= end);
+    }
+
+    private void compare(int c, int ce32, int baseCE32) {
+        if (Collation.isPrefixCE32(ce32)) {
+            int dataIndex = Collation.indexFromCE32(ce32);
+            ce32 = data.getFinalCE32(data.getCE32FromContexts(dataIndex));
+            if (Collation.isPrefixCE32(baseCE32)) {
+                int baseIndex = Collation.indexFromCE32(baseCE32);
+                baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
+                comparePrefixes(c, data.contexts, dataIndex + 2, baseData.contexts, baseIndex + 2);
+            } else {
+                addPrefixes(data, c, data.contexts, dataIndex + 2);
+            }
+        } else if (Collation.isPrefixCE32(baseCE32)) {
+            int baseIndex = Collation.indexFromCE32(baseCE32);
+            baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
+            addPrefixes(baseData, c, baseData.contexts, baseIndex + 2);
+        }
+
+        if (Collation.isContractionCE32(ce32)) {
+            int dataIndex = Collation.indexFromCE32(ce32);
+            if ((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
+                ce32 = Collation.NO_CE32;
+            } else {
+                ce32 = data.getFinalCE32(data.getCE32FromContexts(dataIndex));
+            }
+            if (Collation.isContractionCE32(baseCE32)) {
+                int baseIndex = Collation.indexFromCE32(baseCE32);
+                if ((baseCE32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
+                    baseCE32 = Collation.NO_CE32;
+                } else {
+                    baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
+                }
+                compareContractions(c, data.contexts, dataIndex + 2, baseData.contexts, baseIndex + 2);
+            } else {
+                addContractions(c, data.contexts, dataIndex + 2);
+            }
+        } else if (Collation.isContractionCE32(baseCE32)) {
+            int baseIndex = Collation.indexFromCE32(baseCE32);
+            baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
+            addContractions(c, baseData.contexts, baseIndex + 2);
+        }
+
+        int tag;
+        if (Collation.isSpecialCE32(ce32)) {
+            tag = Collation.tagFromCE32(ce32);
+            assert (tag != Collation.PREFIX_TAG);
+            assert (tag != Collation.CONTRACTION_TAG);
+            // Currently, the tailoring data builder does not write offset tags.
+            // They might be useful for saving space,
+            // but they would complicate the builder,
+            // and in tailorings we assume that performance of tailored characters is more important.
+            assert (tag != Collation.OFFSET_TAG);
+        } else {
+            tag = -1;
+        }
+        int baseTag;
+        if (Collation.isSpecialCE32(baseCE32)) {
+            baseTag = Collation.tagFromCE32(baseCE32);
+            assert (baseTag != Collation.PREFIX_TAG);
+            assert (baseTag != Collation.CONTRACTION_TAG);
+        } else {
+            baseTag = -1;
+        }
+
+        // Non-contextual mappings, expansions, etc.
+        if (baseTag == Collation.OFFSET_TAG) {
+            // We might be comparing a tailoring CE which is a copy of
+            // a base offset-tag CE, via the [optimize [set]] syntax
+            // or when a single-character mapping was copied for tailored contractions.
+            // Offset tags always result in long-primary CEs,
+            // with common secondary/tertiary weights.
+            if (!Collation.isLongPrimaryCE32(ce32)) {
+                add(c);
+                return;
+            }
+            long dataCE = baseData.ces[Collation.indexFromCE32(baseCE32)];
+            long p = Collation.getThreeBytePrimaryForOffsetData(c, dataCE);
+            if (Collation.primaryFromLongPrimaryCE32(ce32) != p) {
+                add(c);
+                return;
+            }
+        }
+
+        if (tag != baseTag) {
+            add(c);
+            return;
+        }
+
+        if (tag == Collation.EXPANSION32_TAG) {
+            int length = Collation.lengthFromCE32(ce32);
+            int baseLength = Collation.lengthFromCE32(baseCE32);
+
+            if (length != baseLength) {
+                add(c);
+                return;
+            }
+
+            int idx0 = Collation.indexFromCE32(ce32);
+            int idx1 = Collation.indexFromCE32(baseCE32);
+
+            for (int i = 0; i < length; ++i) {
+                if (data.ce32s[idx0 + i] != baseData.ce32s[idx1 + i]) {
+                    add(c);
+                    break;
+                }
+            }
+        } else if (tag == Collation.EXPANSION_TAG) {
+            int length = Collation.lengthFromCE32(ce32);
+            int baseLength = Collation.lengthFromCE32(baseCE32);
+
+            if (length != baseLength) {
+                add(c);
+                return;
+            }
+
+            int idx0 = Collation.indexFromCE32(ce32);
+            int idx1 = Collation.indexFromCE32(baseCE32);
+
+            for (int i = 0; i < length; ++i) {
+                if (data.ces[idx0 + i] != baseData.ces[idx1 + i]) {
+                    add(c);
+                    break;
+                }
+            }
+        } else if (tag == Collation.HANGUL_TAG) {
+            StringBuilder jamos = new StringBuilder();
+            int length = Hangul.decompose(c, jamos);
+            if (tailored.contains(jamos.charAt(0)) || tailored.contains(jamos.charAt(1))
+                    || (length == 3 && tailored.contains(jamos.charAt(2)))) {
+                add(c);
+            }
+        } else if (ce32 != baseCE32) {
+            add(c);
+        }
+    }
+
+    private void comparePrefixes(int c, CharSequence p, int pidx, CharSequence q, int qidx) {
+        // Parallel iteration over prefixes of both tables.
+        CharsTrie.Iterator prefixes = new CharsTrie(p, pidx).iterator();
+        CharsTrie.Iterator basePrefixes = new CharsTrie(q, qidx).iterator();
+        String tp = null; // Tailoring prefix.
+        String bp = null; // Base prefix.
+        // Use a string with a U+FFFF as the limit sentinel.
+        // U+FFFF is untailorable and will not occur in prefixes.
+        String none = "\uffff";
+        Entry te = null, be = null;
+        for (;;) {
+            if (tp == null) {
+                if (prefixes.hasNext()) {
+                    te = prefixes.next();
+                    tp = te.chars.toString();
+                } else {
+                    te = null;
+                    tp = none;
+                }
+            }
+            if (bp == null) {
+                if (basePrefixes.hasNext()) {
+                    be = basePrefixes.next();
+                    bp = be.chars.toString();
+                } else {
+                    be = null;
+                    bp = none;
+                }
+            }
+            if (tp == none && bp == none) {
+                break;
+            }
+            int cmp = tp.compareTo(bp);
+            if (cmp < 0) {
+                // tp occurs in the tailoring but not in the base.
+                assert (te != null);
+                addPrefix(data, tp, c, te.value);
+                te = null;
+                tp = null;
+            } else if (cmp > 0) {
+                // bp occurs in the base but not in the tailoring.
+                assert (be != null);
+                addPrefix(baseData, bp, c, be.value);
+                be = null;
+                bp = null;
+            } else {
+                setPrefix(tp);
+                assert (te != null && be != null);
+                compare(c, te.value, be.value);
+                resetPrefix();
+                te = be = null;
+                tp = bp = null;
+            }
+        }
+    }
+
+    private void compareContractions(int c, CharSequence p, int pidx, CharSequence q, int qidx) {
+        // Parallel iteration over suffixes of both tables.
+        CharsTrie.Iterator suffixes = new CharsTrie(p, pidx).iterator();
+        CharsTrie.Iterator baseSuffixes = new CharsTrie(q, qidx).iterator();
+        String ts = null; // Tailoring suffix.
+        String bs = null; // Base suffix.
+        // Use a string with two U+FFFF as the limit sentinel.
+        // U+FFFF is untailorable and will not occur in contractions except maybe
+        // as a single suffix character for a root-collator boundary contraction.
+        String none = "\uffff\uffff";
+        Entry te = null, be = null;
+        for (;;) {
+            if (ts == null) {
+                if (suffixes.hasNext()) {
+                    te = suffixes.next();
+                    ts = te.chars.toString();
+                } else {
+                    te = null;
+                    ts = none;
+                }
+            }
+            if (bs == null) {
+                if (baseSuffixes.hasNext()) {
+                    be = baseSuffixes.next();
+                    bs = be.chars.toString();
+                } else {
+                    be = null;
+                    bs = none;
+                }
+            }
+            if (ts == none && bs == none) {
+                break;
+            }
+            int cmp = ts.compareTo(bs);
+            if (cmp < 0) {
+                // ts occurs in the tailoring but not in the base.
+                addSuffix(c, ts);
+                te = null;
+                ts = null;
+            } else if (cmp > 0) {
+                // bs occurs in the base but not in the tailoring.
+                addSuffix(c, bs);
+                be = null;
+                bs = null;
+            } else {
+                suffix = ts;
+                compare(c, te.value, be.value);
+                suffix = null;
+                te = be = null;
+                ts = bs = null;
+            }
+        }
+    }
+
+    private void addPrefixes(CollationData d, int c, CharSequence p, int pidx) {
+        CharsTrie.Iterator prefixes = new CharsTrie(p, pidx).iterator();
+        while (prefixes.hasNext()) {
+            Entry e = prefixes.next();
+            addPrefix(d, e.chars, c, e.value);
+        }
+    }
+
+    private void addPrefix(CollationData d, CharSequence pfx, int c, int ce32) {
+        setPrefix(pfx);
+        ce32 = d.getFinalCE32(ce32);
+        if (Collation.isContractionCE32(ce32)) {
+            int idx = Collation.indexFromCE32(ce32);
+            addContractions(c, d.contexts, idx + 2);
+        }
+        tailored.add(new StringBuilder(unreversedPrefix.appendCodePoint(c)));
+        resetPrefix();
+    }
+
+    private void addContractions(int c, CharSequence p, int pidx) {
+        CharsTrie.Iterator suffixes = new CharsTrie(p, pidx).iterator();
+        while (suffixes.hasNext()) {
+            Entry e = suffixes.next();
+            addSuffix(c, e.chars);
+        }
+    }
+
+    private void addSuffix(int c, CharSequence sfx) {
+        tailored.add(new StringBuilder(unreversedPrefix).appendCodePoint(c).append(sfx));
+    }
+
+    private void add(int c) {
+        if (unreversedPrefix.length() == 0 && suffix == null) {
+            tailored.add(c);
+        } else {
+            StringBuilder s = new StringBuilder(unreversedPrefix);
+            s.appendCodePoint(c);
+            if (suffix != null) {
+                s.append(suffix);
+            }
+            tailored.add(s);
+        }
+    }
+
+    // Prefixes are reversed in the data structure.
+    private void setPrefix(CharSequence pfx) {
+        unreversedPrefix.setLength(0);
+        unreversedPrefix.append(pfx).reverse();
+    }
+
+    private void resetPrefix() {
+        unreversedPrefix.setLength(0);
+    }
+}
+
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UTF16CollationIterator.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UTF16CollationIterator.java

new file mode 100644 (file)

index 0000000..db40627
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UTF16CollationIterator.java
@@ -0,0 +1,141 @@
+/*
+*******************************************************************************
+* Copyright (C) 2010-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* UTF16CollationIterator.java, ported from utf16collationiterator.h/.cpp
+*
+* C++ version created on: 2010oct27
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.impl.coll;
+
+/**
+ * UTF-16 collation element and character iterator.
+ * Handles normalized UTF-16 text, with length or NUL-terminated.
+ * Unnormalized text is handled by a subclass.
+ */
+public class UTF16CollationIterator extends CollationIterator {
+    /**
+     * Partial constructor, see {@link CollationIterator#CollationIterator(CollationData)}.
+     */
+    public UTF16CollationIterator(CollationData d) {
+        super(d);
+    }
+
+    public UTF16CollationIterator(CollationData d, boolean numeric, CharSequence s, int p) {
+        super(d, numeric);
+        seq = s;
+        start = 0;
+        pos = p;
+        limit = s.length();
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if(!super.equals(other)) { return false; }
+        UTF16CollationIterator o = (UTF16CollationIterator)other;
+        // Compare the iterator state but not the text: Assume that the caller does that.
+        return (pos - start) == (o.pos - o.start);
+    }
+
+    @Override
+    public void resetToOffset(int newOffset) {
+        reset();
+        pos = start + newOffset;
+    }
+
+    @Override
+    public int getOffset() {
+        return pos - start;
+    }
+
+    public void setText(boolean numeric, CharSequence s, int p) {
+        reset(numeric);
+        seq = s;
+        start = 0;
+        pos = p;
+        limit = s.length();
+    }
+
+    @Override
+    public int nextCodePoint() {
+        if(pos == limit) {
+            return Collation.SENTINEL_CP;
+        }
+        char c = seq.charAt(pos++);
+        char trail;
+        if(Character.isHighSurrogate(c) && pos != limit &&
+                Character.isLowSurrogate(trail = seq.charAt(pos))) {
+            ++pos;
+            return Character.toCodePoint(c, trail);
+        } else {
+            return c;
+        }
+    }
+
+    @Override
+    public int previousCodePoint() {
+        if(pos == start) {
+            return Collation.SENTINEL_CP;
+        }
+        char c = seq.charAt(--pos);
+        char lead;
+        if(Character.isLowSurrogate(c) && pos != start &&
+                Character.isHighSurrogate(lead = seq.charAt(pos - 1))) {
+            --pos;
+            return Character.toCodePoint(lead, c);
+        } else {
+            return c;
+        }
+    }
+
+    @Override
+    protected long handleNextCE32() {
+        if(pos == limit) {
+            return NO_CP_AND_CE32;
+        }
+        char c = seq.charAt(pos++);
+        return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead(c));
+    }
+
+    @Override
+    protected char handleGetTrailSurrogate() {
+        if(pos == limit) { return 0; }
+        char trail;
+        if(Character.isLowSurrogate(trail = seq.charAt(pos))) { ++pos; }
+        return trail;
+    }
+
+    /* boolean foundNULTerminator(); */
+
+    @Override
+    protected void forwardNumCodePoints(int num) {
+        while(num > 0 && pos != limit) {
+            char c = seq.charAt(pos++);
+            --num;
+            if(Character.isHighSurrogate(c) && pos != limit &&
+                    Character.isLowSurrogate(seq.charAt(pos))) {
+                ++pos;
+            }
+        }
+    }
+
+    @Override
+    protected void backwardNumCodePoints(int num) {
+        while(num > 0 && pos != start) {
+            char c = seq.charAt(--pos);
+            --num;
+            if(Character.isLowSurrogate(c) && pos != start &&
+                    Character.isHighSurrogate(seq.charAt(pos-1))) {
+                --pos;
+            }
+        }
+    }
+
+    protected CharSequence seq;
+    protected int start;
+    protected int pos;
+    protected int limit;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UVector32.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UVector32.java

new file mode 100644 (file)

index 0000000..8813268
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UVector32.java
@@ -0,0 +1,46 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ *
+ * created on: 2014feb10
+ * created by: Markus W. Scherer
+ */
+package com.ibm.icu.impl.coll;
+
+// TODO: There must be a Java class for a growable array of ints without auto-boxing to Integer?!
+// Keep the API parallel to the C++ version for ease of porting. Port methods only as needed.
+// If & when we start using something else, we might keep this as a thin wrapper for porting.
+public final class UVector32 {
+    public UVector32() {}
+    public boolean isEmpty() { return length == 0; }
+    public int size() { return length; }
+    public int elementAti(int i) { return buffer[i]; }
+    public int[] getBuffer() { return buffer; }
+    public void addElement(int e) {
+        ensureAppendCapacity();
+        buffer[length++] = e;
+    }
+    public void setElementAt(int elem, int index) { buffer[index] = elem; }
+    public void insertElementAt(int elem, int index) {
+        ensureAppendCapacity();
+        System.arraycopy(buffer, index, buffer, index + 1, length - index);
+        buffer[index] = elem;
+        ++length;
+    }
+    public void removeAllElements() {
+        length = 0;
+    }
+
+    private void ensureAppendCapacity() {
+        if(length >= buffer.length) {
+            int newCapacity = buffer.length <= 0xffff ? 4 * buffer.length : 2 * buffer.length;
+            int[] newBuffer = new int[newCapacity];
+            System.arraycopy(buffer, 0, newBuffer, 0, length);
+            buffer = newBuffer;
+        }
+    }
+    private int[] buffer = new int[32];
+    private int length = 0;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UVector64.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UVector64.java

new file mode 100644 (file)

index 0000000..409808a
--- /dev/null
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UVector64.java
@@ -0,0 +1,46 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ *
+ * created on: 2014feb10
+ * created by: Markus W. Scherer
+ */
+package com.ibm.icu.impl.coll;
+
+// TODO: There must be a Java class for a growable array of longs without auto-boxing to Long?!
+// Keep the API parallel to the C++ version for ease of porting. Port methods only as needed.
+// If & when we start using something else, we might keep this as a thin wrapper for porting.
+public final class UVector64 {
+    public UVector64() {}
+    public boolean isEmpty() { return length == 0; }
+    public int size() { return length; }
+    public long elementAti(int i) { return buffer[i]; }
+    public long[] getBuffer() { return buffer; }
+    public void addElement(long e) {
+        ensureAppendCapacity();
+        buffer[length++] = e;
+    }
+    public void setElementAt(long elem, int index) { buffer[index] = elem; }
+    public void insertElementAt(long elem, int index) {
+        ensureAppendCapacity();
+        System.arraycopy(buffer, index, buffer, index + 1, length - index);
+        buffer[index] = elem;
+        ++length;
+    }
+    public void removeAllElements() {
+        length = 0;
+    }
+
+    private void ensureAppendCapacity() {
+        if(length >= buffer.length) {
+            int newCapacity = buffer.length <= 0xffff ? 4 * buffer.length : 2 * buffer.length;
+            long[] newBuffer = new long[newCapacity];
+            System.arraycopy(buffer, 0, newBuffer, 0, length);
+            buffer = newBuffer;
+        }
+    }
+    private long[] buffer = new long[32];
+    private int length = 0;
+}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java

index 5d40e184105cd15d127ce0720abe3a27a34e50d1..3fba10301f88034334d86d5e8bc48071aaa8eb39 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
@@ -7,8 +7,6 @@
  package com.ibm.icu.text;
  
  import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
  import java.util.Collections;
  import java.util.Comparator;
  import java.util.Iterator;
@@ -274,7 +272,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
      private AlphabeticIndex(ULocale locale, RuleBasedCollator collator) {
          collatorOriginal = collator != null ? collator : (RuleBasedCollator) Collator.getInstance(locale);
          try {
-            collatorPrimaryOnly = (RuleBasedCollator) (collatorOriginal.clone());
+            collatorPrimaryOnly = (RuleBasedCollator) (collatorOriginal.cloneAsThawed());
          } catch (Exception e) {
              // should never happen
              throw new IllegalStateException("Collator cannot be cloned", e);
@@ -282,21 +280,8 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
          collatorPrimaryOnly.setStrength(Collator.PRIMARY);
          collatorPrimaryOnly.freeze();
  
-        firstCharsInScripts = new ArrayList<String>(HACK_FIRST_CHARS_IN_SCRIPTS);
+        firstCharsInScripts = getFirstCharactersInScripts();
          Collections.sort(firstCharsInScripts, collatorPrimaryOnly);
-        if (collatorPrimaryOnly.compare("\u4E00", "\u1112") <= 0 &&
-                collatorPrimaryOnly.compare("\u1100", "\u4E00") <= 0) {
-            // The standard Korean tailoring sorts Hanja (Han characters)
-            // as secondary differences from Hangul syllables.
-            // This makes U+4E00 not useful as a Han-script boundary.
-            // TODO: This becomes obsolete when the root collator gets
-            // reliable script-first-primary mappings.
-            int hanIndex = Collections.binarySearch(
-                    firstCharsInScripts, "\u4E00", collatorPrimaryOnly);
-            if (hanIndex >= 0) {
-                firstCharsInScripts.remove(hanIndex);
-            }
-        }
          // Guard against a degenerate collator where
          // some script boundary strings are primary ignorable.
          for (;;) {
@@ -311,7 +296,9 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
              }
          }
  
-        if (locale != null) {
+        // Chinese index characters, which are specific to each of the several Chinese tailorings,
+        // take precedence over the single locale data exemplar set per language.
+        if (!addChineseIndexCharacters() && locale != null) {
              addIndexExemplars(locale);
          }
      }
@@ -483,7 +470,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
              if (collatorPrimaryOnly.compare(item, firstScriptBoundary) < 0) {
                  // Ignore a primary-ignorable or non-alphabetic index character.
              } else if (collatorPrimaryOnly.compare(item, overflowBoundary) >= 0) {
-                // Ignore an index characters that will land in the overflow bucket.
+                // Ignore an index character that will land in the overflow bucket.
              } else if (checkDistinct && collatorPrimaryOnly.compare(item, separated(item)) == 0) {
                  // Ignore a multi-code point index character that does not sort distinctly
                  // from the sequence of its separate characters.
@@ -537,18 +524,6 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
       * but if they aren't available, we have to synthesize them.
       */
      private void addIndexExemplars(ULocale locale) {
-        // Chinese index characters, which are specific to each of the several Chinese tailorings,
-        // take precedence over the single locale data exemplar set per language.
-        final String language = locale.getLanguage();
-        if (language.equals("zh") || language.equals("ja") || language.equals("ko")) {
-            // TODO: This should be done regardless of the language, but it's expensive.
-            // We should add a Collator function (can be @internal)
-            // to enumerate just the contractions that start with a given code point or string.
-            if (addChineseIndexCharacters()) {
-                return;
-            }
-        }
-
          UnicodeSet exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_INDEX);
          if (exemplars != null) {
              initialLabels.addAll(exemplars);
@@ -598,44 +573,22 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
      private boolean addChineseIndexCharacters() {
          UnicodeSet contractions = new UnicodeSet();
          try {
-            collatorPrimaryOnly.getContractionsAndExpansions(contractions, null, false);
+            collatorPrimaryOnly.internalAddContractions(BASE.charAt(0), contractions);
          } catch (Exception e) {
              return false;
          }
-        String firstHanBoundary = null;
-        boolean hasPinyin = false;
+        if (contractions.isEmpty()) { return false; }
+        initialLabels.addAll(contractions);
          for (String s : contractions) {
-            if (s.startsWith(BASE)) {
-                initialLabels.add(s);
-                if (firstHanBoundary == null ||
-                        collatorPrimaryOnly.compare(s, firstHanBoundary) < 0) {
-                    firstHanBoundary = s;
-                }
-                char c = s.charAt(s.length() - 1);
-                if ('A' <= c && c <= 'Z') {
-                    hasPinyin = true;
-                }
-            }
-        }
-        if (hasPinyin) {
-            initialLabels.add('A', 'Z');
-        }
-        if (firstHanBoundary != null) {
-            // The hardcoded list of script boundaries includes U+4E00
-            // which is tailored to not be the first primary
-            // in all Chinese tailorings except "unihan".
-            // Replace U+4E00 with the first boundary string from the tailoring.
-            // TODO: This becomes obsolete when the root collator gets
-            // reliable script-first-primary mappings.
-            int hanIndex = Collections.binarySearch(
-                    firstCharsInScripts, "\u4E00", collatorPrimaryOnly);
-            if (hanIndex >= 0) {
-                firstCharsInScripts.set(hanIndex, firstHanBoundary);
+            assert(s.startsWith(BASE));
+            char c = s.charAt(s.length() - 1);
+            if (0x41 <= c && c <= 0x5A) {  // A-Z
+                // There are Pinyin labels, add ASCII A-Z labels as well.
+                initialLabels.add(0x41, 0x5A);  // A-Z
+                break;
              }
-            return true;
-        } else {
-            return false;
          }
+        return true;
      }
  
      /**
@@ -1058,10 +1011,9 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
          List<String> indexCharacters = initLabels();
  
          // Variables for hasMultiplePrimaryWeights().
-        CollationElementIterator cei = collatorPrimaryOnly.getCollationElementIterator("");
-        int variableTop;
+        long variableTop;
          if (collatorPrimaryOnly.isAlternateHandlingShifted()) {
-            variableTop = CollationElementIterator.primaryOrder(collatorPrimaryOnly.getVariableTop());
+            variableTop = collatorPrimaryOnly.getVariableTop() & 0xffffffffL;
          } else {
              variableTop = 0;
          }
@@ -1116,7 +1068,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
              }
              // Check for multiple primary weights.
              if (!current.startsWith(BASE) &&
-                    hasMultiplePrimaryWeights(cei, variableTop, current) &&
+                    hasMultiplePrimaryWeights(collatorPrimaryOnly, variableTop, current) &&
                      !current.endsWith("\uffff")) {
                  // "Æ" or "Sch" etc.
                  for (int i = bucketList.size() - 2;; --i) {
@@ -1127,7 +1079,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
                          break;
                      }
                      if (singleBucket.displayBucket == null &&
-                            !hasMultiplePrimaryWeights(cei, variableTop, singleBucket.lowerBoundary)) {
+                            !hasMultiplePrimaryWeights(collatorPrimaryOnly, variableTop, singleBucket.lowerBoundary)) {
                          // Add an invisible bucket that redirects strings greater than the expansion
                          // to the previous single-character bucket.
                          // For example, after ... Q R S Sch we add Sch\uFFFF->S
@@ -1247,17 +1199,14 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
      }
  
      private static boolean hasMultiplePrimaryWeights(
-            CollationElementIterator cei, int variableTop, String s) {
-        cei.setText(s);
+            RuleBasedCollator coll, long variableTop, String s) {
+        long[] ces = coll.internalGetCEs(s);
          boolean seenPrimary = false;
-        for (;;) {
-            int ce32 = cei.next();
-            if (ce32 == CollationElementIterator.NULLORDER) {
-                break;
-            }
-            int p = CollationElementIterator.primaryOrder(ce32);
-            if (p > variableTop && (ce32 & 0xc0) != 0xc0) {
-                // not primary ignorable, and not a continuation CE
+        for (int i = 0; i < ces.length; ++i) {
+            long ce = ces[i];
+            long p = ce >>> 32;
+            if (p > variableTop) {
+                // not primary ignorable
                  if (seenPrimary) {
                      return true;
                  }
@@ -1267,62 +1216,15 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
          return false;
      }
  
-    /**
-     * This list contains one character per script that has the
-     * lowest primary weight for that script in the root collator.
-     * This list will be copied and sorted to account for script reordering.
-     *
-     * <p>TODO: This is fragile. If the first character of a script is tailored
-     * so that it does not map to the script's lowest primary weight any more,
-     * then the buckets will be off.
-     * There are hacks in the code to handle the known CJK tailorings of U+4E00.
-     *
-     * <p>We use "A" not "a" because the en_US_POSIX tailoring sorts A primary-before a.
-     */
-    private static final List<String> HACK_FIRST_CHARS_IN_SCRIPTS = 
-        Arrays.asList(new String[] { 
-                "A", "\u03B1", "\u2C81", "\u0430", "\u2C30", "\u10D0", "\u0561", "\u05D0", "\uD802\uDD00", "\u0800", "\u0621",
-                "\u0710",  // Syriac
-                "\u0840",  // Mandaic
-                "\u0780", "\u07CA", "\u2D30", "\u1200", "\u0950", "\u0985", "\u0A74", "\u0AD0", "\u0B05", "\u0BD0", 
-                "\u0C05", "\u0C85", "\u0D05", "\u0D85",
-                "\uAAF2",  // Meetei Mayek
-                "\uA800", "\uA882", "\uD804\uDC83",
-                UCharacter.toString(0x111C4),  // Sharada
-                UCharacter.toString(0x11680),  // Takri
-                "\u1B83",  // Sundanese
-                "\uD804\uDC05",  // Brahmi (U+11005)
-                "\uD802\uDE00", "\u0E01",
-                "\u0EDE",  // Lao
-                "\uAA80", "\u0F40", "\u1C00", "\uA840", "\u1900", "\u1700", "\u1720", "\u1740", "\u1760", 
-                "\u1A00",  // Buginese
-                "\u1BC0",  // Batak
-                "\uA930", "\uA90A", "\u1000",
-                UCharacter.toString(0x11103),  // Chakma
-                "\u1780", "\u1950", "\u1980", "\u1A20", "\uAA00", "\u1B05", "\uA984", "\u1880", "\u1C5A", "\u13A0", "\u1401", "\u1681", "\u16A0", "\uD803\uDC00", "\uA500", "\uA6A0", "\u1100", 
-                "\u3041", "\u30A1", "\u3105", "\uA000", "\uA4F8",
-                UCharacter.toString(0x16F00),  // Miao
-                "\uD800\uDE80", "\uD800\uDEA0", "\uD802\uDD20", "\uD800\uDF00", "\uD800\uDF30", "\uD801\uDC28", "\uD801\uDC50", "\uD801\uDC80",
-                UCharacter.toString(0x110D0),  // Sora Sompeng
-                "\uD800\uDC00", "\uD802\uDC00", "\uD802\uDE60", "\uD802\uDF00", "\uD802\uDC40", 
-                "\uD802\uDF40", "\uD802\uDF60", "\uD800\uDF80", "\uD800\uDFA0", "\uD808\uDC00", "\uD80C\uDC00",
-                UCharacter.toString(0x109A0),  // Meroitic Cursive
-                UCharacter.toString(0x10980),  // Meroitic Hieroglyphs
-                "\u4E00",
-                // TODO: The overflow bucket's lowerBoundary string should be the
-                // first item after the last reordering group in the collator's script order.
-                // This should normally be the first Unicode code point
-                // that is unassigned (U+0378 in Unicode 6.3) and untailored.
-                // However, at least up to ICU 51 the Hani reordering group includes
-                // unassigned code points,
-                // and there is no stable string for the start of the trailing-weights range.
-                // The only known string that sorts "high" is U+FFFF.
-                // When ICU separates Hani vs. unassigned reordering groups, we need to fix this,
-                // and fix relevant test code.
-                // Ideally, FractionalUCA.txt will have a "script first primary"
-                // for unassigned code points.
-                "\uFFFF"
-        });
+    // TODO: Surely we have at least a ticket for porting these mask values to UCharacter.java?!
+    private static final int GC_LU_MASK = 1 << UCharacter.UPPERCASE_LETTER;
+    private static final int GC_LL_MASK = 1 << UCharacter.LOWERCASE_LETTER;
+    private static final int GC_LT_MASK = 1 << UCharacter.TITLECASE_LETTER;
+    private static final int GC_LM_MASK = 1 << UCharacter.MODIFIER_LETTER;
+    private static final int GC_LO_MASK = 1 << UCharacter.OTHER_LETTER;
+    private static final int GC_L_MASK =
+            GC_LU_MASK|GC_LL_MASK|GC_LT_MASK|GC_LM_MASK|GC_LO_MASK;
+    private static final int GC_CN_MASK = 1 << UCharacter.GENERAL_OTHER_TYPES;
  
      /**
       * Return a list of the first character in each script. Only exposed for testing.
@@ -1332,7 +1234,26 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
       * @deprecated This API is ICU internal, only for testing.
       */
      @Deprecated
-    public static Collection<String> getFirstCharactersInScripts() {
-        return HACK_FIRST_CHARS_IN_SCRIPTS;
+    public List<String> getFirstCharactersInScripts() {
+        List<String> dest = new ArrayList<String>(200);
+        // Fetch the script-first-primary contractions which are defined in the root collator.
+        // They all start with U+FDD1.
+        UnicodeSet set = new UnicodeSet();
+        collatorPrimaryOnly.internalAddContractions(0xFDD1, set);
+        if (set.isEmpty()) {
+            throw new UnsupportedOperationException(
+                    "AlphabeticIndex requires script-first-primary contractions");
+        }
+        for (String boundary : set) {
+            int gcMask = 1 << UCharacter.getType(boundary.codePointAt(1));
+            if ((gcMask & (GC_L_MASK | GC_CN_MASK)) == 0) {
+                // Ignore boundaries for the special reordering groups.
+                // Take only those for "real scripts" (where the sample character is a Letter,
+                // and the one for unassigned implicit weights (Cn).
+                continue;
+            }
+            dest.add(boundary);
+        }
+        return dest;
      }
  }
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java

index de7d918e31a5f31a73679f3fb03b2ae241cb5197..ded3baa06906f57f966bdc0271bd0d587a947049 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java
@@ -1,50 +1,43 @@
  /**
  *******************************************************************************
-* Copyright (C) 1996-2014, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*
-*
+* Copyright (C) 1996-2014, International Business Machines Corporation and
+* others. All Rights Reserved.
  *******************************************************************************
  */
  package com.ibm.icu.text;
  
-/***
- * import java.text.StringCharacterIterator;
- * import java.text.CharacterIterator;
- */
  import java.text.CharacterIterator;
-import java.util.MissingResourceException;
+import java.util.HashMap;
+import java.util.Map;
  
  import com.ibm.icu.impl.CharacterIteratorWrapper;
-import com.ibm.icu.impl.ICUDebug;
-import com.ibm.icu.impl.Norm2AllModes;
-import com.ibm.icu.impl.Normalizer2Impl;
-import com.ibm.icu.impl.StringUCharacterIterator;
-import com.ibm.icu.impl.UCharacterProperty;
-import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.impl.coll.Collation;
+import com.ibm.icu.impl.coll.CollationData;
+import com.ibm.icu.impl.coll.CollationIterator;
+import com.ibm.icu.impl.coll.ContractionsAndExpansions;
+import com.ibm.icu.impl.coll.FCDIterCollationIterator;
+import com.ibm.icu.impl.coll.FCDUTF16CollationIterator;
+import com.ibm.icu.impl.coll.IterCollationIterator;
+import com.ibm.icu.impl.coll.UTF16CollationIterator;
+import com.ibm.icu.impl.coll.UVector32;
  
  /**
   * <p><code>CollationElementIterator</code> is an iterator created by
   * a RuleBasedCollator to walk through a string. The return result of
- * each iteration is a 32-bit collation element that defines the
+ * each iteration is a 32-bit collation element (CE) that defines the
   * ordering priority of the next character or sequence of characters
   * in the source string.</p>
   *
- * <p>For illustration, consider the following in Spanish:
+ * <p>For illustration, consider the following in Slovak and in traditional Spanish collation:
   * <blockquote>
   * <pre>
- * "ca" -> the first collation element is collation_element('c') and second
- *         collation element is collation_element('a').
- *
- * Since "ch" in Spanish sorts as one entity, the below example returns one
- * collation element for the two characters 'c' and 'h'
- *
- * "cha" -> the first collation element is collation_element('ch') and second
- *          collation element is collation_element('a').
+ * "ca" -> the first collation element is CE('c') and the second
+ *         collation element is CE('a').
+ * "cha" -> the first collation element is CE('ch') and the second
+ *          collation element is CE('a').
   * </pre>
   * </blockquote>
- * And in German,
+ * And in German phonebook collation,
   * <blockquote>
   * <pre>
   * Since the character '&#230;' is a composed character of 'a' and 'e', the
@@ -58,13 +51,13 @@ import com.ibm.icu.lang.UCharacter;
   * </p>
   *
   * <p>For collation ordering comparison, the collation element results
- * can not be compared simply by using basic arithmetric operators,
+ * can not be compared simply by using basic arithmetic operators,
   * e.g. &lt;, == or &gt;, further processing has to be done. Details
   * can be found in the ICU
- * <a href="http://www.icu-project.org/userguide/Collate_ServiceArchitecture.html">
- * user guide</a>. An example of using the CollationElementIterator
+ * <a href="http://userguide.icu-project.org/collation/architecture">
+ * User Guide</a>. An example of using the CollationElementIterator
   * for collation ordering comparison is the class
- * <a href=StringSearch.html> com.ibm.icu.text.StringSearch</a>.</p>
+ * {@link com.ibm.icu.text.StringSearch}.</p>
   *
   * <p>To construct a CollationElementIterator object, users
   * call the method getCollationElementIterator() on a
@@ -116,9 +109,23 @@ import com.ibm.icu.lang.UCharacter;
   */
  public final class CollationElementIterator
  {
-  
-    
-    // public data members --------------------------------------------------
+    private CollationIterator iter_;  // owned
+    private RuleBasedCollator rbc_;  // aliased
+    private int otherHalf_;
+    /**
+     * <0: backwards; 0: just after reset() (previous() begins from end);
+     * 1: just after setOffset(); >1: forward
+     */
+    private byte dir_;
+    /**
+     * Stores offsets from expansions and from unsafe-backwards iteration,
+     * so that getOffset() returns intermediate offsets for the CEs
+     * that are consistent with forward iteration.
+     */
+    private UVector32 offsets_;
+
+    private String string_;  // TODO: needed in Java? if so, then add a UCharacterIterator field too?
+
  
      /**
       * <p>This constant is returned by the iterator in the methods
@@ -143,9 +150,115 @@ public final class CollationElementIterator
       * @see #previous */
      public static final int IGNORABLE = 0;
  
-    // public methods -------------------------------------------------------
+    /**
+     * Return the primary order of the specified collation element,
+     * i.e. the first 16 bits.  This value is unsigned.
+     * @param ce the collation element
+     * @return the element's 16 bits primary order.
+     * @stable ICU 2.8
+     */
+    public final static int primaryOrder(int ce) {
+        return (ce >>> 16) & 0xffff;
+    }
+
+    /**
+     * Return the secondary order of the specified collation element,
+     * i.e. the 16th to 23th bits, inclusive.  This value is unsigned.
+     * @param ce the collation element
+     * @return the element's 8 bits secondary order
+     * @stable ICU 2.8
+     */
+    public final static int secondaryOrder(int ce) {
+        return (ce >>> 8) & 0xff;
+    }
+
+    /**
+     * Return the tertiary order of the specified collation element, i.e. the last
+     * 8 bits.  This value is unsigned.
+     * @param ce the collation element
+     * @return the element's 8 bits tertiary order
+     * @stable ICU 2.8
+     */
+    public final static int tertiaryOrder(int ce) {
+        return ce & 0xff;
+    }
+
+
+    private static final int getFirstHalf(long p, int lower32) {
+        return ((int)p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff);
+    }
+
+    private static final int getSecondHalf(long p, int lower32) {
+        return ((int)p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f);
+    }
+
+    private static final boolean ceNeedsTwoParts(long ce) {
+        return (ce & 0xffff00ff003fL) != 0;
+    }
+
+    private CollationElementIterator(RuleBasedCollator collator) {
+        iter_ = null;
+        rbc_ = collator;
+        otherHalf_ = 0;
+        dir_ = 0;
+        offsets_ = null;
+    }
+
+    /**
+     * <p>CollationElementIterator constructor. This takes a source
+     * string and a RuleBasedCollator. The iterator will walk through
+     * the source string based on the rules defined by the
+     * collator. If the source string is empty, NULLORDER will be
+     * returned on the first call to next().</p>
+     *
+     * @param source the source string.
+     * @param collator the RuleBasedCollator
+     * @stable ICU 2.8
+     */
+    CollationElementIterator(String source, RuleBasedCollator collator) {
+        this(collator);
+        setText(source);
+    }
+    // Note: The constructors should take settings & tailoring, not a collator,
+    // to avoid circular dependencies.
+    // However, for equals() we would need to be able to compare tailoring data for equality
+    // without making CollationData or CollationTailoring depend on TailoredSet.
+    // (See the implementation of RuleBasedCollator.equals().)
+    // That might require creating an intermediate class that would be used
+    // by both CollationElementIterator and RuleBasedCollator
+    // but only contain the part of RBC.equals() related to data and rules.
+
+    /**
+     * <p>CollationElementIterator constructor. This takes a source
+     * character iterator and a RuleBasedCollator. The iterator will
+     * walk through the source string based on the rules defined by
+     * the collator. If the source string is empty, NULLORDER will be
+     * returned on the first call to next().</p>
+     *
+     * @param source the source string iterator.
+     * @param collator the RuleBasedCollator
+     * @stable ICU 2.8
+     */
+    CollationElementIterator(CharacterIterator source, RuleBasedCollator collator) {
+        this(collator);
+        setText(source);
+    }
  
-    // public getters -------------------------------------------------------
+    /**
+     * <p>CollationElementIterator constructor. This takes a source
+     * character iterator and a RuleBasedCollator. The iterator will
+     * walk through the source string based on the rules defined by
+     * the collator. If the source string is empty, NULLORDER will be
+     * returned on the first call to next().</p>
+     *
+     * @param source the source string iterator.
+     * @param collator the RuleBasedCollator
+     * @stable ICU 2.8
+     */
+    CollationElementIterator(UCharacterIterator source, RuleBasedCollator collator) {
+        this(collator);
+        setText(source);
+    }
  
      /**
       * <p>Returns the character offset in the source string
@@ -174,75 +287,19 @@ public final class CollationElementIterator
       *         next() or previous().
       * @stable ICU 2.8
       */
-    public int getOffset()
-    {
-        if (m_bufferOffset_ != -1) {
-            if (m_isForwards_) {
-                return m_FCDLimit_;
-            }
-            return m_FCDStart_;
-        }
-        return m_source_.getIndex();
-    }
-
-
-    /**
-     * <p> Returns the maximum length of any expansion sequence that ends with
-     * the specified collation element. If there is no expansion with this
-     * collation element as the last element, returns 1.
-     * </p>
-     * @param ce a collation element returned by previous() or next().
-     * @return the maximum length of any expansion sequence ending
-     *         with the specified collation element.
-     * @stable ICU 2.8
-     */
-    public int getMaxExpansion(int ce)
-    {
-        int start = 0;
-        int limit = m_collator_.m_expansionEndCE_.length;
-        long unsignedce = ce & 0xFFFFFFFFl;
-        while (start < limit - 1) {
-            int mid = start + ((limit - start) >> 1);
-            long midce = m_collator_.m_expansionEndCE_[mid] & 0xFFFFFFFFl;
-            if (unsignedce <= midce) {
-                limit = mid;
+    public int getOffset() {
+        if (dir_ < 0 && offsets_ != null && !offsets_.isEmpty()) {
+            // CollationIterator.previousCE() decrements the CEs length
+            // while it pops CEs from its internal buffer.
+            int i = iter_.getCEsLength();
+            if (otherHalf_ != 0) {
+                // Return the trailing CE offset while we are in the middle of a 64-bit CE.
+                ++i;
              }
-            else {
-                start = mid;
-            }
-        }
-        int result = 1;
-        if (m_collator_.m_expansionEndCE_[start] == ce) {
-            result = m_collator_.m_expansionEndCEMaxSize_[start];
+            assert (i < offsets_.size());
+            return offsets_.elementAti(i);
          }
-        else if (limit < m_collator_.m_expansionEndCE_.length &&
-                 m_collator_.m_expansionEndCE_[limit] == ce) {
-            result = m_collator_.m_expansionEndCEMaxSize_[limit];
-        }
-        else if ((ce & 0xFFFF) == 0x00C0) {
-            result = 2;
-        }
-        return result;
-    }
-
-    // public other methods -------------------------------------------------
-
-    /**
-     * <p> Resets the cursor to the beginning of the string. The next
-     * call to next() or previous() will return the first and last
-     * collation element in the string, respectively.</p>
-     *
-     * <p>If the RuleBasedCollator used by this iterator has had its
-     * attributes changed, calling reset() will reinitialize the
-     * iterator to use the new attributes.</p>
-     *
-     * @stable ICU 2.8
-     */
-    public void reset()
-    {
-        m_source_.setToStart();
-        updateInternalState();
-        m_direction = 0;    // initial state
+        return iter_.getOffset();
      }
  
      /**
@@ -263,74 +320,40 @@ public final class CollationElementIterator
       *         iteration has been reached.
       * @stable ICU 2.8
       */
-    public int next()
-    {
-        assert m_direction >= 0;
-        m_direction = 1;
-
-        m_isForwards_ = true;
-        if (m_CEBufferSize_ > 0) {
-            if (m_CEBufferOffset_ < m_CEBufferSize_) {
-                // if there are expansions left in the buffer, we return it
-                return m_CEBuffer_[m_CEBufferOffset_ ++];
-            }
-            m_CEBufferSize_ = 0;
-            m_CEBufferOffset_ = 0;
-        }
- 
-        int result = NULLORDER;
-        char ch = 0;
-        do {
-            int ch_int = nextChar();
-            if (ch_int == UCharacterIterator.DONE) {
-                return NULLORDER;
-            }
-            ch = (char)ch_int;
-            if (m_collator_.m_isHiragana4_) {
-                /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
-                 * based on whether the previous codepoint was Hiragana or Katakana.
-                 */
-                m_isCodePointHiragana_ = (m_isCodePointHiragana_ && (ch >= 0x3099 && ch <= 0x309C)) || 
-                                         ((ch >= 0x3040 && ch <= 0x309e) && !(ch > 0x3094 && ch < 0x309d));
-            }
-
-            if (ch <= 0xFF) {
-                // For latin-1 characters we never need to fall back to the UCA
-                // table because all of the UCA data is replicated in the
-                // latinOneMapping array.
-                // Except: Special CEs can result in CE_NOT_FOUND_,
-                // for example if the default entry for a prefix-special is "not found",
-                // and we do need to fall back to the UCA in such a case.
-                // TODO: It would be better if tailoring specials never resulted in "not found"
-                // unless the corresponding UCA result is also "not found".
-                // That would require a change in the ICU4J collator-from-rule builder.
-                result = m_collator_.m_trie_.getLatin1LinearValue(ch);
-            } else {
-                result = m_collator_.m_trie_.getLeadValue(ch);
-            }
-            if (!RuleBasedCollator.isSpecial(result)) {
-                return result;
-            }
-            if (result != CE_NOT_FOUND_) {
-                result = nextSpecial(m_collator_, result, ch);
-            }
-            if (result == CE_NOT_FOUND_) {
-                // couldn't find a good CE in the tailoring
-                if (RuleBasedCollator.UCA_ != null) {
-                    result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
-                    if (RuleBasedCollator.isSpecial(result)) {
-                        // UCA also gives us a special CE
-                        result = nextSpecial(RuleBasedCollator.UCA_, result, ch);
-                    }
-                }
-                if(result == CE_NOT_FOUND_) { 
-                    // maybe there is no UCA, unlikely in Java, but ported for consistency
-                    result = nextImplicit(ch); 
-                }
-            }
-        } while (result == IGNORABLE && ch >= 0xAC00 && ch <= 0xD7AF);
-
-        return result;
+    public int next() {
+        if (dir_ > 1) {
+            // Continue forward iteration. Test this first.
+            if (otherHalf_ != 0) {
+                int oh = otherHalf_;
+                otherHalf_ = 0;
+                return oh;
+            }
+        } else if (dir_ == 1) {
+            // next() after setOffset()
+            dir_ = 2;
+        } else if (dir_ == 0) {
+            // The iter_ is already reset to the start of the text.
+            dir_ = 2;
+        } else /* dir_ < 0 */{
+            // illegal change of direction
+            throw new IllegalStateException("Illegal change of direction");
+            // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status.
+        }
+        // No need to keep all CEs in the buffer when we iterate.
+        iter_.clearCEsIfNoneRemaining();
+        long ce = iter_.nextCE();
+        if (ce == Collation.NO_CE) {
+            return NULLORDER;
+        }
+        // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
+        long p = ce >>> 32;
+        int lower32 = (int) ce;
+        int firstHalf = getFirstHalf(p, lower32);
+        int secondHalf = getSecondHalf(p, lower32);
+        if (secondHalf != 0) {
+            otherHalf_ = secondHalf | 0xc0; // continuation CE
+        }
+        return firstHalf;
      }
  
      /**
@@ -352,110 +375,70 @@ public final class CollationElementIterator
       *             the iteration has been reached.
       * @stable ICU 2.8
       */
-    public int previous()
-    {
-        assert m_direction <= 0;
-        m_direction = -1;
-
-        if (m_source_.getIndex() <= 0 && m_isForwards_) {
-            // if iterator is new or reset, we can immediate perform  backwards
-            // iteration even when the offset is not right.
-            m_source_.setToLimit();
-            updateInternalState();
-        }
-        m_isForwards_ = false;
-        if (m_CEBufferSize_ > 0) {
-            if (m_CEBufferOffset_ > 0) {
-                return m_CEBuffer_[-- m_CEBufferOffset_];
-            }
-            m_CEBufferSize_ = 0;
-            m_CEBufferOffset_ = 0;
-        }
-
-        int result = NULLORDER;
-        char ch = 0;
-        do {
-            int ch_int = previousChar();
-            if (ch_int == UCharacterIterator.DONE) {
-                return NULLORDER;
-            }
-            ch = (char)ch_int;
-            if (m_collator_.m_isHiragana4_) {
-                m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f);
-            }
-            if (m_collator_.isContractionEnd(ch) && !isBackwardsStart()) {
-                result = previousSpecial(m_collator_, CE_CONTRACTION_, ch);
-            }
-            else {
-                if (ch <= 0xFF) {
-                    result = m_collator_.m_trie_.getLatin1LinearValue(ch);
-                }
-                else {
-                    result = m_collator_.m_trie_.getLeadValue(ch);
-                }
-                if (RuleBasedCollator.isSpecial(result)) {
-                    result = previousSpecial(m_collator_, result, ch);
-                }
-                if (result == CE_NOT_FOUND_) {
-                    if (!isBackwardsStart()
-                        && m_collator_.isContractionEnd(ch)) {
-                        result = CE_CONTRACTION_;
-                    }
-                    else {
-                        if(RuleBasedCollator.UCA_ != null) {
-                            result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
-                        }
-                    }
-
-                    if (RuleBasedCollator.isSpecial(result)) {
-                        if(RuleBasedCollator.UCA_ != null) {                    
-                            result = previousSpecial(RuleBasedCollator.UCA_, result, ch);
-                        }
-                    }
-                }
-            }
-        } while (result == IGNORABLE && ch >= 0xAC00 && ch <= 0xD7AF);
-        if(result == CE_NOT_FOUND_) {
-            result = previousImplicit(ch);
-        }
-        return result;
-    }
-
-    /**
-     * Return the primary order of the specified collation element,
-     * i.e. the first 16 bits.  This value is unsigned.
-     * @param ce the collation element
-     * @return the element's 16 bits primary order.
-     * @stable ICU 2.8
-     */
-    public final static int primaryOrder(int ce)
-    {
-        return (ce & RuleBasedCollator.CE_PRIMARY_MASK_)
-            >>> RuleBasedCollator.CE_PRIMARY_SHIFT_;
-    }
-    /**
-     * Return the secondary order of the specified collation element,
-     * i.e. the 16th to 23th bits, inclusive.  This value is unsigned.
-     * @param ce the collation element
-     * @return the element's 8 bits secondary order
-     * @stable ICU 2.8
-     */
-    public final static int secondaryOrder(int ce)
-    {
-        return (ce & RuleBasedCollator.CE_SECONDARY_MASK_)
-            >> RuleBasedCollator.CE_SECONDARY_SHIFT_;
+    public int previous() {
+        if (dir_ < 0) {
+            // Continue backwards iteration. Test this first.
+            if (otherHalf_ != 0) {
+                int oh = otherHalf_;
+                otherHalf_ = 0;
+                return oh;
+            }
+        } else if (dir_ == 0) {
+            iter_.resetToOffset(string_.length());
+            dir_ = -1;
+        } else if (dir_ == 1) {
+            // previous() after setOffset()
+            dir_ = -1;
+        } else /* dir_ > 1 */{
+            // illegal change of direction
+            throw new IllegalStateException("Illegal change of direction");
+            // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status.
+        }
+        if (offsets_ == null) {
+            offsets_ = new UVector32();
+        }
+        // If we already have expansion CEs, then we also have offsets.
+        // Otherwise remember the trailing offset in case we need to
+        // write offsets for an artificial expansion.
+        int limitOffset = iter_.getCEsLength() == 0 ? iter_.getOffset() : 0;
+        long ce = iter_.previousCE(offsets_);
+        if (ce == Collation.NO_CE) {
+            return NULLORDER;
+        }
+        // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
+        long p = ce >>> 32;
+        int lower32 = (int) ce;
+        int firstHalf = getFirstHalf(p, lower32);
+        int secondHalf = getSecondHalf(p, lower32);
+        if (secondHalf != 0) {
+            if (offsets_.isEmpty()) {
+                // When we convert a single 64-bit CE into two 32-bit CEs,
+                // we need to make this artificial expansion behave like a normal expansion.
+                // See CollationIterator.previousCE().
+                offsets_.addElement(iter_.getOffset());
+                offsets_.addElement(limitOffset);
+            }
+            otherHalf_ = firstHalf;
+            return secondHalf | 0xc0; // continuation CE
+        }
+        return firstHalf;
      }
  
      /**
-     * Return the tertiary order of the specified collation element, i.e. the last
-     * 8 bits.  This value is unsigned.
-     * @param ce the collation element
-     * @return the element's 8 bits tertiary order
+     * <p> Resets the cursor to the beginning of the string. The next
+     * call to next() or previous() will return the first and last
+     * collation element in the string, respectively.</p>
+     *
+     * <p>If the RuleBasedCollator used by this iterator has had its
+     * attributes changed, calling reset() will reinitialize the
+     * iterator to use the new attributes.</p>
+     *
       * @stable ICU 2.8
       */
-    public final static int tertiaryOrder(int ce)
-    {
-        return ce & RuleBasedCollator.CE_TERTIARY_MASK_;
+    public void reset() {
+        iter_ .resetToOffset(0);
+        otherHalf_ = 0;
+        dir_ = 0;
      }
  
      /**
@@ -475,60 +458,45 @@ public final class CollationElementIterator
       * iteration.  The user must ensure that the offset is not in the
       * middle of a decomposible range.</p>
       *
-     * @param offset the character offset into the original source string to
+     * @param newOffset the character offset into the original source string to
       *        set. Note that this is not an offset into the corresponding
       *        sequence of collation elements.
       * @stable ICU 2.8
       */
-    public void setOffset(int offset)
-    {
-        m_direction = 0;    // reset to initial state
-
-        m_source_.setIndex(offset);
-        int ch_int = m_source_.current();
-        char ch = (char)ch_int;
-        if (ch_int != UCharacterIterator.DONE && m_collator_.isUnsafe(ch)) {
-            // if it is unsafe we need to check if it is part of a contraction
-            // or a surrogate character
-            if (UTF16.isTrailSurrogate(ch)) {
-                // if it is a surrogate pair we move up one character
-                char prevch = (char)m_source_.previous();
-                if (!UTF16.isLeadSurrogate(prevch)) {
-                    m_source_.setIndex(offset); // go back to the same index
+    public void setOffset(int newOffset) {
+        if (0 < newOffset && newOffset < string_.length()) {
+            int offset = newOffset;
+            do {
+                char c = string_.charAt(offset);
+                if (!rbc_.isUnsafe(c) ||
+                        (Character.isHighSurrogate(c) && !rbc_.isUnsafe(string_.codePointAt(offset)))) {
+                    break;
                  }
-            }
-            else {
-                // could be part of a contraction
-                // backup to a safe point and iterate till we pass offset
-                while (m_source_.getIndex() > 0) {
-                    if (!m_collator_.isUnsafe(ch)) {
-                        break;
+                // Back up to before this unsafe character.
+                --offset;
+            } while (offset > 0);
+            if (offset < newOffset) {
+                // We might have backed up more than necessary.
+                // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe,
+                // but for text "chu" setOffset(2) should remain at 2
+                // although we initially back up to offset 0.
+                // Find the last safe offset no greater than newOffset by iterating forward.
+                int lastSafeOffset = offset;
+                do {
+                    iter_.resetToOffset(lastSafeOffset);
+                    do {
+                        iter_.nextCE();
+                    } while ((offset = iter_.getOffset()) == lastSafeOffset);
+                    if (offset <= newOffset) {
+                        lastSafeOffset = offset;
                      }
-                    ch = (char)m_source_.previous();
-                }
-                updateInternalState();
-                int prevoffset = 0;
-                while (m_source_.getIndex() <= offset) {
-                    prevoffset = m_source_.getIndex();
-                    next();
-                }
-                m_source_.setIndex(prevoffset);
+                } while (offset < newOffset);
+                newOffset = lastSafeOffset;
              }
          }
-        updateInternalState();
-        // direction code to prevent next and previous from returning a 
-        // character if we are already at the ends
-        offset = m_source_.getIndex();
-        if (offset == 0/* m_source_.getBeginIndex() */) {
-            // preventing previous() from returning characters from the end of 
-            // the string again if we are at the beginning
-            m_isForwards_ = false; 
-        }
-        else if (offset == m_source_.getLength()) {
-            // preventing next() from returning characters from the start of 
-            // the string again if we are at the end
-            m_isForwards_ = true;
-        }
+        iter_.resetToOffset(newOffset);
+        otherHalf_ = 0;
+        dir_ = 1;
      }
  
      /**
@@ -538,15 +506,20 @@ public final class CollationElementIterator
       * @param source the new source string for iteration.
       * @stable ICU 2.8
       */
-    public void setText(String source)
-    {
-        m_srcUtilIter_.setText(source);
-        m_source_ = m_srcUtilIter_;
-        updateInternalState();
-
-        m_direction = 0;   // reset to initial state
+    public void setText(String source) {
+        string_ = source; // TODO: do we need to remember the source string in a field?
+        CollationIterator newIter;
+        boolean numeric = rbc_.settings.readOnly().isNumeric();
+        if (rbc_.settings.readOnly().dontCheckFCD()) {
+            newIter = new UTF16CollationIterator(rbc_.data, numeric, string_, 0);
+        } else {
+            newIter = new FCDUTF16CollationIterator(rbc_.data, numeric, string_, 0);
+        }
+        iter_ = newIter;
+        otherHalf_ = 0;
+        dir_ = 0;
      }
-    
+
      /**
       * <p>Set a new source string iterator for iteration, and reset the
       * offset to the beginning of the text.
@@ -556,13 +529,33 @@ public final class CollationElementIterator
       * @param source the new source string iterator for iteration.
       * @stable ICU 2.8
       */
-    public void setText(UCharacterIterator source)
-    {
-        m_srcUtilIter_.setText(source.getText());
-        m_source_ = m_srcUtilIter_;
-        updateInternalState(); 
-
-        m_direction = 0;   // reset to initial state
+    public void setText(UCharacterIterator source) {
+        string_ = source.getText(); // TODO: do we need to remember the source string in a field?
+        // Note: In C++, we just setText(source.getText()).
+        // In Java, we actually operate on a character iterator.
+        // (The old code apparently did so only for a CharacterIterator;
+        // for a UCharacterIterator it also just used source.getText()).
+        // TODO: do we need to remember the cloned iterator in a field?
+        UCharacterIterator src;
+        try {
+            src = (UCharacterIterator) source.clone();
+        } catch (CloneNotSupportedException e) {
+            // Fall back to ICU 52 behavior of iterating over the text contents
+            // of the UCharacterIterator.
+            setText(source.getText());
+            return;
+        }
+        src.setToStart();
+        CollationIterator newIter;
+        boolean numeric = rbc_.settings.readOnly().isNumeric();
+        if (rbc_.settings.readOnly().dontCheckFCD()) {
+            newIter = new IterCollationIterator(rbc_.data, numeric, src);
+        } else {
+            newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0);
+        }
+        iter_ = newIter;
+        otherHalf_ = 0;
+        dir_ = 0;
      }
  
      /**
@@ -572,16 +565,128 @@ public final class CollationElementIterator
       * @param source the new source string iterator for iteration.
       * @stable ICU 2.8
       */
-    public void setText(CharacterIterator source)
-    {
-        m_source_ = new CharacterIteratorWrapper(source);
-        m_source_.setToStart();
-        updateInternalState();
+    public void setText(CharacterIterator source) {
+        // Note: In C++, we just setText(source.getText()).
+        // In Java, we actually operate on a character iterator.
+        // TODO: do we need to remember the iterator in a field?
+        // TODO: apparently we don't clone a CharacterIterator in Java,
+        // we only clone the text for a UCharacterIterator?? see the old code in the constructors
+        UCharacterIterator src = new CharacterIteratorWrapper(source);
+        src.setToStart();
+        string_ = src.getText(); // TODO: do we need to remember the source string in a field?
+        CollationIterator newIter;
+        boolean numeric = rbc_.settings.readOnly().isNumeric();
+        if (rbc_.settings.readOnly().dontCheckFCD()) {
+            newIter = new IterCollationIterator(rbc_.data, numeric, src);
+        } else {
+            newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0);
+        }
+        iter_ = newIter;
+        otherHalf_ = 0;
+        dir_ = 0;
+    }
+
+    // Java porting note: This method is @stable ICU 2.0 in ICU4C, but not available
+    // in ICU4J. For now, keep it package local.
+    /**
+    * Gets the comparison order in the desired strength. Ignore the other
+    * differences.
+    * @param order The order value
+    */
+    int strengthOrder(int order) {
+        int s = rbc_.settings.readOnly().getStrength();
+        // Mask off the unwanted differences.
+        if (s == Collator.PRIMARY) {
+            order &= 0xffff0000;
+        }
+        else if (s == Collator.SECONDARY) {
+            order &= 0xffffff00;
+        }
+
+        return order;
+    }
+
+
+    private static final class MaxExpSink implements ContractionsAndExpansions.CESink {
+        MaxExpSink(Map<Integer, Integer> h) {
+            maxExpansions = h;
+        }
+
+        // Java 6: @Override
+        public void handleCE(long ce) {
+        }
+
+        // Java 6: @Override
+        public void handleExpansion(long ces[], int start, int length) {
+            if (length <= 1) {
+                // We do not need to add single CEs into the map.
+                return;
+            }
+            int count = 0; // number of CE "halves"
+            for (int i = 0; i < length; ++i) {
+                count += ceNeedsTwoParts(ces[start + i]) ? 2 : 1;
+            }
+            // last "half" of the last CE
+            long ce = ces[start + length - 1];
+            long p = ce >>> 32;
+            int lower32 = (int) ce;
+            int lastHalf = getSecondHalf(p, lower32);
+            if (lastHalf == 0) {
+                lastHalf = getFirstHalf(p, lower32);
+                assert (lastHalf != 0);
+            } else {
+                lastHalf |= 0xc0; // old-style continuation CE
+            }
+            Integer oldCount = maxExpansions.get(lastHalf);
+            if (oldCount == null || count > oldCount) {
+                maxExpansions.put(lastHalf, count);
+            }
+        }
+
+        private Map<Integer, Integer> maxExpansions;
+    }
+
+    static final Map<Integer, Integer> computeMaxExpansions(CollationData data) {
+        Map<Integer, Integer> maxExpansions = new HashMap<Integer, Integer>();
+        MaxExpSink sink = new MaxExpSink(maxExpansions);
+        new ContractionsAndExpansions(null, null, sink, true).forData(data);
+        return maxExpansions;
+    }
+
+    /**
+     * <p> Returns the maximum length of any expansion sequence that ends with
+     * the specified collation element. If there is no expansion with this
+     * collation element as the last element, returns 1.
+     * </p>
+     * @param ce a collation element returned by previous() or next().
+     * @return the maximum length of any expansion sequence ending
+     *         with the specified collation element.
+     * @stable ICU 2.8
+     */
+    public int getMaxExpansion(int ce) {
+        return getMaxExpansion(rbc_.tailoring.maxExpansions, ce);
+    }
  
-        m_direction = 0;   // reset to initial state
+    static int getMaxExpansion(Map<Integer, Integer> maxExpansions, int order) {
+        if (order == 0) {
+            return 1;
+        }
+        Integer max;
+        if (maxExpansions != null && (max = maxExpansions.get(order)) != null) {
+            return max;
+        }
+        if ((order & 0xc0) == 0xc0) {
+            // old-style continuation CE
+            return 2;
+        } else {
+            return 1;
+        }
      }
  
-    // public miscellaneous methods -----------------------------------------
+    /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */
+    private byte normalizeDir() {
+        return dir_ == 1 ? 0 : dir_;
+    }
  
      /**
       * Tests that argument object is equals to this CollationElementIterator.
@@ -591,2291 +696,37 @@ public final class CollationElementIterator
       *             CollationElementIterator
       * @stable ICU 2.8
       */
-    public boolean equals(Object that)
-    {
+    public boolean equals(Object that) {
          if (that == this) {
              return true;
          }
          if (that instanceof CollationElementIterator) {
-            CollationElementIterator thatceiter
-                                              = (CollationElementIterator)that;
-            if (!m_collator_.equals(thatceiter.m_collator_)) {
-                return false;
-            }
-            // checks the text 
-            return m_source_.getIndex() == thatceiter.m_source_.getIndex()
-                   && m_source_.getText().equals(
-                                            thatceiter.m_source_.getText());
+            CollationElementIterator thatceiter = (CollationElementIterator) that;
+            return rbc_.equals(thatceiter.rbc_)
+                    && otherHalf_ == thatceiter.otherHalf_
+                    && normalizeDir() == thatceiter.normalizeDir()
+                    && string_.equals(thatceiter.string_)
+                    && iter_.equals(thatceiter.iter_);
          }
          return false;
      }
-    
+
      /**
       * Mock implementation of hashCode(). This implementation always returns a constant
       * value. When Java assertion is enabled, this method triggers an assertion failure.
       * @internal
       * @deprecated This API is ICU internal only.
       */
-    @Deprecated
      public int hashCode() {
          assert false : "hashCode not designed";
          return 42;
      }
  
-    // package private constructors ------------------------------------------
-
-    private CollationElementIterator(RuleBasedCollator collator) {
-        m_utilStringBuffer_ = new StringBuilder();
-        m_collator_ = collator;
-        m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
-        m_buffer_ = new StringBuilder();
-        m_utilSpecialBackUp_ = new Backup();
-    }
-
      /**
-     * <p>CollationElementIterator constructor. This takes a source
-     * string and a RuleBasedCollator. The iterator will walk through
-     * the source string based on the rules defined by the
-     * collator. If the source string is empty, NULLORDER will be
-     * returned on the first call to next().</p>
-     *
-     * @param source the source string.
-     * @param collator the RuleBasedCollator
-     * @stable ICU 2.8
+     * @internal
+     * @deprecated This API is ICU internal only.
       */
-    CollationElementIterator(String source, RuleBasedCollator collator)
-    {
-        this(collator);
-        m_source_ = m_srcUtilIter_ = new StringUCharacterIterator(source);
-        updateInternalState();
-    }
-
-    /**
-     * <p>CollationElementIterator constructor. This takes a source
-     * character iterator and a RuleBasedCollator. The iterator will
-     * walk through the source string based on the rules defined by
-     * the collator. If the source string is empty, NULLORDER will be
-     * returned on the first call to next().</p>
-     *
-     * @param source the source string iterator.
-     * @param collator the RuleBasedCollator
-     * @stable ICU 2.8
-     */
-    CollationElementIterator(CharacterIterator source,
-                             RuleBasedCollator collator)
-    {
-        this(collator);
-        m_srcUtilIter_ = new StringUCharacterIterator();
-        m_source_ = new CharacterIteratorWrapper(source);
-        updateInternalState();
-    }
-    
-    /**
-     * <p>CollationElementIterator constructor. This takes a source
-     * character iterator and a RuleBasedCollator. The iterator will
-     * walk through the source string based on the rules defined by
-     * the collator. If the source string is empty, NULLORDER will be
-     * returned on the first call to next().</p>
-     *
-     * @param source the source string iterator.
-     * @param collator the RuleBasedCollator
-     * @stable ICU 2.8
-     */
-    CollationElementIterator(UCharacterIterator source,
-                             RuleBasedCollator collator)
-    {
-        this(collator);
-        m_srcUtilIter_ = new StringUCharacterIterator();
-        m_srcUtilIter_.setText(source.getText());
-        m_source_ = m_srcUtilIter_;
-        updateInternalState();
-    }
-
-    // package private data members -----------------------------------------
-
-    /**
-     * true if current codepoint was Hiragana
-     */
-    boolean m_isCodePointHiragana_;
-    /**
-     * Position in the original string that starts with a non-FCD sequence
-     */
-    int m_FCDStart_;
-    /**
-     * This is the CE from CEs buffer that should be returned.
-     * Initial value is 0.
-     * Forwards iteration will end with m_CEBufferOffset_ == m_CEBufferSize_,
-     * backwards will end with m_CEBufferOffset_ == 0.
-     * The next/previous after we reach the end/beginning of the m_CEBuffer_
-     * will cause this value to be reset to 0.
-     */
-    int m_CEBufferOffset_;
-
-    /**
-     * This is the position to which we have stored processed CEs.
-     * Initial value is 0.
-     * The next/previous after we reach the end/beginning of the m_CEBuffer_
-     * will cause this value to be reset to 0.
-     */
-    int m_CEBufferSize_;
-    static final int CE_NOT_FOUND_ = 0xF0000000;
-    static final int CE_EXPANSION_TAG_ = 1;
-    static final int CE_CONTRACTION_TAG_ = 2;
-    /** 
-     * Collate Digits As Numbers (CODAN) implementation
-     */
-    static final int CE_DIGIT_TAG_ = 13;
-
-    // package private methods ----------------------------------------------
-
-    /**
-     * Sets the collator used.
-     * Internal use, all data members will be reset to the default values
-     * @param collator to set
-     */
-    void setCollator(RuleBasedCollator collator)
-    {
-        m_collator_ = collator;
-        updateInternalState();
-    }
-
-    /**
-     * <p>Sets the iterator to point to the collation element corresponding to
-     * the specified character (the parameter is a CHARACTER offset in the
-     * original string, not an offset into its corresponding sequence of
-     * collation elements). The value returned by the next call to next()
-     * will be the collation element corresponding to the specified position
-     * in the text. Unlike the public method setOffset(int), this method does
-     * not try to readjust the offset to the start of a contracting sequence.
-     * getOffset() is guaranteed to return the same value as was passed to a
-     * preceding call to setOffset().</p>
-     * @param offset new character offset into the original text to set.
-     */
-    void setExactOffset(int offset)
-    {
-        m_source_.setIndex(offset);
-        updateInternalState();
-
-        m_direction = 0;    // reset to initial state
-    }
-
-    /**
-     * Checks if iterator is in the buffer zone
-     * @return true if iterator is in buffer zone, false otherwise
-     */
-    boolean isInBuffer()
-    {
-        return m_bufferOffset_ > 0;
-    }
-
-   
-    /**
-     * <p>Sets the iterator to point to the collation element corresponding to
-     * the specified character (the parameter is a CHARACTER offset in the
-     * original string, not an offset into its corresponding sequence of
-     * collation elements). The value returned by the next call to next()
-     * will be the collation element corresponding to the specified position
-     * in the text. Unlike the public method setOffset(int), this method does
-     * not try to readjust the offset to the start of a contracting sequence.
-     * getOffset() is guaranteed to return the same value as was passed to a
-     * preceding call to setOffset().</p>
-     * </p>
-     * @param source the new source string iterator for iteration.
-     * @param offset to the source
-     */
-    void setText(UCharacterIterator source, int offset)
-    {
-        m_srcUtilIter_.setText(source.getText());
-        m_source_ = m_srcUtilIter_;
-        m_source_.setIndex(offset);
-        updateInternalState();
-
-        m_direction = 0;   // reset to initial state
-    }
-
-    // private inner class --------------------------------------------------
-
-    /**
-     * Backup data class
-     */
-    private static final class Backup
-    {
-        // protected data members -------------------------------------------
-
-        /**
-         * Backup non FCD sequence limit
-         */
-        protected int m_FCDLimit_;
-        /**
-         * Backup non FCD sequence start
-         */
-        protected int m_FCDStart_;
-        /**
-         * Backup if previous Codepoint is Hiragana quatenary
-         */
-        protected boolean m_isCodePointHiragana_;
-        /**
-         * Backup buffer position
-         */
-        protected int m_bufferOffset_;
-        /**
-         * Backup source iterator offset
-         */
-        protected int m_offset_;
-        /**
-         * Backup buffer contents
-         */
-        protected StringBuffer m_buffer_;
-
-        // protected constructor --------------------------------------------
-
-        /**
-         * Empty constructor
-         */
-        protected Backup()
-        {
-            m_buffer_ = new StringBuffer();
-        }
-    }
-    // end inner class ------------------------------------------------------
-
-    /**
-     * Direction of travel
-     */
-    private boolean m_isForwards_;
-    /**
-     * Source string iterator
-     */
-    private UCharacterIterator m_source_;
-    /**
-     * This is position to the m_buffer_, -1 if iterator is not in m_buffer_
-     */
-    private int m_bufferOffset_;
-    /**
-     * Buffer for temporary storage of normalized characters, discontiguous
-     * characters and Thai characters
-     */
-    private StringBuilder m_buffer_;
-    /**
-     * Position in the original string to continue forward FCD check from.
-     */
-    private int m_FCDLimit_;
-    /**
-     * The collator this iterator is based on
-     */
-    private RuleBasedCollator m_collator_;
-    /**
-     * true if Hiragana quatenary is on
-     */
-    //private boolean m_isHiragana4_;
-    /**
-     * CE buffer
-     */
-    private int m_CEBuffer_[];
-    /**
-     * In reality we should not have to deal with expansion sequences longer
-     * then 16. However this value can be change if a bigger buffer is needed.
-     * Note, if the size is change to too small a number, BIG trouble.
-     * Reasonable small value is around 10, if there's no Arabic or other
-     * funky collations that have long expansion sequence. This is the longest
-     * expansion sequence this can handle without bombing out.
-     */
-    private static final int CE_BUFFER_INIT_SIZE_ = 512;
-    /**
-     * Backup storage for special processing inner cases
-     */
-    private Backup m_utilSpecialBackUp_;
-    /**
-     * Backup storage in special processing entry state
-     */
-    private Backup m_utilSpecialEntryBackUp_;
-    /**
-     * Backup storage in special processing discontiguous state
-     */
-    private Backup m_utilSpecialDiscontiguousBackUp_;
-    /**
-     * Utility
-     */
-    private StringUCharacterIterator m_srcUtilIter_;
-    private StringBuilder m_utilStringBuffer_;
-    private StringBuilder m_utilSkippedBuffer_;
-    private CollationElementIterator m_utilColEIter_;
-    private static final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
-    private StringBuilder m_unnormalized_;
-    private Normalizer2Impl.ReorderingBuffer m_n2Buffer_;
-    /**
-     * The first non-zero combining class character
-     */
-    private static final int FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0xC0;
-    /**
-     * One character before the first character with leading non-zero combining
-     * class
-     */
-    private static final int LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0x300;
-    /**
-     * Mask for the last byte
-     */
-    private static final int LAST_BYTE_MASK_ = 0xFF;
-    /**
-     * Shift value for the second last byte
-     */
-    private static final int SECOND_LAST_BYTE_SHIFT_ = 8;
-
-    // special ce values and tags -------------------------------------------
-    
-//    private static final int CE_EXPANSION_ = 0xF1000000;
-    private static final int CE_CONTRACTION_ = 0xF2000000;
-    /**
-     * Indicates the last ce has been consumed. Compare with NULLORDER.
-     * NULLORDER is returned if error occurs.
-     */
-/*    private static final int CE_NO_MORE_CES_ = 0x00010101;
-    private static final int CE_NO_MORE_CES_PRIMARY_ = 0x00010000;
-    private static final int CE_NO_MORE_CES_SECONDARY_ = 0x00000100;
-    private static final int CE_NO_MORE_CES_TERTIARY_ = 0x00000001;
-*/
-    private static final int CE_NOT_FOUND_TAG_ = 0;
-    /**
-     * Charset processing, not yet implemented
-     */
-    private static final int CE_CHARSET_TAG_ = 4;
-    /**
-     * AC00-D7AF
-     */
-    private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
-    /**
-     * D800-DBFF
-     */
-    private static final int CE_LEAD_SURROGATE_TAG_ = 7;
-    /**
-     * DC00-DFFF
-     */
-    private static final int CE_TRAIL_SURROGATE_TAG_ = 8;
-    /**
-     * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
-     */
-    private static final int CE_CJK_IMPLICIT_TAG_ = 9;
-    private static final int CE_IMPLICIT_TAG_ = 10;
-    static final int CE_SPEC_PROC_TAG_ = 11;
-    /**
-     * This is a 3 byte primary with starting secondaries and tertiaries.
-     * It fits in a single 32 bit CE and is used instead of expansion to save
-     * space without affecting the performance (hopefully).
-     */
-    private static final int CE_LONG_PRIMARY_TAG_ = 12;
-                        
-//    private static final int CE_CE_TAGS_COUNT = 14;
-    private static final int CE_BYTE_COMMON_ = 0x05;
-
-    // end special ce values and tags ---------------------------------------
-
-    private static final int HANGUL_SBASE_ = 0xAC00;
-    private static final int HANGUL_LBASE_ = 0x1100;
-    private static final int HANGUL_VBASE_ = 0x1161;
-    private static final int HANGUL_TBASE_ = 0x11A7;
-    private static final int HANGUL_VCOUNT_ = 21;
-    private static final int HANGUL_TCOUNT_ = 28;
-
-    // CJK stuff ------------------------------------------------------------
-
-/*    private static final int CJK_BASE_ = 0x4E00;
-    private static final int CJK_LIMIT_ = 0x9FFF+1;
-    private static final int CJK_COMPAT_USED_BASE_ = 0xFA0E;
-    private static final int CJK_COMPAT_USED_LIMIT_ = 0xFA2F + 1;
-    private static final int CJK_A_BASE_ = 0x3400;
-    private static final int CJK_A_LIMIT_ = 0x4DBF + 1;
-    private static final int CJK_B_BASE_ = 0x20000;
-    private static final int CJK_B_LIMIT_ = 0x2A6DF + 1;
-    private static final int NON_CJK_OFFSET_ = 0x110000;
-*/
-    private static final boolean DEBUG  =  ICUDebug.enabled("collator");
-
-    // Field tracking the current direction. This field was added
-    // just for making sure that reset()/setOffset()/setText() is called
-    // before switching the iterator direction.
-    // We used to allow changing direction without calling reset()/setOffset()
-    // setText() in ICU4J, but the API specification was updated to match the
-    // ICU4C's specification. The current implementation seems to handle
-    // direction change (or not), but it will be completely replaced with
-    // a new implementation not allowing this. Until then, we use this field
-    // to trigger assertion and make sure our implementation is not depending on
-    // the assumption. See ticket#9104.
-    private byte m_direction = 0;   // -1: backward, 0: initial state, 1: forward
-
-    // private methods ------------------------------------------------------
-
-    /**
-     * Reset the iterator internally
-     */
-    private void updateInternalState()
-    {
-        m_isCodePointHiragana_ = false;
-        m_buffer_.setLength(0);
-        m_bufferOffset_ = -1;
-        m_CEBufferOffset_ = 0;
-        m_CEBufferSize_ = 0;
-        m_FCDLimit_ = -1;
-        m_FCDStart_ = m_source_.getLength();
-        //m_isHiragana4_ = m_collator_.m_isHiragana4_;
-        m_isForwards_ = true;
-    }
-
-    /**
-     * Backup the current internal state
-     * @param backup object to store the data
-     */
-    private void backupInternalState(Backup backup)
-    {
-        backup.m_offset_ = m_source_.getIndex();
-        backup.m_FCDLimit_ = m_FCDLimit_;
-        backup.m_FCDStart_ = m_FCDStart_;
-        backup.m_isCodePointHiragana_ = m_isCodePointHiragana_;
-        backup.m_bufferOffset_ = m_bufferOffset_;
-        backup.m_buffer_.setLength(0);
-        if (m_bufferOffset_ >= 0) {
-            backup.m_buffer_.append(m_buffer_);
-        }
-    }
-
-    /**
-     * Update the iterator internally with backed-up state
-     * @param backup object that stored the data
-     */
-    private void updateInternalState(Backup backup)
-    {
-        m_source_.setIndex(backup.m_offset_);
-        m_isCodePointHiragana_ = backup.m_isCodePointHiragana_;
-        m_bufferOffset_ = backup.m_bufferOffset_;
-        m_FCDLimit_ = backup.m_FCDLimit_;
-        m_FCDStart_ = backup.m_FCDStart_;
-        m_buffer_.setLength(0);
-        if (m_bufferOffset_ >= 0) {
-            m_buffer_.append(backup.m_buffer_);
-        }
-    }
-
-    /**
-     * A fast combining class retrieval system.
-     * @param ch UTF16 character
-     * @return combining class of ch
-     */
-    private int getCombiningClass(int ch)
-    {
-        if (ch >= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ &&
-            m_collator_.isUnsafe((char)ch) || ch > 0xFFFF
-        ) {
-            return m_nfcImpl_.getCC(m_nfcImpl_.getNorm16(ch));
-        }
-        return 0;
-    }
-
-    /**
-     * <p>Incremental normalization, this is an essential optimization.
-     * Assuming FCD checks has been done, normalize the non-FCD characters into
-     * the buffer.
-     * Source offsets points to the current processing character.
-     * </p>
-     */
-    private void normalize()
-    {
-        if (m_unnormalized_ == null) {
-            m_unnormalized_ = new StringBuilder();
-            m_n2Buffer_ = new Normalizer2Impl.ReorderingBuffer(m_nfcImpl_, m_buffer_, 10);
-        } else {
-            m_unnormalized_.setLength(0);
-            m_n2Buffer_.remove();
-        }
-        int size = m_FCDLimit_ - m_FCDStart_;
-        m_source_.setIndex(m_FCDStart_);
-        for (int i = 0; i < size; i ++) {
-            m_unnormalized_.append((char)m_source_.next());
-        }
-        m_nfcImpl_.decomposeShort(m_unnormalized_, 0, size, m_n2Buffer_);
-    }
-
-    /**
-     * <p>Incremental FCD check and normalization. Gets the next base character
-     * position and determines if the in-between characters needs normalization.
-     * </p>
-     * <p>When entering, the state is known to be this:
-     * <ul>
-     * <li>We are working on source string, not the buffer.
-     * <li>The leading combining class from the current character is 0 or the
-     *     trailing combining class of the previous char was zero.
-     * </ul>
-     * Incoming source offsets points to the current processing character.
-     * Return source offsets points to the current processing character.
-     * </p>
-     * @param ch current character (lead unit)
-     * @param offset offset of ch +1
-     * @return true if FCDCheck passes, false otherwise
-     */
-    private boolean FCDCheck(int ch, int offset)
-    {
-        boolean result = true;
-
-        // Get the trailing combining class of the current character.
-        // If it's zero, we are OK.
-        m_FCDStart_ = offset - 1;
-        m_source_.setIndex(offset);
-        // trie access
-        int fcd;
-        if (ch < 0x180) {
-            fcd = m_nfcImpl_.getFCD16FromBelow180(ch);
-        } else if (m_nfcImpl_.singleLeadMightHaveNonZeroFCD16(ch)) {
-            if (Character.isHighSurrogate((char)ch)) {
-                int c2 = m_source_.next(); 
-                if (c2 < 0) {
-                    fcd = 0;  // end of input
-                } else if (Character.isLowSurrogate((char)c2)) {
-                    fcd = m_nfcImpl_.getFCD16FromNormData(Character.toCodePoint((char)ch, (char)c2));
-                } else {
-                    m_source_.moveIndex(-1);
-                    fcd = 0;
-                }
-            } else {
-                fcd = m_nfcImpl_.getFCD16FromNormData(ch);
-            }
-        } else {
-            fcd = 0;
-        }
-
-        int prevTrailCC = fcd & LAST_BYTE_MASK_;
-
-        if (prevTrailCC == 0) {
-            offset = m_source_.getIndex();
-        } else {
-            // The current char has a non-zero trailing CC. Scan forward until
-            // we find a char with a leading cc of zero.
-            while (true) {
-                ch = m_source_.nextCodePoint();
-                if (ch < 0) {
-                    offset = m_source_.getIndex();
-                    break;
-                }
-                // trie access
-                fcd = m_nfcImpl_.getFCD16(ch);
-                int leadCC = fcd >> SECOND_LAST_BYTE_SHIFT_;
-                if (leadCC == 0) {
-                    // this is a base character, we stop the FCD checks
-                    offset = m_source_.getIndex() - Character.charCount(ch);
-                    break;
-                }
-
-                if (leadCC < prevTrailCC) {
-                    result = false;
-                }
-
-                prevTrailCC = fcd & LAST_BYTE_MASK_;
-            }
-        }
-        m_FCDLimit_ = offset;
-        m_source_.setIndex(m_FCDStart_ + 1);
-        return result;
-    }
-
-    /**
-     * <p>Method tries to fetch the next character that is in fcd form.</p>
-     * <p>Normalization is done if required.</p>
-     * <p>Offsets are returned at the next character.</p>
-     * @return next fcd character
-     */
-    private int nextChar()
-    {
-        int result;
-
-        // loop handles the next character whether it is in the buffer or not.
-        if (m_bufferOffset_ < 0) {
-            // we're working on the source and not normalizing. fast path.
-            // note Thai pre-vowel reordering uses buffer too
-            result = m_source_.next();
-        }
-        else {
-            // we are in the buffer, buffer offset will never be 0 here
-            if (m_bufferOffset_ >= m_buffer_.length()) {
-                // Null marked end of buffer, revert to the source string and
-                // loop back to top to try again to get a character.
-                m_source_.setIndex(m_FCDLimit_);
-                m_bufferOffset_ = -1;
-                m_buffer_.setLength(0);
-                return nextChar();
-            }
-            return m_buffer_.charAt(m_bufferOffset_ ++);
-        }
-        int startoffset = m_source_.getIndex();
-        if (result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_
-            // Fast fcd safe path. trail combining class == 0.
-            || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
-            || m_bufferOffset_ >= 0 || m_FCDLimit_ >= startoffset) {
-            // skip the fcd checks
-            return result;
-        }
-
-        if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
-            // We need to peek at the next character in order to tell if we are
-            // FCD
-            int next = m_source_.current();
-            if (next == UCharacterIterator.DONE
-                || next < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
-                return result; // end of source string and if next character
-                // starts with a base character is always fcd.
-            }
-        }
-
-        // Need a more complete FCD check and possible normalization.
-        if (!FCDCheck(result, startoffset)) {
-            normalize();
-            result = m_buffer_.charAt(0);
-            m_bufferOffset_ = 1;
-        }
-        return result;
-    }
-
-    /**
-     * <p>Incremental normalization, this is an essential optimization.
-     * Assuming FCD checks has been done, normalize the non-FCD characters into
-     * the buffer.
-     * Source offsets points to the current processing character.</p>
-     */
-    private void normalizeBackwards()
-    {
-        normalize();
-        m_bufferOffset_ = m_buffer_.length();
-    }
-
-    /**
-     * <p>Incremental backwards FCD check and normalization. Gets the previous
-     * base character position and determines if the in-between characters
-     * needs normalization.
-     * </p>
-     * <p>When entering, the state is known to be this:
-     * <ul>
-     * <li>We are working on source string, not the buffer.
-     * <li>The trailing combining class from the current character is 0 or the
-     *     leading combining class of the next char was zero.
-     * </ul>
-     * Input source offsets points to the previous character.
-     * Return source offsets points to the current processing character.
-     * </p>
-     * @param ch current character
-     * @param offset current character offset
-     * @return true if FCDCheck passes, false otherwise
-     */
-    private boolean FCDCheckBackwards(int ch, int offset)
-    {
-        int fcd;
-        m_FCDLimit_ = offset + 1;
-        m_source_.setIndex(offset);
-        if (ch < 0x180) {
-            fcd = m_nfcImpl_.getFCD16FromBelow180(ch);
-        } else if (!Character.isLowSurrogate((char)ch)) {
-            if (m_nfcImpl_.singleLeadMightHaveNonZeroFCD16(ch)) {
-                fcd = m_nfcImpl_.getFCD16FromNormData(ch);
-            } else {
-                fcd = 0;
-            }
-        } else {
-            int c2 = m_source_.previous();
-            if (c2 < 0) {
-                fcd = 0;  // start of input
-            } else if (Character.isHighSurrogate((char)c2)) {
-                ch = Character.toCodePoint((char)c2, (char)ch);
-                fcd = m_nfcImpl_.getFCD16FromNormData(ch);
-                --offset;
-            } else {
-                m_source_.moveIndex(1);
-                fcd = 0;
-            }
-        }
-
-        // Scan backward until we find a char with a leading cc of zero.
-        boolean result = true;
-        if (fcd != 0) {
-            int leadCC;
-            for (;;) {
-                leadCC = fcd >> SECOND_LAST_BYTE_SHIFT_;
-                if (leadCC == 0 || (ch = m_source_.previousCodePoint()) < 0) {
-                    offset = m_source_.getIndex();
-                    break;
-                }
-                fcd = m_nfcImpl_.getFCD16(ch);
-                int prevTrailCC = fcd & LAST_BYTE_MASK_;
-                if (leadCC < prevTrailCC) {
-                    result = false;
-                } else if (fcd == 0) {
-                    offset = m_source_.getIndex() + Character.charCount(ch);
-                    break;
-                }
-            }
-        }
-
-        // storing character with 0 lead fcd or the 1st accent with a base
-        // character before it
-        m_FCDStart_ = offset;
-        m_source_.setIndex(m_FCDLimit_);
-        return result;
-    }
-
-    /**
-     * <p>Method tries to fetch the previous character that is in fcd form.</p>
-     * <p>Normalization is done if required.</p>
-     * <p>Offsets are returned at the current character.</p>
-     * @return previous fcd character
-     */
-    private int previousChar()
-    {
-        if (m_bufferOffset_ >= 0) {
-            m_bufferOffset_ --;
-            if (m_bufferOffset_ >= 0) {
-                return m_buffer_.charAt(m_bufferOffset_);
-            }
-            else {
-                // At the start of buffer, route back to string.
-                m_buffer_.setLength(0);
-                if (m_FCDStart_ == 0) {
-                    m_FCDStart_ = -1;
-                    m_source_.setIndex(0);
-                    return UCharacterIterator.DONE;
-                }
-                else {
-                    m_FCDLimit_ = m_FCDStart_;
-                    m_source_.setIndex(m_FCDStart_);
-                    return previousChar();
-                }
-            }
-        }
-        int result = m_source_.previous();
-        int startoffset = m_source_.getIndex();
-        if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_
-            || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
-            || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
-            return result;
-        }
-        int ch = m_source_.previous();
-        if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
-            // if previous character is FCD
-            m_source_.next();
-            return result;
-        }
-        // Need a more complete FCD check and possible normalization.
-        if (!FCDCheckBackwards(result, startoffset)) {
-            normalizeBackwards();
-            m_bufferOffset_ --;
-            result = m_buffer_.charAt(m_bufferOffset_);
-        }
-        else {
-            // fcd checks always reset m_source_ to the limit of the FCD
-            m_source_.setIndex(startoffset);
-        }
-        return result;
-    }
-
-    /**
-     * Determines if it is at the start of source iteration
-     * @return true if iterator at the start, false otherwise
-     */
-    private final boolean isBackwardsStart()
-    {
-        return (m_bufferOffset_ < 0 && m_source_.getIndex() == 0)
-            || (m_bufferOffset_ == 0 && m_FCDStart_ <= 0);
-    }
-
-    /**
-     * Checks if iterator is at the end of its source string.
-     * @return true if it is at the end, false otherwise
-     */
-    private final boolean isEnd()
-    {
-        if (m_bufferOffset_ >= 0) {
-            if (m_bufferOffset_ != m_buffer_.length()) {
-                return false;
-            }
-            else {
-                // at end of buffer. check if fcd is at the end
-                return m_FCDLimit_ == m_source_.getLength();
-            }
-        }
-        return m_source_.getLength() == m_source_.getIndex();
-    }
-
-    /**
-     * <p>Special CE management for surrogates</p>
-     * <p>Lead surrogate is encountered. CE to be retrieved by using the
-     * following code unit. If the next code unit is a trail surrogate, both
-     * units will be combined to retrieve the CE,
-     * otherwise we treat it like an unassigned code point.</p>
-     * @param collator collator to use
-     * @param ce current CE
-     * @param trail character
-     * @return next CE for the surrogate characters
-     */
-    private final int nextSurrogate(RuleBasedCollator collator, int ce,
-                                    char trail)
-    {
-        if (!UTF16.isTrailSurrogate(trail)) {
-            updateInternalState(m_utilSpecialBackUp_);
-            return CE_NOT_FOUND_;
-        }
-        // TODO: CE contain the data from the previous CE + the mask.
-        // It should at least be unmasked
-        int result = collator.m_trie_.getTrailValue(ce, trail);
-        if (result == CE_NOT_FOUND_) {
-            updateInternalState(m_utilSpecialBackUp_);
-        }
-        return result;
-    }
-
-    /**
-     * Gets the CE expansion offset
-     * @param collator current collator
-     * @param ce ce to test
-     * @return expansion offset
-     */
-    private int getExpansionOffset(RuleBasedCollator collator, int ce)
-    {
-        return ((ce & 0xFFFFF0) >> 4) - collator.m_expansionOffset_;
-    }
-
-
-    /**
-     * Gets the contraction ce offset
-     * @param collator current collator
-     * @param ce current ce
-     * @return contraction offset
-     */
-    private int getContractionOffset(RuleBasedCollator collator, int ce)
-    {
-        return (ce & 0xFFFFFF) - collator.m_contractionOffset_;
-    }
-
-    /**
-     * Checks if CE is a special tag CE
-     * @param ce to check
-     * @return true if CE is a special tag CE, false otherwise
-     */
-    private boolean isSpecialPrefixTag(int ce)
-    {
-        return RuleBasedCollator.isSpecial(ce) &&
-            RuleBasedCollator.getTag(ce) == CE_SPEC_PROC_TAG_;
-    }
-
-    /**
-     * <p>Special processing getting a CE that is preceded by a certain
-     * prefix.</p>
-     * <p>Used for optimizing Japanese length and iteration marks. When a
-     * special processing tag is encountered, iterate backwards to see if
-     * there's a match.</p>
-     * <p>Contraction tables are used, prefix data is stored backwards in the
-     * table.</p>
-     * @param collator collator to use
-     * @param ce current ce
-     * @param entrybackup entry backup iterator status
-     * @return next collation element
-     */
-    private int nextSpecialPrefix(RuleBasedCollator collator, int ce,
-                                  Backup entrybackup)
-    {
-        backupInternalState(m_utilSpecialBackUp_);
-        updateInternalState(entrybackup);
-        previousChar();
-        // We want to look at the character where we entered
-
-        while (true) {
-            // This loop will run once per source string character, for as
-            // long as we are matching a potential contraction sequence
-            // First we position ourselves at the begining of contraction
-            // sequence
-            int entryoffset = getContractionOffset(collator, ce);
-            int offset = entryoffset;
-            if (isBackwardsStart()) {
-                ce = collator.m_contractionCE_[offset];
-                break;
-            }
-            char previous = (char)previousChar();
-            while (previous > collator.m_contractionIndex_[offset]) {
-                // contraction characters are ordered, skip smaller characters
-                offset ++;
-            }
-
-            if (previous == collator.m_contractionIndex_[offset]) {
-                // Found the source string char in the table.
-                // Pick up the corresponding CE from the table.
-                ce = collator.m_contractionCE_[offset];
-            }
-            else {
-                // Source string char was not in the table, prefix not found
-                ce = collator.m_contractionCE_[entryoffset];
-            }
-
-            if (!isSpecialPrefixTag(ce)) {
-                // The source string char was in the contraction table, and
-                // the corresponding CE is not a prefix CE. We found the
-                // prefix, break out of loop, this CE will end up being
-                // returned. This is the normal way out of prefix handling
-                // when the source actually contained the prefix.
-                break;
-            }
-        }
-        if (ce != CE_NOT_FOUND_) {
-            // we found something and we can merilly continue
-            updateInternalState(m_utilSpecialBackUp_);
-        }
-        else { // prefix search was a failure, we have to backup all the way to
-            // the start
-            updateInternalState(entrybackup);
-        }
-        return ce;
-    }
-
-    /**
-     * Checks if the ce is a contraction tag
-     * @param ce ce to check
-     * @return true if ce is a contraction tag, false otherwise
-     */
-    private boolean isContractionTag(int ce)
-    {
-        return RuleBasedCollator.isSpecial(ce) &&
-            RuleBasedCollator.getTag(ce) == CE_CONTRACTION_TAG_;
-    }
-
-    /**
-     * Method to copy skipped characters into the buffer and sets the fcd
-     * position. To ensure that the skipped characters are considered later,
-     * we need to place it in the appropriate position in the buffer and
-     * reassign the source index. simple case if index reside in string,
-     * simply copy to buffer and fcdposition = pos, pos = start of buffer.
-     * if pos in normalization buffer, we'll insert the copy infront of pos
-     * and point pos to the start of the buffer. why am i doing these copies?
-     * well, so that the whole chunk of codes in the getNextCE,
-     * ucol_prv_getSpecialCE does not require any changes, which will be
-     * really painful.
-     * @param skipped character buffer
-     */
-    private void setDiscontiguous(StringBuilder skipped)
-    {
-        if (m_bufferOffset_ >= 0) {
-            m_buffer_.replace(0, m_bufferOffset_, skipped.toString());
-        }
-        else {
-            m_FCDLimit_ = m_source_.getIndex();
-            m_buffer_.setLength(0);
-            m_buffer_.append(skipped.toString());
-        }
-
-        m_bufferOffset_ = 0;
-    }
-
-    /**
-     * Returns the current character for forward iteration
-     * @return current character
-     */
-    private int currentChar()
-    {
-        if (m_bufferOffset_ < 0) {
-            m_source_.previousCodePoint();
-            return m_source_.nextCodePoint();
-        }
-
-        // m_bufferOffset_ is never 0 in normal circumstances except after a
-        // discontiguous contraction since it is always returned and moved
-        // by 1 when we do nextChar()
-        return UTF16.charAt(m_buffer_, m_bufferOffset_ - 1);
-    }
-
-    /**
-     * Method to get the discontiguous collation element within the source.
-     * Note this function will set the position to the appropriate places.
-     * Passed in character offset points to the second combining character
-     * after the start character.
-     * @param collator current collator used
-     * @param entryoffset index to the start character in the contraction table
-     * @return discontiguous collation element offset
-     */
-    private int nextDiscontiguous(RuleBasedCollator collator, int entryoffset)
-    {
-        int offset = entryoffset;
-        boolean multicontraction = false;
-        // since it will be stuffed into this iterator and ran over again
-        if (m_utilSkippedBuffer_ == null) {
-            m_utilSkippedBuffer_ = new StringBuilder();
-        }
-        else {
-            m_utilSkippedBuffer_.setLength(0);
-        }
-        int ch = currentChar();
-        m_utilSkippedBuffer_.appendCodePoint(ch);
-        int prevCC = 0;
-        int cc = getCombiningClass(ch);
-        // accent after the first character
-        if (m_utilSpecialDiscontiguousBackUp_ == null) {
-            m_utilSpecialDiscontiguousBackUp_ = new Backup();
-        }
-        backupInternalState(m_utilSpecialDiscontiguousBackUp_);
-        boolean prevWasLead = false;
-        while (true) {
-            // We read code units for contraction table matching
-            // but have to get combining classes for code points
-            // to figure out where to stop with discontiguous contraction.
-            int ch_int = nextChar();
-            char nextch = (char)ch_int;
-            if (UTF16.isSurrogate(nextch)) {
-                if (prevWasLead) {
-                    // trail surrogate of surrogate pair, keep previous and current cc
-                    prevWasLead = false;
-                } else {
-                    prevCC = cc;
-                    cc = 0;  // default cc for an unpaired surrogate
-                    prevWasLead = false;
-                    if (Character.isHighSurrogate(nextch)) {
-                        int trail = nextChar();
-                        if (Character.isLowSurrogate((char)trail)) {
-                            cc = getCombiningClass(Character.toCodePoint(nextch, (char)trail));
-                            prevWasLead = true;
-                        }
-                        if (trail >= 0) {
-                            previousChar();  // restore index after having peeked at the next code unit
-                        }
-                    }
-                }
-            } else {
-                prevCC = cc;
-                cc = getCombiningClass(ch_int);
-                prevWasLead = false;
-            }
-            if (ch_int < 0 || cc == 0) {
-                // if there are no more accents to move around
-                // we don't have to shift previousChar, since we are resetting
-                // the offset later
-                if (multicontraction) {
-                    if (ch_int >= 0) {
-                        previousChar(); // backtrack
-                    }
-                    setDiscontiguous(m_utilSkippedBuffer_);
-                    return collator.m_contractionCE_[offset];
-                }
-                break;
-            }
-
-            offset ++; // skip the combining class offset
-            while ((offset < collator.m_contractionIndex_.length) &&
-                   (nextch > collator.m_contractionIndex_[offset])) {
-                offset ++;
-            }
-
-            int ce = CE_NOT_FOUND_;
-            if ( offset >= collator.m_contractionIndex_.length)  {
-                break;
-            }
-            if (nextch != collator.m_contractionIndex_[offset] || cc == prevCC) {
-                    // unmatched or blocked character
-                if ( (m_utilSkippedBuffer_.length()!= 1) ||
-                     ((m_utilSkippedBuffer_.charAt(0)!= nextch) &&
-                      (m_bufferOffset_<0) )) { // avoid push to skipped buffer twice
-                    m_utilSkippedBuffer_.append(nextch);
-                }
-                offset = entryoffset;  // Restore the offset before checking next character.
-                continue;
-            }
-            else {
-                ce = collator.m_contractionCE_[offset];
-            }
-
-            if (ce == CE_NOT_FOUND_) {
-                break;
-            }
-            else if (isContractionTag(ce)) {
-                // this is a multi-contraction
-                offset = getContractionOffset(collator, ce);
-                if (collator.m_contractionCE_[offset] != CE_NOT_FOUND_) {
-                    multicontraction = true;
-                    backupInternalState(m_utilSpecialDiscontiguousBackUp_);
-                }
-            }
-            else {
-                setDiscontiguous(m_utilSkippedBuffer_);
-                return ce;
-            }
-        }
-
-        updateInternalState(m_utilSpecialDiscontiguousBackUp_);
-        // backup is one forward of the base character, we need to move back
-        // one more
-        previousChar();
-        return collator.m_contractionCE_[entryoffset];
-    }
-
-    /**
-     * Gets the next contraction ce
-     * @param collator collator to use
-     * @param ce current ce
-     * @return ce of the next contraction
-     */
-    private int nextContraction(RuleBasedCollator collator, int ce)
-    {
-        backupInternalState(m_utilSpecialBackUp_);
-        int entryce = collator.m_contractionCE_[getContractionOffset(collator, ce)]; //CE_NOT_FOUND_;
-        while (true) {
-            int entryoffset = getContractionOffset(collator, ce);
-            int offset = entryoffset;
-
-            if (isEnd()) {
-                ce = collator.m_contractionCE_[offset];
-                if (ce == CE_NOT_FOUND_) {
-                    // back up the source over all the chars we scanned going
-                    // into this contraction.
-                    ce = entryce;
-                    updateInternalState(m_utilSpecialBackUp_);
-                }
-                break;
-            }
-
-            // get the discontiguos maximum combining class
-            int maxCC = (collator.m_contractionIndex_[offset] & 0xFF);
-            // checks if all characters have the same combining class
-            byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8);
-            char ch = (char)nextChar();
-            offset ++;
-            while (ch > collator.m_contractionIndex_[offset]) {
-                // contraction characters are ordered, skip all smaller
-                offset ++;
-            }
-
-            if (ch == collator.m_contractionIndex_[offset]) {
-                // Found the source string char in the contraction table.
-                //  Pick up the corresponding CE from the table.
-                ce = collator.m_contractionCE_[offset];
-            }
-            else {
-                // Source string char was not in contraction table.
-                // Unless it is a discontiguous contraction, we are done
-                int miss = ch;
-                // ticket 8484 - porting changes from C for 6101
-                // We test whether the next two char are surrogate pairs.
-                // This test is done if the iterator is not in the end.
-                // If there is no surrogate pair, the iterator
-                // goes back one if needed. 
-                if(UTF16.isLeadSurrogate(ch) && !isEnd()) {
-                    char surrNextChar = (char)nextChar();
-                    if (UTF16.isTrailSurrogate(surrNextChar)) {
-                        miss = UCharacterProperty.getRawSupplementary(ch, surrNextChar);
-                    } else {
-                        previousChar();
-                    }
-                }
-                int sCC;
-                if (maxCC == 0 || (sCC = getCombiningClass(miss)) == 0
-                    || sCC > maxCC || (allSame != 0 && sCC == maxCC) ||
-                    isEnd()) {
-                    // Contraction can not be discontiguous, back up by one
-                    previousChar();
-                    if(miss > 0xFFFF) {
-                        previousChar();
-                    }
-                    ce = collator.m_contractionCE_[entryoffset];
-                }
-                else {
-                    // Contraction is possibly discontiguous.
-                    // find the next character if ch is not a base character
-                    int ch_int = nextChar();
-                    if (ch_int != UCharacterIterator.DONE) {
-                        previousChar();
-                    }
-                    char nextch = (char)ch_int;
-                    if (getCombiningClass(nextch) == 0) {
-                        previousChar();
-                        if(miss > 0xFFFF) {
-                            previousChar();
-                        }    
-                        // base character not part of discontiguous contraction
-                        ce = collator.m_contractionCE_[entryoffset];
-                    }
-                    else {
-                        ce = nextDiscontiguous(collator, entryoffset);
-                    }
-                }
-            }
-
-            if (ce == CE_NOT_FOUND_) {
-                // source did not match the contraction, revert back original
-                updateInternalState(m_utilSpecialBackUp_);
-                ce = entryce;
-                break;
-            }
-
-            // source was a contraction
-            if (!isContractionTag(ce)) {
-                break;
-            }
-
-            // ccontinue looping to check for the remaining contraction.
-            if (collator.m_contractionCE_[entryoffset] != CE_NOT_FOUND_) {
-                // there are further contractions to be performed, so we store
-                // the so-far completed ce, so that if we fail in the next
-                // round we just return this one.
-                entryce = collator.m_contractionCE_[entryoffset];
-                backupInternalState(m_utilSpecialBackUp_);
-                if (m_utilSpecialBackUp_.m_bufferOffset_ >= 0) {
-                    m_utilSpecialBackUp_.m_bufferOffset_ --;
-                }
-                else {
-                    m_utilSpecialBackUp_.m_offset_ --;
-                }
-            }
-        }
-        return ce;
-    }
-
-    /**
-     * Gets the next ce for long primaries, stuffs the rest of the collation
-     * elements into the ce buffer
-     * @param ce current ce
-     * @return next ce
-     */
-    private int nextLongPrimary(int ce)
-    {
-        m_CEBuffer_[1] = ((ce & 0xFF) << 24)
-            | RuleBasedCollator.CE_CONTINUATION_MARKER_;
-        m_CEBufferOffset_ = 1;
-        m_CEBufferSize_ = 2;
-        m_CEBuffer_[0] = ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) |
-            CE_BYTE_COMMON_;
-        return m_CEBuffer_[0];
-    }
-
-    /**
-     * Gets the number of expansion
-     * @param ce current ce
-     * @return number of expansion
-     */
-    private int getExpansionCount(int ce)
-    {
-        return ce & 0xF;
-    }
-
-    /**
-     * Gets the next expansion ce and stuffs the rest of the collation elements
-     * into the ce buffer
-     * @param collator current collator
-     * @param ce current ce
-     * @return next expansion ce
-     */
-    private int nextExpansion(RuleBasedCollator collator, int ce)
-    {
-        // NOTE: we can encounter both continuations and expansions in an
-        // expansion!
-        // I have to decide where continuations are going to be dealt with
-        int offset = getExpansionOffset(collator, ce);
-        m_CEBufferSize_ = getExpansionCount(ce);
-        m_CEBufferOffset_ = 1;
-        m_CEBuffer_[0] = collator.m_expansion_[offset];
-        if (m_CEBufferSize_ != 0) {
-            // if there are less than 16 elements in expansion
-            for (int i = 1; i < m_CEBufferSize_; i ++) {
-                m_CEBuffer_[i] = collator.m_expansion_[offset + i];
-            }
-        }
-        else {
-            // ce are terminated
-            m_CEBufferSize_ = 1;
-            while (collator.m_expansion_[offset] != 0) {
-                m_CEBuffer_[m_CEBufferSize_ ++] =
-                    collator.m_expansion_[++ offset];
-            }
-        }
-        // in case of one element expansion, we 
-        // want to immediately return CEpos
-        if (m_CEBufferSize_ == 1) {
-            m_CEBufferSize_ = 0;
-            m_CEBufferOffset_ = 0;
-        }
-        return m_CEBuffer_[0];
-    }
-    
-    /**
-     * Gets the next digit ce
-     * @param collator current collator
-     * @param ce current collation element
-     * @param cp current codepoint
-     * @return next digit ce
-     */
-    private int nextDigit(RuleBasedCollator collator, int ce, int cp)
-    {
-        // We do a check to see if we want to collate digits as numbers; 
-        // if so we generate a custom collation key. Otherwise we pull out 
-        // the value stored in the expansion table.
-
-        if (m_collator_.m_isNumericCollation_){
-            int collateVal = 0;
-            int trailingZeroIndex = 0;
-            boolean nonZeroValReached = false;
-
-            // I just need a temporary place to store my generated CEs.
-            // icu4c uses a unsigned byte array, i'll use a stringbuffer here
-            // to avoid dealing with the sign problems and array allocation
-            // clear and set initial string buffer length
-            m_utilStringBuffer_.setLength(3);
-        
-            // We parse the source string until we hit a char that's NOT a 
-            // digit.
-            // Use this u_charDigitValue. This might be slow because we have 
-            // to handle surrogates...
-            int digVal = UCharacter.digit(cp); 
-            // if we have arrived here, we have already processed possible 
-            // supplementaries that trigered the digit tag -
-            // all supplementaries are marked in the UCA.
-            // We  pad a zero in front of the first element anyways. 
-            // This takes care of the (probably) most common case where 
-            // people are sorting things followed by a single digit
-            int digIndx = 1;
-            for (;;) {
-                // Make sure we have enough space.
-                if (digIndx >= ((m_utilStringBuffer_.length() - 2) << 1)) {
-                    m_utilStringBuffer_.setLength(m_utilStringBuffer_.length() 
-                                                  << 1);
-                }
-                // Skipping over leading zeroes.        
-                if (digVal != 0 || nonZeroValReached) {
-                    if (digVal != 0 && !nonZeroValReached) {
-                        nonZeroValReached = true;
-                    }    
-                    // We parse the digit string into base 100 numbers 
-                    // (this fits into a byte).
-                    // We only add to the buffer in twos, thus if we are 
-                    // parsing an odd character, that serves as the 
-                    // 'tens' digit while the if we are parsing an even 
-                    // one, that is the 'ones' digit. We dumped the 
-                    // parsed base 100 value (collateVal) into a buffer. 
-                    // We multiply each collateVal by 2 (to give us room) 
-                    // and add 5 (to avoid overlapping magic CE byte 
-                    // values). The last byte we subtract 1 to ensure it is 
-                    // less than all the other bytes.
-                    if (digIndx % 2 != 0) {
-                        collateVal += digVal;  
-                        // This removes trailing zeroes.
-                        if (collateVal == 0 && trailingZeroIndex == 0) {
-                            trailingZeroIndex = ((digIndx - 1) >>> 1) + 2;
-                        }
-                        else if (trailingZeroIndex != 0) {
-                            trailingZeroIndex = 0;
-                        }
-                        m_utilStringBuffer_.setCharAt(
-                                            ((digIndx - 1) >>> 1) + 2,
-                                            (char)((collateVal << 1) + 6));
-                        collateVal = 0;
-                    }
-                    else {
-                        // We drop the collation value into the buffer so if 
-                        // we need to do a "front patch" we don't have to 
-                        // check to see if we're hitting the last element.
-                        
-                        collateVal = digVal * 10;
-                        if (collateVal == 0) {
-                            if (trailingZeroIndex != 0) {
-                                trailingZeroIndex = (digIndx >>> 1) + 2;
-                            }
-                        } else {
-                            trailingZeroIndex = 0;
-                        }
-                        
-                        m_utilStringBuffer_.setCharAt((digIndx >>> 1) + 2, 
-                                                (char)((collateVal << 1) + 6));
-                    }
-                    digIndx ++;
-                }
-            
-                // Get next character.
-                if (!isEnd()){
-                    backupInternalState(m_utilSpecialBackUp_);
-                    int char32 = nextChar();
-                    char ch = (char)char32;
-                    if (UTF16.isLeadSurrogate(ch)){
-                        if (!isEnd()) {
-                            char trail = (char)nextChar();
-                            if (UTF16.isTrailSurrogate(trail)) {
-                               char32 = UCharacterProperty.getRawSupplementary(
-                                                                   ch, trail);
-                            } 
-                            else {
-                                goBackOne();
-                            }
-                        }
-                    }
-                    
-                    digVal = UCharacter.digit(char32);
-                    if (digVal == -1) {
-                        // Resetting position to point to the next unprocessed 
-                        // char. We overshot it when doing our test/set for 
-                        // numbers.
-                        updateInternalState(m_utilSpecialBackUp_);
-                        break;
-                    }
-                } 
-                else {
-                    break;
-                }
-            }
-        
-            if (nonZeroValReached == false){
-                digIndx = 2;
-                m_utilStringBuffer_.setCharAt(2, (char)6);
-            }
-        
-            int endIndex = trailingZeroIndex != 0 ? trailingZeroIndex 
-                                             : (digIndx >>> 1) + 2;   
-         
-            if (digIndx % 2 != 0){
-                // We missed a value. Since digIndx isn't even, stuck too many 
-                // values into the buffer (this is what we get for padding the 
-                // first byte with a zero). "Front-patch" now by pushing all 
-                // nybbles forward.
-                // Doing it this way ensures that at least 50% of the time 
-                // (statistically speaking) we'll only be doing a single pass 
-                // and optimizes for strings with single digits. I'm just 
-                // assuming that's the more common case.
-                for (int i = 2; i < endIndex; i ++){
-                    m_utilStringBuffer_.setCharAt(i, 
-                        (char)((((((m_utilStringBuffer_.charAt(i) - 6) >>> 1) 
-                                  % 10) * 10) 
-                                 + (((m_utilStringBuffer_.charAt(i + 1) - 6) 
-                                      >>> 1) / 10) << 1) + 6));
-                }
-                -- digIndx;
-            }
-        
-            // Subtract one off of the last byte. 
-            m_utilStringBuffer_.setCharAt(endIndex - 1, 
-                         (char)(m_utilStringBuffer_.charAt(endIndex - 1) - 1));            
-                
-            // We want to skip over the first two slots in the buffer. 
-            // The first slot is reserved for the header byte CODAN_PLACEHOLDER. 
-            // The second slot is for the sign/exponent byte: 
-            // 0x80 + (decimalPos/2) & 7f.
-            m_utilStringBuffer_.setCharAt(0, (char)RuleBasedCollator.CODAN_PLACEHOLDER);
-            m_utilStringBuffer_.setCharAt(1, 
-                                     (char)(0x80 + ((digIndx >>> 1) & 0x7F)));
-        
-            // Now transfer the collation key to our collIterate struct.
-            // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
-            ce = (((m_utilStringBuffer_.charAt(0) << 8)
-                       // Primary weight 
-                       | m_utilStringBuffer_.charAt(1)) 
-                                    << RuleBasedCollator.CE_PRIMARY_SHIFT_)
-                       //  Secondary weight 
-                       | (RuleBasedCollator.BYTE_COMMON_ 
-                          << RuleBasedCollator.CE_SECONDARY_SHIFT_) 
-                       | RuleBasedCollator.BYTE_COMMON_; // Tertiary weight.
-            int i = 2; // Reset the index into the buffer.
-            
-            m_CEBuffer_[0] = ce;
-            m_CEBufferSize_ = 1;
-            m_CEBufferOffset_ = 1;
-            while (i < endIndex)
-            {
-                int primWeight = m_utilStringBuffer_.charAt(i ++) << 8;
-                if (i < endIndex) {
-                    primWeight |= m_utilStringBuffer_.charAt(i ++);
-                }
-                m_CEBuffer_[m_CEBufferSize_ ++] 
-                    = (primWeight << RuleBasedCollator.CE_PRIMARY_SHIFT_) 
-                      | RuleBasedCollator.CE_CONTINUATION_MARKER_;
-            }
-            return ce;
-        } 
-        
-        // no numeric mode, we'll just switch to whatever we stashed and 
-        // continue
-        // find the offset to expansion table
-        return collator.m_expansion_[getExpansionOffset(collator, ce)];
-    }
-
-    /**
-     * Gets the next implicit ce for codepoints
-     * @param codepoint current codepoint
-     * @return implicit ce
-     */
-    private int nextImplicit(int codepoint)
-    {
-        int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint);
-        m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_)
-                         | 0x00000505;
-        m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
-        m_CEBufferOffset_ = 1;
-        m_CEBufferSize_ = 2;
-        return m_CEBuffer_[0];
-    }
-
-    /**
-     * Returns the next ce associated with the following surrogate characters
-     * @param ch current character
-     * @return ce
-     */
-    private int nextSurrogate(char ch)
-    {
-        int ch_int = nextChar();
-        char nextch = (char)ch_int;
-        if (ch_int != CharacterIterator.DONE &&
-            UTF16.isTrailSurrogate(nextch)) {
-            int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch);
-            return nextImplicit(codepoint);
-        }
-        if (nextch != CharacterIterator.DONE) {
-            previousChar(); // reverts back to the original position
-        }
-        return CE_NOT_FOUND_; // treat like unassigned
-    }
-
-    /**
-     * Returns the next ce for a hangul character, this is an implicit
-     * calculation
-     * @param collator current collator
-     * @param ch current character
-     * @return hangul ce
-     */
-    private int nextHangul(RuleBasedCollator collator, char ch)
-    {
-        char L = (char)(ch - HANGUL_SBASE_);
-
-        // divide into pieces
-        // do it in this order since some compilers can do % and / in one
-        // operation
-        char T = (char)(L % HANGUL_TCOUNT_);
-        L /= HANGUL_TCOUNT_;
-        char V = (char)(L % HANGUL_VCOUNT_);
-        L /= HANGUL_VCOUNT_;
-
-        // offset them
-        L += HANGUL_LBASE_;
-        V += HANGUL_VBASE_;
-        T += HANGUL_TBASE_;
-
-        // return the first CE, but first put the rest into the expansion
-        // buffer
-        m_CEBufferSize_ = 0;
-        if (!m_collator_.m_isJamoSpecial_) { // FAST PATH
-            m_CEBuffer_[m_CEBufferSize_ ++] =
-                collator.m_trie_.getLeadValue(L);
-            m_CEBuffer_[m_CEBufferSize_ ++] =
-                collator.m_trie_.getLeadValue(V);
-
-            if (T != HANGUL_TBASE_) {
-                m_CEBuffer_[m_CEBufferSize_ ++] =
-                    collator.m_trie_.getLeadValue(T);
-            }
-            m_CEBufferOffset_ = 1;
-            return m_CEBuffer_[0];
-        }
-        else {
-            // Jamo is Special
-            // Since Hanguls pass the FCD check, it is guaranteed that we
-            // won't be in the normalization buffer if something like this
-            // happens
-            // Move Jamos into normalization buffer
-            m_buffer_.append(L);
-            m_buffer_.append(V);
-            if (T != HANGUL_TBASE_) {
-                m_buffer_.append(T);
-            }
-            m_bufferOffset_ = 0;
-            m_FCDLimit_ = m_source_.getIndex();
-            m_FCDStart_ = m_FCDLimit_ - 1;
-            // Indicate where to continue in main input string after
-            // exhausting the buffer
-            return IGNORABLE;
-        }
-    }
-
-    /**
-     * <p>Special CE management. Expansions, contractions etc...</p>
-     * @param collator can be plain UCA
-     * @param ce current ce
-     * @param ch current character
-     * @return next special ce
-     */
-    private int nextSpecial(RuleBasedCollator collator, int ce, char ch)
-    {
-        int codepoint = ch;
-        Backup entrybackup = m_utilSpecialEntryBackUp_;
-        // this is to handle recursive looping
-        if (entrybackup != null) {
-            m_utilSpecialEntryBackUp_ = null;
-        }
-        else {
-            entrybackup = new Backup();
-        }
-        backupInternalState(entrybackup);
-        try { // forces it to assign m_utilSpecialEntryBackup_
-            while (true) {
-                // This loop will repeat only in the case of contractions,
-                // surrogate
-                switch(RuleBasedCollator.getTag(ce)) {
-                case CE_NOT_FOUND_TAG_:
-                    // impossible case for icu4j
-                    return ce;
-                case RuleBasedCollator.CE_SURROGATE_TAG_:
-                    if (isEnd()) {
-                        return CE_NOT_FOUND_;
-                    }
-                    backupInternalState(m_utilSpecialBackUp_);
-                    char trail = (char)nextChar();
-                    ce = nextSurrogate(collator, ce, trail);
-                    // calculate the supplementary code point value,
-                    // if surrogate was not tailored we go one more round
-                    codepoint =
-                        UCharacterProperty.getRawSupplementary(ch, trail);
-                    break;
-                case CE_SPEC_PROC_TAG_:
-                    ce = nextSpecialPrefix(collator, ce, entrybackup);
-                    break;
-                case CE_CONTRACTION_TAG_:
-                    ce = nextContraction(collator, ce);
-                    break;
-                case CE_LONG_PRIMARY_TAG_:
-                    return nextLongPrimary(ce);
-                case CE_EXPANSION_TAG_:
-                    return nextExpansion(collator, ce);
-                case CE_DIGIT_TAG_:
-                    ce = nextDigit(collator, ce, codepoint);
-                    break;
-                    // various implicits optimization
-                case CE_CJK_IMPLICIT_TAG_:
-                    // 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
-                    return nextImplicit(codepoint);
-                case CE_IMPLICIT_TAG_: // everything that is not defined
-                    return nextImplicit(codepoint);
-                case CE_TRAIL_SURROGATE_TAG_:
-                    return CE_NOT_FOUND_; // DC00-DFFF broken surrogate, treat like unassigned
-                case CE_LEAD_SURROGATE_TAG_:  // D800-DBFF
-                    return nextSurrogate(ch);
-                case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF
-                    return nextHangul(collator, ch);
-                case CE_CHARSET_TAG_:
-                                    // not yet implemented probably after 1.8
-                    return CE_NOT_FOUND_;
-                default:
-                    ce = IGNORABLE;
-                    // synwee todo, throw exception or something here.
-                }
-                if (!RuleBasedCollator.isSpecial(ce)) {
-                    break;
-                }
-            }
-        } 
-        finally {
-            m_utilSpecialEntryBackUp_ = entrybackup;
-        }
-        return ce;
-    }
-
-    /**
-     * Special processing is getting a CE that is preceded by a certain prefix.
-     * Currently this is only needed for optimizing Japanese length and
-     * iteration marks. When we encouter a special processing tag, we go
-     * backwards and try to see if we have a match. Contraction tables are used
-     * - so the whole process is not unlike contraction. prefix data is stored
-     * backwards in the table.
-     * @param collator current collator
-     * @param ce current ce
-     * @return previous ce
-     */
-    private int previousSpecialPrefix(RuleBasedCollator collator, int ce)
-    {
-        backupInternalState(m_utilSpecialBackUp_);
-        while (true) {
-            // position ourselves at the begining of contraction sequence
-            int offset = getContractionOffset(collator, ce);
-            int entryoffset = offset;
-            if (isBackwardsStart()) {
-                ce = collator.m_contractionCE_[offset];
-                break;
-            }
-            char prevch = (char)previousChar();
-            while (prevch > collator.m_contractionIndex_[offset]) {
-                // since contraction codepoints are ordered, we skip all that
-                // are smaller
-                offset ++;
-            }
-            if (prevch == collator.m_contractionIndex_[offset]) {
-                ce = collator.m_contractionCE_[offset];
-            }
-            else {
-                // if there is a completely ignorable code point in the middle
-                // of a prefix, we need to act as if it's not there assumption:
-                // 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to
-                // zero)
-                // lone surrogates cannot be set to zero as it would break
-                // other processing
-                int isZeroCE = collator.m_trie_.getLeadValue(prevch);
-                // it's easy for BMP code points
-                if (isZeroCE == 0) {
-                    continue;
-                }
-                else if (UTF16.isTrailSurrogate(prevch)
-                         || UTF16.isLeadSurrogate(prevch)) {
-                    // for supplementary code points, we have to check the next one
-                    // situations where we are going to ignore
-                    // 1. beginning of the string: schar is a lone surrogate
-                    // 2. schar is a lone surrogate
-                    // 3. schar is a trail surrogate in a valid surrogate
-                    //    sequence that is explicitly set to zero.
-                    if (!isBackwardsStart()) {
-                        char lead = (char)previousChar();
-                        if (UTF16.isLeadSurrogate(lead)) {
-                            isZeroCE = collator.m_trie_.getLeadValue(lead);
-                            if (RuleBasedCollator.getTag(isZeroCE)
-                                == RuleBasedCollator.CE_SURROGATE_TAG_) {
-                                int finalCE = collator.m_trie_.getTrailValue(
-                                                                      isZeroCE,
-                                                                      prevch);
-                                if (finalCE == 0) {
-                                    // this is a real, assigned completely
-                                    // ignorable code point
-                                    continue;
-                                }
-                            }
-                        }
-                        else {
-                            nextChar(); // revert to original offset
-                            // lone surrogate, completely ignorable
-                            continue;
-                        }
-                        nextChar(); // revert to original offset
-                    }
-                    else {
-                         // lone surrogate at the beggining, completely ignorable
-                         continue;
-                    }
-                }
-
-                // char was not in the table. prefix not found
-                ce = collator.m_contractionCE_[entryoffset];
-            }
-
-            if (!isSpecialPrefixTag(ce)) {
-                // char was in the contraction table, and the corresponding ce
-                // is not a prefix ce.  We found the prefix, break out of loop,
-                // this ce will end up being returned.
-                break;
-            }
-        }
-        updateInternalState(m_utilSpecialBackUp_);
-        return ce;
-    }
-
-    /**
-     * Retrieves the previous contraction ce. To ensure that the backwards and
-     * forwards iteration matches, we take the current region of most possible
-     * match and pass it through the forward iteration. This will ensure that
-     * the obstinate problem of overlapping contractions will not occur.
-     * @param collator current collator
-     * @param ce current ce
-     * @param ch current character
-     * @return previous contraction ce
-     */
-    private int previousContraction(RuleBasedCollator collator, int ce, char ch)
-    {
-        m_utilStringBuffer_.setLength(0);
-        // since we might encounter normalized characters (from the thai
-        // processing) we can't use peekCharacter() here.
-        char prevch = (char)previousChar();
-        boolean atStart = false;
-        // TODO: address the comment above - maybe now we *can* use peekCharacter
-        //while (collator.isUnsafe(ch) || isThaiPreVowel(prevch)) {
-        while (collator.isUnsafe(ch)) {
-            m_utilStringBuffer_.insert(0, ch);
-            ch = prevch;
-            if (isBackwardsStart()) {
-                atStart = true;
-                break;
-            }
-            prevch = (char)previousChar();
-        }
-        if (!atStart) {
-            // undo the previousChar() if we didn't reach the beginning 
-            nextChar();
-        }
-        // adds the initial base character to the string
-        m_utilStringBuffer_.insert(0, ch);
-
-        // a new collation element iterator is used to simply things, since
-        // using the current collation element iterator will mean that the
-        // forward and backwards iteration will share and change the same
-        // buffers. it is going to be painful.
-        int originaldecomp = collator.getDecomposition();
-        // for faster access, since string would have been normalized above
-        collator.setDecomposition(Collator.NO_DECOMPOSITION);
-        if (m_utilColEIter_ == null) {
-            m_utilColEIter_ = new CollationElementIterator(
-                                                m_utilStringBuffer_.toString(),
-                                                collator);
-        }
-        else {
-            m_utilColEIter_.m_collator_ = collator;
-            m_utilColEIter_.setText(m_utilStringBuffer_.toString());
-        }
-        ce = m_utilColEIter_.next();
-        m_CEBufferSize_ = 0;
-        while (ce != NULLORDER) {
-            if (m_CEBufferSize_ == m_CEBuffer_.length) {
-                try {
-                    // increasing cebuffer size
-                    int tempbuffer[] = new int[m_CEBuffer_.length + 50];
-                    System.arraycopy(m_CEBuffer_, 0, tempbuffer, 0,
-                                     m_CEBuffer_.length);
-                    m_CEBuffer_ = tempbuffer;
-                }
-                catch( MissingResourceException e)
-                {
-                    throw e;
-                }
-                catch (Exception e) {
-                    if(DEBUG){
-                        e.printStackTrace();
-                    }
-                    return NULLORDER;
-                }
-            }
-            m_CEBuffer_[m_CEBufferSize_ ++] = ce;
-            ce = m_utilColEIter_.next();
-        }
-        collator.setDecomposition(originaldecomp);
-        m_CEBufferOffset_ = m_CEBufferSize_ - 1;
-        return m_CEBuffer_[m_CEBufferOffset_];
-    }
-
-    /**
-     * Returns the previous long primary ces
-     * @param ce long primary ce
-     * @return previous long primary ces
-     */
-    private int previousLongPrimary(int ce)
-    {
-        m_CEBufferSize_ = 0;
-        m_CEBuffer_[m_CEBufferSize_ ++] =
-            ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) | CE_BYTE_COMMON_;
-        m_CEBuffer_[m_CEBufferSize_ ++] = ((ce & 0xFF) << 24)
-            | RuleBasedCollator.CE_CONTINUATION_MARKER_;
-        m_CEBufferOffset_ = m_CEBufferSize_ - 1;
-        return m_CEBuffer_[m_CEBufferOffset_];
-    }
-
-    /**
-     * Returns the previous expansion ces
-     * @param collator current collator
-     * @param ce current ce
-     * @return previous expansion ce
-     */
-    private int previousExpansion(RuleBasedCollator collator, int ce)
-    {
-        // find the offset to expansion table
-        int offset = getExpansionOffset(collator, ce);
-        m_CEBufferSize_ = getExpansionCount(ce);
-        if (m_CEBufferSize_ != 0) {
-            // less than 16 elements in expansion
-            for (int i = 0; i < m_CEBufferSize_; i ++) {
-                m_CEBuffer_[i] = collator.m_expansion_[offset + i];
-            }
-
-        }
-        else {
-            // null terminated ces
-            while (collator.m_expansion_[offset + m_CEBufferSize_] != 0) {
-                m_CEBuffer_[m_CEBufferSize_] =
-                    collator.m_expansion_[offset + m_CEBufferSize_];
-                m_CEBufferSize_ ++;
-            }
-        }
-        m_CEBufferOffset_ = m_CEBufferSize_ - 1;
-        return m_CEBuffer_[m_CEBufferOffset_];
-    }
-    
-    /**
-     * Getting the digit collation elements
-     * @param collator
-     * @param ce current collation element
-     * @param ch current code point
-     * @return digit collation element
-     */
-    private int previousDigit(RuleBasedCollator collator, int ce, char ch)
-    {
-        // We do a check to see if we want to collate digits as numbers; if so we generate
-        //  a custom collation key. Otherwise we pull out the value stored in the expansion table.
-        if (m_collator_.m_isNumericCollation_){
-            int leadingZeroIndex = 0;
-            int collateVal = 0;
-            boolean nonZeroValReached = false;
-
-            // clear and set initial string buffer length
-            m_utilStringBuffer_.setLength(3);
-        
-            // We parse the source string until we hit a char that's NOT a digit
-            // Use this u_charDigitValue. This might be slow because we have to 
-            // handle surrogates...
-            int char32 = ch;
-            if (UTF16.isTrailSurrogate(ch)) {
-                if (!isBackwardsStart()){
-                    char lead = (char)previousChar();
-                    if (UTF16.isLeadSurrogate(lead)) {
-                        char32 = UCharacterProperty.getRawSupplementary(lead,
-                                                                        ch);
-                    } 
-                    else {
-                        goForwardOne();
-                    }
-                }
-            } 
-            int digVal = UCharacter.digit(char32);
-            int digIndx = 0;
-            for (;;) {
-                // Make sure we have enough space.
-                if (digIndx >= ((m_utilStringBuffer_.length() - 2) << 1)) {
-                    m_utilStringBuffer_.setLength(m_utilStringBuffer_.length() 
-                                                  << 1);
-                }
-                // Skipping over "trailing" zeroes but we still add to digIndx.
-                if (digVal != 0 || nonZeroValReached) {
-                    if (digVal != 0 && !nonZeroValReached) {
-                        nonZeroValReached = true;
-                    }
-                
-                    // We parse the digit string into base 100 numbers (this 
-                    // fits into a byte).
-                    // We only add to the buffer in twos, thus if we are 
-                    // parsing an odd character, that serves as the 'tens' 
-                    // digit while the if we are parsing an even one, that is 
-                    // the 'ones' digit. We dumped the parsed base 100 value 
-                    // (collateVal) into a buffer. We multiply each collateVal 
-                    // by 2 (to give us room) and add 5 (to avoid overlapping 
-                    // magic CE byte values). The last byte we subtract 1 to 
-                    // ensure it is less than all the other bytes. 
-                    // Since we're doing in this reverse we want to put the 
-                    // first digit encountered into the ones place and the 
-                    // second digit encountered into the tens place.
-                
-                    if (digIndx % 2 != 0){
-                        collateVal += digVal * 10;
-                    
-                        // This removes leading zeroes.
-                        if (collateVal == 0 && leadingZeroIndex == 0) {
-                           leadingZeroIndex = ((digIndx - 1) >>> 1) + 2;
-                        }
-                        else if (leadingZeroIndex != 0) {
-                            leadingZeroIndex = 0;
-                        }
-                                            
-                        m_utilStringBuffer_.setCharAt(((digIndx - 1) >>> 1) + 2, 
-                                                (char)((collateVal << 1) + 6));
-                        collateVal = 0;
-                    }
-                    else {
-                        collateVal = digVal;    
-                    }
-                }
-                digIndx ++;
-            
-                if (!isBackwardsStart()){
-                    backupInternalState(m_utilSpecialBackUp_);
-                    char32 = previousChar();
-                    if (UTF16.isTrailSurrogate(ch)){
-                        if (!isBackwardsStart()) {
-                            char lead = (char)previousChar();
-                            if (UTF16.isLeadSurrogate(lead)) {
-                                char32 
-                                    = UCharacterProperty.getRawSupplementary(
-                                                                    lead, ch);
-                            } 
-                            else {
-                                updateInternalState(m_utilSpecialBackUp_);
-                            }
-                        }
-                    }
-                    
-                    digVal = UCharacter.digit(char32);
-                    if (digVal == -1) {
-                        updateInternalState(m_utilSpecialBackUp_);
-                        break;
-                    }
-                }
-                else {
-                    break;
-                }
-            }
-
-            if (nonZeroValReached == false) {
-                digIndx = 2;
-                m_utilStringBuffer_.setCharAt(2, (char)6);
-            }
-            
-            if (digIndx % 2 != 0) {
-                if (collateVal == 0 && leadingZeroIndex == 0) {
-                    // This removes the leading 0 in a odd number sequence of 
-                    // numbers e.g. avery001
-                    leadingZeroIndex = ((digIndx - 1) >>> 1) + 2;
-                }
-                else {
-                    // this is not a leading 0, we add it in
-                    m_utilStringBuffer_.setCharAt((digIndx >>> 1) + 2,
-                                                (char)((collateVal << 1) + 6));
-                    digIndx ++; 
-                }               
-            }
-                     
-            int endIndex = leadingZeroIndex != 0 ? leadingZeroIndex 
-                                               : ((digIndx >>> 1) + 2) ;  
-            digIndx = ((endIndex - 2) << 1) + 1; // removing initial zeros         
-            // Subtract one off of the last byte. 
-            // Really the first byte here, but it's reversed...
-            m_utilStringBuffer_.setCharAt(2, 
-                                    (char)(m_utilStringBuffer_.charAt(2) - 1));          
-            // We want to skip over the first two slots in the buffer. 
-            // The first slot is reserved for the header byte CODAN_PLACEHOLDER. 
-            // The second slot is for the sign/exponent byte: 
-            // 0x80 + (decimalPos/2) & 7f.
-            m_utilStringBuffer_.setCharAt(0, (char)RuleBasedCollator.CODAN_PLACEHOLDER);
-            m_utilStringBuffer_.setCharAt(1, 
-                                    (char)(0x80 + ((digIndx >>> 1) & 0x7F)));
-        
-            // Now transfer the collation key to our collIterate struct.
-            // The total size for our collation key is endIndx bumped up to the 
-            // next largest even value divided by two.
-            m_CEBufferSize_ = 0;
-            m_CEBuffer_[m_CEBufferSize_ ++] 
-                        = (((m_utilStringBuffer_.charAt(0) << 8)
-                            // Primary weight 
-                            | m_utilStringBuffer_.charAt(1)) 
-                              << RuleBasedCollator.CE_PRIMARY_SHIFT_)
-                            // Secondary weight 
-                            | (RuleBasedCollator.BYTE_COMMON_ 
-                               << RuleBasedCollator.CE_SECONDARY_SHIFT_)
-                            // Tertiary weight. 
-                            | RuleBasedCollator.BYTE_COMMON_; 
-             int i = endIndex - 1; // Reset the index into the buffer.
-             while (i >= 2) {
-                int primWeight = m_utilStringBuffer_.charAt(i --) << 8;
-                if (i >= 2) {
-                    primWeight |= m_utilStringBuffer_.charAt(i --);
-                }
-                m_CEBuffer_[m_CEBufferSize_ ++] 
-                    = (primWeight << RuleBasedCollator.CE_PRIMARY_SHIFT_) 
-                      | RuleBasedCollator.CE_CONTINUATION_MARKER_;
-             }
-             m_CEBufferOffset_ = m_CEBufferSize_ - 1;
-             return m_CEBuffer_[m_CEBufferOffset_];
-         }
-         else {
-             return collator.m_expansion_[getExpansionOffset(collator, ce)];
-         }
-    } 
-
-    /**
-     * Returns previous hangul ces
-     * @param collator current collator
-     * @param ch current character
-     * @return previous hangul ce
-     */
-    private int previousHangul(RuleBasedCollator collator, char ch)
-    {
-        char L = (char)(ch - HANGUL_SBASE_);
-        // we do it in this order since some compilers can do % and / in one
-        // operation
-        char T = (char)(L % HANGUL_TCOUNT_);
-        L /= HANGUL_TCOUNT_;
-        char V = (char)(L % HANGUL_VCOUNT_);
-        L /= HANGUL_VCOUNT_;
-
-        // offset them
-        L += HANGUL_LBASE_;
-        V += HANGUL_VBASE_;
-        T += HANGUL_TBASE_;
-
-        m_CEBufferSize_ = 0;
-        if (!m_collator_.m_isJamoSpecial_) {
-            m_CEBuffer_[m_CEBufferSize_ ++] =
-                collator.m_trie_.getLeadValue(L);
-            m_CEBuffer_[m_CEBufferSize_ ++] =
-                collator.m_trie_.getLeadValue(V);
-            if (T != HANGUL_TBASE_) {
-                m_CEBuffer_[m_CEBufferSize_ ++] =
-                    collator.m_trie_.getLeadValue(T);
-            }
-            m_CEBufferOffset_ = m_CEBufferSize_ - 1;
-            return m_CEBuffer_[m_CEBufferOffset_];
-        }
-        else {
-            // Since Hanguls pass the FCD check, it is guaranteed that we won't
-            // be in the normalization buffer if something like this happens
-            // Move Jamos into normalization buffer
-            m_buffer_.append(L);
-            m_buffer_.append(V);
-            if (T != HANGUL_TBASE_) {
-                m_buffer_.append(T);
-            }
-            m_bufferOffset_ = m_buffer_.length();
-            m_FCDStart_ = m_source_.getIndex();
-            m_FCDLimit_ = m_FCDStart_ + 1;
-            return IGNORABLE;
-        }
-    }
-
-    /**
-     * Gets implicit codepoint ces
-     * @param codepoint current codepoint
-     * @return implicit codepoint ces
-     */
-    private int previousImplicit(int codepoint)
-    {
-        int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint);
-        m_CEBufferSize_ = 2;
-        m_CEBufferOffset_ = 1;
-        m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_)
-                         | 0x00000505;
-        m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
-        return m_CEBuffer_[1];
-    }
-
-    /**
-     * Gets the previous surrogate ce
-     * @param ch current character
-     * @return previous surrogate ce
-     */
-    private int previousSurrogate(char ch)
-    {
-        if (isBackwardsStart()) {
-            // we are at the start of the string, wrong place to be at
-            return CE_NOT_FOUND_;
-        }
-        char prevch = (char)previousChar();
-        // Handles Han and Supplementary characters here.
-        if (UTF16.isLeadSurrogate(prevch)) {
-            return previousImplicit(
-                          UCharacterProperty.getRawSupplementary(prevch, ch));
-        }
-        if (prevch != CharacterIterator.DONE) {
-            nextChar();
-        }
-        return CE_NOT_FOUND_; // treat like unassigned
-    }
-
-    /**
-     * <p>Special CE management. Expansions, contractions etc...</p>
-     * @param collator can be plain UCA
-     * @param ce current ce
-     * @param ch current character
-     * @return previous special ce
-     */
-    private int previousSpecial(RuleBasedCollator collator, int ce, char ch)
-    {
-        while(true) {
-            // the only ces that loops are thai, special prefix and
-            // contractions
-            switch (RuleBasedCollator.getTag(ce)) {
-            case CE_NOT_FOUND_TAG_:  // this tag always returns
-                return ce;
-            case RuleBasedCollator.CE_SURROGATE_TAG_:  // unpaired lead surrogate
-                return CE_NOT_FOUND_;
-            case CE_SPEC_PROC_TAG_:
-                ce = previousSpecialPrefix(collator, ce);
-                break;
-            case CE_CONTRACTION_TAG_:
-                // may loop for first character e.g. "0x0f71" for english
-                if (isBackwardsStart()) {
-                    // start of string or this is not the end of any contraction
-                    ce = collator.m_contractionCE_[
-                                            getContractionOffset(collator, ce)];
-                    break;
-                }
-                return previousContraction(collator, ce, ch); // else
-            case CE_LONG_PRIMARY_TAG_:
-                return previousLongPrimary(ce);
-            case CE_EXPANSION_TAG_: // always returns
-                return previousExpansion(collator, ce);
-            case CE_DIGIT_TAG_:
-                ce = previousDigit(collator, ce, ch);
-                break;
-            case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF
-                return previousHangul(collator, ch);
-            case CE_LEAD_SURROGATE_TAG_:  // D800-DBFF
-                return CE_NOT_FOUND_; // broken surrogate sequence, treat like unassigned
-            case CE_TRAIL_SURROGATE_TAG_: // DC00-DFFF
-                return previousSurrogate(ch);
-            case CE_CJK_IMPLICIT_TAG_:
-                // 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
-                return previousImplicit(ch);
-            case CE_IMPLICIT_TAG_: // everything that is not defined
-                // UCA is filled with these. Tailorings are NOT_FOUND
-                return previousImplicit(ch);
-            case CE_CHARSET_TAG_: // this tag always returns
-                return CE_NOT_FOUND_;
-            default: // this tag always returns
-                ce = IGNORABLE;
-            }
-            if (!RuleBasedCollator.isSpecial(ce)) {
-                break;
-            }
-        }
-        return ce;
-    }
-
-//    /** 
-//     * Gets a character from the source string at a given offset.
-//     * Handles both normal and iterative cases.
-//     * No error checking and does not access the normalization buffer 
-//     * - caller beware!
-//     * @param offset offset from current position which character is to be 
-//     *               retrieved
-//     * @return character at current position + offset
-//     */
-//    private char peekCharacter(int offset) 
-//    {
-//        if (offset != 0) {
-//            int currentoffset = m_source_.getIndex();
-//            m_source_.setIndex(currentoffset + offset);
-//            char result = (char)m_source_.current();
-//            m_source_.setIndex(currentoffset);
-//            return result;
-//        } 
-//        else {
-//            return (char)m_source_.current();
-//        }
-//    }
-
-    /**
-     * Moves back 1 position in the source string. This is slightly less 
-     * complicated than previousChar in that it doesn't normalize while 
-     * moving back. Boundary checks are not performed.
-     * This method is to be used with caution, with the assumption that 
-     * moving back one position will not exceed the source limits.
-     * Use only with nextChar() and never call this API twice in a row without
-     * nextChar() in the middle.
-     */
-    private void goBackOne() 
-    {
-        if (m_bufferOffset_ >= 0) {
-            m_bufferOffset_ --;
-        }
-        else {
-            m_source_.setIndex(m_source_.getIndex() - 1);
-        }
-    }
-    
-    /**
-     * Moves forward 1 position in the source string. This is slightly less 
-     * complicated than nextChar in that it doesn't normalize while 
-     * moving back. Boundary checks are not performed.
-     * This method is to be used with caution, with the assumption that 
-     * moving back one position will not exceed the source limits.
-     * Use only with previousChar() and never call this API twice in a row 
-     * without previousChar() in the middle.
-     */
-    private void goForwardOne() 
-    {
-        if (m_bufferOffset_ < 0) {
-            // we're working on the source and not normalizing. fast path.
-            // note Thai pre-vowel reordering uses buffer too
-            m_source_.setIndex(m_source_.getIndex() + 1);
-        }
-        else {
-            // we are in the buffer, buffer offset will never be 0 here
-            m_bufferOffset_ ++;
-        }
+    public RuleBasedCollator getRuleBasedCollator() {
+        return rbc_;
      }
  }
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationKey.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationKey.java

index 062cda016dc66ebafdc28f7ee49a566d591c96a4..2a983015bfa416143ca14682d3324b7bab4df1f8 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationKey.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationKey.java
@@ -1,11 +1,13 @@
  /**
  *******************************************************************************
-* Copyright (C) 1996-2012, International Business Machines Corporation and
+* Copyright (C) 1996-2014, International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  */
  package com.ibm.icu.text;
  
+import com.ibm.icu.impl.coll.Collation;
+
  /**
   * <p>A <code>CollationKey</code> represents a <code>String</code>
   * under the rules of a specific <code>Collator</code>
@@ -223,14 +225,7 @@ public final class CollationKey implements Comparable<CollationKey>
       */
      public byte[] toByteArray() 
      {
-        int length = 0;
-        while (true) {
-            if (m_key_[length] == 0) {
-              break;
-            }
-            length ++;
-        }
-        length ++;
+        int length = getLength() + 1;
          byte result[] = new byte[length];
          System.arraycopy(m_key_, 0, result, 0, length);
          return result;
@@ -433,7 +428,7 @@ public final class CollationKey implements Comparable<CollationKey>
          if (noOfLevels > Collator.PRIMARY) {
              while (offset < m_key_.length && m_key_[offset] != 0) {
                  if (m_key_[offset ++] 
-                        == RuleBasedCollator.SORT_LEVEL_TERMINATOR_) {
+                        == Collation.LEVEL_SEPARATOR_BYTE) {
                      keystrength ++;
                      noOfLevels --;
                      if (noOfLevels == Collator.PRIMARY 
@@ -545,12 +540,12 @@ public final class CollationKey implements Comparable<CollationKey>
      
              // if both sort keys have another level, then add a 01 level 
              // separator and continue
-            if (m_key_[index] == RuleBasedCollator.SORT_LEVEL_TERMINATOR_
+            if (m_key_[index] == Collation.LEVEL_SEPARATOR_BYTE
                  && source.m_key_[sourceindex] 
-                        == RuleBasedCollator.SORT_LEVEL_TERMINATOR_) {
+                        == Collation.LEVEL_SEPARATOR_BYTE) {
                  ++index;
                  ++sourceindex;
-                result[rindex++] = RuleBasedCollator.SORT_LEVEL_TERMINATOR_;
+                result[rindex++] = Collation.LEVEL_SEPARATOR_BYTE;
              }
              else {
                  break;
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java

deleted file mode 100644 (file)

index 7dfd445..0000000
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
+++ /dev/null
@@ -1,4254 +0,0 @@
-/**
- *******************************************************************************
- * Copyright (C) 1996-2011, International Business Machines Corporation and
- * others. All Rights Reserved.
- *******************************************************************************
- */
-package com.ibm.icu.text;
-
-import java.io.IOException;
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import com.ibm.icu.impl.IntTrieBuilder;
-import com.ibm.icu.impl.Norm2AllModes;
-import com.ibm.icu.impl.Normalizer2Impl;
-import com.ibm.icu.impl.TrieBuilder;
-import com.ibm.icu.impl.TrieIterator;
-import com.ibm.icu.impl.UCharacterProperty;
-import com.ibm.icu.impl.Utility;
-import com.ibm.icu.lang.UCharacter;
-import com.ibm.icu.lang.UCharacterCategory;
-import com.ibm.icu.util.RangeValueIterator;
-import com.ibm.icu.util.VersionInfo;
-
-/**
- * Class for building a collator from a list of collation rules. This class is
- * uses CollationRuleParser
- * 
- * @author Syn Wee Quek
- * @since release 2.2, June 11 2002
- */
-final class CollationParsedRuleBuilder {
-    // package private constructors ------------------------------------------
-
-    /**
-     * Constructor
-     * 
-     * @param rules
-     *            collation rules
-     * @exception ParseException
-     *                thrown when argument rules have an invalid syntax
-     */
-    CollationParsedRuleBuilder(String rules) throws ParseException {
-        m_parser_ = new CollationRuleParser(rules);
-        m_parser_.assembleTokenList();
-        m_utilColEIter_ = RuleBasedCollator.UCA_
-                .getCollationElementIterator("");
-    }
-
-    // package private inner classes -----------------------------------------
-
-    /**
-     * Inverse UCA wrapper
-     */
-    static class InverseUCA {
-        // package private constructor ---------------------------------------
-
-        InverseUCA() {
-        }
-
-        // package private data member ---------------------------------------
-
-        /**
-         * Array list of characters
-         */
-        int m_table_[];
-        /**
-         * Array list of continuation characters
-         */
-        char m_continuations_[];
-
-        /**
-         * UCA version of inverse UCA table
-         */
-        VersionInfo m_UCA_version_;
-
-        // package private method --------------------------------------------
-
-        /**
-         * Returns the previous inverse ces of the argument ces
-         * 
-         * @param ce
-         *            ce to test
-         * @param contce
-         *            continuation ce to test
-         * @param strength
-         *            collation strength
-         * @param prevresult
-         *            an array to store the return results previous inverse ce
-         *            and previous inverse continuation ce
-         * @return result of the inverse ce
-         */
-        final int getInversePrevCE(int ce, int contce, int strength,
-                int prevresult[]) {
-            int result = findInverseCE(ce, contce);
-
-            if (result < 0) {
-                prevresult[0] = CollationElementIterator.NULLORDER;
-                return -1;
-            }
-
-            ce &= STRENGTH_MASK_[strength];
-            contce &= STRENGTH_MASK_[strength];
-
-            prevresult[0] = ce;
-            prevresult[1] = contce;
-
-            while ((prevresult[0] & STRENGTH_MASK_[strength]) == ce
-                    && (prevresult[1] & STRENGTH_MASK_[strength]) == contce
-                    && result > 0) {
-                // this condition should prevent falling off the edge of the
-                // world
-                // here, we end up in a singularity - zero
-                prevresult[0] = m_table_[3 * (--result)];
-                prevresult[1] = m_table_[3 * result + 1];
-            }
-            return result;
-        }
-
-        final int getCEStrengthDifference(int CE, int contCE, int prevCE,
-                int prevContCE) {
-            int strength = Collator.TERTIARY;
-            while (((prevCE & STRENGTH_MASK_[strength]) != (CE & STRENGTH_MASK_[strength]) || (prevContCE & STRENGTH_MASK_[strength]) != (contCE & STRENGTH_MASK_[strength]))
-                    && (strength != 0)) {
-                strength--;
-            }
-            return strength;
-        }
-
-        private int compareCEs(int source0, int source1, int target0,
-                int target1) {
-            int s1 = source0, s2, t1 = target0, t2;
-            if (RuleBasedCollator.isContinuation(source1)) {
-                s2 = source1;
-            } else {
-                s2 = 0;
-            }
-            if (RuleBasedCollator.isContinuation(target1)) {
-                t2 = target1;
-            } else {
-                t2 = 0;
-            }
-
-            int s = 0, t = 0;
-            if (s1 == t1 && s2 == t2) {
-                return 0;
-            }
-            s = (s1 & 0xFFFF0000) | ((s2 & 0xFFFF0000) >>> 16);
-            t = (t1 & 0xFFFF0000) | ((t2 & 0xFFFF0000) >>> 16);
-            if (s == t) {
-                s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00) >> 8;
-                t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00) >> 8;
-                if (s == t) {
-                    s = (s1 & 0x000000FF) << 8 | (s2 & 0x000000FF);
-                    t = (t1 & 0x000000FF) << 8 | (t2 & 0x000000FF);
-                    return Utility.compareUnsigned(s, t);
-                } else {
-                    return Utility.compareUnsigned(s, t);
-                }
-            } else {
-                return Utility.compareUnsigned(s, t);
-            }
-        }
-
-        /**
-         * Finding the inverse CE of the argument CEs
-         * 
-         * @param ce
-         *            CE to be tested
-         * @param contce
-         *            continuation CE
-         * @return inverse CE
-         */
-        int findInverseCE(int ce, int contce) {
-            int bottom = 0;
-            int top = m_table_.length / 3;
-            int result = 0;
-
-            while (bottom < top - 1) {
-                result = (top + bottom) >> 1;
-                int first = m_table_[3 * result];
-                int second = m_table_[3 * result + 1];
-                int comparison = compareCEs(first, second, ce, contce);
-                if (comparison > 0) {
-                    top = result;
-                } else if (comparison < 0) {
-                    bottom = result;
-                } else {
-                    break;
-                }
-            }
-
-            return result;
-        }
-
-        /**
-         * Getting gap offsets in the inverse UCA
-         * 
-         * @param listheader
-         *            parsed token lists
-         * @exception Exception
-         *                thrown when error occurs while finding the collation
-         *                gaps
-         */
-        void getInverseGapPositions(
-                CollationRuleParser.TokenListHeader listheader)
-                throws Exception {
-            // reset all the gaps
-            CollationRuleParser.Token token = listheader.m_first_;
-            int tokenstrength = token.m_strength_;
-
-            for (int i = 0; i < 3; i++) {
-                listheader.m_gapsHi_[3 * i] = 0;
-                listheader.m_gapsHi_[3 * i + 1] = 0;
-                listheader.m_gapsHi_[3 * i + 2] = 0;
-                listheader.m_gapsLo_[3 * i] = 0;
-                listheader.m_gapsLo_[3 * i + 1] = 0;
-                listheader.m_gapsLo_[3 * i + 2] = 0;
-                listheader.m_numStr_[i] = 0;
-                listheader.m_fStrToken_[i] = null;
-                listheader.m_lStrToken_[i] = null;
-                listheader.m_pos_[i] = -1;
-            }
-
-            if ((listheader.m_baseCE_ >>> 24) >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_
-                    && (listheader.m_baseCE_ >>> 24) <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_) {
-                // implicits -
-                listheader.m_pos_[0] = 0;
-                int t1 = listheader.m_baseCE_;
-                int t2 = listheader.m_baseContCE_;
-                listheader.m_gapsLo_[0] = mergeCE(t1, t2, Collator.PRIMARY);
-                listheader.m_gapsLo_[1] = mergeCE(t1, t2, Collator.SECONDARY);
-                listheader.m_gapsLo_[2] = mergeCE(t1, t2, Collator.TERTIARY);
-                int primaryCE = t1 & RuleBasedCollator.CE_PRIMARY_MASK_
-                        | (t2 & RuleBasedCollator.CE_PRIMARY_MASK_) >>> 16;
-                primaryCE = RuleBasedCollator.impCEGen_
-                        .getImplicitFromRaw(RuleBasedCollator.impCEGen_
-                                .getRawFromImplicit(primaryCE) + 1);
-
-                t1 = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
-                t2 = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_
-                        | RuleBasedCollator.CE_CONTINUATION_MARKER_;
-
-                // if (listheader.m_baseCE_ < 0xEF000000) {
-                // // first implicits have three byte primaries, with a gap of
-                // // one so we esentially need to add 2 to the top byte in
-                // // listheader.m_baseContCE_
-                // t2 += 0x02000000;
-                // }
-                // else {
-                // // second implicits have four byte primaries, with a gap of
-                // // IMPLICIT_LAST2_MULTIPLIER_
-                // // Now, this guy is not really accessible here, so until we
-                // // find a better way to pass it around, assume that the gap
-                // is 1
-                // t2 += 0x00020000;
-                // }
-                listheader.m_gapsHi_[0] = mergeCE(t1, t2, Collator.PRIMARY);
-                listheader.m_gapsHi_[1] = mergeCE(t1, t2, Collator.SECONDARY);
-                listheader.m_gapsHi_[2] = mergeCE(t1, t2, Collator.TERTIARY);
-            } else if (listheader.m_indirect_ == true
-                    && listheader.m_nextCE_ != 0) {
-                listheader.m_pos_[0] = 0;
-                int t1 = listheader.m_baseCE_;
-                int t2 = listheader.m_baseContCE_;
-                listheader.m_gapsLo_[0] = mergeCE(t1, t2, Collator.PRIMARY);
-                listheader.m_gapsLo_[1] = mergeCE(t1, t2, Collator.SECONDARY);
-                listheader.m_gapsLo_[2] = mergeCE(t1, t2, Collator.TERTIARY);
-                t1 = listheader.m_nextCE_;
-                t2 = listheader.m_nextContCE_;
-                listheader.m_gapsHi_[0] = mergeCE(t1, t2, Collator.PRIMARY);
-                listheader.m_gapsHi_[1] = mergeCE(t1, t2, Collator.SECONDARY);
-                listheader.m_gapsHi_[2] = mergeCE(t1, t2, Collator.TERTIARY);
-            } else {
-                while (true) {
-                    if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) {
-                        listheader.m_pos_[tokenstrength] = getInverseNext(
-                                listheader, tokenstrength);
-                        if (listheader.m_pos_[tokenstrength] >= 0) {
-                            listheader.m_fStrToken_[tokenstrength] = token;
-                        } else {
-                            // The CE must be implicit, since it's not in the
-                            // table
-                            // Error
-                            throw new Exception("Internal program error");
-                        }
-                    }
-
-                    while (token != null && token.m_strength_ >= tokenstrength) {
-                        if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) {
-                            listheader.m_lStrToken_[tokenstrength] = token;
-                        }
-                        token = token.m_next_;
-                    }
-                    if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_ - 1) {
-                        // check if previous interval is the same and merge the
-                        // intervals if it is so
-                        if (listheader.m_pos_[tokenstrength] == listheader.m_pos_[tokenstrength + 1]) {
-                            listheader.m_fStrToken_[tokenstrength] = listheader.m_fStrToken_[tokenstrength + 1];
-                            listheader.m_fStrToken_[tokenstrength + 1] = null;
-                            listheader.m_lStrToken_[tokenstrength + 1] = null;
-                            listheader.m_pos_[tokenstrength + 1] = -1;
-                        }
-                    }
-                    if (token != null) {
-                        tokenstrength = token.m_strength_;
-                    } else {
-                        break;
-                    }
-                }
-                for (int st = 0; st < 3; st++) {
-                    int pos = listheader.m_pos_[st];
-                    if (pos >= 0) {
-                        int t1 = m_table_[3 * pos];
-                        int t2 = m_table_[3 * pos + 1];
-                        listheader.m_gapsHi_[3 * st] = mergeCE(t1, t2,
-                                Collator.PRIMARY);
-                        listheader.m_gapsHi_[3 * st + 1] = mergeCE(t1, t2,
-                                Collator.SECONDARY);
-                        listheader.m_gapsHi_[3 * st + 2] = (t1 & 0x3f) << 24
-                                | (t2 & 0x3f) << 16;
-                        // pos --;
-                        // t1 = m_table_[3 * pos];
-                        // t2 = m_table_[3 * pos + 1];
-                        t1 = listheader.m_baseCE_;
-                        t2 = listheader.m_baseContCE_;
-
-                        listheader.m_gapsLo_[3 * st] = mergeCE(t1, t2,
-                                Collator.PRIMARY);
-                        listheader.m_gapsLo_[3 * st + 1] = mergeCE(t1, t2,
-                                Collator.SECONDARY);
-                        listheader.m_gapsLo_[3 * st + 2] = (t1 & 0x3f) << 24
-                                | (t2 & 0x3f) << 16;
-                    }
-                }
-            }
-        }
-
-        /**
-         * Gets the next CE in the inverse table
-         * 
-         * @param listheader
-         *            token list header
-         * @param strength
-         *            collation strength
-         * @return next ce
-         */
-        private final int getInverseNext(
-                CollationRuleParser.TokenListHeader listheader, int strength) {
-            int ce = listheader.m_baseCE_;
-            int secondce = listheader.m_baseContCE_;
-            int result = findInverseCE(ce, secondce);
-
-            if (result < 0) {
-                return -1;
-            }
-
-            ce &= STRENGTH_MASK_[strength];
-            secondce &= STRENGTH_MASK_[strength];
-
-            int nextce = ce;
-            int nextcontce = secondce;
-
-            while ((nextce & STRENGTH_MASK_[strength]) == ce
-                    && (nextcontce & STRENGTH_MASK_[strength]) == secondce) {
-                nextce = m_table_[3 * (++result)];
-                nextcontce = m_table_[3 * result + 1];
-            }
-
-            listheader.m_nextCE_ = nextce;
-            listheader.m_nextContCE_ = nextcontce;
-
-            return result;
-        }
-    }
-
-    // package private data members ------------------------------------------
-
-    /**
-     * Inverse UCA, instantiate only when required
-     */
-    static final InverseUCA INVERSE_UCA_;
-
-    /**
-     * UCA and Inverse UCA version do not match
-     */
-    private static final String INV_UCA_VERSION_MISMATCH_ = "UCA versions of UCA and inverse UCA should match";
-
-    /**
-     * UCA and Inverse UCA version do not match
-     */
-    private static final String UCA_NOT_INSTANTIATED_ = "UCA is not instantiated!";
-
-    /**
-     * Initializing the inverse UCA
-     */
-    static {
-        InverseUCA temp = null;
-        try {
-            temp = CollatorReader.getInverseUCA();
-        } catch (IOException e) {
-        }
-        /*
-         * try { String invdat = "/com/ibm/icu/impl/data/invuca.icu";
-         * InputStream i =
-         * CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
-         * BufferedInputStream b = new BufferedInputStream(i, 110000);
-         * INVERSE_UCA_ = CollatorReader.readInverseUCA(b); b.close();
-         * i.close(); } catch (Exception e) { e.printStackTrace(); throw new
-         * RuntimeException(e.getMessage()); }
-         */
-
-        if (temp != null && RuleBasedCollator.UCA_ != null) {
-            if (!temp.m_UCA_version_
-                    .equals(RuleBasedCollator.UCA_.m_UCA_version_)) {
-                throw new RuntimeException(INV_UCA_VERSION_MISMATCH_);
-            }
-        } else {
-            throw new RuntimeException(UCA_NOT_INSTANTIATED_);
-        }
-
-        INVERSE_UCA_ = temp;
-    }
-
-    // package private methods -----------------------------------------------
-
-    /**
-     * Parse and sets the collation rules in the argument collator
-     * 
-     * @param collator
-     *            to set
-     * @exception Exception
-     *                thrown when internal program error occurs
-     */
-    void setRules(RuleBasedCollator collator) throws Exception {
-        if (m_parser_.m_resultLength_ > 0 || m_parser_.m_removeSet_ != null) {
-            // we have a set of rules, let's make something of it
-            assembleTailoringTable(collator);
-        } else { // no rules, but no error either must be only options
-            // We will init the collator from UCA
-            collator.setWithUCATables();
-        }
-        // And set only the options
-        m_parser_.setDefaultOptionsInCollator(collator);
-    }
-
-    private void copyRangeFromUCA(BuildTable t, int start, int end) {
-        int u = 0;
-        for (u = start; u <= end; u++) {
-            // if ((CE = ucmpe32_get(t.m_mapping, u)) == UCOL_NOT_FOUND
-            int CE = t.m_mapping_.getValue(u);
-            if (CE == CE_NOT_FOUND_
-            // this test is for contractions that are missing the starting
-                    // element. Looks like latin-1 should be done before
-                    // assembling the table, even if it results in more false
-                    // closure elements
-                    || (isContractionTableElement(CE) && getCE(
-                            t.m_contractions_, CE, 0) == CE_NOT_FOUND_)) {
-                // m_utilElement_.m_uchars_ = str.toString();
-                m_utilElement_.m_uchars_ = UCharacter.toString(u);
-                m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
-                m_utilElement_.m_prefix_ = 0;
-                m_utilElement_.m_CELength_ = 0;
-                m_utilElement_.m_prefixChars_ = null;
-                m_utilColEIter_.setText(m_utilElement_.m_uchars_);
-                while (CE != CollationElementIterator.NULLORDER) {
-                    CE = m_utilColEIter_.next();
-                    if (CE != CollationElementIterator.NULLORDER) {
-                        m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE;
-                    }
-                }
-                addAnElement(t, m_utilElement_);
-            }
-        }
-    }
-
-    /**
-     * 2. Eliminate the negative lists by doing the following for each non-null
-     * negative list: o if previousCE(baseCE, strongestN) != some ListHeader X's
-     * baseCE, create new ListHeader X o reverse the list, add to the end of X's
-     * positive list. Reset the strength of the first item you add, based on the
-     * stronger strength levels of the two lists.
-     * 
-     * 3. For each ListHeader with a non-null positive list: o Find all
-     * character strings with CEs between the baseCE and the next/previous CE,
-     * at the strength of the first token. Add these to the tailoring. ? That
-     * is, if UCA has ... x <<< X << x' <<< X' < y ..., and the tailoring has &
-     * x < z... ? Then we change the tailoring to & x <<< X << x' <<< X' < z ...
-     * 
-     * It is possible that this part should be done even while constructing list
-     * The problem is that it is unknown what is going to be the strongest
-     * weight. So we might as well do it here o Allocate CEs for each token in
-     * the list, based on the total number N of the largest level difference,
-     * and the gap G between baseCE and nextCE at that level. The relation *
-     * between the last item and nextCE is the same as the strongest strength. o
-     * Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1) ? There are 3
-     * primary items: a, d, e. Fit them into the primary gap. Then fit b and c
-     * into the secondary gap between a and d, then fit q into the tertiary gap
-     * between b and c. o Example: baseCE << b <<< q << c * nextCE(X,2) ? There
-     * are 2 secondary items: b, c. Fit them into the secondary gap. Then fit q
-     * into the tertiary gap between b and c. o When incrementing primary
-     * values, we will not cross high byte boundaries except where there is only
-     * a single-byte primary. That is to ensure that the script reordering will
-     * continue to work.
-     * 
-     * @param collator
-     *            the rule based collator to update
-     * @exception Exception
-     *                thrown when internal program error occurs
-     */
-    void assembleTailoringTable(RuleBasedCollator collator) throws Exception {
-
-        for (int i = 0; i < m_parser_.m_resultLength_; i++) {
-            // now we need to generate the CEs
-            // We stuff the initial value in the buffers, and increase the
-            // appropriate buffer according to strength
-            if (m_parser_.m_listHeader_[i].m_first_ != null) {
-                // if there are any elements
-                // due to the way parser works, subsequent tailorings
-                // may remove all the elements from a sequence, therefore
-                // leaving an empty tailoring sequence.
-                initBuffers(m_parser_.m_listHeader_[i]);
-            }
-        }
-
-        if (m_parser_.m_variableTop_ != null) {
-            // stuff the variable top value
-            m_parser_.m_options_.m_variableTopValue_ = m_parser_.m_variableTop_.m_CE_[0] >>> 16;
-            // remove it from the list
-            if (m_parser_.m_variableTop_.m_listHeader_.m_first_ == m_parser_.m_variableTop_) { // first
-                                                                                               // in
-                                                                                               // list
-                m_parser_.m_variableTop_.m_listHeader_.m_first_ = m_parser_.m_variableTop_.m_next_;
-            }
-            if (m_parser_.m_variableTop_.m_listHeader_.m_last_ == m_parser_.m_variableTop_) {
-                // first in list
-                m_parser_.m_variableTop_.m_listHeader_.m_last_ = m_parser_.m_variableTop_.m_previous_;
-            }
-            if (m_parser_.m_variableTop_.m_next_ != null) {
-                m_parser_.m_variableTop_.m_next_.m_previous_ = m_parser_.m_variableTop_.m_previous_;
-            }
-            if (m_parser_.m_variableTop_.m_previous_ != null) {
-                m_parser_.m_variableTop_.m_previous_.m_next_ = m_parser_.m_variableTop_.m_next_;
-            }
-        }
-
-        BuildTable t = new BuildTable(m_parser_);
-
-        // After this, we have assigned CE values to all regular CEs now we
-        // will go through list once more and resolve expansions, make
-        // UCAElements structs and add them to table
-        for (int i = 0; i < m_parser_.m_resultLength_; i++) {
-            // now we need to generate the CEs
-            // We stuff the initial value in the buffers, and increase the
-            // appropriate buffer according to strength */
-            createElements(t, m_parser_.m_listHeader_[i]);
-        }
-
-        m_utilElement_.clear();
-
-        // add latin-1 stuff
-        copyRangeFromUCA(t, 0, 0xFF);
-
-        // add stuff for copying
-        if (m_parser_.m_copySet_ != null) {
-            int i = 0;
-            for (i = 0; i < m_parser_.m_copySet_.getRangeCount(); i++) {
-                copyRangeFromUCA(t, m_parser_.m_copySet_.getRangeStart(i),
-                        m_parser_.m_copySet_.getRangeEnd(i));
-            }
-        }
-
-        // copy contractions from the UCA - this is felt mostly for cyrillic
-        char conts[] = RuleBasedCollator.UCA_CONTRACTIONS_;
-        int maxUCAContractionLength = RuleBasedCollator.MAX_UCA_CONTRACTION_LENGTH;
-        int offset = 0;
-        while (conts[offset] != 0) {
-            // A continuation is NUL-terminated and NUL-padded
-            // except if it has the maximum length.
-            int contractionLength = maxUCAContractionLength;
-            while (contractionLength > 0 && conts[offset + contractionLength - 1] == 0) {
-                --contractionLength;
-            }
-            int first = Character.codePointAt(conts, offset);
-            int firstLength = Character.charCount(first);
-            int tailoredCE = t.m_mapping_.getValue(first);
-            Elements prefixElm = null;
-            if (tailoredCE != CE_NOT_FOUND_) {
-                boolean needToAdd = true;
-                if (isContractionTableElement(tailoredCE)) {
-                    if (isTailored(t.m_contractions_, tailoredCE, conts,
-                            offset + firstLength) == true) {
-                        needToAdd = false;
-                    }
-                }
-                if (!needToAdd && isPrefix(tailoredCE)
-                        && conts[offset + 1] == 0) {
-                    // pre-context character in UCA
-                    // The format for pre-context character is
-                    // conts[0]: baseCP conts[1]:0 conts[2]:pre-context CP
-                    Elements elm = new Elements();
-                    elm.m_CELength_ = 0;
-                    elm.m_uchars_ = Character.toString(conts[offset]);
-                    elm.m_cPoints_ = m_utilElement_.m_uchars_;
-                    elm.m_prefixChars_ = Character.toString(conts[offset + 2]);
-                    elm.m_prefix_ = 0; // TODO(claireho) : confirm!
-                    prefixElm = t.m_prefixLookup_.get(elm);
-                    if ((prefixElm == null)
-                            || (prefixElm.m_prefixChars_.charAt(0) != conts[offset + 2])) {
-                        needToAdd = true;
-                    }
-                }
-                if (m_parser_.m_removeSet_ != null
-                        && m_parser_.m_removeSet_.contains(first)) {
-                    needToAdd = false;
-                }
-
-                if (needToAdd == true) {
-                    // we need to add if this contraction is not tailored.
-                    if (conts[offset + 1] != 0) { // not precontext
-                        m_utilElement_.m_prefix_ = 0;
-                        m_utilElement_.m_prefixChars_ = null;
-                        m_utilElement_.m_uchars_ = new String(conts, offset, contractionLength);
-                        m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
-                        m_utilElement_.m_CELength_ = 0;
-                        m_utilColEIter_.setText(m_utilElement_.m_uchars_);
-                    } else { // add a pre-context element
-                        int preKeyLen = 0;
-                        m_utilElement_.m_uchars_ = Character.toString(conts[offset]);
-                        m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
-                        m_utilElement_.m_CELength_ = 0;
-                        m_utilElement_.m_prefixChars_ = Character.toString(conts[offset + 2]);
-                        if (prefixElm == null) {
-                            m_utilElement_.m_prefix_ = 0;
-                        } else { // TODO (claireho): confirm!
-                            m_utilElement_.m_prefix_ = m_utilElement_.m_prefix_;
-                            // m_utilElement_.m_prefix_= prefixElm.m_prefix_;
-                        }
-                        m_utilColEIter_.setText(m_utilElement_.m_prefixChars_);
-                        while (m_utilColEIter_.next() != CollationElementIterator.NULLORDER) {
-                            // count number of keys for pre-context char.
-                            preKeyLen++;
-                        }
-                        m_utilColEIter_.setText(m_utilElement_.m_prefixChars_ + m_utilElement_.m_uchars_);
-                        // Skip the keys for prefix character, then copy the
-                        // rest to el.
-                        while ((preKeyLen-- > 0)
-                                && m_utilColEIter_.next() != CollationElementIterator.NULLORDER) {
-                            continue;
-                        }
-
-                    }
-                    while (true) {
-                        int CE = m_utilColEIter_.next();
-                        if (CE != CollationElementIterator.NULLORDER) {
-                            m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE;
-                        } else {
-                            break;
-                        }
-                    }
-                    addAnElement(t, m_utilElement_);
-                }
-            } else if (m_parser_.m_removeSet_ != null
-                    && m_parser_.m_removeSet_.contains(first)) {
-                copyRangeFromUCA(t, first, first);
-            }
-
-            offset += maxUCAContractionLength;
-        }
-
-        // Add completely ignorable elements
-        processUCACompleteIgnorables(t);
-
-        // canonical closure
-        canonicalClosure(t);
-
-        // still need to produce compatibility closure
-        assembleTable(t, collator);
-    }
-
-    // private inner classes -------------------------------------------------
-
-    @SuppressWarnings("unused")
-    private static class CEGenerator {
-        // package private data members --------------------------------------
-
-        WeightRange m_ranges_[];
-        int m_rangesLength_;
-        int m_byteSize_;
-        int m_start_;
-        int m_limit_;
-        int m_maxCount_;
-        int m_count_;
-        int m_current_;
-        int m_fLow_; // forbidden Low
-        int m_fHigh_; // forbidden High
-
-        // package private constructor ---------------------------------------
-
-        CEGenerator() {
-            m_ranges_ = new WeightRange[7];
-            for (int i = 6; i >= 0; i--) {
-                m_ranges_[i] = new WeightRange();
-            }
-        }
-    }
-
-    private static class WeightRange implements Comparable<WeightRange> {
-        // public methods ----------------------------------------------------
-
-        /**
-         * Compares this object with target
-         * 
-         * @param target object to compare with
-         * @return 0 if equals, 1 if this is > target, -1 otherwise
-         */
-        public int compareTo(WeightRange target) {
-            return Utility.compareUnsigned(m_start_, target.m_start_);
-        }
-
-        /**
-         * Initialize
-         */
-        public void clear() {
-            m_start_ = 0;
-            m_end_ = 0;
-            m_length_ = 0;
-            m_count_ = 0;
-            m_length2_ = 0;
-            m_count2_ = 0;
-        }
-
-        // package private data members --------------------------------------
-
-        int m_start_;
-        int m_end_;
-        int m_length_;
-        int m_count_;
-        int m_length2_;
-        int m_count2_;
-
-        // package private constructor ---------------------------------------
-
-        WeightRange() {
-            clear();
-        }
-
-        /**
-         * Copy constructor. Cloneable is troublesome, needs to check for
-         * exception
-         * 
-         * @param source
-         *            to clone
-         */
-        WeightRange(WeightRange source) {
-            m_start_ = source.m_start_;
-            m_end_ = source.m_end_;
-            m_length_ = source.m_length_;
-            m_count_ = source.m_count_;
-            m_length2_ = source.m_length2_;
-            m_count2_ = source.m_count2_;
-        }
-    }
-
-    private static class MaxJamoExpansionTable {
-        // package private data members --------------------------------------
-
-        List<Integer> m_endExpansionCE_;
-        // vector of booleans
-        List<Boolean> m_isV_;
-        byte m_maxLSize_;
-        byte m_maxVSize_;
-        byte m_maxTSize_;
-
-        // package private constructor ---------------------------------------
-
-        MaxJamoExpansionTable() {
-            m_endExpansionCE_ = new ArrayList<Integer>();
-            m_isV_ = new ArrayList<Boolean>();
-            m_endExpansionCE_.add(Integer.valueOf(0));
-            m_isV_.add(Boolean.FALSE);
-            m_maxLSize_ = 1;
-            m_maxVSize_ = 1;
-            m_maxTSize_ = 1;
-        }
-
-        MaxJamoExpansionTable(MaxJamoExpansionTable table) {
-            m_endExpansionCE_ = new ArrayList<Integer>(table.m_endExpansionCE_);
-            m_isV_ = new ArrayList<Boolean>(table.m_isV_);
-            m_maxLSize_ = table.m_maxLSize_;
-            m_maxVSize_ = table.m_maxVSize_;
-            m_maxTSize_ = table.m_maxTSize_;
-        }
-    }
-
-    private static class MaxExpansionTable {
-        // package private constructor --------------------------------------
-
-        MaxExpansionTable() {
-            m_endExpansionCE_ = new ArrayList<Integer>();
-            m_expansionCESize_ = new ArrayList<Byte>();
-            m_endExpansionCE_.add(Integer.valueOf(0));
-            m_expansionCESize_.add(Byte.valueOf((byte) 0));
-        }
-
-        MaxExpansionTable(MaxExpansionTable table) {
-            m_endExpansionCE_ = new ArrayList<Integer>(table.m_endExpansionCE_);
-            m_expansionCESize_ = new ArrayList<Byte>(table.m_expansionCESize_);
-        }
-
-        // package private data member --------------------------------------
-
-        List<Integer> m_endExpansionCE_;
-        List<Byte> m_expansionCESize_;
-    }
-
-    private static class BasicContractionTable {
-        // package private constructors -------------------------------------
-
-        BasicContractionTable() {
-            m_CEs_ = new ArrayList<Integer>();
-            m_codePoints_ = new StringBuilder();
-        }
-
-        // package private data members -------------------------------------
-
-        StringBuilder m_codePoints_;
-        List<Integer> m_CEs_;
-    }
-
-    private static class ContractionTable {
-        // package private constructor --------------------------------------
-
-        /**
-         * Builds a contraction table
-         * 
-         * @param mapping
-         */
-        ContractionTable(IntTrieBuilder mapping) {
-            m_mapping_ = mapping;
-            m_elements_ = new ArrayList<BasicContractionTable>();
-            m_CEs_ = new ArrayList<Integer>();
-            m_codePoints_ = new StringBuilder();
-            m_offsets_ = new ArrayList<Integer>();
-            m_currentTag_ = CE_NOT_FOUND_TAG_;
-        }
-
-        /**
-         * Copies a contraction table. Not all data will be copied into their
-         * own object.
-         * 
-         * @param table
-         */
-        ContractionTable(ContractionTable table) {
-            m_mapping_ = table.m_mapping_;
-            m_elements_ = new ArrayList<BasicContractionTable>(table.m_elements_);
-            m_codePoints_ = new StringBuilder(table.m_codePoints_);
-            m_CEs_ = new ArrayList<Integer>(table.m_CEs_);
-            m_offsets_ = new ArrayList<Integer>(table.m_offsets_);
-            m_currentTag_ = table.m_currentTag_;
-        }
-
-        // package private data members ------------------------------------
-
-        /**
-         * Vector of BasicContractionTable
-         */
-        List<BasicContractionTable> m_elements_;
-        IntTrieBuilder m_mapping_;
-        StringBuilder m_codePoints_;
-        List<Integer> m_CEs_;
-        List<Integer> m_offsets_;
-        int m_currentTag_;
-    }
-
-    /**
-     * Private class for combining mark table. The table is indexed by the class
-     * value(0-255).
-     */
-    @SuppressWarnings("unused")
-    private static class CombinClassTable {
-        /**
-         * accumulated numbers of combining marks.
-         */
-        int[] index = new int[256];
-
-        /**
-         * code point array for combining marks.
-         */
-        char[] cPoints;
-
-        /**
-         * size of cPoints.
-         */
-        int size;
-
-        // constructor
-        CombinClassTable() {
-            cPoints = null;
-            size = 0;
-            pos = 0;
-            curClass = 1;
-        }
-
-        /**
-         * Copy the combining mark table from ccc and index in compact way.
-         * 
-         * @param cps
-         *            : code point array
-         * @param size
-         *            : size of ccc
-         * @param index
-         *            : index of combining classes(0-255)
-         */
-        void generate(char[] cps, int numOfCM, int[] ccIndex) {
-            int count = 0;
-
-            cPoints = new char[numOfCM];
-            for (int i = 0; i < 256; i++) {
-                for (int j = 0; j < ccIndex[i]; j++) {
-                    cPoints[count++] = cps[(i << 8) + j];
-                }
-                index[i] = count;
-            }
-            size = count;
-        }
-
-        /**
-         * Get first CM(combining mark) with the combining class value cClass.
-         * 
-         * @param cClass
-         *            : combining class value.
-         * @return combining mark codepoint or 0 if no combining make with class
-         *         value cClass
-         */
-        char GetFirstCM(int cClass) {
-            curClass = cClass;
-            if (cPoints == null || cClass == 0
-                    || index[cClass] == index[cClass - 1]) {
-                return 0;
-            }
-            pos = 1;
-            return cPoints[index[cClass - 1]];
-        }
-
-        /**
-         * Get next CM(combining mark) with the combining class value cClass.
-         * Return combining mark codepoint or 0 if no next CM.
-         */
-        char GetNextCM() {
-            if (cPoints == null
-                    || index[curClass] == (index[curClass - 1] + pos)) {
-                return 0;
-            }
-            return cPoints[index[curClass - 1] + (pos++)];
-        }
-
-        // private data members
-        int pos;
-        int curClass;
-    }
-
-    private static final class BuildTable implements TrieBuilder.DataManipulate {
-        // package private methods ------------------------------------------
-
-        /**
-         * For construction of the Trie tables. Has to be labeled public
-         * 
-         * @param cp The value of the code point.
-         * @param offset The value of the offset.
-         * @return data offset or 0
-         */
-        public int getFoldedValue(int cp, int offset) {
-            int limit = cp + 0x400;
-            while (cp < limit) {
-                int value = m_mapping_.getValue(cp);
-                boolean inBlockZero = m_mapping_.isInZeroBlock(cp);
-                int tag = getCETag(value);
-                if (inBlockZero == true) {
-                    cp += TrieBuilder.DATA_BLOCK_LENGTH;
-                } else if (!(isSpecial(value) && (tag == CE_IMPLICIT_TAG_ || tag == CE_NOT_FOUND_TAG_))) {
-                    // These are values that are starting in either UCA
-                    // (IMPLICIT_TAG) or in the tailorings (NOT_FOUND_TAG).
-                    // Presence of these tags means that there is nothing in
-                    // this position and that it should be skipped.
-                    return RuleBasedCollator.CE_SPECIAL_FLAG_
-                            | (CE_SURROGATE_TAG_ << 24) | offset;
-                } else {
-                    ++cp;
-                }
-            }
-            return 0;
-        }
-
-        // package private constructor --------------------------------------
-
-        /**
-         * Returns a table
-         */
-        BuildTable(CollationRuleParser parser) {
-            m_collator_ = new RuleBasedCollator();
-            m_collator_.setWithUCAData();
-            MaxExpansionTable maxet = new MaxExpansionTable();
-            MaxJamoExpansionTable maxjet = new MaxJamoExpansionTable();
-            m_options_ = parser.m_options_;
-            m_expansions_ = new ArrayList<Integer>();
-            // Do your own mallocs for the structure, array and have linear
-            // Latin 1
-            int trieinitialvalue = RuleBasedCollator.CE_SPECIAL_FLAG_
-                    | (CE_NOT_FOUND_TAG_ << 24);
-            // temporary fix for jb3822, 0x100000 -> 30000
-            m_mapping_ = new IntTrieBuilder(null, 0x30000, trieinitialvalue,
-                    trieinitialvalue, true);
-            m_prefixLookup_ = new HashMap<Elements, Elements>();
-            // uhash_open(prefixLookupHash, prefixLookupComp);
-            m_contractions_ = new ContractionTable(m_mapping_);
-            // copy UCA's maxexpansion and merge as we go along
-            m_maxExpansions_ = maxet;
-            // adding an extra initial value for easier manipulation
-            for (int i = 0; i < RuleBasedCollator.UCA_.m_expansionEndCE_.length; i++) {
-                maxet.m_endExpansionCE_.add(Integer.valueOf(
-                        RuleBasedCollator.UCA_.m_expansionEndCE_[i]));
-                maxet.m_expansionCESize_.add(Byte.valueOf(
-                        RuleBasedCollator.UCA_.m_expansionEndCEMaxSize_[i]));
-            }
-            m_maxJamoExpansions_ = maxjet;
-
-            m_unsafeCP_ = new byte[UNSAFECP_TABLE_SIZE_];
-            m_contrEndCP_ = new byte[UNSAFECP_TABLE_SIZE_];
-            Arrays.fill(m_unsafeCP_, (byte) 0);
-            Arrays.fill(m_contrEndCP_, (byte) 0);
-        }
-
-        /**
-         * Duplicating a BuildTable. Not all data will be duplicated into their
-         * own object.
-         * 
-         * @param table
-         *            to clone
-         */
-        BuildTable(BuildTable table) {
-            m_collator_ = table.m_collator_;
-            m_mapping_ = new IntTrieBuilder(table.m_mapping_);
-            m_expansions_ = new ArrayList<Integer>(table.m_expansions_);
-            m_contractions_ = new ContractionTable(table.m_contractions_);
-            m_contractions_.m_mapping_ = m_mapping_;
-            m_options_ = table.m_options_;
-            m_maxExpansions_ = new MaxExpansionTable(table.m_maxExpansions_);
-            m_maxJamoExpansions_ = new MaxJamoExpansionTable(
-                    table.m_maxJamoExpansions_);
-            m_unsafeCP_ = new byte[table.m_unsafeCP_.length];
-            System.arraycopy(table.m_unsafeCP_, 0, m_unsafeCP_, 0,
-                    m_unsafeCP_.length);
-            m_contrEndCP_ = new byte[table.m_contrEndCP_.length];
-            System.arraycopy(table.m_contrEndCP_, 0, m_contrEndCP_, 0,
-                    m_contrEndCP_.length);
-        }
-
-        // package private data members -------------------------------------
-
-        RuleBasedCollator m_collator_;
-        IntTrieBuilder m_mapping_;
-        List<Integer> m_expansions_;
-        ContractionTable m_contractions_;
-        // UCATableHeader image;
-        CollationRuleParser.OptionSet m_options_;
-        MaxExpansionTable m_maxExpansions_;
-        MaxJamoExpansionTable m_maxJamoExpansions_;
-        byte m_unsafeCP_[];
-        byte m_contrEndCP_[];
-        Map<Elements, Elements> m_prefixLookup_;
-        CombinClassTable cmLookup = null;
-    }
-
-    private static class Elements {
-        // package private data members -------------------------------------
-
-        String m_prefixChars_;
-        int m_prefix_;
-        String m_uchars_;
-        /**
-         * Working string
-         */
-        String m_cPoints_;
-        /**
-         * Offset to the working string
-         */
-        int m_cPointsOffset_;
-        /**
-         * These are collation elements - there could be more than one - in case
-         * of expansion
-         */
-        int m_CEs_[];
-        int m_CELength_;
-        /**
-         * This is the value element maps in original table
-         */
-        int m_mapCE_;
-        int m_sizePrim_[];
-        int m_sizeSec_[];
-        int m_sizeTer_[];
-        boolean m_variableTop_;
-        boolean m_caseBit_;
-
-        // package private constructors -------------------------------------
-
-        /**
-         * Package private constructor
-         */
-        Elements() {
-            m_sizePrim_ = new int[128];
-            m_sizeSec_ = new int[128];
-            m_sizeTer_ = new int[128];
-            m_CEs_ = new int[256];
-            m_CELength_ = 0;
-        }
-
-        /**
-         * Package private constructor
-         */
-        Elements(Elements element) {
-            m_prefixChars_ = element.m_prefixChars_;
-            m_prefix_ = element.m_prefix_;
-            m_uchars_ = element.m_uchars_;
-            m_cPoints_ = element.m_cPoints_;
-            m_cPointsOffset_ = element.m_cPointsOffset_;
-            m_CEs_ = element.m_CEs_;
-            m_CELength_ = element.m_CELength_;
-            m_mapCE_ = element.m_mapCE_;
-            m_sizePrim_ = element.m_sizePrim_;
-            m_sizeSec_ = element.m_sizeSec_;
-            m_sizeTer_ = element.m_sizeTer_;
-            m_variableTop_ = element.m_variableTop_;
-            m_caseBit_ = element.m_caseBit_;
-        }
-
-        // package private methods -------------------------------------------
-
-        /**
-         * Initializing the elements
-         */
-        public void clear() {
-            m_prefixChars_ = null;
-            m_prefix_ = 0;
-            m_uchars_ = null;
-            m_cPoints_ = null;
-            m_cPointsOffset_ = 0;
-            m_CELength_ = 0;
-            m_mapCE_ = 0;
-            Arrays.fill(m_sizePrim_, 0);
-            Arrays.fill(m_sizeSec_, 0);
-            Arrays.fill(m_sizeTer_, 0);
-            m_variableTop_ = false;
-            m_caseBit_ = false;
-        }
-
-        /**
-         * Hashcode calculation for token
-         * 
-         * @return the hashcode
-         */
-        public int hashCode() {
-            String str = m_cPoints_.substring(m_cPointsOffset_);
-            return str.hashCode();
-        }
-
-        /**
-         * Equals calculation
-         * 
-         * @param target Object to compare
-         * @return true if target is the same as this object
-         */
-        public boolean equals(Object target) {
-            if (target == this) {
-                return true;
-            }
-            if (target instanceof Elements) {
-                Elements t = (Elements) target;
-                int size = m_cPoints_.length() - m_cPointsOffset_;
-                if (size == t.m_cPoints_.length() - t.m_cPointsOffset_) {
-                    return t.m_cPoints_.regionMatches(t.m_cPointsOffset_,
-                            m_cPoints_, m_cPointsOffset_, size);
-                }
-            }
-            return false;
-        }
-    }
-
-    // private data member ---------------------------------------------------
-
-    /**
-     * Maximum strength used in CE building
-     */
-    private static final int CE_BASIC_STRENGTH_LIMIT_ = 3;
-    /**
-     * Maximum collation strength
-     */
-    private static final int CE_STRENGTH_LIMIT_ = 16;
-    /**
-     * Strength mask array, used in inverse UCA
-     */
-    private static final int STRENGTH_MASK_[] = { 0xFFFF0000, 0xFFFFFF00,
-        0xFFFFFFFF };
-    /**
-     * CE tag for not found
-     */
-    private static final int CE_NOT_FOUND_ = 0xF0000000;
-    /**
-     * CE tag for not found
-     */
-    private static final int CE_NOT_FOUND_TAG_ = 0;
-    /**
-     * This code point results in an expansion
-     */
-    private static final int CE_EXPANSION_TAG_ = 1;
-    /**
-     * Start of a contraction
-     */
-    private static final int CE_CONTRACTION_TAG_ = 2;
-    /*
-     * Thai character - do the reordering
-     */
-    // private static final int CE_THAI_TAG_ = 3;
-    /*
-     * Charset processing, not yet implemented
-     */
-    // private static final int CE_CHARSET_TAG_ = 4;
-    /**
-     * Lead surrogate that is tailored and doesn't start a contraction
-     */
-    private static final int CE_SURROGATE_TAG_ = 5;
-    /*
-     * AC00-D7AF
-     */
-    // private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
-    /*
-     * D800-DBFF
-     */
-    // private static final int CE_LEAD_SURROGATE_TAG_ = 7;
-    /*
-     * DC00-DFFF
-     */
-    // private static final int CE_TRAIL_SURROGATE_TAG_ = 8;
-    /*
-     * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
-     */
-    // private static final int CE_CJK_IMPLICIT_TAG_ = 9;
-    private static final int CE_IMPLICIT_TAG_ = 10;
-    private static final int CE_SPEC_PROC_TAG_ = 11;
-    /**
-     * This is a three byte primary with starting secondaries and tertiaries. It
-     * fits in a single 32 bit CE and is used instead of expansion to save space
-     * without affecting the performance (hopefully)
-     */
-    private static final int CE_LONG_PRIMARY_TAG_ = 12;
-    /**
-     * Unsafe UChar hash table table size. Size is 32 bytes for 1 bit for each
-     * latin 1 char + some power of two for hashing the rest of the chars. Size
-     * in bytes
-     */
-    private static final int UNSAFECP_TABLE_SIZE_ = 1056;
-    /**
-     * Mask value down to "some power of two" -1. Number of bits, not num of
-     * bytes.
-     */
-    private static final int UNSAFECP_TABLE_MASK_ = 0x1fff;
-    /**
-     * Case values
-     */
-    private static final int UPPER_CASE_ = 0x80;
-    private static final int MIXED_CASE_ = 0x40;
-    private static final int LOWER_CASE_ = 0x00;
-    /*
-     * Initial table size
-     */
-    // private static final int INIT_TABLE_SIZE_ = 1028;
-    /*
-     * Header size, copied from ICU4C, to be changed when that value changes
-     */
-    // private static final int HEADER_SIZE_ = 0xC4;
-    /**
-     * Contraction table new element indicator
-     */
-    private static final int CONTRACTION_TABLE_NEW_ELEMENT_ = 0xFFFFFF;
-    /**
-     * Parser for the rules
-     */
-    private CollationRuleParser m_parser_;
-    /**
-     * Utility UCA collation element iterator
-     */
-    private CollationElementIterator m_utilColEIter_;
-    /**
-     * Utility data members
-     */
-    private CEGenerator m_utilGens_[] = { new CEGenerator(), new CEGenerator(),
-        new CEGenerator() };
-    private int m_utilCEBuffer_[] = new int[CE_BASIC_STRENGTH_LIMIT_];
-    private int m_utilIntBuffer_[] = new int[CE_STRENGTH_LIMIT_];
-    private Elements m_utilElement_ = new Elements();
-    private Elements m_utilElement2_ = new Elements();
-    private CollationRuleParser.Token m_utilToken_ = new CollationRuleParser.Token();
-    private int m_utilCountBuffer_[] = new int[6];
-    private long m_utilLongBuffer_[] = new long[5];
-    private WeightRange m_utilLowerWeightRange_[] = { new WeightRange(),
-        new WeightRange(), new WeightRange(), new WeightRange(),
-        new WeightRange() };
-    private WeightRange m_utilUpperWeightRange_[] = { new WeightRange(),
-        new WeightRange(), new WeightRange(), new WeightRange(),
-        new WeightRange() };
-    private WeightRange m_utilWeightRange_ = new WeightRange();
-    private final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
-    private CanonicalIterator m_utilCanIter_ = new CanonicalIterator("");
-    private StringBuilder m_utilStringBuffer_ = new StringBuilder("");
-    // Flag indicating a combining marks table is required or not.
-    private static boolean buildCMTabFlag = false;
-
-    // private methods -------------------------------------------------------
-
-    /**
-     * @param listheader
-     *            parsed rule tokens
-     * @exception Exception
-     *                thrown when internal error occurs
-     */
-    private void initBuffers(CollationRuleParser.TokenListHeader listheader)
-            throws Exception {
-        CollationRuleParser.Token token = listheader.m_last_;
-        Arrays.fill(m_utilIntBuffer_, 0, CE_STRENGTH_LIMIT_, 0);
-
-        token.m_toInsert_ = 1;
-        m_utilIntBuffer_[token.m_strength_] = 1;
-        while (token.m_previous_ != null) {
-            if (token.m_previous_.m_strength_ < token.m_strength_) {
-                // going up
-                m_utilIntBuffer_[token.m_strength_] = 0;
-                m_utilIntBuffer_[token.m_previous_.m_strength_]++;
-            } else if (token.m_previous_.m_strength_ > token.m_strength_) {
-                // going down
-                m_utilIntBuffer_[token.m_previous_.m_strength_] = 1;
-            } else {
-                m_utilIntBuffer_[token.m_strength_]++;
-            }
-            token = token.m_previous_;
-            token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_];
-        }
-
-        token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_];
-        INVERSE_UCA_.getInverseGapPositions(listheader);
-
-        token = listheader.m_first_;
-        int fstrength = Collator.IDENTICAL;
-        int initstrength = Collator.IDENTICAL;
-
-        m_utilCEBuffer_[Collator.PRIMARY] = mergeCE(listheader.m_baseCE_,
-                listheader.m_baseContCE_, Collator.PRIMARY);
-        m_utilCEBuffer_[Collator.SECONDARY] = mergeCE(listheader.m_baseCE_,
-                listheader.m_baseContCE_, Collator.SECONDARY);
-        m_utilCEBuffer_[Collator.TERTIARY] = mergeCE(listheader.m_baseCE_,
-                listheader.m_baseContCE_, Collator.TERTIARY);
-        while (token != null) {
-            fstrength = token.m_strength_;
-            if (fstrength < initstrength) {
-                initstrength = fstrength;
-                if (listheader.m_pos_[fstrength] == -1) {
-                    while (listheader.m_pos_[fstrength] == -1 && fstrength > 0) {
-                        fstrength--;
-                    }
-                    if (listheader.m_pos_[fstrength] == -1) {
-                        throw new Exception("Internal program error");
-                    }
-                }
-                if (initstrength == Collator.TERTIARY) {
-                    // starting with tertiary
-                    m_utilCEBuffer_[Collator.PRIMARY] = listheader.m_gapsLo_[fstrength * 3];
-                    m_utilCEBuffer_[Collator.SECONDARY] = listheader.m_gapsLo_[fstrength * 3 + 1];
-                    m_utilCEBuffer_[Collator.TERTIARY] = getCEGenerator(
-                            m_utilGens_[Collator.TERTIARY],
-                            listheader.m_gapsLo_, listheader.m_gapsHi_, token,
-                            fstrength);
-                } else if (initstrength == Collator.SECONDARY) {
-                    // secondaries
-                    m_utilCEBuffer_[Collator.PRIMARY] = listheader.m_gapsLo_[fstrength * 3];
-                    m_utilCEBuffer_[Collator.SECONDARY] = getCEGenerator(
-                            m_utilGens_[Collator.SECONDARY],
-                            listheader.m_gapsLo_, listheader.m_gapsHi_, token,
-                            fstrength);
-                    m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
-                            m_utilGens_[Collator.TERTIARY], token,
-                            Collator.TERTIARY);
-                } else {
-                    // primaries
-                    m_utilCEBuffer_[Collator.PRIMARY] = getCEGenerator(
-                            m_utilGens_[Collator.PRIMARY],
-                            listheader.m_gapsLo_, listheader.m_gapsHi_, token,
-                            fstrength);
-                    m_utilCEBuffer_[Collator.SECONDARY] = getSimpleCEGenerator(
-                            m_utilGens_[Collator.SECONDARY], token,
-                            Collator.SECONDARY);
-                    m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
-                            m_utilGens_[Collator.TERTIARY], token,
-                            Collator.TERTIARY);
-                }
-            } else {
-                if (token.m_strength_ == Collator.TERTIARY) {
-                    m_utilCEBuffer_[Collator.TERTIARY] = getNextGenerated(m_utilGens_[Collator.TERTIARY]);
-                } else if (token.m_strength_ == Collator.SECONDARY) {
-                    m_utilCEBuffer_[Collator.SECONDARY] = getNextGenerated(m_utilGens_[Collator.SECONDARY]);
-                    m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
-                            m_utilGens_[Collator.TERTIARY], token,
-                            Collator.TERTIARY);
-                } else if (token.m_strength_ == Collator.PRIMARY) {
-                    m_utilCEBuffer_[Collator.PRIMARY] = getNextGenerated(m_utilGens_[Collator.PRIMARY]);
-                    m_utilCEBuffer_[Collator.SECONDARY] = getSimpleCEGenerator(
-                            m_utilGens_[Collator.SECONDARY], token,
-                            Collator.SECONDARY);
-                    m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
-                            m_utilGens_[Collator.TERTIARY], token,
-                            Collator.TERTIARY);
-                }
-            }
-            doCE(m_utilCEBuffer_, token);
-            token = token.m_next_;
-        }
-    }
-
-    /**
-     * Get the next generated ce
-     * 
-     * @param g
-     *            ce generator
-     * @return next generated ce
-     */
-    private int getNextGenerated(CEGenerator g) {
-        g.m_current_ = nextWeight(g);
-        return g.m_current_;
-    }
-
-    /**
-     * @param g
-     *            CEGenerator
-     * @param token
-     *            rule token
-     * @param strength
-     * @return ce generator
-     * @exception Exception
-     *                thrown when internal error occurs
-     */
-    private int getSimpleCEGenerator(CEGenerator g,
-            CollationRuleParser.Token token, int strength) throws Exception {
-        int high, low, count = 1;
-        int maxbyte = (strength == Collator.TERTIARY) ? 0x3F : 0xFF;
-
-        if (strength == Collator.SECONDARY) {
-            low = RuleBasedCollator.COMMON_TOP_2_ << 24;
-            high = 0xFFFFFFFF;
-            count = 0xFF - RuleBasedCollator.COMMON_TOP_2_;
-        } else {
-            low = RuleBasedCollator.BYTE_COMMON_ << 24; // 0x05000000;
-            high = 0x40000000;
-            count = 0x40 - RuleBasedCollator.BYTE_COMMON_;
-        }
-
-        if (token.m_next_ != null && token.m_next_.m_strength_ == strength) {
-            count = token.m_next_.m_toInsert_;
-        }
-
-        g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte,
-                g.m_ranges_);
-        g.m_current_ = RuleBasedCollator.BYTE_COMMON_ << 24;
-
-        if (g.m_rangesLength_ == 0) {
-            throw new Exception("Internal program error");
-        }
-        return g.m_current_;
-    }
-
-    /**
-     * Combines 2 ce into one with respect to the argument strength
-     * 
-     * @param ce1
-     *            first ce
-     * @param ce2
-     *            second ce
-     * @param strength
-     *            strength to use
-     * @return combined ce
-     */
-    private static int mergeCE(int ce1, int ce2, int strength) {
-        int mask = RuleBasedCollator.CE_TERTIARY_MASK_;
-        if (strength == Collator.SECONDARY) {
-            mask = RuleBasedCollator.CE_SECONDARY_MASK_;
-        } else if (strength == Collator.PRIMARY) {
-            mask = RuleBasedCollator.CE_PRIMARY_MASK_;
-        }
-        ce1 &= mask;
-        ce2 &= mask;
-        switch (strength) {
-        case Collator.PRIMARY:
-            return ce1 | ce2 >>> 16;
-        case Collator.SECONDARY:
-            return ce1 << 16 | ce2 << 8;
-        default:
-            return ce1 << 24 | ce2 << 16;
-        }
-    }
-
-    /**
-     * @param g
-     *            CEGenerator
-     * @param lows
-     *            low gap array
-     * @param highs
-     *            high gap array
-     * @param token
-     *            rule token
-     * @param fstrength
-     * @exception Exception
-     *                thrown when internal error occurs
-     */
-    private int getCEGenerator(CEGenerator g, int lows[], int highs[],
-            CollationRuleParser.Token token, int fstrength) throws Exception {
-        int strength = token.m_strength_;
-        int low = lows[fstrength * 3 + strength];
-        int high = highs[fstrength * 3 + strength];
-        int maxbyte = 0;
-        if (strength == Collator.TERTIARY) {
-            maxbyte = 0x3F;
-        } else if (strength == Collator.PRIMARY) {
-            maxbyte = 0xFE;
-        } else {
-            maxbyte = 0xFF;
-        }
-
-        int count = token.m_toInsert_;
-
-        if (Utility.compareUnsigned(low, high) >= 0
-                && strength > Collator.PRIMARY) {
-            int s = strength;
-            while (true) {
-                s--;
-                if (lows[fstrength * 3 + s] != highs[fstrength * 3 + s]) {
-                    if (strength == Collator.SECONDARY) {
-                        if (low < (RuleBasedCollator.COMMON_TOP_2_ << 24)) {
-                            // Override if low range is less than
-                            // UCOL_COMMON_TOP2.
-                            low = RuleBasedCollator.COMMON_TOP_2_ << 24;
-                        }
-                        high = 0xFFFFFFFF;
-                    } else {
-                        if (low < RuleBasedCollator.COMMON_BOTTOM_3 << 24) {
-                            // Override if low range is less than
-                            // UCOL_COMMON_BOT3.
-                            low = RuleBasedCollator.COMMON_BOTTOM_3 << 24;
-                        }
-                        high = 0x40000000;
-                    }
-                    break;
-                }
-                if (s < 0) {
-                    throw new Exception("Internal program error");
-                }
-            }
-        }
-        if(0 <= low && low < 0x02000000) {  // unsigned comparison < 0x02000000
-            // We must not use CE weight byte 02, so we set it as the minimum lower bound.
-            // See http://site.icu-project.org/design/collation/bytes
-            low = 0x02000000;
-        }
-
-        if (strength == Collator.SECONDARY) { // similar as simple
-            if (Utility.compareUnsigned(low,
-                    RuleBasedCollator.COMMON_BOTTOM_2_ << 24) >= 0
-                    && Utility.compareUnsigned(low,
-                            RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) {
-                low = RuleBasedCollator.COMMON_TOP_2_ << 24;
-            }
-            if (Utility.compareUnsigned(high,
-                    RuleBasedCollator.COMMON_BOTTOM_2_ << 24) > 0
-                    && Utility.compareUnsigned(high,
-                            RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) {
-                high = RuleBasedCollator.COMMON_TOP_2_ << 24;
-            }
-            if (Utility.compareUnsigned(low,
-                    RuleBasedCollator.COMMON_BOTTOM_2_ << 24) < 0) {
-                g.m_rangesLength_ = allocateWeights(
-                        RuleBasedCollator.BYTE_UNSHIFTED_MIN_ << 24, high,
-                        count, maxbyte, g.m_ranges_);
-                g.m_current_ = nextWeight(g);
-                // g.m_current_ = RuleBasedCollator.COMMON_BOTTOM_2_ << 24;
-                return g.m_current_;
-            }
-        }
-
-        g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte,
-                g.m_ranges_);
-        if (g.m_rangesLength_ == 0) {
-            throw new Exception("Internal program error");
-        }
-        g.m_current_ = nextWeight(g);
-        return g.m_current_;
-    }
-
-    /**
-     * @param ceparts
-     *            list of collation elements parts
-     * @param token
-     *            rule token
-     * @exception Exception
-     *                thrown when forming case bits for expansions fails
-     */
-    private void doCE(int ceparts[], CollationRuleParser.Token token)
-            throws Exception {
-        // this one makes the table and stuff
-        // int noofbytes[] = new int[3];
-        for (int i = 0; i < 3; i++) {
-            // noofbytes[i] = countBytes(ceparts[i]);
-            m_utilIntBuffer_[i] = countBytes(ceparts[i]);
-        }
-
-        // Here we have to pack CEs from parts
-        int cei = 0;
-        int value = 0;
-
-        while ((cei << 1) < m_utilIntBuffer_[0] || cei < m_utilIntBuffer_[1]
-                || cei < m_utilIntBuffer_[2]) {
-            if (cei > 0) {
-                value = RuleBasedCollator.CE_CONTINUATION_MARKER_;
-            } else {
-                value = 0;
-            }
-
-            if ((cei << 1) < m_utilIntBuffer_[0]) {
-                value |= ((ceparts[0] >> (32 - ((cei + 1) << 4))) & 0xFFFF) << 16;
-            }
-            if (cei < m_utilIntBuffer_[1]) {
-                value |= ((ceparts[1] >> (32 - ((cei + 1) << 3))) & 0xFF) << 8;
-            }
-
-            if (cei < m_utilIntBuffer_[2]) {
-                value |= ((ceparts[2] >> (32 - ((cei + 1) << 3))) & 0x3F);
-            }
-            token.m_CE_[cei] = value;
-            cei++;
-        }
-        if (cei == 0) { // totally ignorable
-            token.m_CELength_ = 1;
-            token.m_CE_[0] = 0;
-        } else { // there is at least something
-            token.m_CELength_ = cei;
-        }
-
-        // Case bits handling for expansion
-        if (token.m_CE_[0] != 0) { // case bits should be set only for
-                                   // non-ignorables
-            token.m_CE_[0] &= 0xFFFFFF3F; // Clean the case bits field
-            int cSize = (token.m_source_ & 0xFF000000) >>> 24;
-            int startoftokenrule = token.m_source_ & 0x00FFFFFF;
-
-            if (cSize > 1) {
-                // Do it manually
-                String tokenstr = token.m_rules_.substring(startoftokenrule,
-                                  startoftokenrule + cSize);
-                token.m_CE_[0] |= getCaseBits(tokenstr);
-            } else {
-                // Copy it from the UCA
-                int caseCE = getFirstCE(token.m_rules_.charAt(startoftokenrule));
-                token.m_CE_[0] |= (caseCE & 0xC0);
-            }
-        }
-    }
-
-    /**
-     * Count the number of non-zero bytes used in the ce
-     * 
-     * @param ce
-     * @return number of non-zero bytes used in ce
-     */
-    private static final int countBytes(int ce) {
-        int mask = 0xFFFFFFFF;
-        int result = 0;
-        while (mask != 0) {
-            if ((ce & mask) != 0) {
-                result++;
-            }
-            mask >>>= 8;
-        }
-        return result;
-    }
-
-    /**
-     * We are ready to create collation elements
-     * 
-     * @param t
-     *            build table to insert
-     * @param lh
-     *            rule token list header
-     */
-    private void createElements(BuildTable t,
-            CollationRuleParser.TokenListHeader lh) {
-        CollationRuleParser.Token tok = lh.m_first_;
-        m_utilElement_.clear();
-        while (tok != null) {
-            // first, check if there are any expansions
-            // if there are expansions, we need to do a little bit more
-            // processing since parts of expansion can be tailored, while
-            // others are not
-            if (tok.m_expansion_ != 0) {
-                int len = tok.m_expansion_ >>> 24;
-                int currentSequenceLen = len;
-                int expOffset = tok.m_expansion_ & 0x00FFFFFF;
-                m_utilToken_.m_source_ = currentSequenceLen | expOffset;
-                m_utilToken_.m_rules_ = m_parser_.m_source_;
-
-                while (len > 0) {
-                    currentSequenceLen = len;
-                    while (currentSequenceLen > 0) {
-                        m_utilToken_.m_source_ = (currentSequenceLen << 24)
-                                | expOffset;
-                        CollationRuleParser.Token expt = m_parser_.m_hashTable_.get(m_utilToken_);
-                        if (expt != null
-                                && expt.m_strength_ != CollationRuleParser.TOKEN_RESET_) {
-                            // expansion is tailored
-                            int noOfCEsToCopy = expt.m_CELength_;
-                            for (int j = 0; j < noOfCEsToCopy; j++) {
-                                tok.m_expCE_[tok.m_expCELength_ + j] = expt.m_CE_[j];
-                            }
-                            tok.m_expCELength_ += noOfCEsToCopy;
-                            // never try to add codepoints and CEs.
-                            // For some odd reason, it won't work.
-                            expOffset += currentSequenceLen; // noOfCEsToCopy;
-                            len -= currentSequenceLen; // noOfCEsToCopy;
-                            break;
-                        } else {
-                            currentSequenceLen--;
-                        }
-                    }
-                    if (currentSequenceLen == 0) {
-                        // couldn't find any tailored subsequence, will have to
-                        // get one from UCA. first, get the UChars from the
-                        // rules then pick CEs out until there is no more and
-                        // stuff them into expansion
-                        m_utilColEIter_.setText(m_parser_.m_source_.substring(
-                                expOffset, expOffset + 1));
-                        while (true) {
-                            int order = m_utilColEIter_.next();
-                            if (order == CollationElementIterator.NULLORDER) {
-                                break;
-                            }
-                            tok.m_expCE_[tok.m_expCELength_++] = order;
-                        }
-                        expOffset++;
-                        len--;
-                    }
-                }
-            } else {
-                tok.m_expCELength_ = 0;
-            }
-
-            // set the ucaelement with obtained values
-            m_utilElement_.m_CELength_ = tok.m_CELength_ + tok.m_expCELength_;
-
-            // copy CEs
-            System.arraycopy(tok.m_CE_, 0, m_utilElement_.m_CEs_, 0,
-                    tok.m_CELength_);
-            System.arraycopy(tok.m_expCE_, 0, m_utilElement_.m_CEs_,
-                    tok.m_CELength_, tok.m_expCELength_);
-
-            // copy UChars
-            // We kept prefix and source kind of together, as it is a kind of a
-            // contraction.
-            // However, now we have to slice the prefix off the main thing -
-            m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_;
-            m_utilElement_.m_cPointsOffset_ = 0; // el.m_uchars_;
-            if (tok.m_prefix_ != 0) {
-                // we will just copy the prefix here, and adjust accordingly in
-                // the addPrefix function in ucol_elm. The reason is that we
-                // need to add both composed AND decomposed elements to the
-                // unsafe table.
-                int size = tok.m_prefix_ >> 24;
-                int offset = tok.m_prefix_ & 0x00FFFFFF;
-                m_utilElement_.m_prefixChars_ = m_parser_.m_source_.substring(
-                        offset, offset + size);
-                size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24);
-                offset = (tok.m_source_ & 0x00FFFFFF) + (tok.m_prefix_ >> 24);
-                m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
-                        offset, offset + size);
-            } else {
-                m_utilElement_.m_prefixChars_ = null;
-                int offset = tok.m_source_ & 0x00FFFFFF;
-                int size = tok.m_source_ >>> 24;
-                m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
-                        offset, offset + size);
-            }
-            m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
-
-            boolean containCombinMarks = false;
-            for (int i = 0; i < m_utilElement_.m_cPoints_.length()
-                    - m_utilElement_.m_cPointsOffset_; i++) {
-                if (isJamo(m_utilElement_.m_cPoints_.charAt(i))) {
-                    t.m_collator_.m_isJamoSpecial_ = true;
-                    break;
-                }
-                if (!buildCMTabFlag) {
-                    // check combining class
-                    int fcd = m_nfcImpl_.getFCD16(m_utilElement_.m_cPoints_.charAt(i));  // TODO: review for handling supplementary characters
-                    if ((fcd & 0xff) == 0) {
-                        // reset flag when current char is not combining mark.
-                        containCombinMarks = false;
-                    } else {
-                        containCombinMarks = true;
-                    }
-                }
-            }
-
-            if (!buildCMTabFlag && containCombinMarks) {
-                buildCMTabFlag = true;
-            }
-
-            /***
-             * // Case bits handling m_utilElement_.m_CEs_[0] &= 0xFFFFFF3F; //
-             * Clean the case bits field if (m_utilElement_.m_cPoints_.length()
-             * - m_utilElement_.m_cPointsOffset_ > 1) { // Do it manually
-             * m_utilElement_.m_CEs_[0] |=
-             * getCaseBits(m_utilElement_.m_cPoints_); } else { // Copy it from
-             * the UCA int caseCE =
-             * getFirstCE(m_utilElement_.m_cPoints_.charAt(0));
-             * m_utilElement_.m_CEs_[0] |= (caseCE & 0xC0); }
-             ***/
-            // and then, add it
-            addAnElement(t, m_utilElement_);
-            tok = tok.m_next_;
-        }
-    }
-
-    /**
-     * Testing if the string argument has case
-     * 
-     * @param src
-     *            string
-     * @return the case for this char array
-     * @exception Exception
-     *                thrown when internal program error occurs
-     */
-    private final int getCaseBits(String src) throws Exception {
-        int uCount = 0;
-        int lCount = 0;
-        src = Normalizer.decompose(src, true);
-        m_utilColEIter_.setText(src);
-        for (int i = 0; i < src.length(); i++) {
-            m_utilColEIter_.setText(src.substring(i, i + 1));
-            int order = m_utilColEIter_.next();
-            if (RuleBasedCollator.isContinuation(order)) {
-                throw new Exception("Internal program error");
-            }
-            if ((order & RuleBasedCollator.CE_CASE_BIT_MASK_) == UPPER_CASE_) {
-                uCount++;
-            } else {
-                char ch = src.charAt(i);
-                if (UCharacter.isLowerCase(ch)) {
-                    lCount++;
-                } else {
-                    if (toSmallKana(ch) == ch && toLargeKana(ch) != ch) {
-                        lCount++;
-                    }
-                }
-            }
-        }
-
-        if (uCount != 0 && lCount != 0) {
-            return MIXED_CASE_;
-        } else if (uCount != 0) {
-            return UPPER_CASE_;
-        } else {
-            return LOWER_CASE_;
-        }
-    }
-
-    /**
-     * Converts a char to the uppercase Kana
-     * 
-     * @param ch
-     *            character to convert
-     * @return the converted Kana character
-     */
-    private static final char toLargeKana(char ch) {
-        if (0x3042 < ch && ch < 0x30ef) { // Kana range
-            switch (ch - 0x3000) {
-            case 0x41:
-            case 0x43:
-            case 0x45:
-            case 0x47:
-            case 0x49:
-            case 0x63:
-            case 0x83:
-            case 0x85:
-            case 0x8E:
-            case 0xA1:
-            case 0xA3:
-            case 0xA5:
-            case 0xA7:
-            case 0xA9:
-            case 0xC3:
-            case 0xE3:
-            case 0xE5:
-            case 0xEE:
-                ch++;
-                break;
-            case 0xF5:
-                ch = 0x30AB;
-                break;
-            case 0xF6:
-                ch = 0x30B1;
-                break;
-            }
-        }
-        return ch;
-    }
-
-    /**
-     * Converts a char to the lowercase Kana
-     * 
-     * @param ch
-     *            character to convert
-     * @return the converted Kana character
-     */
-    private static final char toSmallKana(char ch) {
-        if (0x3042 < ch && ch < 0x30ef) { // Kana range
-            switch (ch - 0x3000) {
-            case 0x42:
-            case 0x44:
-            case 0x46:
-            case 0x48:
-            case 0x4A:
-            case 0x64:
-            case 0x84:
-            case 0x86:
-            case 0x8F:
-            case 0xA2:
-            case 0xA4:
-            case 0xA6:
-            case 0xA8:
-            case 0xAA:
-            case 0xC4:
-            case 0xE4:
-            case 0xE6:
-            case 0xEF:
-                ch--;
-                break;
-            case 0xAB:
-                ch = 0x30F5;
-                break;
-            case 0xB1:
-                ch = 0x30F6;
-                break;
-            }
-        }
-        return ch;
-    }
-
-    /**
-     * This should be connected to special Jamo handling.
-     */
-    private int getFirstCE(char ch) {
-        m_utilColEIter_.setText(UCharacter.toString(ch));
-        return m_utilColEIter_.next();
-    }
-
-    /**
-     * This adds a read element, while testing for existence
-     * 
-     * @param t
-     *            build table
-     * @param element
-     * @return ce
-     */
-    private int addAnElement(BuildTable t, Elements element) {
-        List<Integer> expansions = t.m_expansions_;
-        element.m_mapCE_ = 0;
-
-        if (element.m_CELength_ == 1) {
-            element.m_mapCE_ = element.m_CEs_[0];
-
-        } else {
-            // unfortunately, it looks like we have to look for a long primary
-            // here since in canonical closure we are going to hit some long
-            // primaries from the first phase, and they will come back as
-            // continuations/expansions destroying the effect of the previous
-            // opitimization. A long primary is a three byte primary with
-            // starting secondaries and tertiaries. It can appear in long runs
-            // of only primary differences (like east Asian tailorings) also,
-            // it should not be an expansion, as expansions would break with
-            // this
-            if (element.m_CELength_ == 2 // a two CE expansion
-                    && RuleBasedCollator.isContinuation(element.m_CEs_[1])
-                    && (element.m_CEs_[1] & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) == 0 // that
-                                                                                                              // has
-                                                                                                              // only
-                                                                                                              // primaries
-                                                                                                              // in
-                                                                                                              // continuation
-                    && (((element.m_CEs_[0] >> 8) & 0xFF) == RuleBasedCollator.BYTE_COMMON_)
-                    // a common secondary
-                    && ((element.m_CEs_[0] & 0xFF) == RuleBasedCollator.BYTE_COMMON_) // and
-                                                                                      // a
-                                                                                      // common
-                                                                                      // tertiary
-            ) {
-                element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_
-                // a long primary special
-                        | (CE_LONG_PRIMARY_TAG_ << 24)
-                        // first and second byte of primary
-                        | ((element.m_CEs_[0] >> 8) & 0xFFFF00)
-                        // third byte of primary
-                        | ((element.m_CEs_[1] >> 24) & 0xFF);
-            } else {
-                // omitting expansion offset in builder
-                // (HEADER_SIZE_ >> 2)
-                int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_
-                        | (CE_EXPANSION_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
-                        | (addExpansion(expansions, element.m_CEs_[0]) << 4)
-                        & 0xFFFFF0;
-
-                for (int i = 1; i < element.m_CELength_; i++) {
-                    addExpansion(expansions, element.m_CEs_[i]);
-                }
-                if (element.m_CELength_ <= 0xF) {
-                    expansion |= element.m_CELength_;
-                } else {
-                    addExpansion(expansions, 0);
-                }
-                element.m_mapCE_ = expansion;
-                setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1],
-                        (byte) element.m_CELength_, t.m_maxExpansions_);
-                if (isJamo(element.m_cPoints_.charAt(0))) {
-                    t.m_collator_.m_isJamoSpecial_ = true;
-                    setMaxJamoExpansion(element.m_cPoints_.charAt(0),
-                            element.m_CEs_[element.m_CELength_ - 1],
-                            (byte) element.m_CELength_, t.m_maxJamoExpansions_);
-                }
-            }
-        }
-
-        // We treat digits differently - they are "uber special" and should be
-        // processed differently if numeric collation is on.
-        int uniChar = 0;
-        if ((element.m_uchars_.length() == 2)
-                && UTF16.isLeadSurrogate(element.m_uchars_.charAt(0))) {
-            uniChar = UCharacterProperty.getRawSupplementary(element.m_uchars_
-                    .charAt(0), element.m_uchars_.charAt(1));
-        } else if (element.m_uchars_.length() == 1) {
-            uniChar = element.m_uchars_.charAt(0);
-        }
-
-        // Here, we either have one normal CE OR mapCE is set. Therefore, we
-        // stuff only one element to the expansion buffer. When we encounter a
-        // digit and we don't do numeric collation, we will just pick the CE
-        // we have and break out of case (see ucol.cpp ucol_prv_getSpecialCE
-        // && ucol_prv_getSpecialPrevCE). If we picked a special, further
-        // processing will occur. If it's a simple CE, we'll return due
-        // to how the loop is constructed.
-        if (uniChar != 0 && UCharacter.isDigit(uniChar)) {
-            // prepare the element
-            int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_
-                    | (CollationElementIterator.CE_DIGIT_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
-                    | 1;
-            if (element.m_mapCE_ != 0) {
-                // if there is an expansion, we'll pick it here
-                expansion |= (addExpansion(expansions, element.m_mapCE_) << 4);
-            } else {
-                expansion |= (addExpansion(expansions, element.m_CEs_[0]) << 4);
-            }
-            element.m_mapCE_ = expansion;
-        }
-
-        // here we want to add the prefix structure.
-        // I will try to process it as a reverse contraction, if possible.
-        // prefix buffer is already reversed.
-
-        if (element.m_prefixChars_ != null
-                && element.m_prefixChars_.length() - element.m_prefix_ > 0) {
-            // We keep the seen prefix starter elements in a hashtable we need
-            // it to be able to distinguish between the simple codepoints and
-            // prefix starters. Also, we need to use it for canonical closure.
-            m_utilElement2_.m_caseBit_ = element.m_caseBit_;
-            m_utilElement2_.m_CELength_ = element.m_CELength_;
-            m_utilElement2_.m_CEs_ = element.m_CEs_;
-            m_utilElement2_.m_mapCE_ = element.m_mapCE_;
-            // m_utilElement2_.m_prefixChars_ = element.m_prefixChars_;
-            m_utilElement2_.m_sizePrim_ = element.m_sizePrim_;
-            m_utilElement2_.m_sizeSec_ = element.m_sizeSec_;
-            m_utilElement2_.m_sizeTer_ = element.m_sizeTer_;
-            m_utilElement2_.m_variableTop_ = element.m_variableTop_;
-            m_utilElement2_.m_prefix_ = element.m_prefix_;
-            m_utilElement2_.m_prefixChars_ = Normalizer.compose(
-                    element.m_prefixChars_, false);
-            m_utilElement2_.m_uchars_ = element.m_uchars_;
-            m_utilElement2_.m_cPoints_ = element.m_cPoints_;
-            m_utilElement2_.m_cPointsOffset_ = 0;
-
-            if (t.m_prefixLookup_ != null) {
-                Elements uCE = t.m_prefixLookup_.get(element);
-                if (uCE != null) {
-                    // there is already a set of code points here
-                    element.m_mapCE_ = addPrefix(t, uCE.m_mapCE_, element);
-                } else { // no code points, so this spot is clean
-                    element.m_mapCE_ = addPrefix(t, CE_NOT_FOUND_, element);
-                    uCE = new Elements(element);
-                    uCE.m_cPoints_ = uCE.m_uchars_;
-                    t.m_prefixLookup_.put(uCE, uCE);
-                }
-                if (m_utilElement2_.m_prefixChars_.length() != element.m_prefixChars_
-                        .length()
-                        - element.m_prefix_
-                        || !m_utilElement2_.m_prefixChars_.regionMatches(0,
-                                element.m_prefixChars_, element.m_prefix_,
-                                m_utilElement2_.m_prefixChars_.length())) {
-                    // do it!
-                    m_utilElement2_.m_mapCE_ = addPrefix(t, element.m_mapCE_,
-                            m_utilElement2_);
-                }
-            }
-        }
-
-        // We need to use the canonical iterator here
-        // the way we do it is to generate the canonically equivalent strings
-        // for the contraction and then add the sequences that pass FCD check
-        if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1
-                && !(element.m_cPoints_.length() - element.m_cPointsOffset_ == 2
-                        && UTF16.isLeadSurrogate(element.m_cPoints_.charAt(0)) && UTF16
-                        .isTrailSurrogate(element.m_cPoints_.charAt(1)))) {
-            // this is a contraction, we should check whether a composed form
-            // should also be included
-            m_utilCanIter_.setSource(element.m_cPoints_);
-            String source = m_utilCanIter_.next();
-            while (source != null && source.length() > 0) {
-                if (Normalizer.quickCheck(source, Normalizer.FCD, 0) != Normalizer.NO) {
-                    element.m_uchars_ = source;
-                    element.m_cPoints_ = element.m_uchars_;
-                    finalizeAddition(t, element);
-                }
-                source = m_utilCanIter_.next();
-            }
-
-            return element.m_mapCE_;
-        } else {
-            return finalizeAddition(t, element);
-        }
-    }
-
-    /**
-     * Adds an expansion ce to the expansion vector
-     * 
-     * @param expansions
-     *            vector to add to
-     * @param value
-     *            of the expansion
-     * @return the current position of the new element
-     */
-    private static final int addExpansion(List<Integer> expansions, int value) {
-        expansions.add(Integer.valueOf(value));
-        return expansions.size() - 1;
-    }
-
-    /**
-     * Looks for the maximum length of all expansion sequences ending with the
-     * same collation element. The size required for maxexpansion and maxsize is
-     * returned if the arrays are too small.
-     * 
-     * @param endexpansion
-     *            the last expansion collation element to be added
-     * @param expansionsize
-     *            size of the expansion
-     * @param maxexpansion
-     *            data structure to store the maximum expansion data.
-     * @returns size of the maxexpansion and maxsize used.
-     */
-    private static int setMaxExpansion(int endexpansion, byte expansionsize,
-            MaxExpansionTable maxexpansion) {
-        int start = 0;
-        int limit = maxexpansion.m_endExpansionCE_.size();
-        long unsigned = (long) endexpansion;
-        unsigned &= 0xFFFFFFFFl;
-
-        // using binary search to determine if last expansion element is
-        // already in the array
-        int result = -1;
-        if (limit > 0) {
-            while (start < limit - 1) {
-                int mid = (start + limit) >> 1;
-                long unsignedce = (maxexpansion.m_endExpansionCE_
-                        .get(mid)).intValue();
-                unsignedce &= 0xFFFFFFFFl;
-                if (unsigned < unsignedce) {
-                    limit = mid;
-                } else {
-                    start = mid;
-                }
-            }
-    
-            if ((maxexpansion.m_endExpansionCE_.get(start)).intValue() == endexpansion) {
-                result = start;
-            }
-        }
-        if (result > -1) {
-            // found the ce in expansion, we'll just modify the size if it
-            // is smaller
-            Object currentsize = maxexpansion.m_expansionCESize_.get(result);
-            if (((Byte) currentsize).byteValue() < expansionsize) {
-                maxexpansion.m_expansionCESize_.set(result, Byte.valueOf(
-                            expansionsize));
-            }
-        } else {
-            // we'll need to squeeze the value into the array. initial
-            // implementation. shifting the subarray down by 1
-            maxexpansion.m_endExpansionCE_.add(start + 1, Integer.valueOf(endexpansion));
-            maxexpansion.m_expansionCESize_.add(start + 1, Byte.valueOf(expansionsize));
-        }
-        return maxexpansion.m_endExpansionCE_.size();
-    }
-
-    /**
-     * Sets the maximum length of all jamo expansion sequences ending with the
-     * same collation element. The size required for maxexpansion and maxsize is
-     * returned if the arrays are too small.
-     * 
-     * @param ch
-     *            the jamo codepoint
-     * @param endexpansion
-     *            the last expansion collation element to be added
-     * @param expansionsize
-     *            size of the expansion
-     * @param maxexpansion
-     *            data structure to store the maximum expansion data.
-     * @returns size of the maxexpansion and maxsize used.
-     */
-    private static int setMaxJamoExpansion(char ch, int endexpansion,
-            byte expansionsize, MaxJamoExpansionTable maxexpansion) {
-        boolean isV = true;
-        if (ch >= 0x1100 && ch <= 0x1112) {
-            // determines L for Jamo, doesn't need to store this since it is
-            // never at the end of a expansion
-            if (maxexpansion.m_maxLSize_ < expansionsize) {
-                maxexpansion.m_maxLSize_ = expansionsize;
-            }
-            return maxexpansion.m_endExpansionCE_.size();
-        }
-
-        if (ch >= 0x1161 && ch <= 0x1175) {
-            // determines V for Jamo
-            if (maxexpansion.m_maxVSize_ < expansionsize) {
-                maxexpansion.m_maxVSize_ = expansionsize;
-            }
-        }
-
-        if (ch >= 0x11A8 && ch <= 0x11C2) {
-            isV = false;
-            // determines T for Jamo
-            if (maxexpansion.m_maxTSize_ < expansionsize) {
-                maxexpansion.m_maxTSize_ = expansionsize;
-            }
-        }
-
-        int pos = maxexpansion.m_endExpansionCE_.size();
-        while (pos > 0) {
-            pos--;
-            if ((maxexpansion.m_endExpansionCE_.get(pos)).intValue() == endexpansion) {
-                return maxexpansion.m_endExpansionCE_.size();
-            }
-        }
-        maxexpansion.m_endExpansionCE_.add(Integer.valueOf(endexpansion));
-        maxexpansion.m_isV_.add(isV ? Boolean.TRUE : Boolean.FALSE);
-
-        return maxexpansion.m_endExpansionCE_.size();
-    }
-
-    /**
-     * Adds a prefix to the table
-     * 
-     * @param t
-     *            build table to update
-     * @param CE
-     *            collation element to add
-     * @param element
-     *            rule element to add
-     * @return modified ce
-     */
-    private int addPrefix(BuildTable t, int CE, Elements element) {
-        // currently the longest prefix we're supporting in Japanese is two
-        // characters long. Although this table could quite easily mimic
-        // complete contraction stuff there is no good reason to make a general
-        // solution, as it would require some error prone messing.
-        ContractionTable contractions = t.m_contractions_;
-        String oldCP = element.m_cPoints_;
-        int oldCPOffset = element.m_cPointsOffset_;
-
-        contractions.m_currentTag_ = CE_SPEC_PROC_TAG_;
-        // here, we will normalize & add prefix to the table.
-        int size = element.m_prefixChars_.length() - element.m_prefix_;
-        for (int j = 1; j < size; j++) {
-            // First add NFD prefix chars to unsafe CP hash table
-            // Unless it is a trail surrogate, which is handled algoritmically
-            // and shouldn't take up space in the table.
-            char ch = element.m_prefixChars_.charAt(j + element.m_prefix_);
-            if (!UTF16.isTrailSurrogate(ch)) {
-                unsafeCPSet(t.m_unsafeCP_, ch);
-            }
-        }
-
-        // StringBuffer reversed = new StringBuffer();
-        m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
-        for (int j = 0; j < size; j++) {
-            // prefixes are going to be looked up backwards
-            // therefore, we will promptly reverse the prefix buffer...
-            int offset = element.m_prefixChars_.length() - j - 1;
-            m_utilStringBuffer_.append(element.m_prefixChars_.charAt(offset));
-        }
-        element.m_prefixChars_ = m_utilStringBuffer_.toString();
-        element.m_prefix_ = 0;
-
-        // the first codepoint is also unsafe, as it forms a 'contraction' with
-        // the prefix
-        if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(0))) {
-            unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_.charAt(0));
-        }
-
-        element.m_cPoints_ = element.m_prefixChars_;
-        element.m_cPointsOffset_ = element.m_prefix_;
-
-        // Add the last char of the contraction to the contraction-end hash
-        // table. unless it is a trail surrogate, which is handled
-        // algorithmically and shouldn't be in the table
-        if (!UTF16.isTrailSurrogate(element.m_cPoints_
-                .charAt(element.m_cPoints_.length() - 1))) {
-            ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_
-                    .charAt(element.m_cPoints_.length() - 1));
-        }
-        // First we need to check if contractions starts with a surrogate
-        // int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);
-
-        // If there are any Jamos in the contraction, we should turn on special
-        // processing for Jamos
-        if (isJamo(element.m_prefixChars_.charAt(element.m_prefix_))) {
-            t.m_collator_.m_isJamoSpecial_ = true;
-        }
-        // then we need to deal with it
-        // we could aready have something in table - or we might not
-        if (!isPrefix(CE)) {
-            // if it wasn't contraction, we wouldn't end up here
-            int firstContractionOffset = addContraction(contractions,
-                    CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, CE);
-            int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
-            addContraction(contractions, firstContractionOffset,
-                    element.m_prefixChars_.charAt(element.m_prefix_), newCE);
-            addContraction(contractions, firstContractionOffset, (char) 0xFFFF,
-                    CE);
-            CE = constructSpecialCE(CE_SPEC_PROC_TAG_, firstContractionOffset);
-        } else {
-            // we are adding to existing contraction
-            // there were already some elements in the table, so we need to add
-            // a new contraction
-            // Two things can happen here: either the codepoint is already in
-            // the table, or it is not
-            char ch = element.m_prefixChars_.charAt(element.m_prefix_);
-            int position = findCP(contractions, CE, ch);
-            if (position > 0) {
-                // if it is we just continue down the chain
-                int eCE = getCE(contractions, CE, position);
-                int newCE = processContraction(contractions, element, eCE);
-                setContraction(contractions, CE, position, ch, newCE);
-            } else {
-                // if it isn't, we will have to create a new sequence
-                processContraction(contractions, element, CE_NOT_FOUND_);
-                insertContraction(contractions, CE, ch, element.m_mapCE_);
-            }
-        }
-
-        element.m_cPoints_ = oldCP;
-        element.m_cPointsOffset_ = oldCPOffset;
-
-        return CE;
-    }
-
-    /**
-     * Checks if the argument ce is a contraction
-     * 
-     * @param CE
-     *            collation element
-     * @return true if argument ce is a contraction
-     */
-    private static final boolean isContraction(int CE) {
-        return isSpecial(CE) && (getCETag(CE) == CE_CONTRACTION_TAG_);
-    }
-
-    /**
-     * Checks if the argument ce has a prefix
-     * 
-     * @param CE
-     *            collation element
-     * @return true if argument ce has a prefix
-     */
-    private static final boolean isPrefix(int CE) {
-        return isSpecial(CE) && (getCETag(CE) == CE_SPEC_PROC_TAG_);
-    }
-
-    /**
-     * Checks if the argument ce is special
-     * 
-     * @param CE
-     *            collation element
-     * @return true if argument ce is special
-     */
-    private static final boolean isSpecial(int CE) {
-        return (CE & RuleBasedCollator.CE_SPECIAL_FLAG_) == 0xF0000000;
-    }
-
-    /**
-     * Checks if the argument ce has a prefix
-     * 
-     * @param CE
-     *            collation element
-     * @return true if argument ce has a prefix
-     */
-    private static final int getCETag(int CE) {
-        return (CE & RuleBasedCollator.CE_TAG_MASK_) >>> RuleBasedCollator.CE_TAG_SHIFT_;
-    }
-
-    /**
-     * Gets the ce at position in contraction table
-     * 
-     * @param table
-     *            contraction table
-     * @param position
-     *            offset to the contraction table
-     * @return ce
-     */
-    private static final int getCE(ContractionTable table, int element,
-            int position) {
-        element &= 0xFFFFFF;
-        BasicContractionTable tbl = getBasicContractionTable(table, element);
-
-        if (tbl == null) {
-            return CE_NOT_FOUND_;
-        }
-        if (position > tbl.m_CEs_.size() || position == -1) {
-            return CE_NOT_FOUND_;
-        } else {
-            return tbl.m_CEs_.get(position).intValue();
-        }
-    }
-
-    /**
-     * Sets the unsafe character
-     * 
-     * @param table
-     *            unsafe table
-     * @param c
-     *            character to be added
-     */
-    private static final void unsafeCPSet(byte table[], char c) {
-        int hash = c;
-        if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
-            if (hash >= 0xd800 && hash <= 0xf8ff) {
-                // Part of a surrogate, or in private use area.
-                // These don't go in the table
-                return;
-            }
-            hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
-        }
-        table[hash >> 3] |= (1 << (hash & 7));
-    }
-
-    /**
-     * Sets the contraction end character
-     * 
-     * @param table
-     *            contraction end table
-     * @param c
-     *            character to be added
-     */
-    private static final void ContrEndCPSet(byte table[], char c) {
-        int hash = c;
-        if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
-            hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
-        }
-        table[hash >> 3] |= (1 << (hash & 7));
-    }
-
-    /**
-     * Adds more contractions in table. If element is non existant, it creates
-     * on. Returns element handle
-     * 
-     * @param table
-     *            contraction table
-     * @param element
-     *            offset to the contraction table
-     * @param codePoint
-     *            codepoint to add
-     * @param value
-     * @return collation element
-     */
-    private static int addContraction(ContractionTable table, int element,
-            char codePoint, int value) {
-        BasicContractionTable tbl = getBasicContractionTable(table, element);
-        if (tbl == null) {
-            tbl = addAContractionElement(table);
-            element = table.m_elements_.size() - 1;
-        }
-
-        tbl.m_CEs_.add(Integer.valueOf(value));
-        tbl.m_codePoints_.append(codePoint);
-        return constructSpecialCE(table.m_currentTag_, element);
-    }
-
-    /**
-     * Adds a contraction element to the table
-     * 
-     * @param table
-     *            contraction table to update
-     * @return contraction
-     */
-    private static BasicContractionTable addAContractionElement(
-            ContractionTable table) {
-        BasicContractionTable result = new BasicContractionTable();
-        table.m_elements_.add(result);
-        return result;
-    }
-
-    /**
-     * Constructs a special ce
-     * 
-     * @param tag
-     *            special tag
-     * @param CE
-     *            collation element
-     * @return a contraction ce
-     */
-    private static final int constructSpecialCE(int tag, int CE) {
-        return RuleBasedCollator.CE_SPECIAL_FLAG_
-                | (tag << RuleBasedCollator.CE_TAG_SHIFT_) | (CE & 0xFFFFFF);
-    }
-
-    /**
-     * Sets and inserts the element that has a contraction
-     * 
-     * @param contractions
-     *            contraction table
-     * @param element
-     *            contracting element
-     * @param existingCE
-     * @return contraction ce
-     */
-    private static int processContraction(ContractionTable contractions,
-            Elements element, int existingCE) {
-        int firstContractionOffset = 0;
-        // end of recursion
-        if (element.m_cPoints_.length() - element.m_cPointsOffset_ == 1) {
-            if (isContractionTableElement(existingCE)
-                    && getCETag(existingCE) == contractions.m_currentTag_) {
-                changeContraction(contractions, existingCE, (char) 0,
-                        element.m_mapCE_);
-                changeContraction(contractions, existingCE, (char) 0xFFFF,
-                        element.m_mapCE_);
-                return existingCE;
-            } else {
-                // can't do just that. existingCe might be a contraction,
-                // meaning that we need to do another step
-                return element.m_mapCE_;
-            }
-        }
-
-        // this recursion currently feeds on the only element we have...
-        // We will have to copy it in order to accomodate for both backward
-        // and forward cycles
-        // we encountered either an empty space or a non-contraction element
-        // this means we are constructing a new contraction sequence
-        element.m_cPointsOffset_++;
-        if (!isContractionTableElement(existingCE)) {
-            // if it wasn't contraction, we wouldn't end up here
-            firstContractionOffset = addContraction(contractions,
-                    CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, existingCE);
-            int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
-            addContraction(contractions, firstContractionOffset,
-                    element.m_cPoints_.charAt(element.m_cPointsOffset_), newCE);
-            addContraction(contractions, firstContractionOffset, (char) 0xFFFF,
-                    existingCE);
-            existingCE = constructSpecialCE(contractions.m_currentTag_,
-                    firstContractionOffset);
-        } else {
-            // we are adding to existing contraction
-            // there were already some elements in the table, so we need to add
-            // a new contraction
-            // Two things can happen here: either the codepoint is already in
-            // the table, or it is not
-            int position = findCP(contractions, existingCE, element.m_cPoints_
-                    .charAt(element.m_cPointsOffset_));
-            if (position > 0) {
-                // if it is we just continue down the chain
-                int eCE = getCE(contractions, existingCE, position);
-                int newCE = processContraction(contractions, element, eCE);
-                setContraction(contractions, existingCE, position,
-                        element.m_cPoints_.charAt(element.m_cPointsOffset_),
-                        newCE);
-            } else {
-                // if it isn't, we will have to create a new sequence
-                int newCE = processContraction(contractions, element,
-                        CE_NOT_FOUND_);
-                insertContraction(contractions, existingCE, element.m_cPoints_
-                        .charAt(element.m_cPointsOffset_), newCE);
-            }
-        }
-        element.m_cPointsOffset_--;
-        return existingCE;
-    }
-
-    /**
-     * Checks if CE belongs to the contraction table
-     * 
-     * @param CE
-     *            collation element to test
-     * @return true if CE belongs to the contraction table
-     */
-    private static final boolean isContractionTableElement(int CE) {
-        return isSpecial(CE)
-                && (getCETag(CE) == CE_CONTRACTION_TAG_ || getCETag(CE) == CE_SPEC_PROC_TAG_);
-    }
-
-    /**
-     * Gets the codepoint
-     * 
-     * @param table
-     *            contraction table
-     * @param element
-     *            offset to the contraction element in the table
-     * @param codePoint
-     *            code point to look for
-     * @return the offset to the code point
-     */
-    private static int findCP(ContractionTable table, int element,
-            char codePoint) {
-        BasicContractionTable tbl = getBasicContractionTable(table, element);
-        if (tbl == null) {
-            return -1;
-        }
-
-        int position = 0;
-        while (codePoint > tbl.m_codePoints_.charAt(position)) {
-            position++;
-            if (position > tbl.m_codePoints_.length()) {
-                return -1;
-            }
-        }
-        if (codePoint == tbl.m_codePoints_.charAt(position)) {
-            return position;
-        } else {
-            return -1;
-        }
-    }
-
-    /**
-     * Gets the contraction element out of the contraction table
-     * 
-     * @param table
-     *            contraction table
-     * @param offset
-     *            to the element in the contraction table
-     * @return basic contraction element at offset in the contraction table
-     */
-    private static final BasicContractionTable getBasicContractionTable(
-            ContractionTable table, int offset) {
-        offset &= 0xFFFFFF;
-        if (offset == 0xFFFFFF) {
-            return null;
-        }
-        return table.m_elements_.get(offset);
-    }
-
-    /**
-     * Changes the contraction element
-     * 
-     * @param table
-     *            contraction table
-     * @param element
-     *            offset to the element in the contraction table
-     * @param codePoint
-     *            codepoint
-     * @param newCE
-     *            new collation element
-     * @return basic contraction element at offset in the contraction table
-     */
-    private static final int changeContraction(ContractionTable table,
-            int element, char codePoint, int newCE) {
-        BasicContractionTable tbl = getBasicContractionTable(table, element);
-        if (tbl == null) {
-            return 0;
-        }
-        int position = 0;
-        while (codePoint > tbl.m_codePoints_.charAt(position)) {
-            position++;
-            if (position > tbl.m_codePoints_.length()) {
-                return CE_NOT_FOUND_;
-            }
-        }
-        if (codePoint == tbl.m_codePoints_.charAt(position)) {
-            tbl.m_CEs_.set(position, Integer.valueOf(newCE));
-            return element & 0xFFFFFF;
-        } else {
-            return CE_NOT_FOUND_;
-        }
-    }
-
-    /**
-     * Sets a part of contraction sequence in table. If element is non existant,
-     * it creates on. Returns element handle.
-     * 
-     * @param table
-     *            contraction table
-     * @param element
-     *            offset to the contraction table
-     * @param offset
-     * @param codePoint
-     *            contraction character
-     * @param value
-     *            ce value
-     * @return new contraction ce
-     */
-    private static final int setContraction(ContractionTable table,
-            int element, int offset, char codePoint, int value) {
-        element &= 0xFFFFFF;
-        BasicContractionTable tbl = getBasicContractionTable(table, element);
-        if (tbl == null) {
-            tbl = addAContractionElement(table);
-            element = table.m_elements_.size() - 1;
-        }
-
-        tbl.m_CEs_.set(offset, Integer.valueOf(value));
-        tbl.m_codePoints_.setCharAt(offset, codePoint);
-        return constructSpecialCE(table.m_currentTag_, element);
-    }
-
-    /**
-     * Inserts a part of contraction sequence in table. Sequences behind the
-     * offset are moved back. If element is non existent, it creates on.
-     * 
-     * @param table
-     *            contraction
-     * @param element
-     *            offset to the table contraction
-     * @param codePoint
-     *            code point
-     * @param value
-     *            collation element value
-     * @return contraction collation element
-     */
-    private static final int insertContraction(ContractionTable table,
-            int element, char codePoint, int value) {
-        element &= 0xFFFFFF;
-        BasicContractionTable tbl = getBasicContractionTable(table, element);
-        if (tbl == null) {
-            tbl = addAContractionElement(table);
-            element = table.m_elements_.size() - 1;
-        }
-
-        int offset = 0;
-        while (tbl.m_codePoints_.charAt(offset) < codePoint
-                && offset < tbl.m_codePoints_.length()) {
-            offset++;
-        }
-
-        tbl.m_CEs_.add(offset, Integer.valueOf(value));
-        tbl.m_codePoints_.insert(offset, codePoint);
-
-        return constructSpecialCE(table.m_currentTag_, element);
-    }
-
-    /**
-     * Finalize addition
-     * 
-     * @param t
-     *            build table
-     * @param element
-     *            to add
-     */
-    private final static int finalizeAddition(BuildTable t, Elements element) {
-        int CE = CE_NOT_FOUND_;
-        // This should add a completely ignorable element to the
-        // unsafe table, so that backward iteration will skip
-        // over it when treating contractions.
-        if (element.m_mapCE_ == 0) {
-            for (int i = 0; i < element.m_cPoints_.length(); i++) {
-                char ch = element.m_cPoints_.charAt(i);
-                if (!UTF16.isTrailSurrogate(ch)) {
-                    unsafeCPSet(t.m_unsafeCP_, ch);
-                }
-            }
-        }
-
-        if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1) {
-            // we're adding a contraction
-            int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);
-            CE = t.m_mapping_.getValue(cp);
-            CE = addContraction(t, CE, element);
-        } else {
-            // easy case
-            CE = t.m_mapping_.getValue(element.m_cPoints_
-                    .charAt(element.m_cPointsOffset_));
-
-            if (CE != CE_NOT_FOUND_) {
-                if (isContractionTableElement(CE)) {
-                    // adding a non contraction element (thai, expansion,
-                    // single) to already existing contraction
-                    if (!isPrefix(element.m_mapCE_)) {
-                        // we cannot reenter prefix elements - as we are going
-                        // to create a dead loop
-                        // Only expansions and regular CEs can go here...
-                        // Contractions will never happen in this place
-                        setContraction(t.m_contractions_, CE, 0, (char) 0,
-                                element.m_mapCE_);
-                        // This loop has to change the CE at the end of
-                        // contraction REDO!
-                        changeLastCE(t.m_contractions_, CE, element.m_mapCE_);
-                    }
-                } else {
-                    t.m_mapping_
-                            .setValue(element.m_cPoints_
-                                    .charAt(element.m_cPointsOffset_),
-                                    element.m_mapCE_);
-                    if (element.m_prefixChars_ != null
-                            && element.m_prefixChars_.length() > 0
-                            && getCETag(CE) != CE_IMPLICIT_TAG_) {
-                        // Add CE for standalone precontext char.
-                        Elements origElem = new Elements();
-                        origElem.m_prefixChars_ = null;
-                        origElem.m_uchars_ = element.m_cPoints_;
-                        origElem.m_cPoints_ = origElem.m_uchars_;
-                        origElem.m_CEs_[0] = CE;
-                        origElem.m_mapCE_ = CE;
-                        origElem.m_CELength_ = 1;
-                        finalizeAddition(t, origElem);
-                    }
-                }
-            } else {
-                t.m_mapping_.setValue(element.m_cPoints_
-                        .charAt(element.m_cPointsOffset_), element.m_mapCE_);
-            }
-        }
-        return CE;
-    }
-
-    /**
-     * Note regarding surrogate handling: We are interested only in the single
-     * or leading surrogates in a contraction. If a surrogate is somewhere else
-     * in the contraction, it is going to be handled as a pair of code units, as
-     * it doesn't affect the performance AND handling surrogates specially would
-     * complicate code way too much.
-     */
-    private static int addContraction(BuildTable t, int CE, Elements element) {
-        ContractionTable contractions = t.m_contractions_;
-        contractions.m_currentTag_ = CE_CONTRACTION_TAG_;
-
-        // First we need to check if contractions starts with a surrogate
-        int cp = UTF16.charAt(element.m_cPoints_, 0);
-        int cpsize = 1;
-        if (UCharacter.isSupplementary(cp)) {
-            cpsize = 2;
-        }
-        if (cpsize < element.m_cPoints_.length()) {
-            // This is a real contraction, if there are other characters after
-            // the first
-            int size = element.m_cPoints_.length() - element.m_cPointsOffset_;
-            for (int j = 1; j < size; j++) {
-                // First add contraction chars to unsafe CP hash table
-                // Unless it is a trail surrogate, which is handled
-                // algoritmically and shouldn't take up space in the table.
-                if (!UTF16.isTrailSurrogate(element.m_cPoints_
-                        .charAt(element.m_cPointsOffset_ + j))) {
-                    unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_
-                            .charAt(element.m_cPointsOffset_ + j));
-                }
-            }
-            // Add the last char of the contraction to the contraction-end
-            // hash table. unless it is a trail surrogate, which is handled
-            // algorithmically and shouldn't be in the table
-            if (!UTF16.isTrailSurrogate(element.m_cPoints_
-                    .charAt(element.m_cPoints_.length() - 1))) {
-                ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_
-                        .charAt(element.m_cPoints_.length() - 1));
-            }
-
-            // If there are any Jamos in the contraction, we should turn on
-            // special processing for Jamos
-            if (isJamo(element.m_cPoints_.charAt(element.m_cPointsOffset_))) {
-                t.m_collator_.m_isJamoSpecial_ = true;
-            }
-            // then we need to deal with it
-            // we could aready have something in table - or we might not
-            element.m_cPointsOffset_ += cpsize;
-            if (!isContraction(CE)) {
-                // if it wasn't contraction, we wouldn't end up here
-                int firstContractionOffset = addContraction(contractions,
-                        CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, CE);
-                int newCE = processContraction(contractions, element,
-                        CE_NOT_FOUND_);
-                addContraction(contractions, firstContractionOffset,
-                        element.m_cPoints_.charAt(element.m_cPointsOffset_),
-                        newCE);
-                addContraction(contractions, firstContractionOffset,
-                        (char) 0xFFFF, CE);
-                CE = constructSpecialCE(CE_CONTRACTION_TAG_,
-                        firstContractionOffset);
-            } else {
-                // we are adding to existing contraction
-                // there were already some elements in the table, so we need to
-                // add a new contraction
-                // Two things can happen here: either the codepoint is already
-                // in the table, or it is not
-                int position = findCP(contractions, CE, element.m_cPoints_
-                        .charAt(element.m_cPointsOffset_));
-                if (position > 0) {
-                    // if it is we just continue down the chain
-                    int eCE = getCE(contractions, CE, position);
-                    int newCE = processContraction(contractions, element, eCE);
-                    setContraction(
-                            contractions,
-                            CE,
-                            position,
-                            element.m_cPoints_.charAt(element.m_cPointsOffset_),
-                            newCE);
-                } else {
-                    // if it isn't, we will have to create a new sequence
-                    int newCE = processContraction(contractions, element,
-                            CE_NOT_FOUND_);
-                    insertContraction(contractions, CE, element.m_cPoints_
-                            .charAt(element.m_cPointsOffset_), newCE);
-                }
-            }
-            element.m_cPointsOffset_ -= cpsize;
-            t.m_mapping_.setValue(cp, CE);
-        } else if (!isContraction(CE)) {
-            // this is just a surrogate, and there is no contraction
-            t.m_mapping_.setValue(cp, element.m_mapCE_);
-        } else {
-            // fill out the first stage of the contraction with the surrogate
-            // CE
-            changeContraction(contractions, CE, (char) 0, element.m_mapCE_);
-            changeContraction(contractions, CE, (char) 0xFFFF, element.m_mapCE_);
-        }
-        return CE;
-    }
-
-    /**
-     * this is for adding non contractions
-     * 
-     * @param table
-     *            contraction table
-     * @param element
-     *            offset to the contraction table
-     * @param value
-     *            collation element value
-     * @return new collation element
-     */
-    private static final int changeLastCE(ContractionTable table, int element,
-            int value) {
-        BasicContractionTable tbl = getBasicContractionTable(table, element);
-        if (tbl == null) {
-            return 0;
-        }
-
-        tbl.m_CEs_.set(tbl.m_CEs_.size() - 1, Integer.valueOf(value));
-        return constructSpecialCE(table.m_currentTag_, element & 0xFFFFFF);
-    }
-
-    /**
-     * Given a set of ranges calculated by allocWeights(), iterate through the
-     * weights. Sets the next weight in cegenerator.m_current_.
-     * 
-     * @param cegenerator
-     *            object that contains ranges weight range array and its
-     *            rangeCount
-     * @return the next weight
-     */
-    private static int nextWeight(CEGenerator cegenerator) {
-        if (cegenerator.m_rangesLength_ > 0) {
-            // get maxByte from the .count field
-            int maxByte = cegenerator.m_ranges_[0].m_count_;
-            // get the next weight
-            int weight = cegenerator.m_ranges_[0].m_start_;
-            if (weight == cegenerator.m_ranges_[0].m_end_) {
-                // this range is finished, remove it and move the following
-                // ones up
-                cegenerator.m_rangesLength_--;
-                if (cegenerator.m_rangesLength_ > 0) {
-                    System.arraycopy(cegenerator.m_ranges_, 1,
-                            cegenerator.m_ranges_, 0,
-                            cegenerator.m_rangesLength_);
-                    cegenerator.m_ranges_[0].m_count_ = maxByte;
-                    // keep maxByte in ranges[0]
-                }
-            } else {
-                // increment the weight for the next value
-                cegenerator.m_ranges_[0].m_start_ = incWeight(weight,
-                        cegenerator.m_ranges_[0].m_length2_, maxByte);
-            }
-            return weight;
-        }
-        return -1;
-    }
-
-    /**
-     * Increment the collation weight
-     * 
-     * @param weight
-     *            to increment
-     * @param length
-     * @param maxByte
-     * @return new incremented weight
-     */
-    private static final int incWeight(int weight, int length, int maxByte) {
-        while (true) {
-            int b = getWeightByte(weight, length);
-            if (b < maxByte) {
-                return setWeightByte(weight, length, b + 1);
-            } else {
-                // roll over, set this byte to BYTE_FIRST_TAILORED_ and
-                // increment the previous one
-                weight = setWeightByte(weight, length,
-                        RuleBasedCollator.BYTE_FIRST_TAILORED_);
-                --length;
-            }
-        }
-    }
-
-    /**
-     * Gets the weight byte
-     * 
-     * @param weight
-     * @param index
-     * @return byte
-     */
-    private static final int getWeightByte(int weight, int index) {
-        return (weight >> ((4 - index) << 3)) & 0xff;
-    }
-
-    /**
-     * Set the weight byte in table
-     * 
-     * @param weight
-     * @param index
-     * @param b
-     *            byte
-     */
-    private static final int setWeightByte(int weight, int index, int b) {
-        index <<= 3;
-        // 0xffffffff except a 00 "hole" for the index-th byte
-        int mask;
-        if (index < 32) {
-            mask = 0xffffffff >>> index;
-        } else {
-            // Do not use int>>>32 because that does not shift at all
-            // while we need it to become 0.
-            // 
-            // Java Language Specification (Third Edition) 15.19 Shift Operators:
-            // "If the promoted type of the left-hand operand is int,
-            // only the five lowest-order bits of the right-hand operand
-            // are used as the shift distance.
-            // It is as if the right-hand operand were subjected to
-            // a bitwise logical AND operator & (§15.22.1) with the mask value 0x1f.
-            // The shift distance actually used is therefore
-            // always in the range 0 to 31, inclusive."
-            mask = 0;
-        }
-        index = 32 - index;
-        mask |= 0xffffff00 << index;
-        return (weight & mask) | (b << index);
-    }
-
-    /**
-     * Call getWeightRanges and then determine heuristically which ranges to use
-     * for a given number of weights between (excluding) two limits
-     * 
-     * @param lowerLimit
-     * @param upperLimit
-     * @param n
-     * @param maxByte
-     * @param ranges
-     * @return
-     */
-    private int allocateWeights(int lowerLimit, int upperLimit, int n,
-            int maxByte, WeightRange ranges[]) {
-        // number of usable byte values 3..maxByte
-        int countBytes = maxByte - RuleBasedCollator.BYTE_FIRST_TAILORED_ + 1;
-        // [0] unused, [5] to make index checks unnecessary, m_utilCountBuffer_
-        // countBytes to the power of index, m_utilLongBuffer_ for unsignedness
-        // gcc requires explicit initialization
-        m_utilLongBuffer_[0] = 1;
-        m_utilLongBuffer_[1] = countBytes;
-        m_utilLongBuffer_[2] = m_utilLongBuffer_[1] * countBytes;
-        m_utilLongBuffer_[3] = m_utilLongBuffer_[2] * countBytes;
-        m_utilLongBuffer_[4] = m_utilLongBuffer_[3] * countBytes;
-        int rangeCount = getWeightRanges(lowerLimit, upperLimit, maxByte,
-                countBytes, ranges);
-        if (rangeCount <= 0) {
-            return 0;
-        }
-        // what is the maximum number of weights with these ranges?
-        long maxCount = 0;
-        for (int i = 0; i < rangeCount; ++i) {
-            maxCount += (long) ranges[i].m_count_
-                    * m_utilLongBuffer_[4 - ranges[i].m_length_];
-        }
-        if (maxCount < n) {
-            return 0;
-        }
-        // set the length2 and count2 fields
-        for (int i = 0; i < rangeCount; ++i) {
-            ranges[i].m_length2_ = ranges[i].m_length_;
-            ranges[i].m_count2_ = ranges[i].m_count_;
-        }
-        // try until we find suitably large ranges
-        while (true) {
-            // get the smallest number of bytes in a range
-            int minLength = ranges[0].m_length2_;
-            // sum up the number of elements that fit into ranges of each byte
-            // length
-            Arrays.fill(m_utilCountBuffer_, 0);
-            for (int i = 0; i < rangeCount; ++i) {
-                m_utilCountBuffer_[ranges[i].m_length2_] += ranges[i].m_count2_;
-            }
-            // now try to allocate n elements in the available short ranges
-            if (n <= m_utilCountBuffer_[minLength]
-                    + m_utilCountBuffer_[minLength + 1]) {
-                // trivial cases, use the first few ranges
-                maxCount = 0;
-                rangeCount = 0;
-                do {
-                    maxCount += ranges[rangeCount].m_count2_;
-                    ++rangeCount;
-                } while (n > maxCount);
-                break;
-            } else if (n <= ranges[0].m_count2_ * countBytes) {
-                // easy case, just make this one range large enough by
-                // lengthening it once more, possibly split it
-                rangeCount = 1;
-                // calculate how to split the range between maxLength-1
-                // (count1) and maxLength (count2)
-                long power_1 = m_utilLongBuffer_[minLength
-                        - ranges[0].m_length_];
-                long power = power_1 * countBytes;
-                int count2 = (int) ((n + power - 1) / power);
-                int count1 = ranges[0].m_count_ - count2;
-                // split the range
-                if (count1 < 1) {
-                    // lengthen the entire range to maxLength
-                    lengthenRange(ranges, 0, maxByte, countBytes);
-                } else {
-                    // really split the range
-                    // create a new range with the end and initial and current
-                    // length of the old one
-                    rangeCount = 2;
-                    ranges[1].m_end_ = ranges[0].m_end_;
-                    ranges[1].m_length_ = ranges[0].m_length_;
-                    ranges[1].m_length2_ = minLength;
-                    // set the end of the first range according to count1
-                    int i = ranges[0].m_length_;
-                    int b = getWeightByte(ranges[0].m_start_, i) + count1 - 1;
-                    // ranges[0].count and count1 may be >countBytes from
-                    // merging adjacent ranges; b > maxByte is possible
-                    if (b <= maxByte) {
-                        ranges[0].m_end_ = setWeightByte(ranges[0].m_start_, i,
-                                b);
-                    } else {
-                        ranges[0].m_end_ = setWeightByte(incWeight(
-                                ranges[0].m_start_, i - 1, maxByte), i, b
-                                - countBytes);
-                    }
-                    // set the bytes in the end weight at length + 1..length2
-                    // to maxByte
-                    b = (maxByte << 24) | (maxByte << 16) | (maxByte << 8)
-                            | maxByte; // this used to be 0xffffffff
-                    ranges[0].m_end_ = truncateWeight(ranges[0].m_end_, i)
-                            | (b >>> (i << 3)) & (b << ((4 - minLength) << 3));
-                    // set the start of the second range to immediately follow
-                    // the end of the first one
-                    ranges[1].m_start_ = incWeight(ranges[0].m_end_, minLength,
-                            maxByte);
-                    // set the count values (informational)
-                    ranges[0].m_count_ = count1;
-                    ranges[1].m_count_ = count2;
-
-                    ranges[0].m_count2_ = (int) (count1 * power_1);
-                    // will be *countBytes when lengthened
-                    ranges[1].m_count2_ = (int) (count2 * power_1);
-
-                    // lengthen the second range to maxLength
-                    lengthenRange(ranges, 1, maxByte, countBytes);
-                }
-                break;
-            }
-            // no good match, lengthen all minLength ranges and iterate
-            for (int i = 0; ranges[i].m_length2_ == minLength; ++i) {
-                lengthenRange(ranges, i, maxByte, countBytes);
-            }
-        }
-
-        if (rangeCount > 1) {
-            // sort the ranges by weight values
-            Arrays.sort(ranges, 0, rangeCount);
-        }
-
-        // set maxByte in ranges[0] for ucol_nextWeight()
-        ranges[0].m_count_ = maxByte;
-
-        return rangeCount;
-    }
-
-    /**
-     * Updates the range length
-     * 
-     * @param range
-     *            weight range array
-     * @param offset
-     *            to weight range array
-     * @param maxByte
-     * @param countBytes
-     * @return new length
-     */
-    private static final int lengthenRange(WeightRange range[], int offset,
-            int maxByte, int countBytes) {
-        int length = range[offset].m_length2_ + 1;
-        range[offset].m_start_ = setWeightTrail(range[offset].m_start_, length,
-                RuleBasedCollator.BYTE_FIRST_TAILORED_);
-        range[offset].m_end_ = setWeightTrail(range[offset].m_end_, length,
-                maxByte);
-        range[offset].m_count2_ *= countBytes;
-        range[offset].m_length2_ = length;
-        return length;
-    }
-
-    /**
-     * Gets the weight
-     * 
-     * @param weight
-     * @param length
-     * @param trail
-     * @return new weight
-     */
-    private static final int setWeightTrail(int weight, int length, int trail) {
-        length = (4 - length) << 3;
-        return (weight & (0xffffff00 << length)) | (trail << length);
-    }
-
-    /**
-     * take two CE weights and calculate the possible ranges of weights between
-     * the two limits, excluding them for weights with up to 4 bytes there are
-     * up to 2*4-1=7 ranges
-     * 
-     * @param lowerLimit
-     * @param upperLimit
-     * @param maxByte
-     * @param countBytes
-     * @param ranges
-     * @return weight ranges
-     */
-    private int getWeightRanges(int lowerLimit, int upperLimit, int maxByte,
-            int countBytes, WeightRange ranges[]) {
-        // assume that both lowerLimit & upperLimit are not 0
-        // get the lengths of the limits
-        int lowerLength = lengthOfWeight(lowerLimit);
-        int upperLength = lengthOfWeight(upperLimit);
-        if (Utility.compareUnsigned(lowerLimit, upperLimit) >= 0) {
-            return 0;
-        }
-        // check that neither is a prefix of the other
-        if (lowerLength < upperLength) {
-            if (lowerLimit == truncateWeight(upperLimit, lowerLength)) {
-                return 0;
-            }
-        }
-        // if the upper limit is a prefix of the lower limit then the earlier
-        // test lowerLimit >= upperLimit has caught it
-        // reset local variables
-        // With the limit lengths of 1..4, there are up to 7 ranges for
-        // allocation:
-        // range minimum length
-        // lower[4] 4
-        // lower[3] 3
-        // lower[2] 2
-        // middle 1
-        // upper[2] 2
-        // upper[3] 3
-        // upper[4] 4
-        // We are now going to calculate up to 7 ranges.
-        // Some of them will typically overlap, so we will then have to merge
-        // and eliminate ranges.
-
-        // We have to clean cruft from previous invocations
-        // before doing anything. C++ already does that
-        for (int length = 0; length < 5; length++) {
-            m_utilLowerWeightRange_[length].clear();
-            m_utilUpperWeightRange_[length].clear();
-        }
-        m_utilWeightRange_.clear();
-
-        int weight = lowerLimit;
-        for (int length = lowerLength; length >= 2; --length) {
-            m_utilLowerWeightRange_[length].clear();
-            int trail = getWeightByte(weight, length);
-            if (trail < maxByte) {
-                m_utilLowerWeightRange_[length].m_start_ = incWeightTrail(
-                        weight, length);
-                m_utilLowerWeightRange_[length].m_end_ = setWeightTrail(weight,
-                        length, maxByte);
-                m_utilLowerWeightRange_[length].m_length_ = length;
-                m_utilLowerWeightRange_[length].m_count_ = maxByte - trail;
-            }
-            weight = truncateWeight(weight, length - 1);
-        }
-        m_utilWeightRange_.m_start_ = incWeightTrail(weight, 1);
-
-        weight = upperLimit;
-        // [0] and [1] are not used - this simplifies indexing,
-        // m_utilUpperWeightRange_
-
-        for (int length = upperLength; length >= 2; length--) {
-            int trail = getWeightByte(weight, length);
-            if (trail > RuleBasedCollator.BYTE_FIRST_TAILORED_) {
-                m_utilUpperWeightRange_[length].m_start_ = setWeightTrail(
-                        weight, length, RuleBasedCollator.BYTE_FIRST_TAILORED_);
-                m_utilUpperWeightRange_[length].m_end_ = decWeightTrail(weight,
-                        length);
-                m_utilUpperWeightRange_[length].m_length_ = length;
-                m_utilUpperWeightRange_[length].m_count_ = trail
-                        - RuleBasedCollator.BYTE_FIRST_TAILORED_;
-            }
-            weight = truncateWeight(weight, length - 1);
-        }
-        m_utilWeightRange_.m_end_ = decWeightTrail(weight, 1);
-
-        // set the middle range
-        m_utilWeightRange_.m_length_ = 1;
-        if (Utility.compareUnsigned(m_utilWeightRange_.m_end_,
-                m_utilWeightRange_.m_start_) >= 0) {
-            // if (m_utilWeightRange_.m_end_ >= m_utilWeightRange_.m_start_) {
-            m_utilWeightRange_.m_count_ = ((m_utilWeightRange_.m_end_ - m_utilWeightRange_.m_start_) >>> 24) + 1;
-        } else {
-            // eliminate overlaps
-            // remove the middle range
-            m_utilWeightRange_.m_count_ = 0;
-            // reduce or remove the lower ranges that go beyond upperLimit
-            for (int length = 4; length >= 2; --length) {
-                if (m_utilLowerWeightRange_[length].m_count_ > 0
-                        && m_utilUpperWeightRange_[length].m_count_ > 0) {
-                    int start = m_utilUpperWeightRange_[length].m_start_;
-                    int end = m_utilLowerWeightRange_[length].m_end_;
-                    if (end >= start
-                            || incWeight(end, length, maxByte) == start) {
-                        // lower and upper ranges collide or are directly
-                        // adjacent: merge these two and remove all shorter
-                        // ranges
-                        start = m_utilLowerWeightRange_[length].m_start_;
-                        end = m_utilLowerWeightRange_[length].m_end_ = m_utilUpperWeightRange_[length].m_end_;
-                        // merging directly adjacent ranges needs to subtract
-                        // the 0/1 gaps in between;
-                        // it may result in a range with count>countBytes
-                        m_utilLowerWeightRange_[length].m_count_ = getWeightByte(
-                                end, length)
-                                - getWeightByte(start, length)
-                                + 1
-                                + countBytes
-                                * (getWeightByte(end, length - 1) - getWeightByte(
-                                        start, length - 1));
-                        m_utilUpperWeightRange_[length].m_count_ = 0;
-                        while (--length >= 2) {
-                            m_utilLowerWeightRange_[length].m_count_ = m_utilUpperWeightRange_[length].m_count_ = 0;
-                        }
-                        break;
-                    }
-                }
-            }
-        }
-
-        // copy the ranges, shortest first, into the result array
-        int rangeCount = 0;
-        if (m_utilWeightRange_.m_count_ > 0) {
-            ranges[0] = new WeightRange(m_utilWeightRange_);
-            rangeCount = 1;
-        }
-        for (int length = 2; length <= 4; ++length) {
-            // copy upper first so that later the middle range is more likely
-            // the first one to use
-            if (m_utilUpperWeightRange_[length].m_count_ > 0) {
-                ranges[rangeCount] = new WeightRange(
-                        m_utilUpperWeightRange_[length]);
-                ++rangeCount;
-            }
-            if (m_utilLowerWeightRange_[length].m_count_ > 0) {
-                ranges[rangeCount] = new WeightRange(
-                        m_utilLowerWeightRange_[length]);
-                ++rangeCount;
-            }
-        }
-        return rangeCount;
-    }
-
-    /**
-     * Truncates the weight with length
-     * 
-     * @param weight
-     * @param length
-     * @return truncated weight
-     */
-    private static final int truncateWeight(int weight, int length) {
-        return weight & (0xffffffff << ((4 - length) << 3));
-    }
-
-    /**
-     * Length of the weight
-     * 
-     * @param weight
-     * @return length of the weight
-     */
-    private static final int lengthOfWeight(int weight) {
-        if ((weight & 0xffffff) == 0) {
-            return 1;
-        } else if ((weight & 0xffff) == 0) {
-            return 2;
-        } else if ((weight & 0xff) == 0) {
-            return 3;
-        }
-        return 4;
-    }
-
-    /**
-     * Increment the weight trail
-     * 
-     * @param weight
-     * @param length
-     * @return new weight
-     */
-    private static final int incWeightTrail(int weight, int length) {
-        return weight + (1 << ((4 - length) << 3));
-    }
-
-    /**
-     * Decrement the weight trail
-     * 
-     * @param weight
-     * @param length
-     * @return new weight
-     */
-    private static int decWeightTrail(int weight, int length) {
-        return weight - (1 << ((4 - length) << 3));
-    }
-
-    /**
-     * Gets the codepoint
-     * 
-     * @param tbl
-     *            contraction table
-     * @param codePoint
-     *            code point to look for
-     * @return the offset to the code point
-     */
-    private static int findCP(BasicContractionTable tbl, char codePoint) {
-        int position = 0;
-        while (codePoint > tbl.m_codePoints_.charAt(position)) {
-            position++;
-            if (position > tbl.m_codePoints_.length()) {
-                return -1;
-            }
-        }
-        if (codePoint == tbl.m_codePoints_.charAt(position)) {
-            return position;
-        } else {
-            return -1;
-        }
-    }
-
-    /**
-     * Finds a contraction ce
-     * 
-     * @param table
-     * @param element
-     * @param ch
-     * @return ce
-     */
-    private static int findCE(ContractionTable table, int element, char ch) {
-        if (table == null) {
-            return CE_NOT_FOUND_;
-        }
-        BasicContractionTable tbl = getBasicContractionTable(table, element);
-        if (tbl == null) {
-            return CE_NOT_FOUND_;
-        }
-        int position = findCP(tbl, ch);
-        if (position > tbl.m_CEs_.size() || position < 0) {
-            return CE_NOT_FOUND_;
-        }
-        return tbl.m_CEs_.get(position).intValue();
-    }
-
-    /**
-     * Checks if the string is tailored in the contraction
-     * 
-     * @param table
-     *            contraction table
-     * @param element
-     * @param array
-     *            character array to check
-     * @param offset
-     *            array offset
-     * @return true if it is tailored
-     */
-    private static boolean isTailored(ContractionTable table, int element,
-            char array[], int offset) {
-        while (array[offset] != 0) {
-            element = findCE(table, element, array[offset]);
-            if (element == CE_NOT_FOUND_) {
-                return false;
-            }
-            if (!isContractionTableElement(element)) {
-                return true;
-            }
-            offset++;
-        }
-        if (getCE(table, element, 0) != CE_NOT_FOUND_) {
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    /**
-     * Assemble RuleBasedCollator
-     * 
-     * @param t
-     *            build table
-     * @param collator
-     *            to update
-     */
-    private void assembleTable(BuildTable t, RuleBasedCollator collator) {
-        IntTrieBuilder mapping = t.m_mapping_;
-        List<Integer> expansions = t.m_expansions_;
-        ContractionTable contractions = t.m_contractions_;
-        MaxExpansionTable maxexpansion = t.m_maxExpansions_;
-
-        // contraction offset has to be in since we are building on the
-        // UCA contractions
-        // int beforeContractions = (HEADER_SIZE_
-        // + paddedsize(expansions.size() << 2)) >>> 1;
-        collator.m_contractionOffset_ = 0;
-        int contractionsSize = constructTable(contractions);
-
-        // the following operation depends on the trie data. Therefore, we have
-        // to do it before the trie is compacted
-        // sets jamo expansions
-        getMaxExpansionJamo(mapping, maxexpansion, t.m_maxJamoExpansions_,
-                collator.m_isJamoSpecial_);
-
-        // TODO: LATIN1 array is now in the utrie - it should be removed from
-        // the calculation
-        setAttributes(collator, t.m_options_);
-        // copy expansions
-        int size = expansions.size();
-        collator.m_expansion_ = new int[size];
-        for (int i = 0; i < size; i++) {
-            collator.m_expansion_[i] = expansions.get(i).intValue();
-        }
-        // contractions block
-        if (contractionsSize != 0) {
-            // copy contraction index
-            collator.m_contractionIndex_ = new char[contractionsSize];
-            contractions.m_codePoints_.getChars(0, contractionsSize,
-                    collator.m_contractionIndex_, 0);
-            // copy contraction collation elements
-            collator.m_contractionCE_ = new int[contractionsSize];
-            for (int i = 0; i < contractionsSize; i++) {
-                collator.m_contractionCE_[i] = contractions.m_CEs_.get(i).intValue();
-            }
-        }
-        // copy mapping table
-        collator.m_trie_ = mapping.serialize(t,
-                RuleBasedCollator.DataManipulate.getInstance());
-        // copy max expansion table
-        // not copying the first element which is a dummy
-        // to be in synch with icu4c's builder, we continue to use the
-        // expansion offset
-        // omitting expansion offset in builder
-        collator.m_expansionOffset_ = 0;
-        size = maxexpansion.m_endExpansionCE_.size();
-        collator.m_expansionEndCE_ = new int[size - 1];
-        for (int i = 1; i < size; i++) {
-            collator.m_expansionEndCE_[i - 1] = maxexpansion.m_endExpansionCE_
-                    .get(i).intValue();
-        }
-        collator.m_expansionEndCEMaxSize_ = new byte[size - 1];
-        for (int i = 1; i < size; i++) {
-            collator.m_expansionEndCEMaxSize_[i - 1] = maxexpansion.m_expansionCESize_
-                    .get(i).byteValue();
-        }
-        // Unsafe chars table. Finish it off, then copy it.
-        unsafeCPAddCCNZ(t);
-        // Or in unsafebits from UCA, making a combined table.
-        for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i++) {
-            t.m_unsafeCP_[i] |= RuleBasedCollator.UCA_.m_unsafe_[i];
-        }
-        collator.m_unsafe_ = t.m_unsafeCP_;
-
-        // Finish building Contraction Ending chars hash table and then copy it
-        // out.
-        // Or in unsafebits from UCA, making a combined table
-        for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i++) {
-            t.m_contrEndCP_[i] |= RuleBasedCollator.UCA_.m_contractionEnd_[i];
-        }
-        collator.m_contractionEnd_ = t.m_contrEndCP_;
-    }
-
-    /**
-     * Sets this collator to use the all options and tables in UCA.
-     * 
-     * @param collator
-     *            which attribute is to be set
-     * @param option
-     *            to set with
-     */
-    private static final void setAttributes(RuleBasedCollator collator,
-            CollationRuleParser.OptionSet option) {
-        collator.latinOneFailed_ = true;
-        collator.m_caseFirst_ = option.m_caseFirst_;
-        collator.setDecomposition(option.m_decomposition_);
-        collator
-                .setAlternateHandlingShifted(option.m_isAlternateHandlingShifted_);
-        collator.setCaseLevel(option.m_isCaseLevel_);
-        collator.setFrenchCollation(option.m_isFrenchCollation_);
-        collator.m_isHiragana4_ = option.m_isHiragana4_;
-        collator.setStrength(option.m_strength_);
-        collator.m_variableTopValue_ = option.m_variableTopValue_;
-        collator.m_reorderCodes_ = option.m_scriptOrder_;
-        collator.latinOneFailed_ = false;
-    }
-
-    /**
-     * Constructing the contraction table
-     * 
-     * @param table
-     *            contraction table
-     * @return
-     */
-    private int constructTable(ContractionTable table) {
-        // See how much memory we need
-        int tsize = table.m_elements_.size();
-        if (tsize == 0) {
-            return 0;
-        }
-        table.m_offsets_.clear();
-        int position = 0;
-        for (int i = 0; i < tsize; i++) {
-            table.m_offsets_.add(Integer.valueOf(position));
-            position += table.m_elements_.get(i).m_CEs_
-                    .size();
-        }
-        table.m_CEs_.clear();
-        table.m_codePoints_.delete(0, table.m_codePoints_.length());
-        // Now stuff the things in
-        StringBuilder cpPointer = table.m_codePoints_;
-        List<Integer> CEPointer = table.m_CEs_;
-        for (int i = 0; i < tsize; i++) {
-            BasicContractionTable bct = table.m_elements_.get(i);
-            int size = bct.m_CEs_.size();
-            char ccMax = 0;
-            char ccMin = 255;
-            int offset = CEPointer.size();
-            CEPointer.add(bct.m_CEs_.get(0));
-            for (int j = 1; j < size; j++) {
-                char ch = bct.m_codePoints_.charAt(j);
-                char cc = (char) (UCharacter.getCombiningClass(ch) & 0xFF);
-                if (cc > ccMax) {
-                    ccMax = cc;
-                }
-                if (cc < ccMin) {
-                    ccMin = cc;
-                }
-                cpPointer.append(ch);
-                CEPointer.add(bct.m_CEs_.get(j));
-            }
-            cpPointer.insert(offset,
-                    (char) (((ccMin == ccMax) ? 1 : 0 << 8) | ccMax));
-            for (int j = 0; j < size; j++) {
-                if (isContractionTableElement(CEPointer.get(offset + j).intValue())) {
-                    int ce = CEPointer.get(offset + j).intValue();
-                    CEPointer.set(offset + j,
-                            Integer.valueOf(constructSpecialCE(getCETag(ce),
-                                    table.m_offsets_.get(getContractionOffset(ce))
-                                    .intValue())));
-                }
-            }
-        }
-
-        for (int i = 0; i <= 0x10FFFF; i++) {
-            int CE = table.m_mapping_.getValue(i);
-            if (isContractionTableElement(CE)) {
-                CE = constructSpecialCE(getCETag(CE),
-                        table.m_offsets_.get(getContractionOffset(CE)).intValue());
-                table.m_mapping_.setValue(i, CE);
-            }
-        }
-        return position;
-    }
-
-    /**
-     * Get contraction offset
-     * 
-     * @param ce
-     *            collation element
-     * @return contraction offset
-     */
-    private static final int getContractionOffset(int ce) {
-        return ce & 0xFFFFFF;
-    }
-
-    /**
-     * Gets the maximum Jamo expansion
-     * 
-     * @param mapping
-     *            trie table
-     * @param maxexpansion
-     *            maximum expansion table
-     * @param maxjamoexpansion
-     *            maximum jamo expansion table
-     * @param jamospecial
-     *            is jamo special?
-     */
-    private static void getMaxExpansionJamo(IntTrieBuilder mapping,
-            MaxExpansionTable maxexpansion,
-            MaxJamoExpansionTable maxjamoexpansion, boolean jamospecial) {
-        int VBASE = 0x1161;
-        int TBASE = 0x11A8;
-        int VCOUNT = 21;
-        int TCOUNT = 28;
-        int v = VBASE + VCOUNT - 1;
-        int t = TBASE + TCOUNT - 1;
-
-        while (v >= VBASE) {
-            int ce = mapping.getValue(v);
-            if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) != RuleBasedCollator.CE_SPECIAL_FLAG_) {
-                setMaxExpansion(ce, (byte) 2, maxexpansion);
-            }
-            v--;
-        }
-
-        while (t >= TBASE) {
-            int ce = mapping.getValue(t);
-            if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) != RuleBasedCollator.CE_SPECIAL_FLAG_) {
-                setMaxExpansion(ce, (byte) 3, maxexpansion);
-            }
-            t--;
-        }
-        // According to the docs, 99% of the time, the Jamo will not be special
-        if (jamospecial) {
-            // gets the max expansion in all unicode characters
-            int count = maxjamoexpansion.m_endExpansionCE_.size();
-            byte maxTSize = (byte) (maxjamoexpansion.m_maxLSize_
-                    + maxjamoexpansion.m_maxVSize_ + maxjamoexpansion.m_maxTSize_);
-            byte maxVSize = (byte) (maxjamoexpansion.m_maxLSize_ + maxjamoexpansion.m_maxVSize_);
-
-            while (count > 0) {
-                count--;
-                if ((maxjamoexpansion.m_isV_.get(count))
-                        .booleanValue() == true) {
-                    setMaxExpansion(
-                            (maxjamoexpansion.m_endExpansionCE_
-                                    .get(count)).intValue(), maxVSize,
-                            maxexpansion);
-                } else {
-                    setMaxExpansion(
-                            (maxjamoexpansion.m_endExpansionCE_
-                                    .get(count)).intValue(), maxTSize,
-                            maxexpansion);
-                }
-            }
-        }
-    }
-
-    /**
-     * To the UnsafeCP hash table, add all chars with combining class != 0
-     * 
-     * @param t
-     *            build table
-     */
-    private final void unsafeCPAddCCNZ(BuildTable t) {
-        boolean buildCMTable = (buildCMTabFlag & (t.cmLookup == null));
-        char[] cm = null; // combining mark array
-        int[] index = new int[256];
-        int count = 0;
-
-        if (buildCMTable) {
-            cm = new char[0x10000];
-        }
-        for (char c = 0; c < 0xffff; c++) {
-            int fcd;
-            if (UTF16.isLeadSurrogate(c)) {
-                fcd = 0;
-                if (m_nfcImpl_.singleLeadMightHaveNonZeroFCD16(c)) {
-                    int supp = Character.toCodePoint(c, (char)0xdc00);
-                    int suppLimit = supp + 0x400;
-                    while (supp < suppLimit) {
-                        fcd |= m_nfcImpl_.getFCD16FromNormData(supp++);
-                    }
-                }
-            } else {
-                fcd = m_nfcImpl_.getFCD16(c);
-            }
-            // TODO: review for handling supplementary characters
-            if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
-                    (UTF16.isLeadSurrogate(c) && fcd != 0)) {
-                // c is a leading surrogate with some FCD data
-                unsafeCPSet(t.m_unsafeCP_, c);
-                if (buildCMTable) {
-                    int cc = (fcd & 0xff);
-                    int pos = (cc << 8) + index[cc];
-                    cm[pos] = c;
-                    index[cc]++;
-                    count++;
-                }
-            }
-        }
-
-        if (t.m_prefixLookup_ != null) {
-            Enumeration<Elements> els = Collections.enumeration(t.m_prefixLookup_.values());
-            while (els.hasMoreElements()) {
-                Elements e = els.nextElement();
-                // codepoints here are in the NFD form. We need to add the
-                // first code point of the NFC form to unsafe, because
-                // strcoll needs to backup over them.
-                // weiv: This is wrong! See the comment above.
-                // String decomp = Normalizer.decompose(e.m_cPoints_, true);
-                // unsafeCPSet(t.m_unsafeCP_, decomp.charAt(0));
-                // it should be:
-                String comp = Normalizer.compose(e.m_cPoints_, false);
-                unsafeCPSet(t.m_unsafeCP_, comp.charAt(0));
-            }
-        }
-
-        if (buildCMTable) {
-            t.cmLookup = new CombinClassTable();
-            t.cmLookup.generate(cm, count, index);
-        }
-    }
-
-    /**
-     * Create closure
-     * 
-     * @param t
-     *            build table
-     * @param collator
-     *            RuleBasedCollator
-     * @param colEl
-     *            collation element iterator
-     * @param start
-     * @param limit
-     * @param type
-     *            character type
-     * @return
-     */
-    private boolean enumCategoryRangeClosureCategory(BuildTable t,
-            RuleBasedCollator collator, CollationElementIterator colEl,
-            int start, int limit, int type) {
-        if (type != UCharacterCategory.UNASSIGNED
-                && type != UCharacterCategory.PRIVATE_USE) {
-            // if the range is assigned - we might ommit more categories later
-
-            for (int u32 = start; u32 < limit; u32++) {
-                String decomp = m_nfcImpl_.getDecomposition(u32);
-                if (decomp != null) {
-                    String comp = UCharacter.toString(u32);
-                    if (!collator.equals(comp, decomp)) {
-                        m_utilElement_.m_cPoints_ = decomp;
-                        m_utilElement_.m_prefix_ = 0;
-                        Elements prefix = t.m_prefixLookup_.get(m_utilElement_);
-                        if (prefix == null) {
-                            m_utilElement_.m_cPoints_ = comp;
-                            m_utilElement_.m_prefix_ = 0;
-                            m_utilElement_.m_prefixChars_ = null;
-                            colEl.setText(decomp);
-                            int ce = colEl.next();
-                            m_utilElement_.m_CELength_ = 0;
-                            while (ce != CollationElementIterator.NULLORDER) {
-                                m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = ce;
-                                ce = colEl.next();
-                            }
-                        } else {
-                            m_utilElement_.m_cPoints_ = comp;
-                            m_utilElement_.m_prefix_ = 0;
-                            m_utilElement_.m_prefixChars_ = null;
-                            m_utilElement_.m_CELength_ = 1;
-                            m_utilElement_.m_CEs_[0] = prefix.m_mapCE_;
-                            // This character uses a prefix. We have to add it
-                            // to the unsafe table, as it decomposed form is
-                            // already in. In Japanese, this happens for \u309e
-                            // & \u30fe
-                            // Since unsafeCPSet is static in ucol_elm, we are
-                            // going to wrap it up in the unsafeCPAddCCNZ
-                            // function
-                        }
-                        addAnElement(t, m_utilElement_);
-                    }
-                }
-            }
-        }
-        return true;
-    }
-
-    /**
-     * Determine if a character is a Jamo
-     * 
-     * @param ch
-     *            character to test
-     * @return true if ch is a Jamo, false otherwise
-     */
-    private static final boolean isJamo(char ch) {
-        return (ch >= 0x1100 && ch <= 0x1112) || (ch >= 0x1175 && ch <= 0x1161)
-                || (ch >= 0x11A8 && ch <= 0x11C2);
-    }
-
-    /**
-     * Produces canonical closure
-     */
-    private void canonicalClosure(BuildTable t) {
-        BuildTable temp = new BuildTable(t);
-        assembleTable(temp, temp.m_collator_);
-        // produce canonical closure
-        CollationElementIterator coleiter = temp.m_collator_
-                .getCollationElementIterator("");
-        RangeValueIterator typeiter = UCharacter.getTypeIterator();
-        RangeValueIterator.Element element = new RangeValueIterator.Element();
-        while (typeiter.next(element)) {
-            enumCategoryRangeClosureCategory(t, temp.m_collator_, coleiter,
-                    element.start, element.limit, element.value);
-        }
-
-        t.cmLookup = temp.cmLookup;
-        temp.cmLookup = null;
-
-        for (int i = 0; i < m_parser_.m_resultLength_; i++) {
-            char baseChar, firstCM;
-            // now we need to generate the CEs
-            // We stuff the initial value in the buffers, and increase the
-            // appropriate buffer according to strength */
-            // createElements(t, m_parser_.m_listHeader_[i]);
-            CollationRuleParser.Token tok = m_parser_.m_listHeader_[i].m_first_;
-            m_utilElement_.clear();
-            while (tok != null) {
-                m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_;
-                m_utilElement_.m_cPointsOffset_ = 0; // el.m_uchars_;
-                if (tok.m_prefix_ != 0) {
-                    // we will just copy the prefix here, and adjust accordingly
-                    // in
-                    // the addPrefix function in ucol_elm. The reason is that we
-                    // need to add both composed AND decomposed elements to the
-                    // unsafe table.
-                    int size = tok.m_prefix_ >> 24;
-                    int offset = tok.m_prefix_ & 0x00FFFFFF;
-                    m_utilElement_.m_prefixChars_ = m_parser_.m_source_
-                            .substring(offset, offset + size);
-                    size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24);
-                    offset = (tok.m_source_ & 0x00FFFFFF)
-                            + (tok.m_prefix_ >> 24);
-                    m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
-                            offset, offset + size);
-                } else {
-                    m_utilElement_.m_prefixChars_ = null;
-                    int offset = tok.m_source_ & 0x00FFFFFF;
-                    int size = tok.m_source_ >>> 24;
-                    m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
-                            offset, offset + size);
-                }
-                m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
-
-                baseChar = firstCM = 0; // reset
-                for (int j = 0; j < m_utilElement_.m_cPoints_.length()
-                        - m_utilElement_.m_cPointsOffset_; j++) {
-
-                    int fcd = m_nfcImpl_.getFCD16(m_utilElement_.m_cPoints_.charAt(j));  // TODO: review for handling supplementary characters
-                    if ((fcd & 0xff) == 0) {
-                        baseChar = m_utilElement_.m_cPoints_.charAt(j);
-                    } else {
-                        if ((baseChar != 0) && (firstCM == 0)) {
-                            firstCM = m_utilElement_.m_cPoints_.charAt(j); // first
-                                                                           // combining
-                                                                           // mark
-                        }
-                    }
-                }
-
-                if ((baseChar != 0) && (firstCM != 0)) {
-                    addTailCanonicalClosures(t, temp.m_collator_, coleiter,
-                            baseChar, firstCM);
-                }
-                tok = tok.m_next_;
-            }
-        }
-    }
-
-    private void addTailCanonicalClosures(BuildTable t,
-            RuleBasedCollator m_collator, CollationElementIterator colEl,
-            char baseChar, char cMark) {
-        if (t.cmLookup == null) {
-            return;
-        }
-        CombinClassTable cmLookup = t.cmLookup;
-        int[] index = cmLookup.index;
-        int cClass = m_nfcImpl_.getFCD16(cMark) & 0xff;  // TODO: review for handling supplementary characters
-        int maxIndex = 0;
-        char[] precompCh = new char[256];
-        int[] precompClass = new int[256];
-        int precompLen = 0;
-        Elements element = new Elements();
-
-        if (cClass > 0) {
-            maxIndex = index[cClass - 1];
-        }
-        for (int i = 0; i < maxIndex; i++) {
-            StringBuilder decompBuf = new StringBuilder();
-            decompBuf.append(baseChar).append(cmLookup.cPoints[i]);
-            String comp = Normalizer.compose(decompBuf.toString(), false);
-            if (comp.length() == 1) {
-                precompCh[precompLen] = comp.charAt(0);
-                precompClass[precompLen] = m_nfcImpl_.getFCD16(cmLookup.cPoints[i]) & 0xff;  // TODO: review for handling supplementary characters
-                precompLen++;
-                StringBuilder decomp = new StringBuilder();
-                for (int j = 0; j < m_utilElement_.m_cPoints_.length(); j++) {
-                    if (m_utilElement_.m_cPoints_.charAt(j) == cMark) {
-                        decomp.append(cmLookup.cPoints[i]);
-                    } else {
-                        decomp.append(m_utilElement_.m_cPoints_.charAt(j));
-                    }
-                }
-                comp = Normalizer.compose(decomp.toString(), false);
-                StringBuilder buf = new StringBuilder(comp);
-                buf.append(cMark);
-                decomp.append(cMark);
-                comp = buf.toString();
-
-                element.m_cPoints_ = decomp.toString();
-                element.m_CELength_ = 0;
-                element.m_prefix_ = 0;
-                Elements prefix = t.m_prefixLookup_.get(element);
-                element.m_cPoints_ = comp;
-                element.m_uchars_ = comp;
-
-                if (prefix == null) {
-                    element.m_prefix_ = 0;
-                    element.m_prefixChars_ = null;
-                    colEl.setText(decomp.toString());
-                    int ce = colEl.next();
-                    element.m_CELength_ = 0;
-                    while (ce != CollationElementIterator.NULLORDER) {
-                        element.m_CEs_[element.m_CELength_++] = ce;
-                        ce = colEl.next();
-                    }
-                } else {
-                    element.m_cPoints_ = comp;
-                    element.m_prefix_ = 0;
-                    element.m_prefixChars_ = null;
-                    element.m_CELength_ = 1;
-                    element.m_CEs_[0] = prefix.m_mapCE_;
-                }
-                setMapCE(t, element);
-                finalizeAddition(t, element);
-
-                if (comp.length() > 2) {
-                    // This is a fix for tailoring contractions with accented
-                    // character at the end of contraction string.
-                    addFCD4AccentedContractions(t, colEl, comp, element);
-                }
-                if (precompLen > 1) {
-                    precompLen = addMultiCMontractions(t, colEl, element,
-                            precompCh, precompClass, precompLen, cMark, i,
-                            decomp.toString());
-                }
-            }
-        }
-
-    }
-
-    private void setMapCE(BuildTable t, Elements element) {
-        List<Integer> expansions = t.m_expansions_;
-        element.m_mapCE_ = 0;
-
-        if (element.m_CELength_ == 2 // a two CE expansion
-                && RuleBasedCollator.isContinuation(element.m_CEs_[1])
-                && (element.m_CEs_[1] & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) == 0 // that
-                                                                                                          // has
-                                                                                                          // only
-                                                                                                          // primaries
-                                                                                                          // in
-                                                                                                          // continuation
-                && (((element.m_CEs_[0] >> 8) & 0xFF) == RuleBasedCollator.BYTE_COMMON_)
-                // a common secondary
-                && ((element.m_CEs_[0] & 0xFF) == RuleBasedCollator.BYTE_COMMON_)) { // and
-                                                                                     // a
-                                                                                     // common
-                                                                                     // tertiary
-
-            element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_
-            // a long primary special
-                    | (CE_LONG_PRIMARY_TAG_ << 24)
-                    // first and second byte of primary
-                    | ((element.m_CEs_[0] >> 8) & 0xFFFF00)
-                    // third byte of primary
-                    | ((element.m_CEs_[1] >> 24) & 0xFF);
-        } else {
-            // omitting expansion offset in builder
-            // (HEADER_SIZE_ >> 2)
-            int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_
-                    | (CE_EXPANSION_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
-                    | (addExpansion(expansions, element.m_CEs_[0]) << 4)
-                    & 0xFFFFF0;
-
-            for (int i = 1; i < element.m_CELength_; i++) {
-                addExpansion(expansions, element.m_CEs_[i]);
-            }
-            if (element.m_CELength_ <= 0xF) {
-                expansion |= element.m_CELength_;
-            } else {
-                addExpansion(expansions, 0);
-            }
-            element.m_mapCE_ = expansion;
-            setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1],
-                    (byte) element.m_CELength_, t.m_maxExpansions_);
-        }
-    }
-
-    private int addMultiCMontractions(BuildTable t,
-            CollationElementIterator colEl, Elements element, char[] precompCh,
-            int[] precompClass, int maxComp, char cMark, int cmPos,
-            String decomp) {
-
-        CombinClassTable cmLookup = t.cmLookup;
-        char[] combiningMarks = { cMark };
-        int cMarkClass = UCharacter.getCombiningClass(cMark) & 0xFF;
-        String comMark = new String(combiningMarks);
-        int noOfPrecomposedChs = maxComp;
-
-        for (int j = 0; j < maxComp; j++) {
-            int count = 0;
-            StringBuilder temp;
-
-            do {
-                String newDecomp, comp;
-
-                if (count == 0) { // Decompose the saved precomposed char.
-                    newDecomp = Normalizer.decompose(
-                            new String(precompCh, j, 1), false);
-                    temp = new StringBuilder(newDecomp);
-                    temp.append(cmLookup.cPoints[cmPos]);
-                    newDecomp = temp.toString();
-                } else {
-                    temp = new StringBuilder(decomp);
-                    temp.append(precompCh[j]);
-                    newDecomp = temp.toString();
-                }
-                comp = Normalizer.compose(newDecomp, false);
-                if (comp.length() == 1) {
-                    temp.append(cMark);
-                    element.m_cPoints_ = temp.toString();
-                    element.m_CELength_ = 0;
-                    element.m_prefix_ = 0;
-                    Elements prefix = t.m_prefixLookup_.get(element);
-                    element.m_cPoints_ = comp + comMark;
-                    if (prefix == null) {
-                        element.m_prefix_ = 0;
-                        element.m_prefixChars_ = null;
-                        colEl.setText(temp.toString());
-                        int ce = colEl.next();
-                        element.m_CELength_ = 0;
-                        while (ce != CollationElementIterator.NULLORDER) {
-                            element.m_CEs_[element.m_CELength_++] = ce;
-                            ce = colEl.next();
-                        }
-                    } else {
-                        element.m_cPoints_ = comp;
-                        element.m_prefix_ = 0;
-                        element.m_prefixChars_ = null;
-                        element.m_CELength_ = 1;
-                        element.m_CEs_[0] = prefix.m_mapCE_;
-                    }
-                    setMapCE(t, element);
-                    finalizeAddition(t, element);
-                    precompCh[noOfPrecomposedChs] = comp.charAt(0);
-                    precompClass[noOfPrecomposedChs] = cMarkClass;
-                    noOfPrecomposedChs++;
-                }
-            } while (++count < 2 && (precompClass[j] == cMarkClass));
-        }
-        return noOfPrecomposedChs;
-    }
-
-    private void addFCD4AccentedContractions(BuildTable t,
-            CollationElementIterator colEl, String data, Elements element) {
-        String decomp = Normalizer.decompose(data, false);
-        String comp = Normalizer.compose(data, false);
-
-        element.m_cPoints_ = decomp;
-        element.m_CELength_ = 0;
-        element.m_prefix_ = 0;
-        Elements prefix = t.m_prefixLookup_.get(element);
-        if (prefix == null) {
-            element.m_cPoints_ = comp;
-            element.m_prefix_ = 0;
-            element.m_prefixChars_ = null;
-            element.m_CELength_ = 0;
-            colEl.setText(decomp);
-            int ce = colEl.next();
-            element.m_CELength_ = 0;
-            while (ce != CollationElementIterator.NULLORDER) {
-                element.m_CEs_[element.m_CELength_++] = ce;
-                ce = colEl.next();
-            }
-            addAnElement(t, element);
-        }
-    }
-
-    private void processUCACompleteIgnorables(BuildTable t) {
-        TrieIterator trieiterator = new TrieIterator(
-                RuleBasedCollator.UCA_.m_trie_);
-        RangeValueIterator.Element element = new RangeValueIterator.Element();
-        while (trieiterator.next(element)) {
-            int start = element.start;
-            int limit = element.limit;
-            if (element.value == 0) {
-                while (start < limit) {
-                    int CE = t.m_mapping_.getValue(start);
-                    if (CE == CE_NOT_FOUND_) {
-                        m_utilElement_.m_prefix_ = 0;
-                        m_utilElement_.m_uchars_ = UCharacter.toString(start);
-                        m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
-                        m_utilElement_.m_cPointsOffset_ = 0;
-                        m_utilElement_.m_CELength_ = 1;
-                        m_utilElement_.m_CEs_[0] = 0;
-                        addAnElement(t, m_utilElement_);
-                    }
-                    start++;
-                }
-            }
-        }
-    }
-}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java

deleted file mode 100644 (file)

index fcadf69..0000000
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java
+++ /dev/null
@@ -1,2358 +0,0 @@
-/**
-*******************************************************************************
-* Copyright (C) 1996-2011, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*/
-package com.ibm.icu.text;
-
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.PatternProps;
-import com.ibm.icu.lang.UCharacter;
-import com.ibm.icu.lang.UProperty;
-import com.ibm.icu.lang.UScript;
-import com.ibm.icu.text.Collator.ReorderCodes;
-import com.ibm.icu.util.ULocale;
-import com.ibm.icu.util.UResourceBundle;
-
-/**
-* Class for parsing collation rules, produces a list of tokens that will be
-* turned into collation elements
-* @author Syn Wee Quek
-* @since release 2.2, June 7 2002
-*/
-final class CollationRuleParser
-{
-    // public data members ---------------------------------------------------
-
-    // package private constructors ------------------------------------------
-
-    /**
-     * <p>RuleBasedCollator constructor that takes the rules.
-     * Please see RuleBasedCollator class description for more details on the
-     * collation rule syntax.</p>
-     * @see java.util.Locale
-     * @param rules the collation rules to build the collation table from.
-     * @exception ParseException thrown when argument rules have an invalid
-     *            syntax.
-     */
-    CollationRuleParser(String rules) throws ParseException
-    {
-        // Prepares m_copySet_ and m_removeSet_.
-        rules = preprocessRules(rules);
-
-        // Save the rules as a long string.  The StringBuilder object is
-        // used to store the result of token parsing as well.
-        m_source_ = new StringBuilder(Normalizer.decompose(rules, false).trim());
-        m_rules_ = m_source_.toString();
-
-        // Index of the next unparsed character.
-        m_current_ = 0;
-
-        // Index of the next unwritten character in the parsed result.
-        m_extraCurrent_ = m_source_.length();
-
-        m_variableTop_ = null;
-        m_parsedToken_ = new ParsedToken();
-        m_hashTable_ = new HashMap<Token, Token>();
-        m_options_ = new OptionSet(RuleBasedCollator.UCA_);
-        m_listHeader_ = new TokenListHeader[512];
-        m_resultLength_ = 0;
-        // call assembleTokenList() manually, so that we can
-        // init a parser and manually parse tokens
-        //assembleTokenList();
-    }
-
-    // package private inner classes -----------------------------------------
-
-    /**
-     * Collation options set
-     */
-    static class OptionSet
-    {
-        // package private constructor ---------------------------------------
-
-        /**
-         * Initializes the option set with the argument collators
-         * @param collator option to use
-         */
-        OptionSet(RuleBasedCollator collator)
-        {
-            m_variableTopValue_ = collator.m_variableTopValue_;
-            m_isFrenchCollation_ = collator.isFrenchCollation();
-            m_isAlternateHandlingShifted_
-                                   = collator.isAlternateHandlingShifted();
-            m_caseFirst_ = collator.m_caseFirst_;
-            m_isCaseLevel_ = collator.isCaseLevel();
-            m_decomposition_ = collator.getDecomposition();
-            m_strength_ = collator.getStrength();
-            m_isHiragana4_ = collator.m_isHiragana4_;
-
-            if(collator.m_reorderCodes_ != null){ 
-                m_scriptOrder_ = new int[collator.m_reorderCodes_.length]; 
-                for(int i = 0; i < m_scriptOrder_.length; i++){ 
-                    m_scriptOrder_[i] = collator.m_reorderCodes_[i]; 
-                } 
-            } 
-
-        }
-
-        // package private data members --------------------------------------
-
-        int m_variableTopValue_;
-        boolean m_isFrenchCollation_;
-        /**
-         * Attribute for handling variable elements
-         */
-        boolean m_isAlternateHandlingShifted_;
-        /**
-         * who goes first, lower case or uppercase
-         */
-        int m_caseFirst_;
-        /**
-         * do we have an extra case level
-         */
-        boolean m_isCaseLevel_;
-        /**
-         * attribute for normalization
-         */
-        int m_decomposition_;
-        /**
-         * attribute for strength
-         */
-        int m_strength_;
-        /**
-         * attribute for special Hiragana
-         */
-        boolean m_isHiragana4_;
-        
-        /** 
-         * the ordering of the scripts 
-         */ 
-        int[] m_scriptOrder_;   
-    }
-
-    /**
-     * List of tokens used by the collation rules
-     */
-    static class TokenListHeader
-    {
-        Token m_first_;
-        Token m_last_;
-        Token m_reset_;
-        boolean m_indirect_;
-        int m_baseCE_;
-        int m_baseContCE_;
-        int m_nextCE_;
-        int m_nextContCE_;
-        int m_previousCE_;
-        int m_previousContCE_;
-        int m_pos_[] = new int[Collator.IDENTICAL + 1];
-        int m_gapsLo_[] = new int[3 * (Collator.TERTIARY + 1)];
-        int m_gapsHi_[] = new int[3 * (Collator.TERTIARY + 1)];
-        int m_numStr_[] = new int[3 * (Collator.TERTIARY + 1)];
-        Token m_fStrToken_[] = new Token[Collator.TERTIARY + 1];
-        Token m_lStrToken_[] = new Token[Collator.TERTIARY + 1];
-    }
-
-    /**
-     * Token wrapper for collation rules
-     */
-    static class Token
-    {
-       // package private data members ---------------------------------------
-
-       int m_CE_[];
-       int m_CELength_;
-       int m_expCE_[];
-       int m_expCELength_;
-       int m_source_;
-       int m_expansion_;
-       int m_prefix_;
-       int m_strength_;
-       int m_toInsert_;
-       int m_polarity_; // 1 for <, <<, <<<, , ; and 0 for >, >>, >>>
-       TokenListHeader m_listHeader_;
-       Token m_previous_;
-       Token m_next_;
-       StringBuilder m_rules_;
-       char m_flags_;
-
-       // package private constructors ---------------------------------------
-
-       Token()
-       {
-           m_CE_ = new int[128];
-           m_expCE_ = new int[128];
-           // TODO: this should also handle reverse
-           m_polarity_ = TOKEN_POLARITY_POSITIVE_;
-           m_next_ = null;
-           m_previous_ = null;
-           m_CELength_ = 0;
-           m_expCELength_ = 0;
-       }
-
-       // package private methods --------------------------------------------
-
-       /**
-        * Hashcode calculation for token
-        * @return the hashcode
-        */
-       public int hashCode()
-       {
-           int result = 0;
-           int len = (m_source_ & 0xFF000000) >>> 24;
-           int inc = ((len - 32) / 32) + 1;
-
-           int start = m_source_ & 0x00FFFFFF;
-           int limit = start + len;
-
-           while (start < limit) {
-               result = (result * 37) + m_rules_.charAt(start);
-               start += inc;
-           }
-           return result;
-       }
-
-       /**
-        * Equals calculation
-        * @param target object to compare
-        * @return true if target is the same as this object
-        */
-       public boolean equals(Object target)
-       {
-           if (target == this) {
-               return true;
-           }
-           if (target instanceof Token) {
-               Token t = (Token)target;
-               int sstart = m_source_ & 0x00FFFFFF;
-               int tstart = t.m_source_ & 0x00FFFFFF;
-               int slimit = (m_source_ & 0xFF000000) >> 24;
-               int tlimit = (m_source_ & 0xFF000000) >> 24;
-
-               int end = sstart + slimit - 1;
-
-               if (m_source_ == 0 || t.m_source_ == 0) {
-                   return false;
-               }
-               if (slimit != tlimit) {
-                   return false;
-               }
-               if (m_source_ == t.m_source_) {
-                   return true;
-               }
-
-               while (sstart < end
-                      && m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart))
-               {
-                   ++ sstart;
-                   ++ tstart;
-               }
-               if (m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart)) {
-                   return true;
-               }
-           }
-           return false;
-        }
-    }
-
-    // package private data member -------------------------------------------
-
-    /**
-     * Indicator that the token is resetted yet, ie & in the rules
-     */
-    static final int TOKEN_RESET_ = 0xDEADBEEF;
-
-    /**
-     * Size of the number of tokens
-     */
-    int m_resultLength_;
-    /**
-     * List of parsed tokens
-     */
-    TokenListHeader m_listHeader_[];
-    /**
-     * Variable top token
-     */
-    Token m_variableTop_;
-    /**
-     * Collation options
-     */
-    OptionSet m_options_;
-    /**
-     * Normalized collation rules with some extra characters
-     */
-    StringBuilder m_source_;
-    /**
-     * Hash table to keep all tokens
-     */
-    Map<Token, Token> m_hashTable_;
-
-    // package private method ------------------------------------------------
-
-    void setDefaultOptionsInCollator(RuleBasedCollator collator)
-    {
-        collator.m_defaultStrength_ = m_options_.m_strength_;
-        collator.m_defaultDecomposition_ = m_options_.m_decomposition_;
-        collator.m_defaultIsFrenchCollation_ = m_options_.m_isFrenchCollation_;
-        collator.m_defaultIsAlternateHandlingShifted_
-                                    = m_options_.m_isAlternateHandlingShifted_;
-        collator.m_defaultIsCaseLevel_ = m_options_.m_isCaseLevel_;
-        collator.m_defaultCaseFirst_ = m_options_.m_caseFirst_;
-        collator.m_defaultIsHiragana4_ = m_options_.m_isHiragana4_;
-        collator.m_defaultVariableTopValue_ = m_options_.m_variableTopValue_;
-        if(m_options_.m_scriptOrder_ != null) { 
-            collator.m_defaultReorderCodes_ = m_options_.m_scriptOrder_.clone(); 
-        } else { 
-            collator.m_defaultReorderCodes_ = null; 
-        }  
-    }
-
-    // private inner classes -------------------------------------------------
-
-    /**
-     * This is a token that has been parsed but not yet processed. Used to
-     * reduce the number of arguments in the parser
-     */
-    private static class ParsedToken
-    {
-        // private constructor ----------------------------------------------
-
-        /**
-         * Empty constructor
-         */
-        ParsedToken()
-        {
-            m_charsLen_ = 0;
-            m_charsOffset_ = 0;
-            m_extensionLen_ = 0;
-            m_extensionOffset_ = 0;
-            m_prefixLen_ = 0;
-            m_prefixOffset_ = 0;
-            m_flags_ = 0;
-            m_strength_ = TOKEN_UNSET_;
-        }
-
-        // private data members ---------------------------------------------
-
-        int m_strength_;
-        int m_charsOffset_;
-        int m_charsLen_;
-        int m_extensionOffset_;
-        int m_extensionLen_;
-        int m_prefixOffset_;
-        int m_prefixLen_;
-        char m_flags_;
-        char m_indirectIndex_;
-    }
-
-    /**
-     * Boundary wrappers
-     */
-    private static class IndirectBoundaries
-    {
-        // package private constructor ---------------------------------------
-
-        IndirectBoundaries(int startce[], int limitce[])
-        {
-            // Set values for the top - TODO: once we have values for all the
-            // indirects, we are going to initalize here.
-            m_startCE_ = startce[0];
-            m_startContCE_ = startce[1];
-            if (limitce != null) {
-                m_limitCE_ = limitce[0];
-                m_limitContCE_ = limitce[1];
-            }
-            else {
-                m_limitCE_ = 0;
-                m_limitContCE_ = 0;
-            }
-        }
-
-        // package private data members --------------------------------------
-
-        int m_startCE_;
-        int m_startContCE_;
-        int m_limitCE_;
-        int m_limitContCE_;
-    }
-
-    /**
-     * Collation option rule tag
-     */
-    private static class TokenOption
-    {
-        // package private constructor ---------------------------------------
-
-        TokenOption(String name, int attribute, String suboptions[],
-                    int suboptionattributevalue[])
-        {
-            m_name_ = name;
-            m_attribute_ = attribute;
-            m_subOptions_ = suboptions;
-            m_subOptionAttributeValues_ = suboptionattributevalue;
-        }
-
-        // package private data member ---------------------------------------
-
-        private String m_name_;
-        private int m_attribute_;
-        private String m_subOptions_[];
-        private int m_subOptionAttributeValues_[];
-    }
-
-    // private variables -----------------------------------------------------
-
-    /**
-     * Current parsed token
-     */
-    private ParsedToken m_parsedToken_;
-    /**
-     * Collation rule
-     */
-    private String m_rules_;
-    private int m_current_;
-    /**
-     * End of the option while reading.
-     * Need it for UnicodeSet reading support.
-     */
-    private int m_optionEnd_;
-    /*
-     * Current offset in m_source
-     */
-    //private int m_sourceLimit_;
-    /**
-     * Offset to m_source_ ofr the extra expansion characters
-     */
-    private int m_extraCurrent_;
-
-    /**
-     * UnicodeSet that contains code points to be copied from the UCA
-     */
-    UnicodeSet m_copySet_;
-
-    /**
-     * UnicodeSet that contains code points for which we want to remove
-     * UCA contractions. It implies copying of these code points from
-     * the UCA.
-     */
-    UnicodeSet m_removeSet_;
-
-    /*
-     * This is space for the extra strings that need to be unquoted during the
-     * parsing of the rules
-     */
-    //private static final int TOKEN_EXTRA_RULE_SPACE_SIZE_ = 2048;
-    /**
-     * Indicator that the token is not set yet
-     */
-    private static final int TOKEN_UNSET_ = 0xFFFFFFFF;
-    /*
-     * Indicator that the rule is in the > polarity, ie everything on the
-     * right of the rule is less than
-     */
-    //private static final int TOKEN_POLARITY_NEGATIVE_ = 0;
-    /**
-     * Indicator that the rule is in the < polarity, ie everything on the
-     * right of the rule is greater than
-     */
-    private static final int TOKEN_POLARITY_POSITIVE_ = 1;
-    /**
-     * Flag mask to determine if top is set
-     */
-    private static final int TOKEN_TOP_MASK_ = 0x04;
-    /**
-     * Flag mask to determine if variable top is set
-     */
-    private static final int TOKEN_VARIABLE_TOP_MASK_ = 0x08;
-    /**
-     * Flag mask to determine if a before attribute is set
-     */
-    private static final int TOKEN_BEFORE_ = 0x03;
-    /**
-     * For use in parsing token options
-     */
-    private static final int TOKEN_SUCCESS_MASK_ = 0x10;
-
-    /**
-     * These values are used for finding CE values for indirect positioning.
-     * Indirect positioning is a mechanism for allowing resets on symbolic
-     * values. It only works for resets and you cannot tailor indirect names.
-     * An indirect name can define either an anchor point or a range. An anchor
-     * point behaves in exactly the same way as a code point in reset would,
-     * except that it cannot be tailored. A range (we currently only know for
-     * the [top] range will explicitly set the upper bound for generated CEs,
-     * thus allowing for better control over how many CEs can be squeezed
-     * between in the range without performance penalty. In that respect, we use
-     * [top] for tailoring of locales that use CJK characters. Other indirect
-     * values are currently a pure convenience, they can be used to assure that
-     * the CEs will be always positioned in the same place relative to a point
-     * with known properties (e.g. first primary ignorable).
-     */
-    private static final IndirectBoundaries INDIRECT_BOUNDARIES_[];
-
-//    /**
-//     * Inverse UCA constants
-//     */
-//    private static final int INVERSE_SIZE_MASK_ = 0xFFF00000;
-//    private static final int INVERSE_OFFSET_MASK_ = 0x000FFFFF;
-//    private static final int INVERSE_SHIFT_VALUE_ = 20;
-
-    /**
-     * Collation option tags
-     * [last variable] last variable value
-     * [last primary ignorable] largest CE for primary ignorable
-     * [last secondary ignorable] largest CE for secondary ignorable
-     * [last tertiary ignorable] largest CE for tertiary ignorable
-     * [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
-     */
-    private static final TokenOption RULES_OPTIONS_[];
-
-    static
-    {
-        INDIRECT_BOUNDARIES_ = new IndirectBoundaries[15];
-        // UCOL_RESET_TOP_VALUE
-        INDIRECT_BOUNDARIES_[0] = new IndirectBoundaries(
-                        RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_,
-                        RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_);
-        // UCOL_FIRST_PRIMARY_IGNORABLE
-        INDIRECT_BOUNDARIES_[1] = new IndirectBoundaries(
-                    RuleBasedCollator.UCA_CONSTANTS_.FIRST_PRIMARY_IGNORABLE_,
-                    null);
-        // UCOL_LAST_PRIMARY_IGNORABLE
-        INDIRECT_BOUNDARIES_[2] = new IndirectBoundaries(
-                    RuleBasedCollator.UCA_CONSTANTS_.LAST_PRIMARY_IGNORABLE_,
-                    null);
-
-        // UCOL_FIRST_SECONDARY_IGNORABLE
-        INDIRECT_BOUNDARIES_[3] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_SECONDARY_IGNORABLE_,
-                   null);
-        // UCOL_LAST_SECONDARY_IGNORABLE
-        INDIRECT_BOUNDARIES_[4] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.LAST_SECONDARY_IGNORABLE_,
-                   null);
-        // UCOL_FIRST_TERTIARY_IGNORABLE
-        INDIRECT_BOUNDARIES_[5] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_TERTIARY_IGNORABLE_,
-                   null);
-        // UCOL_LAST_TERTIARY_IGNORABLE
-        INDIRECT_BOUNDARIES_[6] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.LAST_TERTIARY_IGNORABLE_,
-                   null);
-        // UCOL_FIRST_VARIABLE;
-        INDIRECT_BOUNDARIES_[7] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_VARIABLE_,
-                   null);
-        // UCOL_LAST_VARIABLE
-        INDIRECT_BOUNDARIES_[8] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.LAST_VARIABLE_,
-                   null);
-        // UCOL_FIRST_NON_VARIABLE
-        INDIRECT_BOUNDARIES_[9] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_NON_VARIABLE_,
-                   null);
-        // UCOL_LAST_NON_VARIABLE
-        INDIRECT_BOUNDARIES_[10] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_,
-                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_);
-        // UCOL_FIRST_IMPLICIT
-        INDIRECT_BOUNDARIES_[11] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_,
-                   null);
-        // UCOL_LAST_IMPLICIT
-        INDIRECT_BOUNDARIES_[12] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.LAST_IMPLICIT_,
-                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_TRAILING_);
-        // UCOL_FIRST_TRAILING
-        INDIRECT_BOUNDARIES_[13] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_TRAILING_,
-                   null);
-        // UCOL_LAST_TRAILING
-        INDIRECT_BOUNDARIES_[14] = new IndirectBoundaries(
-                   RuleBasedCollator.UCA_CONSTANTS_.LAST_TRAILING_,
-                   null);
-        INDIRECT_BOUNDARIES_[14].m_limitCE_
-                 = RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_SPECIAL_MIN_ << 24;
-
-        RULES_OPTIONS_ = new TokenOption[20];
-        String option[] = {"non-ignorable", "shifted"};
-        int value[] = {RuleBasedCollator.AttributeValue.NON_IGNORABLE_,
-                       RuleBasedCollator.AttributeValue.SHIFTED_};
-        RULES_OPTIONS_[0] = new TokenOption("alternate",
-                              RuleBasedCollator.Attribute.ALTERNATE_HANDLING_,
-                              option, value);
-        option = new String[1];
-        option[0] = "2";
-        value = new int[1];
-        value[0] = RuleBasedCollator.AttributeValue.ON_;
-        RULES_OPTIONS_[1] = new TokenOption("backwards",
-                                 RuleBasedCollator.Attribute.FRENCH_COLLATION_,
-                                 option, value);
-        String offonoption[] = new String[2];
-        offonoption[0] = "off";
-        offonoption[1] = "on";
-        int offonvalue[] = new int[2];
-        offonvalue[0] = RuleBasedCollator.AttributeValue.OFF_;
-        offonvalue[1] = RuleBasedCollator.AttributeValue.ON_;
-        RULES_OPTIONS_[2] = new TokenOption("caseLevel",
-                                       RuleBasedCollator.Attribute.CASE_LEVEL_,
-                                       offonoption, offonvalue);
-        option = new String[3];
-        option[0] = "lower";
-        option[1] = "upper";
-        option[2] = "off";
-        value = new int[3];
-        value[0] = RuleBasedCollator.AttributeValue.LOWER_FIRST_;
-        value[1] = RuleBasedCollator.AttributeValue.UPPER_FIRST_;
-        value[2] = RuleBasedCollator.AttributeValue.OFF_;
-        RULES_OPTIONS_[3] = new TokenOption("caseFirst",
-                                       RuleBasedCollator.Attribute.CASE_FIRST_,
-                                       option, value);
-        RULES_OPTIONS_[4] = new TokenOption("normalization",
-                               RuleBasedCollator.Attribute.NORMALIZATION_MODE_,
-                               offonoption, offonvalue);
-        RULES_OPTIONS_[5] = new TokenOption("hiraganaQ",
-                         RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_,
-                         offonoption, offonvalue);
-        option = new String[5];
-        option[0] = "1";
-        option[1] = "2";
-        option[2] = "3";
-        option[3] = "4";
-        option[4] = "I";
-        value = new int[5];
-        value[0] = RuleBasedCollator.AttributeValue.PRIMARY_;
-        value[1] = RuleBasedCollator.AttributeValue.SECONDARY_;
-        value[2] = RuleBasedCollator.AttributeValue.TERTIARY_;
-        value[3] = RuleBasedCollator.AttributeValue.QUATERNARY_;
-        value[4] = RuleBasedCollator.AttributeValue.IDENTICAL_;
-        RULES_OPTIONS_[6] = new TokenOption("strength",
-                                         RuleBasedCollator.Attribute.STRENGTH_,
-                                         option, value);
-        RULES_OPTIONS_[7] = new TokenOption("variable top",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  null, null);
-        RULES_OPTIONS_[8] = new TokenOption("rearrange",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  null, null);
-        option = new String[3];
-        option[0] = "1";
-        option[1] = "2";
-        option[2] = "3";
-        value = new int[3];
-        value[0] = RuleBasedCollator.AttributeValue.PRIMARY_;
-        value[1] = RuleBasedCollator.AttributeValue.SECONDARY_;
-        value[2] = RuleBasedCollator.AttributeValue.TERTIARY_;
-        RULES_OPTIONS_[9] = new TokenOption("before",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  option, value);
-        RULES_OPTIONS_[10] = new TokenOption("top",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  null, null);
-        String firstlastoption[] = new String[7];
-        firstlastoption[0] = "primary";
-        firstlastoption[1] = "secondary";
-        firstlastoption[2] = "tertiary";
-        firstlastoption[3] = "variable";
-        firstlastoption[4] = "regular";
-        firstlastoption[5] = "implicit";
-        firstlastoption[6] = "trailing";
-
-        int firstlastvalue[] = new int[7];
-        Arrays.fill(firstlastvalue, RuleBasedCollator.AttributeValue.PRIMARY_);
-
-        RULES_OPTIONS_[11] = new TokenOption("first",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  firstlastoption, firstlastvalue);
-        RULES_OPTIONS_[12] = new TokenOption("last",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  firstlastoption, firstlastvalue);
-        RULES_OPTIONS_[13] = new TokenOption("optimize",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  null, null);
-        RULES_OPTIONS_[14] = new TokenOption("suppressContractions",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  null, null);
-        RULES_OPTIONS_[15] = new TokenOption("undefined",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  null, null);
-        RULES_OPTIONS_[16] = new TokenOption("reorder",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  null, null);
-        RULES_OPTIONS_[17] = new TokenOption("charsetname",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  null, null);
-        RULES_OPTIONS_[18] = new TokenOption("charset",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  null, null);
-        RULES_OPTIONS_[19] = new TokenOption("import",
-                                  RuleBasedCollator.Attribute.LIMIT_,
-                                  null, null);
-    }
-
-    /**
-     * Utility data members
-     */
-    private Token m_utilToken_ = new Token();
-    private CollationElementIterator m_UCAColEIter_
-                      = RuleBasedCollator.UCA_.getCollationElementIterator("");
-    private int m_utilCEBuffer_[] = new int[2];
-
-    private boolean m_isStarred_;
-
-    private int m_currentStarredCharIndex_;
-
-
-    private int m_lastStarredCharIndex_;
-
-    private int m_currentRangeCp_;
-
-    private int m_lastRangeCp_;
-
-    private boolean m_inRange_;
-
-    private int m_previousCp_;
-
-    private boolean m_savedIsStarred_;
-
-
-    // private methods -------------------------------------------------------
-
-    /**
-     * Assembles the token list
-     * @exception ParseException thrown when rules syntax fails
-     */
-    int assembleTokenList() throws ParseException
-    {
-        Token lastToken = null;
-        m_parsedToken_.m_strength_ = TOKEN_UNSET_;
-        int sourcelimit = m_source_.length();
-        int expandNext = 0;
-
-        m_isStarred_ = false;
-
-        while (m_current_ < sourcelimit || m_isStarred_) {
-            m_parsedToken_.m_prefixOffset_ = 0;
-            if (parseNextToken(lastToken == null) < 0) {
-                // we have reached the end
-                continue;
-            }
-            char specs = m_parsedToken_.m_flags_;
-            boolean variableTop = ((specs & TOKEN_VARIABLE_TOP_MASK_) != 0);
-            boolean top = ((specs & TOKEN_TOP_MASK_) != 0);
-            int lastStrength = TOKEN_UNSET_;
-            if (lastToken != null) {
-                lastStrength = lastToken.m_strength_;
-            }
-            m_utilToken_.m_source_ = m_parsedToken_.m_charsLen_ << 24
-                                             | m_parsedToken_.m_charsOffset_;
-            m_utilToken_.m_rules_ = m_source_;
-            // 4 Lookup each source in the CharsToToken map, and find a
-            // sourcetoken
-            Token sourceToken = m_hashTable_.get(m_utilToken_);
-            if (m_parsedToken_.m_strength_ != TOKEN_RESET_) {
-                if (lastToken == null) {
-                    // this means that rules haven't started properly
-                    throwParseException(m_source_.toString(), 0);
-                }
-                //  6 Otherwise (when relation != reset)
-                if (sourceToken == null) {
-                    // If sourceToken is null, create new one
-                    sourceToken = new Token();
-                     sourceToken.m_rules_ = m_source_;
-                    sourceToken.m_source_ = m_parsedToken_.m_charsLen_ << 24
-                                           | m_parsedToken_.m_charsOffset_;
-                    sourceToken.m_prefix_ = m_parsedToken_.m_prefixLen_ << 24
-                                           | m_parsedToken_.m_prefixOffset_;
-                    // TODO: this should also handle reverse
-                    sourceToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_;
-                    sourceToken.m_next_ = null;
-                     sourceToken.m_previous_ = null;
-                    sourceToken.m_CELength_ = 0;
-                    sourceToken.m_expCELength_ = 0;
-                    m_hashTable_.put(sourceToken, sourceToken);
-                }
-                else {
-                    // we could have fished out a reset here
-                    if (sourceToken.m_strength_ != TOKEN_RESET_
-                        && lastToken != sourceToken) {
-                        // otherwise remove sourceToken from where it was.
-
-                        // Take care of the next node
-                        if (sourceToken.m_next_ != null) {
-                            if (sourceToken.m_next_.m_strength_
-                                                   > sourceToken.m_strength_) {
-                                sourceToken.m_next_.m_strength_
-                                                   = sourceToken.m_strength_;
-                            }
-                            sourceToken.m_next_.m_previous_
-                                                    = sourceToken.m_previous_;
-                        }
-                        else {
-                            // sourcetoken is the last token.
-                            // Redefine the tail token.
-                            sourceToken.m_listHeader_.m_last_
-                                                    = sourceToken.m_previous_;
-                        }
-
-                        // Take care of the previous node.
-                        if (sourceToken.m_previous_ != null) {
-                            sourceToken.m_previous_.m_next_
-                                                        = sourceToken.m_next_;
-                        }
-                        else {
-                            // sourcetoken is the first token.
-                            // Redefine the head node.
-                            sourceToken.m_listHeader_.m_first_
-                                                        = sourceToken.m_next_;
-                        }
-                        sourceToken.m_next_ = null;
-                        sourceToken.m_previous_ = null;
-                    }
-                }
-                sourceToken.m_strength_ = m_parsedToken_.m_strength_;
-                sourceToken.m_listHeader_ = lastToken.m_listHeader_;
-
-                // 1.  Find the strongest strength in each list, and set
-                // strongestP and strongestN accordingly in the headers.
-                if (lastStrength == TOKEN_RESET_
-                    || sourceToken.m_listHeader_.m_first_ == null) {
-                    // If LAST is a reset insert sourceToken in the list.
-                    if (sourceToken.m_listHeader_.m_first_ == null) {
-                        sourceToken.m_listHeader_.m_first_ = sourceToken;
-                        sourceToken.m_listHeader_.m_last_ = sourceToken;
-                    }
-                    else { // we need to find a place for us
-                           // and we'll get in front of the same strength
-                        if (sourceToken.m_listHeader_.m_first_.m_strength_
-                                                 <= sourceToken.m_strength_) {
-                            sourceToken.m_next_
-                                          = sourceToken.m_listHeader_.m_first_;
-                            sourceToken.m_next_.m_previous_ = sourceToken;
-                            sourceToken.m_listHeader_.m_first_ = sourceToken;
-                            sourceToken.m_previous_ = null;
-                        }
-                        else {
-                            lastToken = sourceToken.m_listHeader_.m_first_;
-                            while (lastToken.m_next_ != null
-                                   && lastToken.m_next_.m_strength_
-                                                 > sourceToken.m_strength_) {
-                                lastToken = lastToken.m_next_;
-                            }
-                            if (lastToken.m_next_ != null) {
-                                lastToken.m_next_.m_previous_ = sourceToken;
-                            }
-                            else {
-                                sourceToken.m_listHeader_.m_last_
-                                                               = sourceToken;
-                            }
-                            sourceToken.m_previous_ = lastToken;
-                            sourceToken.m_next_ = lastToken.m_next_;
-                            lastToken.m_next_ = sourceToken;
-                        }
-                    }
-                }
-                else {
-                    // Otherwise (when LAST is not a reset)
-                    // if polarity (LAST) == polarity(relation), insert
-                    // sourceToken after LAST, otherwise insert before.
-                    // when inserting after or before, search to the next
-                    // position with the same strength in that direction.
-                    // (This is called postpone insertion).
-                    if (sourceToken != lastToken) {
-                        if (lastToken.m_polarity_ == sourceToken.m_polarity_) {
-                            while (lastToken.m_next_ != null
-                                   && lastToken.m_next_.m_strength_
-                                                   > sourceToken.m_strength_) {
-                                lastToken = lastToken.m_next_;
-                            }
-                            sourceToken.m_previous_ = lastToken;
-                            if (lastToken.m_next_ != null) {
-                                lastToken.m_next_.m_previous_ = sourceToken;
-                            }
-                            else {
-                                sourceToken.m_listHeader_.m_last_ = sourceToken;
-                            }
-                            sourceToken.m_next_ = lastToken.m_next_;
-                            lastToken.m_next_ = sourceToken;
-                        }
-                        else {
-                            while (lastToken.m_previous_ != null
-                                   && lastToken.m_previous_.m_strength_
-                                                > sourceToken.m_strength_) {
-                                lastToken = lastToken.m_previous_;
-                            }
-                            sourceToken.m_next_ = lastToken;
-                            if (lastToken.m_previous_ != null) {
-                                lastToken.m_previous_.m_next_ = sourceToken;
-                            }
-                            else {
-                                sourceToken.m_listHeader_.m_first_
-                                                                 = sourceToken;
-                            }
-                            sourceToken.m_previous_ = lastToken.m_previous_;
-                            lastToken.m_previous_ = sourceToken;
-                        }
-                    }
-                    else { // repeated one thing twice in rules, stay with the
-                           // stronger strength
-                        if (lastStrength < sourceToken.m_strength_) {
-                            sourceToken.m_strength_ = lastStrength;
-                        }
-                    }
-                }
-                // if the token was a variable top, we're gonna put it in
-                if (variableTop == true && m_variableTop_ == null) {
-                    variableTop = false;
-                    m_variableTop_ = sourceToken;
-                }
-                // Treat the expansions.
-                // There are two types of expansions: explicit (x / y) and
-                // reset based propagating expansions
-                // (&abc * d * e <=> &ab * d / c * e / c)
-                // if both of them are in effect for a token, they are combined.
-               sourceToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24
-                                          | m_parsedToken_.m_extensionOffset_;
-               if (expandNext != 0) {
-                   if (sourceToken.m_strength_ == RuleBasedCollator.PRIMARY) {
-                       // primary strength kills off the implicit expansion
-                       expandNext = 0;
-                   }
-                   else if (sourceToken.m_expansion_ == 0) {
-                       // if there is no expansion, implicit is just added to
-                       // the token
-                       sourceToken.m_expansion_ = expandNext;
-                   }
-                   else {
-                       // there is both explicit and implicit expansion.
-                       // We need to make a combination
-                       int start = expandNext & 0xFFFFFF;
-                       int size = expandNext >>> 24;
-                       if (size > 0) {
-                          m_source_.append(m_source_.substring(start,
-                                                               start + size));
-                       }
-                          start = m_parsedToken_.m_extensionOffset_;
-                       m_source_.append(m_source_.substring(start,
-                                      start + m_parsedToken_.m_extensionLen_));
-                       sourceToken.m_expansion_ = (size
-                                       + m_parsedToken_.m_extensionLen_) << 24
-                                       | m_extraCurrent_;
-                       m_extraCurrent_ += size + m_parsedToken_.m_extensionLen_;
-                   }
-                }
-               // if the previous token was a reset before, the strength of this
-               // token must match the strength of before. Otherwise we have an
-               // undefined situation.
-               // In other words, we currently have a cludge which we use to
-               // represent &a >> x. This is written as &[before 2]a << x.
-               if((lastToken.m_flags_ & TOKEN_BEFORE_) != 0) {
-                   int beforeStrength = (lastToken.m_flags_ & TOKEN_BEFORE_) - 1;
-                   if(beforeStrength != sourceToken.m_strength_) {
-                          throwParseException(m_source_.toString(), m_current_);
-                   }
-               }
-
-            }
-            else {
-                if (lastToken != null && lastStrength == TOKEN_RESET_) {
-                    // if the previous token was also a reset, this means that
-                    // we have two consecutive resets and we want to remove the
-                    // previous one if empty
-                    if (m_resultLength_ > 0 && m_listHeader_[m_resultLength_ - 1].m_first_ == null) {
-                        m_resultLength_ --;
-                    }
-                }
-                if (sourceToken == null) {
-                    // this is a reset, but it might still be somewhere in the
-                    // tailoring, in shorter form
-                    int searchCharsLen = m_parsedToken_.m_charsLen_;
-                    while (searchCharsLen > 1 && sourceToken == null) {
-                        searchCharsLen --;
-                        // key = searchCharsLen << 24 | charsOffset;
-                        m_utilToken_.m_source_ = searchCharsLen << 24
-                                             | m_parsedToken_.m_charsOffset_;
-                        m_utilToken_.m_rules_ = m_source_;
-                        sourceToken = m_hashTable_.get(m_utilToken_);
-                    }
-                    if (sourceToken != null) {
-                        expandNext = (m_parsedToken_.m_charsLen_
-                                                      - searchCharsLen) << 24
-                                        | (m_parsedToken_.m_charsOffset_
-                                           + searchCharsLen);
-                    }
-                }
-                if ((specs & TOKEN_BEFORE_) != 0) {
-                    if (top == false) {
-                        // we're doing before & there is no indirection
-                        int strength = (specs & TOKEN_BEFORE_) - 1;
-                        if (sourceToken != null
-                            && sourceToken.m_strength_ != TOKEN_RESET_) {
-                            // this is a before that is already ordered in the UCA
-                            // - so we need to get the previous with good strength
-                            while (sourceToken.m_strength_ > strength
-                                   && sourceToken.m_previous_ != null) {
-                                sourceToken = sourceToken.m_previous_;
-                            }
-                            // here, either we hit the strength or NULL
-                            if (sourceToken.m_strength_ == strength) {
-                                if (sourceToken.m_previous_ != null) {
-                                    sourceToken = sourceToken.m_previous_;
-                                }
-                                else { // start of list
-                                    sourceToken
-                                         = sourceToken.m_listHeader_.m_reset_;
-                                }
-                            }
-                            else { // we hit NULL, we should be doing the else part
-                                sourceToken
-                                         = sourceToken.m_listHeader_.m_reset_;
-                                sourceToken = getVirginBefore(sourceToken,
-                                                              strength);
-                            }
-                        }
-                        else {
-                            sourceToken
-                                      = getVirginBefore(sourceToken, strength);
-                        }
-                    }
-                    else {
-                        // this is both before and indirection
-                        top = false;
-                        m_listHeader_[m_resultLength_] = new TokenListHeader();
-                        m_listHeader_[m_resultLength_].m_previousCE_ = 0;
-                        m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
-                        m_listHeader_[m_resultLength_].m_indirect_ = true;
-                        // we need to do slightly more work. we need to get the
-                        // baseCE using the inverse UCA & getPrevious. The next
-                        // bound is not set, and will be decided in ucol_bld
-                        int strength = (specs & TOKEN_BEFORE_) - 1;
-                        int baseCE = INDIRECT_BOUNDARIES_[
-                                   m_parsedToken_.m_indirectIndex_].m_startCE_;
-                        int baseContCE = INDIRECT_BOUNDARIES_[
-                               m_parsedToken_.m_indirectIndex_].m_startContCE_;
-                        int ce[] = new int[2];
-                        if((baseCE >>> 24 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_)
-                        && (baseCE >>> 24 <=  RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_)) { /* implicits - */
-                            int primary = baseCE & RuleBasedCollator.CE_PRIMARY_MASK_ | (baseContCE & RuleBasedCollator.CE_PRIMARY_MASK_) >> 16;
-                            int raw = RuleBasedCollator.impCEGen_.getRawFromImplicit(primary);
-                            int primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(raw-1);
-                            ce[0] = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
-                            ce[1] = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
-                        } else {
-                            CollationParsedRuleBuilder.InverseUCA invuca
-                                = CollationParsedRuleBuilder.INVERSE_UCA_;
-                            invuca.getInversePrevCE(baseCE, baseContCE, strength,
-                                    ce);
-                        }
-                        m_listHeader_[m_resultLength_].m_baseCE_ = ce[0];
-                        m_listHeader_[m_resultLength_].m_baseContCE_ = ce[1];
-                        m_listHeader_[m_resultLength_].m_nextCE_ = 0;
-                        m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
-
-                        sourceToken = new Token();
-                        expandNext = initAReset(0, sourceToken);
-                    }
-                }
-                // 5 If the relation is a reset:
-                // If sourceToken is null
-                // Create new list, create new sourceToken, make the baseCE
-                // from source, put the sourceToken in ListHeader of the new
-                // list
-                if (sourceToken == null) {
-                    if (m_listHeader_[m_resultLength_] == null) {
-                        m_listHeader_[m_resultLength_] = new TokenListHeader();
-                    }
-                    // 3 Consider each item: relation, source, and expansion:
-                    // e.g. ...< x / y ...
-                    // First convert all expansions into normal form.
-                    // Examples:
-                    // If "xy" doesn't occur earlier in the list or in the UCA,
-                    // convert &xy * c * d * ... into &x * c/y * d * ...
-                    // Note: reset values can never have expansions, although
-                    // they can cause the very next item to have one. They may
-                    // be contractions, if they are found earlier in the list.
-                    if (top == false) {
-                        CollationElementIterator coleiter
-                        = RuleBasedCollator.UCA_.getCollationElementIterator(
-                            m_source_.substring(m_parsedToken_.m_charsOffset_,
-                                                m_parsedToken_.m_charsOffset_
-                                                + m_parsedToken_.m_charsLen_));
-
-                        int CE = coleiter.next();
-                        // offset to the character in the full rule string
-                        int expand = coleiter.getOffset()
-                                     + m_parsedToken_.m_charsOffset_;
-                        int SecondCE = coleiter.next();
-
-                        m_listHeader_[m_resultLength_].m_baseCE_
-                                                             = CE & 0xFFFFFF3F;
-                        if (RuleBasedCollator.isContinuation(SecondCE)) {
-                            m_listHeader_[m_resultLength_].m_baseContCE_
-                                                                    = SecondCE;
-                        }
-                        else {
-                            m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
-                        }
-                        m_listHeader_[m_resultLength_].m_nextCE_ = 0;
-                        m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
-                        m_listHeader_[m_resultLength_].m_previousCE_ = 0;
-                        m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
-                        m_listHeader_[m_resultLength_].m_indirect_ = false;
-                        sourceToken = new Token();
-                        expandNext = initAReset(expand, sourceToken);
-                    }
-                    else { // top == TRUE
-                        top = false;
-                        m_listHeader_[m_resultLength_].m_previousCE_ = 0;
-                        m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
-                        m_listHeader_[m_resultLength_].m_indirect_ = true;
-                        IndirectBoundaries ib = INDIRECT_BOUNDARIES_[
-                                              m_parsedToken_.m_indirectIndex_];
-                        m_listHeader_[m_resultLength_].m_baseCE_
-                                                               = ib.m_startCE_;
-                        m_listHeader_[m_resultLength_].m_baseContCE_
-                                                           = ib.m_startContCE_;
-                        m_listHeader_[m_resultLength_].m_nextCE_
-                                                               = ib.m_limitCE_;
-                        m_listHeader_[m_resultLength_].m_nextContCE_
-                                                           = ib.m_limitContCE_;
-                        sourceToken = new Token();
-                        expandNext = initAReset(0, sourceToken);
-                    }
-                }
-                else { // reset to something already in rules
-                    top = false;
-                }
-            }
-            // 7 After all this, set LAST to point to sourceToken, and goto
-            // step 3.
-            lastToken = sourceToken;
-        }
-
-        if (m_resultLength_ > 0
-            && m_listHeader_[m_resultLength_ - 1].m_first_ == null) {
-            m_resultLength_ --;
-        }
-        return m_resultLength_;
-    }
-
-    /**
-     * Formats and throws a ParseException
-     * @param rules collation rule that failed
-     * @param offset failed offset in rules
-     * @throws ParseException with failure information
-     */
-    private static final void throwParseException(String rules, int offset)
-                                                          throws ParseException
-    {
-        // for pre-context
-        String precontext = rules.substring(0, offset);
-        String postcontext = rules.substring(offset, rules.length());
-        StringBuilder error = new StringBuilder(
-                                    "Parse error occurred in rule at offset ");
-        error.append(offset);
-        error.append("\n after the prefix \"");
-        error.append(precontext);
-        error.append("\" before the suffix \"");
-        error.append(postcontext);
-        throw new ParseException(error.toString(), offset);
-    }
-
-    private final boolean doSetTop() {
-        m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
-        m_source_.append((char)0xFFFE);
-        IndirectBoundaries ib =
-                  INDIRECT_BOUNDARIES_[m_parsedToken_.m_indirectIndex_];
-        m_source_.append((char)(ib.m_startCE_ >> 16));
-        m_source_.append((char)(ib.m_startCE_ & 0xFFFF));
-        m_extraCurrent_ += 3;
-        if (INDIRECT_BOUNDARIES_[m_parsedToken_.m_indirectIndex_
-                                                       ].m_startContCE_ == 0) {
-            m_parsedToken_.m_charsLen_ = 3;
-        }
-        else {
-            m_source_.append((char)(INDIRECT_BOUNDARIES_[
-                                        m_parsedToken_.m_indirectIndex_
-                                    ].m_startContCE_ >> 16));
-            m_source_.append((char)(INDIRECT_BOUNDARIES_[
-                                        m_parsedToken_.m_indirectIndex_
-                                    ].m_startContCE_ & 0xFFFF));
-            m_extraCurrent_ += 2;
-            m_parsedToken_.m_charsLen_ = 5;
-        }
-        return true;
-    }
-
-    private static boolean isCharNewLine(char c) {
-        switch (c) {
-        case 0x000A: /* LF */
-        case 0x000D: /* CR */
-        case 0x000C: /* FF */
-        case 0x0085: /* NEL */
-        case 0x2028: /* LS */
-        case 0x2029: /* PS */
-            return true;
-        default:
-            return false;
-        }
-    }
-
-    /**
-     * Parses the next token.
-     *
-     * It updates/accesses the following member variables:
-     * m_current_: Index to the next unparsed character (not code point)
-     *    in the character array (a StringBuilder object) m_source_.
-     * m_parsedToken_:  The parsed token.  The following of the token are updated.
-     *    .m_strength: The strength of the token.
-     *    .m_charsOffset, m_charsLen_: Index to the first character (after operators),
-     *         and number of characters in the token.
-     *         This may be in the main string, or in the appended string.
-     *    .m_extensionOffset_,  .m_extensionLen_:
-     *    .m_flags:
-     *    .m_prefixOffset, .m_prefixLen:  Used when "|" is used to specify "context before".
-     *    .m_indirectIndex:
-     * @param startofrules
-     *            flag indicating if we are at the start of rules
-     * @return the offset of the next unparsed char
-     * @exception ParseException
-     *                thrown when rule parsing fails
-     */
-    private int parseNextToken(boolean startofrules) throws ParseException
-    {
-
-        if (m_inRange_) {
-            // We are not done processing a range.  Continue it.
-            return processNextCodePointInRange();
-        } else if (m_isStarred_) {
-            // We are not done processing a starred token.  Continue it.
-            return processNextTokenInTheStarredList();
-        }
-
-        // Get the next token.
-        int nextOffset = parseNextTokenInternal(startofrules);
-
-        // If the next token is starred and/or in range, we need to handle it here.
-        if (m_inRange_) {
-            // A new range has started.
-            // Check whether it is a chain of ranges with more than one hyphen.
-            if (m_lastRangeCp_ > 0 && m_lastRangeCp_ == m_previousCp_) {
-                throw new ParseException("Chained range syntax", m_current_);
-            }
-
-            // The current token is the first character of the second code point of the range.
-            // Process just that, and then proceed with the star.
-            m_lastRangeCp_ = m_source_.codePointAt(this.m_parsedToken_.m_charsOffset_);
-            if (m_lastRangeCp_ <= m_previousCp_) {
-                throw new ParseException("Invalid range", m_current_);
-            }
-
-            // Set current range code point to process the range loop
-            m_currentRangeCp_ = m_previousCp_ + 1;
-
-            // Set current starred char index to continue processing the starred
-            // expression after the range is done.
-            m_currentStarredCharIndex_ = m_parsedToken_.m_charsOffset_
-                + Character.charCount(m_lastRangeCp_);
-            m_lastStarredCharIndex_ = m_parsedToken_.m_charsOffset_ + m_parsedToken_.m_charsLen_ - 1;
-
-            return processNextCodePointInRange();
-        } else if (m_isStarred_) {
-            // We define two indices m_currentStarredCharIndex_ and m_lastStarredCharIndex_ so that
-            // [m_currentStarredCharIndex_ .. m_lastStarredCharIndex_], both inclusive, need to be
-            // separated into several tokens and returned.
-            m_currentStarredCharIndex_ = m_parsedToken_.m_charsOffset_;
-            m_lastStarredCharIndex_ =  m_parsedToken_.m_charsOffset_ + m_parsedToken_.m_charsLen_ - 1;
-
-            return processNextTokenInTheStarredList();
-        }
-        return nextOffset;
-    }
-
-    private int processNextCodePointInRange() throws ParseException {
-        int nChars = Character.charCount(m_currentRangeCp_);
-        m_source_.appendCodePoint(m_currentRangeCp_);
-
-        m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
-        m_parsedToken_.m_charsLen_ = nChars;
-
-        m_extraCurrent_ += nChars;
-        ++m_currentRangeCp_;
-        if (m_currentRangeCp_ > m_lastRangeCp_) {
-            // All the code points in the range are processed.
-            // Turn the range flag off.
-            m_inRange_ = false;
-
-            // If there is a starred portion remaining in the current
-            // parsed token, resume the starred operation.
-            if (m_currentStarredCharIndex_ <= m_lastStarredCharIndex_) {
-                m_isStarred_ = true;
-            } else {
-                m_isStarred_ = false;
-            }
-        } else {
-            m_previousCp_ = m_currentRangeCp_;
-        }
-       return m_current_;
-    }
-
-
-    /**
-     * Extracts the next token from the starred token from
-     *   m_currentStarredCharIndex_ and returns it.
-     * @return the offset of the next unparsed char
-     * @throws ParseException
-     */
-    private int processNextTokenInTheStarredList() throws ParseException {
-        // Extract the characters corresponding to the next code point.
-        int cp = m_source_.codePointAt(m_currentStarredCharIndex_);
-        int nChars = Character.charCount(cp);
-
-        m_parsedToken_.m_charsLen_ = nChars;
-        m_parsedToken_.m_charsOffset_ = m_currentStarredCharIndex_;
-        m_currentStarredCharIndex_ += nChars;
-
-        // When we are done parsing the starred string, turn the flag off so that
-        // the normal processing is restored.
-        if (m_currentStarredCharIndex_ > m_lastStarredCharIndex_) {
-            m_isStarred_ = false;
-        }
-        m_previousCp_ = cp;
-        return m_current_;
-    }
-
-    private int resetToTop(boolean top, boolean variableTop,
-                           int extensionOffset, int newExtensionLen,
-                           byte byteBefore) throws ParseException {
-        m_parsedToken_.m_indirectIndex_ = 5;
-        top = doSetTop();
-        return doEndParseNextToken(TOKEN_RESET_,
-                                   top,
-                                   extensionOffset,
-                                   newExtensionLen,
-                                   variableTop, byteBefore);
-    }
-
-    /**
-     * Gets the next token and sets the necessary internal variables.
-     * This function parses a starred string as a single token, which will be separated
-     * in the calling function.
-     * @param startofrules Boolean value indicating whether this is the first rule
-     * @return the offset of the next unparsed char
-     * @throws ParseException
-     */
-    @SuppressWarnings("fallthrough")
-    private int parseNextTokenInternal(boolean startofrules) throws ParseException {
-        boolean variabletop = false;
-        boolean top = false;
-        boolean inchars = true;
-        boolean inquote = false;
-        boolean wasinquote = false;
-        byte before = 0;
-        boolean isescaped = false;
-        int /*newcharslen = 0,*/ newextensionlen = 0;
-        int /*charsoffset = 0,*/ extensionoffset = 0;
-        int newstrength = TOKEN_UNSET_;
-
-        initializeParsedToken();
-
-        int limit = m_rules_.length();
-        while (m_current_ < limit) {
-            char ch = m_source_.charAt(m_current_);
-            if (inquote) {
-                if (ch == 0x0027) { // '\''
-                    inquote = false;
-                }
-                else {
-                    if ((m_parsedToken_.m_charsLen_ == 0) || inchars) {
-                        if (m_parsedToken_.m_charsLen_ == 0) {
-                            m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
-                        }
-                        m_parsedToken_.m_charsLen_ ++;
-                    }
-                    else {
-                        if (newextensionlen == 0) {
-                            extensionoffset = m_extraCurrent_;
-                        }
-                        newextensionlen ++;
-                    }
-                }
-            }
-            else if (isescaped) {
-                isescaped = false;
-                if (newstrength == TOKEN_UNSET_) {
-                    throwParseException(m_rules_, m_current_);
-                }
-                if (ch != 0 && m_current_ != limit) {
-                    if (inchars) {
-                        if (m_parsedToken_.m_charsLen_ == 0) {
-                            m_parsedToken_.m_charsOffset_ = m_current_;
-                        }
-                        m_parsedToken_.m_charsLen_ ++;
-                    }
-                    else {
-                        if (newextensionlen == 0) {
-                            extensionoffset = m_current_;
-                        }
-                        newextensionlen ++;
-                    }
-                }
-            }
-            else {
-                if (!PatternProps.isWhiteSpace(ch)) {
-                    // Sets the strength for this entry
-                    switch (ch) {
-                    case 0x003D : // '='
-                        if (newstrength != TOKEN_UNSET_) {
-                            return doEndParseNextToken(newstrength,
-                                                       top,
-                                                       extensionoffset,
-                                                       newextensionlen,
-                                                       variabletop, before);
-                        }
-                        // if we start with strength, we'll reset to top
-                        if (startofrules == true) {
-                            return resetToTop(top, variabletop, extensionoffset,
-                                              newextensionlen, before);
-                        }
-                        newstrength = Collator.IDENTICAL;
-                        if (m_source_.charAt(m_current_ + 1) == 0x002A) { // '*'
-                            m_current_++;
-                            m_isStarred_ = true;
-                        }
-                        break;
-                    case 0x002C : // ','
-                        if (newstrength != TOKEN_UNSET_) {
-                            return doEndParseNextToken(newstrength,
-                                                       top,
-                                                       extensionoffset,
-                                                       newextensionlen,
-                                                       variabletop, before);
-                        }
-                        // if we start with strength, we'll reset to top
-                        if (startofrules == true) {
-                            return resetToTop(top, variabletop, extensionoffset,
-                                              newextensionlen, before);
-                        }
-                        newstrength = Collator.TERTIARY;
-                        break;
-                    case 0x003B : // ';'
-                        if (newstrength != TOKEN_UNSET_) {
-                            return doEndParseNextToken(newstrength,
-                                                       top,
-                                                       extensionoffset,
-                                                       newextensionlen,
-                                                       variabletop, before);
-                        }
-                        //if we start with strength, we'll reset to top
-                        if(startofrules == true) {
-                            return resetToTop(top, variabletop, extensionoffset,
-                                              newextensionlen, before);
-                        }
-                        newstrength = Collator.SECONDARY;
-                        break;
-                    case 0x003C : // '<'
-                        if (newstrength != TOKEN_UNSET_) {
-                            return doEndParseNextToken(newstrength,
-                                                       top,
-                                                       extensionoffset,
-                                                       newextensionlen,
-                                                       variabletop, before);
-                        }
-                       // if we start with strength, we'll reset to top
-                       if (startofrules == true) {
-                            return resetToTop(top, variabletop, extensionoffset,
-                                              newextensionlen, before);
-                        }
-                        // before this, do a scan to verify whether this is
-                        // another strength
-                        if (m_source_.charAt(m_current_ + 1) == 0x003C) {
-                            m_current_ ++;
-                            if (m_source_.charAt(m_current_ + 1) == 0x003C) {
-                                m_current_ ++; // three in a row!
-                                newstrength = Collator.TERTIARY;
-                            }
-                            else { // two in a row
-                                newstrength = Collator.SECONDARY;
-                            }
-                        }
-                        else { // just one
-                            newstrength = Collator.PRIMARY;
-                        }
-                        if (m_source_.charAt(m_current_ + 1) == 0x002A) { // '*'
-                            m_current_++;
-                            m_isStarred_ = true;
-                        }
-                        break;
-
-                    case 0x0026 : // '&'
-                        if (newstrength != TOKEN_UNSET_) {
-                            return doEndParseNextToken(newstrength,
-                                                       top,
-                                                       extensionoffset,
-                                                       newextensionlen,
-                                                       variabletop, before);
-                        }
-                        newstrength = TOKEN_RESET_; // PatternEntry::RESET = 0
-                        break;
-                    case 0x005b : // '['
-                        // options - read an option, analyze it
-                        m_optionEnd_ = m_rules_.indexOf(0x005d, m_current_);
-                        if (m_optionEnd_ != -1) { // ']'
-                            byte result = readAndSetOption();
-                            m_current_ = m_optionEnd_;
-                            if ((result & TOKEN_TOP_MASK_) != 0) {
-                                if (newstrength == TOKEN_RESET_) {
-                                    doSetTop();
-                                    if (before != 0) {
-                                        // This is a combination of before and
-                                        // indirection like
-                                        // '&[before 2][first regular]<b'
-                                        m_source_.append((char)0x002d);
-                                        m_source_.append((char)before);
-                                        m_extraCurrent_ += 2;
-                                        m_parsedToken_.m_charsLen_ += 2;
-                                    }
-                                    m_current_ ++;
-                                    return doEndParseNextToken(newstrength,
-                                                       true,
-                                                       extensionoffset,
-                                                       newextensionlen,
-                                                       variabletop, before);
-                                }
-                                else {
-                                    throwParseException(m_rules_, m_current_);
-                                }
-                            }
-                            else if ((result & TOKEN_VARIABLE_TOP_MASK_) != 0) {
-                                if (newstrength != TOKEN_RESET_
-                                    && newstrength != TOKEN_UNSET_) {
-                                    variabletop = true;
-                                    m_parsedToken_.m_charsOffset_
-                                                             = m_extraCurrent_;
-                                    m_source_.append((char)0xFFFF);
-                                    m_extraCurrent_ ++;
-                                    m_current_ ++;
-                                    m_parsedToken_.m_charsLen_ = 1;
-                                    return doEndParseNextToken(newstrength,
-                                                       top,
-                                                       extensionoffset,
-                                                       newextensionlen,
-                                                       variabletop, before);
-                                }
-                                else {
-                                    throwParseException(m_rules_, m_current_);
-                                }
-                            }
-                            else if ((result & TOKEN_BEFORE_) != 0){
-                                if (newstrength == TOKEN_RESET_) {
-                                    before = (byte)(result & TOKEN_BEFORE_);
-                                }
-                                else {
-                                    throwParseException(m_rules_, m_current_);
-                                }
-                            }
-                        }
-                        break;
-                    case 0x002F : // '/'
-                        wasinquote = false; // if we were copying source
-                                            // characters, we want to stop now
-                        inchars = false; // we're now processing expansion
-                        break;
-                    case 0x005C : // back slash for escaped chars
-                        isescaped = true;
-                        break;
-                    // found a quote, we're gonna start copying
-                    case 0x0027 : //'\''
-                        if (newstrength == TOKEN_UNSET_) {
-                            // quote is illegal until we have a strength
-                            throwParseException(m_rules_, m_current_);
-                        }
-                        inquote = true;
-                        if (inchars) { // we're doing characters
-                            if (wasinquote == false) {
-                                m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
-                            }
-                            if (m_parsedToken_.m_charsLen_ != 0) {
-                                // We are processing characters in quote together.
-                                // Copy whatever is in the current token, so that
-                                // the unquoted string can be appended to that.
-                                m_source_.append(m_source_.substring(
-                                       m_current_ - m_parsedToken_.m_charsLen_,
-                                       m_current_));
-                                m_extraCurrent_ += m_parsedToken_.m_charsLen_;
-                            }
-                            m_parsedToken_.m_charsLen_ ++;
-                        }
-                        else { // we're doing an expansion
-                            if (wasinquote == false) {
-                                extensionoffset = m_extraCurrent_;
-                            }
-                            if (newextensionlen != 0) {
-                                m_source_.append(m_source_.substring(
-                                                   m_current_ - newextensionlen,
-                                                   m_current_));
-                                m_extraCurrent_ += newextensionlen;
-                            }
-                            newextensionlen ++;
-                        }
-                        wasinquote = true;
-                        m_current_ ++;
-                        ch = m_source_.charAt(m_current_);
-                        if (ch == 0x0027) { // copy the double quote
-                            m_source_.append(ch);
-                            m_extraCurrent_ ++;
-                            inquote = false;
-                        }
-                        break;
-                    // '@' is french only if the strength is not currently set
-                    // if it is, it's just a regular character in collation
-                    case 0x0040 : // '@'
-                        if (newstrength == TOKEN_UNSET_) {
-                            m_options_.m_isFrenchCollation_ = true;
-                            break;
-                        }
-                        // fall through
-                    case 0x007C : //|
-                        // this means we have actually been reading prefix part
-                        // we want to store read characters to the prefix part
-                        // and continue reading the characters (proper way
-                        // would be to restart reading the chars, but in that
-                        // case we would have to complicate the token hasher,
-                        // which I do not intend to play with. Instead, we will
-                        // do prefixes when prefixes are due (before adding the
-                        // elements).
-                        m_parsedToken_.m_prefixOffset_
-                                                = m_parsedToken_.m_charsOffset_;
-                        m_parsedToken_.m_prefixLen_
-                                                = m_parsedToken_.m_charsLen_;
-                        if (inchars) { // we're doing characters
-                            if (wasinquote == false) {
-                                m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
-                            }
-                            if (m_parsedToken_.m_charsLen_ != 0) {
-                                String prefix = m_source_.substring(
-                                       m_current_ - m_parsedToken_.m_charsLen_,
-                                       m_current_);
-                                m_source_.append(prefix);
-                                m_extraCurrent_ += m_parsedToken_.m_charsLen_;
-                            }
-                            m_parsedToken_.m_charsLen_ ++;
-                        }
-                        wasinquote = true;
-                        do {
-                            m_current_ ++;
-                            ch = m_source_.charAt(m_current_);
-                            // skip whitespace between '|' and the character
-                        } while (PatternProps.isWhiteSpace(ch));
-                        break;
-                   case 0x002D : // '-', indicates a range.
-                       if (newstrength != TOKEN_UNSET_) {
-                           m_savedIsStarred_ = m_isStarred_;
-                           return doEndParseNextToken(newstrength,
-                                                      top,
-                                                      extensionoffset,
-                                                      newextensionlen,
-                                                      variabletop, before);
-                       }
-
-                       m_isStarred_ = m_savedIsStarred_;
-                       // Ranges are valid only in starred tokens.
-                       if (!m_isStarred_) {
-                           throwParseException(m_rules_, m_current_);
-                       }
-
-                       newstrength = m_parsedToken_.m_strength_;
-                       m_inRange_ = true;
-                       break;
-
-                    case 0x0023: // '#' // this is a comment, skip everything through the end of line
-                        do {
-                            m_current_ ++;
-                            ch = m_source_.charAt(m_current_);
-                        } while (!isCharNewLine(ch));
-                        break;
-                    case 0x0021: // '!' // ignoring java set thai reordering
-                        break;
-                    default :
-                        if (newstrength == TOKEN_UNSET_) {
-                            throwParseException(m_rules_, m_current_);
-                        }
-                        if (isSpecialChar(ch) && (inquote == false)) {
-                                throwParseException(m_rules_, m_current_);
-                        }
-                        if (ch == 0x0000 && m_current_ + 1 == limit) {
-                            break;
-                        }
-                        if (inchars) {
-                            if (m_parsedToken_.m_charsLen_ == 0) {
-                                m_parsedToken_.m_charsOffset_ = m_current_;
-                            }
-                            m_parsedToken_.m_charsLen_++;
-                        }
-                        else {
-                            if (newextensionlen == 0) {
-                                extensionoffset = m_current_;
-                            }
-                            newextensionlen ++;
-                        }
-                        break;
-                    }
-                }
-            }
-            if (wasinquote) {
-                if (ch != 0x27) {
-                      m_source_.append(ch);
-                    m_extraCurrent_ ++;
-                }
-            }
-            m_current_ ++;
-        }
-        return doEndParseNextToken(newstrength, top,
-                                   extensionoffset, newextensionlen,
-                                   variabletop, before);
-    }
-
-
-    /**
-     *
-     */
-    private void initializeParsedToken() {
-        m_parsedToken_.m_charsLen_ = 0;
-        m_parsedToken_.m_charsOffset_ = 0;
-        m_parsedToken_.m_prefixOffset_ = 0;
-        m_parsedToken_.m_prefixLen_ = 0;
-        m_parsedToken_.m_indirectIndex_ = 0;
-    }
-
-    /**
-     * End the next parse token
-     * @param newstrength new strength
-     * @return offset in rules, -1 for end of rules
-     */
-    private int doEndParseNextToken(int newstrength, /*int newcharslen,*/
-                                    boolean top, /*int charsoffset,*/
-                                    int extensionoffset, int newextensionlen,
-                                    boolean variabletop, int before)
-                                    throws ParseException
-    {
-        if (newstrength == TOKEN_UNSET_) {
-            return -1;
-        }
-        if (m_parsedToken_.m_charsLen_ == 0 && top == false) {
-            throwParseException(m_rules_, m_current_);
-        }
-
-        m_parsedToken_.m_strength_ = newstrength;
-        //m_parsedToken_.m_charsOffset_ = charsoffset;
-        //m_parsedToken_.m_charsLen_ = newcharslen;
-        m_parsedToken_.m_extensionOffset_ = extensionoffset;
-        m_parsedToken_.m_extensionLen_ = newextensionlen;
-        m_parsedToken_.m_flags_ = (char)
-                                  ((variabletop ? TOKEN_VARIABLE_TOP_MASK_ : 0)
-                                  | (top ? TOKEN_TOP_MASK_ : 0) | before);
-        return m_current_;
-    }
-
-    /**
-     * Token before this element
-     * @param sourcetoken
-     * @param strength collation strength
-     * @return the token before source token
-     * @exception ParseException thrown when rules have the wrong syntax
-     */
-    private Token getVirginBefore(Token sourcetoken, int strength)
-                                                          throws ParseException
-    {
-        // this is a virgin before - we need to fish the anchor from the UCA
-        if (sourcetoken != null) {
-            int offset = sourcetoken.m_source_ & 0xFFFFFF;
-            m_UCAColEIter_.setText(m_source_.substring(offset, offset + 1));
-        }
-        else {
-            m_UCAColEIter_.setText(
-                             m_source_.substring(m_parsedToken_.m_charsOffset_,
-                             m_parsedToken_.m_charsOffset_ + 1));
-        }
-
-        int basece = m_UCAColEIter_.next() & 0xFFFFFF3F;
-        int basecontce = m_UCAColEIter_.next();
-        if (basecontce == CollationElementIterator.NULLORDER) {
-            basecontce = 0;
-        }
-
-        int ch = 0;
-
-
-        if((basece >>> 24 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_)
-                && (basece >>> 24 <=  RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_)) { /* implicits - */
-
-            int primary = basece & RuleBasedCollator.CE_PRIMARY_MASK_ | (basecontce & RuleBasedCollator.CE_PRIMARY_MASK_) >> 16;
-            int raw = RuleBasedCollator.impCEGen_.getRawFromImplicit(primary);
-            ch = RuleBasedCollator.impCEGen_.getCodePointFromRaw(raw-1);
-            int primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(raw-1);
-            m_utilCEBuffer_[0] = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
-            m_utilCEBuffer_[1] = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
-
-            m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
-            m_source_.append('\uFFFE');
-            m_source_.append((char)ch);
-            m_extraCurrent_ += 2;
-            m_parsedToken_.m_charsLen_++;
-
-            m_utilToken_.m_source_ = (m_parsedToken_.m_charsLen_ << 24)
-            | m_parsedToken_.m_charsOffset_;
-            m_utilToken_.m_rules_ = m_source_;
-            sourcetoken = m_hashTable_.get(m_utilToken_);
-
-            if(sourcetoken == null) {
-                m_listHeader_[m_resultLength_] = new TokenListHeader();
-                m_listHeader_[m_resultLength_].m_baseCE_
-                    = m_utilCEBuffer_[0] & 0xFFFFFF3F;
-                if (RuleBasedCollator.isContinuation(m_utilCEBuffer_[1])) {
-                    m_listHeader_[m_resultLength_].m_baseContCE_
-                    = m_utilCEBuffer_[1];
-                }
-                else {
-                    m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
-                }
-                m_listHeader_[m_resultLength_].m_nextCE_ = 0;
-                m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
-                m_listHeader_[m_resultLength_].m_previousCE_ = 0;
-                m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
-                m_listHeader_[m_resultLength_].m_indirect_ = false;
-
-                sourcetoken = new Token();
-                initAReset(-1, sourcetoken);
-            }
-
-        } else {
-
-            // first ce and second ce m_utilCEBuffer_
-            /*int invpos = */CollationParsedRuleBuilder.INVERSE_UCA_.getInversePrevCE(
-                                                         basece, basecontce,
-                                                         strength, m_utilCEBuffer_);
-            // we got the previous CE. Now we need to see if the difference between
-            // the two CEs is really of the requested strength.
-            // if it's a bigger difference (we asked for secondary and got primary), we
-            // need to modify the CE.
-            if(CollationParsedRuleBuilder.INVERSE_UCA_.getCEStrengthDifference(basece, basecontce, m_utilCEBuffer_[0], m_utilCEBuffer_[1]) < strength) {
-                // adjust the strength
-                // now we are in the situation where our baseCE should actually be modified in
-                // order to get the CE in the right position.
-                if(strength == Collator.SECONDARY) {
-                    m_utilCEBuffer_[0] = basece - 0x0200;
-                } else { // strength == UCOL_TERTIARY
-                    m_utilCEBuffer_[0] = basece - 0x02;
-                }
-                if(RuleBasedCollator.isContinuation(basecontce)) {
-                    if(strength == Collator.SECONDARY) {
-                        m_utilCEBuffer_[1] = basecontce - 0x0200;
-                    } else { // strength == UCOL_TERTIARY
-                        m_utilCEBuffer_[1] = basecontce - 0x02;
-                    }
-                }
-            }
-
-/*
-            // the code below relies on getting a code point from the inverse table, in order to be
-            // able to merge the situations like &x < 9 &[before 1]a < d. This won't work:
-            // 1. There are many code points that have the same CE
-            // 2. The CE to codepoint table (things pointed to by CETable[3*invPos+2] are broken.
-            // Also, in case when there is no equivalent strength before an element, we have to actually
-            // construct one. For example, &[before 2]a << x won't result in x << a, because the element
-            // before a is a primary difference.
-            ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_table_[3 * invpos
-                                                                      + 2];
-            if ((ch &  INVERSE_SIZE_MASK_) != 0) {
-                int offset = ch & INVERSE_OFFSET_MASK_;
-                ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_continuations_[
-                                                                           offset];
-            }
-            m_source_.append((char)ch);
-            m_extraCurrent_ ++;
-            m_parsedToken_.m_charsOffset_ = m_extraCurrent_ - 1;
-            m_parsedToken_.m_charsLen_ = 1;
-
-            // We got an UCA before. However, this might have been tailored.
-            // example:
-            // &\u30ca = \u306a
-            // &[before 3]\u306a<<<\u306a|\u309d
-
-            m_utilToken_.m_source_ = (m_parsedToken_.m_charsLen_ << 24)
-                                                 | m_parsedToken_.m_charsOffset_;
-            m_utilToken_.m_rules_ = m_source_;
-            sourcetoken = (Token)m_hashTable_.get(m_utilToken_);
-*/
-
-            // here is how it should be. The situation such as &[before 1]a < x, should be
-            // resolved exactly as if we wrote &a > x.
-            // therefore, I don't really care if the UCA value before a has been changed.
-            // However, I do care if the strength between my element and the previous element
-            // is bigger then I wanted. So, if CE < baseCE and I wanted &[before 2], then i'll
-            // have to construct the base CE.
-
-            // if we found a tailored thing, we have to use the UCA value and
-            // construct a new reset token with constructed name
-            //if (sourcetoken != null && sourcetoken.m_strength_ != TOKEN_RESET_) {
-                // character to which we want to anchor is already tailored.
-                // We need to construct a new token which will be the anchor point
-                //m_source_.setCharAt(m_extraCurrent_ - 1, '\uFFFE');
-                //m_source_.append(ch);
-                //m_extraCurrent_ ++;
-                //m_parsedToken_.m_charsLen_ ++;
-                // grab before
-                m_parsedToken_.m_charsOffset_ -= 10;
-                m_parsedToken_.m_charsLen_ += 10;
-                m_listHeader_[m_resultLength_] = new TokenListHeader();
-                m_listHeader_[m_resultLength_].m_baseCE_
-                                                 = m_utilCEBuffer_[0] & 0xFFFFFF3F;
-                if (RuleBasedCollator.isContinuation(m_utilCEBuffer_[1])) {
-                    m_listHeader_[m_resultLength_].m_baseContCE_
-                                                              = m_utilCEBuffer_[1];
-                }
-                else {
-                    m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
-                }
-                m_listHeader_[m_resultLength_].m_nextCE_ = 0;
-                m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
-                m_listHeader_[m_resultLength_].m_previousCE_ = 0;
-                m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
-                m_listHeader_[m_resultLength_].m_indirect_ = false;
-                sourcetoken = new Token();
-                initAReset(-1, sourcetoken);
-            //}
-        }
-        return sourcetoken;
-    }
-
-    /**
-     * Processing Description.
-     * 1. Build a m_listHeader_. Each list has a header, which contains two lists
-     * (positive and negative), a reset token, a baseCE, nextCE, and
-     * previousCE. The lists and reset may be null.
-     * 2. As you process, you keep a LAST pointer that points to the last token
-     * you handled.
-     * @param expand string offset, -1 for null strings
-     * @param targetToken token to update
-     * @return expandnext offset
-     * @throws ParseException thrown when rules syntax failed
-     */
-    private int initAReset(int expand, Token targetToken) throws ParseException
-    {
-        if (m_resultLength_ == m_listHeader_.length - 1) {
-            // Unfortunately, this won't work, as we store addresses of lhs in
-            // token
-            TokenListHeader temp[] = new TokenListHeader[m_resultLength_ << 1];
-            System.arraycopy(m_listHeader_, 0, temp, 0, m_resultLength_ + 1);
-            m_listHeader_ = temp;
-        }
-        // do the reset thing
-        targetToken.m_rules_ = m_source_;
-        targetToken.m_source_ = m_parsedToken_.m_charsLen_ << 24
-                                | m_parsedToken_.m_charsOffset_;
-        targetToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24
-                                   | m_parsedToken_.m_extensionOffset_;
-        // keep the flags around so that we know about before
-        targetToken.m_flags_ = m_parsedToken_.m_flags_;
-
-        if (m_parsedToken_.m_prefixOffset_ != 0) {
-            throwParseException(m_rules_, m_parsedToken_.m_charsOffset_ - 1);
-        }
-
-        targetToken.m_prefix_ = 0;
-        // TODO: this should also handle reverse
-        targetToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_;
-        targetToken.m_strength_ = TOKEN_RESET_;
-        targetToken.m_next_ = null;
-        targetToken.m_previous_ = null;
-        targetToken.m_CELength_ = 0;
-        targetToken.m_expCELength_ = 0;
-        targetToken.m_listHeader_ = m_listHeader_[m_resultLength_];
-        m_listHeader_[m_resultLength_].m_first_ = null;
-        m_listHeader_[m_resultLength_].m_last_ = null;
-        m_listHeader_[m_resultLength_].m_first_ = null;
-        m_listHeader_[m_resultLength_].m_last_ = null;
-        m_listHeader_[m_resultLength_].m_reset_ = targetToken;
-
-        /* 3 Consider each item: relation, source, and expansion:
-         * e.g. ...< x / y ...
-         * First convert all expansions into normal form. Examples:
-         * If "xy" doesn't occur earlier in the list or in the UCA, convert
-         * &xy * c * d * ... into &x * c/y * d * ...
-         * Note: reset values can never have expansions, although they can
-         * cause the very next item to have one. They may be contractions, if
-         * they are found earlier in the list.
-         */
-        int result = 0;
-        if (expand > 0) {
-            // check to see if there is an expansion
-            if (m_parsedToken_.m_charsLen_ > 1) {
-                targetToken.m_source_ = ((expand
-                                          - m_parsedToken_.m_charsOffset_ )
-                                          << 24)
-                                          | m_parsedToken_.m_charsOffset_;
-                result = ((m_parsedToken_.m_charsLen_
-                               + m_parsedToken_.m_charsOffset_ - expand) << 24)
-                               | expand;
-            }
-        }
-
-        m_resultLength_ ++;
-        m_hashTable_.put(targetToken, targetToken);
-        return result;
-    }
-
-    /**
-     * Checks if an character is special
-     * @param ch character to test
-     * @return true if the character is special
-     */
-    private static final boolean isSpecialChar(char ch)
-    {
-        return (ch <= 0x002F && ch >= 0x0020) || (ch <= 0x003F && ch >= 0x003A)
-               || (ch <= 0x0060 && ch >= 0x005B)
-               || (ch <= 0x007E && ch >= 0x007D) || ch == 0x007B;
-    }
-
-    private
-    UnicodeSet readAndSetUnicodeSet(String source, int start) throws ParseException
-    {
-      while(source.charAt(start) != '[') { /* advance while we find the first '[' */
-        start++;
-      }
-      // now we need to get a balanced set of '[]'. The problem is that a set can have
-      // many, and *end point to the first closing '['
-      int noOpenBraces = 1;
-      int current = 1; // skip the opening brace
-      while(start+current < source.length() && noOpenBraces != 0) {
-        if(source.charAt(start+current) == '[') {
-          noOpenBraces++;
-        } else if(source.charAt(start+current) == ']') { // closing brace
-          noOpenBraces--;
-        }
-        current++;
-      }
-      //int nextBrace = -1;
-
-      if(noOpenBraces != 0 || (/*nextBrace =*/ source.indexOf("]", start+current) /*']'*/) == -1) {
-        throwParseException(m_rules_, start);
-      }
-      return new UnicodeSet(source.substring(start, start+current)); //uset_openPattern(start, current);
-    }
-
-    /** in C, optionarg is passed by reference to function.
-     *  We use a private int to simulate this.
-     */
-    private int m_optionarg_ = 0;
-
-    private int readOption(String rules, int start, int optionend)
-    {
-        m_optionarg_ = 0;
-        int i = 0;
-        while (i < RULES_OPTIONS_.length) {
-            String option = RULES_OPTIONS_[i].m_name_;
-            int optionlength = option.length();
-            if (rules.length() > start + optionlength
-                && option.equalsIgnoreCase(rules.substring(start,
-                                                      start + optionlength))) {
-                if (optionend - start > optionlength) {
-                    m_optionarg_ = start + optionlength;
-                    // start of the options, skip space
-                    while (m_optionarg_ < optionend && PatternProps.isWhiteSpace(rules.charAt(m_optionarg_)))
-                    {   // eat whitespace
-                        m_optionarg_ ++;
-                    }
-                }
-                break;
-            }
-            i ++;
-        }
-        if(i == RULES_OPTIONS_.length) {
-            i = -1;
-        }
-        return i;
-    }
-    
-    /**
-     * Reads and set collation options
-     * @return TOKEN_SUCCESS if option is set correct, 0 otherwise
-     * @exception ParseException thrown when options in rules are wrong
-     */
-    private byte readAndSetOption() throws ParseException
-    {
-        int start = m_current_ + 1; // skip opening '['
-        int i = readOption(m_rules_, start, m_optionEnd_);
-
-        int optionarg = m_optionarg_;
-
-        if (i < 0) {
-            throwParseException(m_rules_, start);
-        }
-
-        if (i < 7) {
-            if (optionarg != 0) {
-                for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length;
-                                                                        j ++) {
-                     String subname = RULES_OPTIONS_[i].m_subOptions_[j];
-                     int size = optionarg + subname.length();
-                     if (m_rules_.length() > size
-                         && subname.equalsIgnoreCase(m_rules_.substring(
-                                                           optionarg, size))) {
-                         setOptions(m_options_, RULES_OPTIONS_[i].m_attribute_,
-                             RULES_OPTIONS_[i].m_subOptionAttributeValues_[j]);
-                         return TOKEN_SUCCESS_MASK_;
-                     }
-                }
-            }
-            throwParseException(m_rules_, optionarg);
-        }
-        else if (i == 7) { // variable top
-            return TOKEN_SUCCESS_MASK_ | TOKEN_VARIABLE_TOP_MASK_;
-        }
-        else if (i == 8) { // rearrange
-            return TOKEN_SUCCESS_MASK_;
-        }
-        else if (i == 9) { // before
-            if (optionarg != 0) {
-                for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length;
-                                                                        j ++) {
-                     String subname = RULES_OPTIONS_[i].m_subOptions_[j];
-                     int size = optionarg + subname.length();
-                     if (m_rules_.length() > size
-                         && subname.equalsIgnoreCase(
-                                               m_rules_.substring(optionarg,
-                                              optionarg + subname.length()))) {
-                         return (byte)(TOKEN_SUCCESS_MASK_
-                            | RULES_OPTIONS_[i].m_subOptionAttributeValues_[j]
-                            + 1);
-                     }
-                }
-            }
-            throwParseException(m_rules_, optionarg);
-        }
-        else if (i == 10) {  // top, we are going to have an array with
-            // structures of limit CEs index to this array will be
-            // src->parsedToken.indirectIndex
-            m_parsedToken_.m_indirectIndex_ = 0;
-            return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_;
-        }
-        else if (i < 13) { // first, last
-            for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; j ++) {
-                String subname = RULES_OPTIONS_[i].m_subOptions_[j];
-                int size = optionarg + subname.length();
-                if (m_rules_.length() > size
-                    && subname.equalsIgnoreCase(m_rules_.substring(optionarg,
-                                                                   size))) {
-                    m_parsedToken_.m_indirectIndex_ = (char)(i - 10 + (j << 1));
-                    return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_;
-                }
-            }
-            throwParseException(m_rules_, optionarg);
-        }
-        else if(i == 13 || i == 14) { // copy and remove are handled before normalization
-            // we need to move end here
-            int noOpenBraces = 1;
-            m_current_++; // skip opening brace
-            while(m_current_ < m_source_.length() && noOpenBraces != 0) {
-                if(m_source_.charAt(m_current_) == '[') {
-                  noOpenBraces++;
-                } else if(m_source_.charAt(m_current_) == ']') { // closing brace
-                  noOpenBraces--;
-                }
-                m_current_++;
-            }
-            m_optionEnd_ = m_current_-1;
-            return TOKEN_SUCCESS_MASK_;
-        }
-        else if(i == 16) { 
-            m_current_ = m_optionarg_; // skip opening brace and name 
-            parseScriptReorder(); 
-            return TOKEN_SUCCESS_MASK_; 
-        } 
-        else {
-            throwParseException(m_rules_, optionarg);
-        }
-        return TOKEN_SUCCESS_MASK_; // we will never reach here.
-    }
-
-    /**
-     * Set collation option
-     * @param optionset option set to set
-     * @param attribute type to set
-     * @param value attribute value
-     */
-    private void setOptions(OptionSet optionset, int attribute, int value)
-    {
-        switch (attribute) {
-            case RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_ :
-                optionset.m_isHiragana4_
-                            = (value == RuleBasedCollator.AttributeValue.ON_);
-                break;
-            case RuleBasedCollator.Attribute.FRENCH_COLLATION_ :
-                optionset.m_isFrenchCollation_
-                             = (value == RuleBasedCollator.AttributeValue.ON_);
-                break;
-            case RuleBasedCollator.Attribute.ALTERNATE_HANDLING_ :
-                optionset.m_isAlternateHandlingShifted_
-                             = (value
-                                == RuleBasedCollator.AttributeValue.SHIFTED_);
-                break;
-            case RuleBasedCollator.Attribute.CASE_FIRST_ :
-                optionset.m_caseFirst_ = value;
-                break;
-            case RuleBasedCollator.Attribute.CASE_LEVEL_ :
-                optionset.m_isCaseLevel_
-                             = (value == RuleBasedCollator.AttributeValue.ON_);
-                break;
-            case RuleBasedCollator.Attribute.NORMALIZATION_MODE_ :
-                if (value == RuleBasedCollator.AttributeValue.ON_) {
-                    value = Collator.CANONICAL_DECOMPOSITION;
-                }
-                optionset.m_decomposition_ = value;
-                break;
-            case RuleBasedCollator.Attribute.STRENGTH_ :
-                optionset.m_strength_ = value;
-                break;
-            default :
-                break;
-        }
-      }
-
-    UnicodeSet getTailoredSet() throws ParseException
-    {
-        boolean startOfRules = true;
-        UnicodeSet tailored = new UnicodeSet();
-        String pattern;
-        CanonicalIterator it = new CanonicalIterator("");
-
-        m_parsedToken_.m_strength_ = TOKEN_UNSET_;
-        int sourcelimit = m_source_.length();
-        //int expandNext = 0;
-
-        while (m_current_ < sourcelimit) {
-        m_parsedToken_.m_prefixOffset_ = 0;
-        if (parseNextToken(startOfRules) < 0) {
-            // we have reached the end
-            continue;
-        }
-        startOfRules = false;
-        // The idea is to tokenize the rule set. For each non-reset token,
-        // we add all the canonicaly equivalent FCD sequences
-            if(m_parsedToken_.m_strength_ != TOKEN_RESET_) {
-                it.setSource(m_source_.substring(
-                      m_parsedToken_.m_charsOffset_,
-                      m_parsedToken_.m_charsOffset_+m_parsedToken_.m_charsLen_));
-                pattern = it.next();
-                while(pattern != null) {
-                      if(Normalizer.quickCheck(pattern, Normalizer.FCD,0) != Normalizer.NO) {
-                        tailored.add(pattern);
-                    }
-                    pattern = it.next();
-                }
-            }
-        }
-        return tailored;
-    }
-
-    final private String preprocessRules(String rules) throws ParseException {
-      int optionNumber = -1;
-      int setStart = 0;
-      int i = 0;
-      while(i < rules.length()) {
-        if(rules.charAt(i) == 0x005B) { // [
-          optionNumber = readOption(rules, i+1, rules.length());
-          setStart = m_optionarg_;
-          if(optionNumber == 13) { /* copy - parts of UCA to tailoring */
-            UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart);
-              if(m_copySet_ == null) {
-                m_copySet_ = newSet;
-              } else {
-                m_copySet_.addAll(newSet);
-              }
-          } else if(optionNumber == 14) {
-            UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart);
-            if(m_removeSet_ == null) {
-              m_removeSet_ = newSet;
-            } else {
-              m_removeSet_.addAll(newSet);
-            }
-          } else if(optionNumber == 19) {
-            int optionEndOffset = rules.indexOf(']', i) + 1;
-            ULocale locale = ULocale.forLanguageTag(rules.substring(setStart, optionEndOffset-1));
-            UResourceBundle bundle = UResourceBundle.getBundleInstance(
-                ICUResourceBundle.ICU_BASE_NAME + "/coll", locale.getBaseName());
-
-            String type = locale.getKeywordValue("collation");
-            if(type == null){
-              type = "standard";
-            }
-
-            String importRules = bundle.get("collations")
-                                 .get(type)
-                                 .get("Sequence")
-                                 .getString();
-
-            rules = rules.substring(0, i) + importRules + rules.substring(optionEndOffset);
-          }
-        }
-        i++;
-      }
-      return rules;
-    }
-    
-    /* This is the data that is used for non-script reordering codes. These _must_ be kept
-     * in order that they are to be applied as defaults and in synch with the Collator.ReorderCodes statics.
-     */
-    static final String ReorderingTokensArray[] = {
-        "SPACE",
-        "PUNCT",
-        "SYMBOL",
-        "CURRENCY",
-        "DIGIT",
-    };
-
-    int findReorderingEntry(String name) {
-        for (int tokenIndex = 0; tokenIndex < ReorderingTokensArray.length; tokenIndex++) {
-            if (name.equalsIgnoreCase(ReorderingTokensArray[tokenIndex])) {
-                return tokenIndex + ReorderCodes.FIRST;
-            }
-        }
-        return UScript.INVALID_CODE;
-    }
-    
-    private void parseScriptReorder() throws ParseException { 
-        ArrayList<Integer> tempOrder = new ArrayList<Integer>(); 
-        int end = m_rules_.indexOf(']', m_current_);
-        if (end == -1) {
-            return;
-        }
-        String tokenString = m_rules_.substring(m_current_, end);
-        String[] tokens = tokenString.split("\\s+", 0);
-        String token;
-        for (int tokenIndex = 0; tokenIndex < tokens.length; tokenIndex++) {
-            token = tokens[tokenIndex];
-            int reorderCode = findReorderingEntry(token);
-            if (reorderCode == UScript.INVALID_CODE) {
-                reorderCode = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, token); 
-                if (reorderCode < 0) {
-                    throw new ParseException(m_rules_, tokenIndex);
-                }
-            }
-            tempOrder.add(reorderCode);
-        }
-        m_options_.m_scriptOrder_ = new int[tempOrder.size()]; 
-        for(int i = 0; i < tempOrder.size(); i++) { 
-            m_options_.m_scriptOrder_[i] = tempOrder.get(i); 
-        } 
-    } 
-}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java

index c4f88199a4c0919886b2588d23be4d1fa42af47d..8e7a34bf24978f70030c0be7a907927b1c9af163 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java
@@ -1,7 +1,7 @@
  /**
  *******************************************************************************
-* Copyright (C) 1996-2014, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
+* Copyright (C) 1996-2014, International Business Machines Corporation and
+* others. All Rights Reserved.
  *******************************************************************************
  */
  package com.ibm.icu.text;
@@ -16,7 +16,8 @@ import java.util.Set;
  
  import com.ibm.icu.impl.ICUDebug;
  import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.Norm2AllModes;
+import com.ibm.icu.impl.coll.CollationData;
+import com.ibm.icu.impl.coll.CollationRoot;
  import com.ibm.icu.lang.UScript;
  import com.ibm.icu.util.Freezable;
  import com.ibm.icu.util.ULocale;
@@ -56,8 +57,8 @@ import com.ibm.icu.util.VersionInfo;
  *     difference between large and small Kana. A tertiary difference is ignored
  *     when there is a primary or secondary difference anywhere in the strings.
  * <li>QUATERNARY strength: When punctuation is ignored
-*     <a href="http://www.icu-project.org/userguide/Collate_Concepts.html#Ignoring_Punctuation">
-*     (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY
+*     (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation">
+*     Ignoring Punctuations in the User Guide</a>) at PRIMARY to TERTIARY
  *     strength, an additional strength level can
  *     be used to distinguish words with and without punctuation (for example,
  *     "ab" &lt; "a-b" &lt; "aB").
@@ -87,8 +88,7 @@ import com.ibm.icu.util.VersionInfo;
  * a comparison or before getting a CollationKey.</p>
  *
  * <p>For more information about the collation service see the
-* <a href="http://www.icu-project.org/userguide/Collate_Intro.html">users
-* guide</a>.</p>
+* <a href="http://userguide.icu-project.org/collation">User Guide</a>.</p>
  *
  * <p>Examples of use
  * <pre>
@@ -125,7 +125,7 @@ import com.ibm.icu.util.VersionInfo;
  * @author Syn Wee Quek
  * @stable ICU 2.8
  */
-public abstract class Collator implements Comparator<Object>, Freezable<Collator>
+public abstract class Collator implements Comparator<Object>, Freezable<Collator>, Cloneable
  {
      // public data members ---------------------------------------------------
  
@@ -165,8 +165,8 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
      /**
       * {@icu} Fourth level collator strength value.
       * When punctuation is ignored
-     * <a href="http://www.icu-project.org/userguide/Collate_Concepts.html#Ignoring_Punctuation">
-     * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY
+     * (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation">
+     * Ignoring Punctuation in the User Guide</a>) at PRIMARY to TERTIARY
       * strength, an additional strength level can
       * be used to distinguish words with and without punctuation.
       * See class documentation for more explanation.
@@ -299,15 +299,37 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
      
      // public methods --------------------------------------------------------
  
+    /**
+     * Compares the equality of two Collator objects. Collator objects are equal if they have the same
+     * collation (sorting & searching) behavior.
+     *
+     * <p>The base class checks for null and for equal types.
+     * Subclasses should override.
+     *
+     * @param obj the Collator to compare to.
+     * @return true if this Collator has exactly the same collation behavior as obj, false otherwise.
+     * @stable ICU 2.8
+     */
+    @Override
+    public boolean equals(Object obj) {
+        // Subclasses: Call this method and then add more specific checks.
+        return this == obj || (obj != null && getClass() == obj.getClass());
+    }
+
      // public setters --------------------------------------------------------
  
+    private void checkNotFrozen() {
+        if (isFrozen()) {
+            throw new UnsupportedOperationException("Attempt to modify frozen Collator");
+        }
+    }
+
      /**
-     * Sets this Collator's strength property. The strength property
+     * Sets this Collator's strength attribute. The strength attribute
       * determines the minimum level of difference considered significant
       * during comparison.</p>
       *
-     * <p>The default strength for the Collator is TERTIARY, unless specified
-     * otherwise by the locale used to create the Collator.</p>
+     * <p>The base class method does nothing. Subclasses should override it if appropriate.
       *
       * <p>See the Collator class description for an example of use.</p>
       * @param newStrength the new strength value.
@@ -317,28 +339,17 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * @see #TERTIARY
       * @see #QUATERNARY
       * @see #IDENTICAL
-     * @throws IllegalArgumentException if the new strength value is not one
-     *                of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
+     * @throws IllegalArgumentException if the new strength value is not valid.
       * @stable ICU 2.8
       */
      public void setStrength(int newStrength)
      {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        if ((newStrength != PRIMARY) &&
-            (newStrength != SECONDARY) &&
-            (newStrength != TERTIARY) &&
-            (newStrength != QUATERNARY) &&
-            (newStrength != IDENTICAL)) {
-            throw new IllegalArgumentException("Incorrect comparison level.");
-        }
-        m_strength_ = newStrength;
+        checkNotFrozen();
      }
-    
+
      /**
-     * @internal
+     * @return this, for chaining
+     * @internal Used in UnicodeTools
       * @deprecated This API is ICU internal only.
       */
      @Deprecated
@@ -350,7 +361,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
  
      /**
       * Sets the decomposition mode of this Collator.  Setting this
-     * decomposition property with CANONICAL_DECOMPOSITION allows the
+     * decomposition attribute with CANONICAL_DECOMPOSITION allows the
       * Collator to handle un-normalized text properly, producing the
       * same results as if the text were normalized. If
       * NO_DECOMPOSITION is set, it is the user's responsibility to
@@ -363,9 +374,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * text normalization, most locales set NO_DECOMPOSITION as the
       * default decomposition mode.</p>
       *
-     * The default decompositon mode for the Collator is
-     * NO_DECOMPOSITON, unless specified otherwise by the locale used
-     * to create the Collator.</p>
+     * <p>The base class method does nothing. Subclasses should override it if appropriate.
       *
       * <p>See getDecomposition for a description of decomposition
       * mode.</p>
@@ -380,32 +389,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       */
      public void setDecomposition(int decomposition)
      {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-        internalSetDecomposition(decomposition);
-    }
-
-    /**
-     * Internal set decompostion call to workaround frozen state because of self-modification
-     * in the RuleBasedCollator. This method <b>must</b> only be called by code that has
-     * passed the frozen check already <b>and</b> has the lock if the Collator is frozen.
-     * Better still this method should go away and RuleBasedCollator.getSortKeyBytes()
-     * should be fixed to not self-modify.
-     * @param decomposition
-     * @internal
-     */
-    protected void internalSetDecomposition(int decomposition)
-    {
-        if ((decomposition != NO_DECOMPOSITION) &&
-            (decomposition != CANONICAL_DECOMPOSITION)) {
-            throw new IllegalArgumentException("Wrong decomposition mode.");
-        }
-        m_decomposition_ = decomposition;
-        if (decomposition != NO_DECOMPOSITION) {
-            // ensure the FCD data is initialized
-            Norm2AllModes.getFCDNormalizer2();
-        }
+        checkNotFrozen();
      }
  
      /** 
@@ -441,7 +425,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       */ 
      public void setReorderCodes(int... order) 
      { 
-        throw new UnsupportedOperationException(); 
+        throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 
      } 
  
      // public getters --------------------------------------------------------
@@ -451,7 +435,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * The default locale is determined by java.util.Locale.getDefault().
       * @return the Collator for the default locale (for example, en_US) if it
       *         is created successfully. Otherwise if there is no Collator
-     *         associated with the current locale, the default UCA collator
+     *         associated with the current locale, the root collator
       *         will be returned.
       * @see java.util.Locale#getDefault()
       * @see #getInstance(Locale)
@@ -617,7 +601,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * @param locale the desired locale.
       * @return Collator for the desired locale if it is created successfully.
       *         Otherwise if there is no Collator
-     *         associated with the current locale, a default UCA collator will
+     *         associated with the current locale, the root collator will
       *         be returned.
       * @see java.util.Locale
       * @see java.util.ResourceBundle
@@ -635,7 +619,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * @param locale the desired locale.
       * @return Collator for the desired locale if it is created successfully.
       *         Otherwise if there is no Collator
-     *         associated with the current locale, a default UCA collator will
+     *         associated with the current locale, the root collator will
       *         be returned.
       * @see java.util.Locale
       * @see java.util.ResourceBundle
@@ -792,7 +776,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
          LinkedList<String> values = new LinkedList<String>();
  
          UResourceBundle bundle = UResourceBundle.getBundleInstance(
-                ICUResourceBundle.ICU_BASE_NAME + "/coll", baseLoc);
+                ICUResourceBundle.ICU_COLLATION_BASE_NAME, baseLoc);
  
          String defcoll = null;
          while (bundle != null) {
@@ -838,7 +822,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * applications who wish to cache collators, or otherwise reuse
       * collators when possible.  The functional equivalent may change
       * over time.  For more information, please see the <a
-     * href="http://www.icu-project.org/userguide/locale.html#services">
+     * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services">
       * Locales and Services</a> section of the ICU User Guide.
       * @param keyword a particular keyword as enumerated by
       * getKeywords.
@@ -923,7 +907,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
      }
  
      /**
-     * Returns this Collator's strength property. The strength property
+     * Returns this Collator's strength attribute. The strength attribute
       * determines the minimum level of difference considered significant.
       * </p>
       * {@icunote} This can return QUATERNARY strength, which is not supported by the
@@ -931,7 +915,10 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * <p>
       * See the Collator class description for more details.
       * </p>
-     * @return this Collator's current strength property.
+     * <p>The base class method always returns {@link #TERTIARY}.
+     * Subclasses should override it if appropriate.
+     *
+     * @return this Collator's current strength attribute.
       * @see #setStrength
       * @see #PRIMARY
       * @see #SECONDARY
@@ -942,7 +929,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       */
      public int getStrength()
      {
-        return m_strength_;
+        return TERTIARY;
      }
  
      /**
@@ -952,6 +939,9 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * <p>
       * See the Collator class description for more details.
       * </p>
+     * <p>The base class method always returns {@link #NO_DECOMPOSITION}.
+     * Subclasses should override it if appropriate.
+     *
       * @return the decomposition mode
       * @see #setDecomposition
       * @see #NO_DECOMPOSITION
@@ -960,7 +950,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       */
      public int getDecomposition()
      {
-        return m_decomposition_;
+        return NO_DECOMPOSITION;
      }
  
      // public other methods -------------------------------------------------
@@ -986,7 +976,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * in this collator.
       * @return a pointer to a UnicodeSet object containing all the
       *         code points and sequences that may sort differently than
-     *         in the UCA.
+     *         in the root collator.
       * @stable ICU 2.4
       */
      public UnicodeSet getTailoredSet()
@@ -1022,11 +1012,22 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * @return Returns an integer value. Value is less than zero if source is
       *         less than target, value is zero if source and target are equal,
       *         value is greater than zero if source is greater than target.
-     * @throws ClassCastException thrown if either arguments cannot be cast to String.
+     * @throws ClassCastException thrown if either arguments cannot be cast to CharSequence.
       * @stable ICU 4.2
       */
      public int compare(Object source, Object target) {
-        return compare((String)source, (String)target);
+        return doCompare((CharSequence)source, (CharSequence)target);
+    }
+
+    /**
+     * Compares two CharSequences.
+     * The base class just calls compare(left.toString(), right.toString()).
+     * Subclasses should instead implement this method and have the String API call this method.
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    protected int doCompare(CharSequence left, CharSequence right) {
+        return compare(left.toString(), right.toString());
      }
  
      /**
@@ -1065,46 +1066,85 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
                                                         RawCollationKey key);
  
      /**
-     * {@icu} Variable top is a two byte primary value which causes all the codepoints
-     * with primary values that are less or equal than the variable top to be
-     * shifted when alternate handling is set to SHIFTED.
-     * </p>
-     * <p>
-     * Sets the variable top to a collation element value of a string supplied.
-     * </p>
+     * {@icu} Sets the variable top to the top of the specified reordering group.
+     * The variable top determines the highest-sorting character
+     * which is affected by the alternate handling behavior.
+     * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
+     *
+     * <p>The base class implementation throws an UnsupportedOperationException.
+     * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
+     *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
+     *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
+     * @return this
+     * @see #getMaxVariable
+     * @draft ICU 53
+     * @provisional This API might change or be removed in a future release.
+     */
+    public Collator setMaxVariable(int group) {
+        throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
+    }
+
+    /**
+     * {@icu} Returns the maximum reordering group whose characters are affected by
+     * the alternate handling behavior.
+     *
+     * <p>The base class implementation returns Collator.ReorderCodes.PUNCTUATION.
+     * @return the maximum variable reordering group.
+     * @see #setMaxVariable
+     * @draft ICU 53
+     * @provisional This API might change or be removed in a future release.
+     */
+    public int getMaxVariable() {
+        return Collator.ReorderCodes.PUNCTUATION;
+    }
+
+    /**
+     * {@icu} Sets the variable top to the primary weight of the specified string.
+     *
+     * <p>Beginning with ICU 53, the variable top is pinned to
+     * the top of one of the supported reordering groups,
+     * and it must not be beyond the last of those groups.
+     * See {@link #setMaxVariable(int)}.
+     * 
       * @param varTop one or more (if contraction) characters to which the
       *               variable top should be set
-     * @return a int value containing the value of the variable top in upper 16
-     *         bits. Lower 16 bits are undefined.
-     * @throws IllegalArgumentException is thrown if varTop argument is not
-     *            a valid variable top element. A variable top element is
-     *            invalid when it is a contraction that does not exist in the
-     *            Collation order or when the PRIMARY strength collation
-     *            element for the variable top has more than two bytes
+     * @return variable top primary weight
+     * @exception IllegalArgumentException
+     *                is thrown if varTop argument is not a valid variable top element. A variable top element is
+     *                invalid when
+     *                <ul>
+     *                <li>it is a contraction that does not exist in the Collation order
+     *                <li>the variable top is beyond
+     *                    the last reordering group supported by setMaxVariable()
+     *                <li>when the varTop argument is null or zero in length.
+     *                </ul>
       * @see #getVariableTop
       * @see RuleBasedCollator#setAlternateHandlingShifted
-     * @stable ICU 2.6
+     * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
       */
      public abstract int setVariableTop(String varTop);
  
      /**
-     * {@icu} Returns the variable top value of a Collator.
-     * Lower 16 bits are undefined and should be ignored.
-     * @return the variable top value of a Collator.
-     * @see #setVariableTop
+     * {@icu} Gets the variable top value of a Collator.
+     * 
+     * @return the variable top primary weight
+     * @see #getMaxVariable
       * @stable ICU 2.6
       */
      public abstract int getVariableTop();
  
      /**
-     * {@icu} Sets the variable top to a collation element value supplied.
-     * Variable top is set to the upper 16 bits.
-     * Lower 16 bits are ignored.
-     * @param varTop Collation element value, as returned by setVariableTop or
-     *               getVariableTop
+     * {@icu} Sets the variable top to the specified primary weight.
+     *
+     * <p>Beginning with ICU 53, the variable top is pinned to
+     * the top of one of the supported reordering groups,
+     * and it must not be beyond the last of those groups.
+     * See {@link #setMaxVariable(int)}.
+     * 
+     * @param varTop primary weight, as returned by setVariableTop or getVariableTop
       * @see #getVariableTop
-     * @see #setVariableTop
-     * @stable ICU 2.6
+     * @see #setVariableTop(String)
+     * @deprecated ICU 53 Call setMaxVariable() instead.
       */
      public abstract void setVariableTop(int varTop);
  
@@ -1135,14 +1175,14 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       */ 
      public int[] getReorderCodes() 
      { 
-        throw new UnsupportedOperationException(); 
+        throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 
      }   
  
      /**
       * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
       * codes are grouped and must reorder together.
       * 
-     * @param reorderCode code for which equivalents to be retrieved
+     * @param reorderCode The reorder code to determine equivalence for. 
       * @return the set of all reorder codes in the same group as the given reorder code.
       * @see #setReorderCodes
       * @see #getReorderCodes
@@ -1150,9 +1190,9 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * @see UScript
       * @stable ICU 4.8
       */
-    public static int[] getEquivalentReorderCodes(int reorderCode)
-    { 
-        throw new UnsupportedOperationException(); 
+    public static int[] getEquivalentReorderCodes(int reorderCode) {
+        CollationData baseData = CollationRoot.getData();
+        return baseData.getEquivalentScripts(reorderCode);
      }   
  
  
@@ -1183,8 +1223,6 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
          throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
      }
      
-    // protected constructor -------------------------------------------------
-
      /**
       * Empty default constructor to make javadocs happy
       * @stable ICU 2.4
@@ -1193,26 +1231,8 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
      {
      }
  
-    // package private methods -----------------------------------------------
-
-    // private data members --------------------------------------------------
-
-    /**
-     * Collation strength
-     */
-    private int m_strength_ = TERTIARY;
-
-    /**
-     * Decomposition mode
-     */
-    private int m_decomposition_ = CANONICAL_DECOMPOSITION;
-
      private static final boolean DEBUG = ICUDebug.enabled("collator");
  
-    // private methods -------------------------------------------------------
-
-    // end registry stuff
-
      // -------- BEGIN ULocale boilerplate --------
  
      /**
@@ -1228,6 +1248,10 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * contains a partial preview implementation.  The * <i>actual</i>
       * locale is returned correctly, but the <i>valid</i> locale is
       * not, in most cases.
+     *
+     * <p>The base class method always returns {@link ULocale#ROOT}.
+     * Subclasses should override it if appropriate.
+     *
       * @param type type of information requested, either {@link
       * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
       * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
@@ -1239,12 +1263,11 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * @draft ICU 2.8 (retain)
       * @provisional This API might change or be removed in a future release.
       */
-    public final ULocale getLocale(ULocale.Type type) {
-        return type == ULocale.ACTUAL_LOCALE ?
-            this.actualLocale : this.validLocale;
+    public ULocale getLocale(ULocale.Type type) {
+        return ULocale.ROOT;
      }
  
-    /*
+    /**
       * Set information about the locales that were used to create this
       * object.  If the object was not constructed from locale data,
       * both arguments should be set to null.  Otherwise, neither
@@ -1252,6 +1275,9 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * less specific than the valid locale.  This method is intended
       * for use by factories or other entities that create objects of
       * this class.
+     *
+     * <p>The base class method does nothing. Subclasses should override it if appropriate.
+     *
       * @param valid the most specific locale containing any resource
       * data, or null
       * @param actual the locale containing data used to construct this
@@ -1260,34 +1286,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
       * @see com.ibm.icu.util.ULocale#VALID_LOCALE
       * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
       */
-    final void setLocale(ULocale valid, ULocale actual) {
-        // Change the following to an assertion later
-        ///CLOVER:OFF
-        // The following would not happen since the method is called
-        //  by other protected functions that checks and makes sure that
-        //  valid and actual are not null before passing
-        if ((valid == null) != (actual == null)) {
-            throw new IllegalArgumentException();
-        }
-        ///CLOVER:ON
-        // Another check we could do is that the actual locale is at
-        // the same level or less specific than the valid locale.
-        this.validLocale = valid;
-        this.actualLocale = actual;
-    }
-
-    /*
-     * The most specific locale containing any resource data, or null.
-     * @see com.ibm.icu.util.ULocale
-     */
-    private ULocale validLocale;
-
-    /*
-     * The locale containing data used to construct this object, or
-     * null.
-     * @see com.ibm.icu.util.ULocale
-     */
-    private ULocale actualLocale;
+    void setLocale(ULocale valid, ULocale actual) {}
  
      // -------- END ULocale boilerplate --------
  }
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java

deleted file mode 100644 (file)

index 77ba84f..0000000
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java
+++ /dev/null
@@ -1,673 +0,0 @@
-/**
- *******************************************************************************
- * Copyright (C) 1996-2013, International Business Machines Corporation and
- * others. All Rights Reserved.
- *******************************************************************************
- */
-package com.ibm.icu.text;
-
-import java.io.BufferedInputStream;
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-
-import com.ibm.icu.impl.ICUBinary;
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.IntTrie;
-import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
-import com.ibm.icu.text.RuleBasedCollator.LeadByteConstants;
-import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
-import com.ibm.icu.util.Output;
-import com.ibm.icu.util.VersionInfo;
-
-/**
- * <p>
- * Internal reader class for ICU data file uca.icu containing Unicode Collation Algorithm data.
- * </p>
- * <p>
- * This class simply reads uca.icu, authenticates that it is a valid ICU data file and split its contents up into blocks
- * of data for use in <a href=Collator.html>com.ibm.icu.text.Collator</a>.
- * </p>
- * <p>
- * uca.icu which is in big-endian format is jared together with this package.
- * </p>
- * 
- * @author Syn Wee Quek
- * @since release 2.2, April 18 2002
- */
-
-final class CollatorReader {
-    static char[] read(RuleBasedCollator rbc, UCAConstants ucac,
-                       LeadByteConstants leadByteConstants, Output<Integer> maxUCAContractionLength)
-            throws IOException {
-        InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu");
-        BufferedInputStream b = new BufferedInputStream(i, 90000);
-        CollatorReader reader = new CollatorReader(b);
-        char[] ucaContractions = reader.readImp(rbc, ucac, leadByteConstants, maxUCAContractionLength);
-        b.close();
-        return ucaContractions;
-    }
-
-    public static InputStream makeByteBufferInputStream(final ByteBuffer buf) {
-        return new InputStream() {
-            public int read() throws IOException {
-                if (!buf.hasRemaining()) {
-                    return -1;
-                }
-                return buf.get() & 0xff;
-            }
-
-            public int read(byte[] bytes, int off, int len) throws IOException {
-                len = Math.min(len, buf.remaining());
-                buf.get(bytes, off, len);
-                return len;
-            }
-        };
-    }
-
-    static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {
-        final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
-        int dataLength = data.remaining();
-        // TODO: Change the rest of this class to use the ByteBuffer directly, rather than
-        // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().
-        // Consider changing ICUBinary to also work with a ByteBuffer.
-        CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);
-        if (dataLength > MIN_BINARY_DATA_SIZE_) {
-            reader.readImp(rbc, null, null, null);
-        } else {
-            reader.readHeader(rbc, null);
-            reader.readOptions(rbc);
-            // duplicating UCA_'s data
-            rbc.setWithUCATables();
-        }
-    }
-
-    static InverseUCA getInverseUCA() throws IOException {
-        InverseUCA result = null;
-        InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/invuca.icu");
-        // try {
-        // String invdat = "/com/ibm/icu/impl/data/invuca.icu";
-        // InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
-        BufferedInputStream b = new BufferedInputStream(i, 110000);
-        result = CollatorReader.readInverseUCA(b);
-        b.close();
-        i.close();
-        return result;
-        // } catch (Exception e) {
-        // throw new RuntimeException(e.getMessage());
-        // }
-    }
-
-    // protected constructor ---------------------------------------------
-
-    /**
-     * <p>
-     * Protected constructor.
-     * </p>
-     * 
-     * @param inputStream
-     *            ICU collator file input stream
-     * @exception IOException
-     *                throw if data file fails authentication
-     */
-    private CollatorReader(InputStream inputStream) throws IOException {
-        this(inputStream, true);
-        /*
-         * byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); // weiv: check
-         * that we have the correct Unicode version in // binary files VersionInfo UCDVersion =
-         * UCharacter.getUnicodeVersion(); if(UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] !=
-         * UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } m_dataInputStream_ = new
-         * DataInputStream(inputStream);
-         */
-    }
-
-    /**
-     * <p>
-     * Protected constructor.
-     * </p>
-     * 
-     * @param inputStream
-     *            ICU uprops.icu file input stream
-     * @param readICUHeader
-     *            flag to indicate if the ICU header has to be read
-     * @exception IOException
-     *                throw if data file fails authentication
-     */
-    private CollatorReader(InputStream inputStream, boolean readICUHeader) throws IOException {
-        if (readICUHeader) {
-            ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
-            // Note: In ICU 51 and earlier,
-            // we used to check that the UCA data version (readHeader() return value)
-            // matches the UCD version (UCharacter.getUnicodeVersion())
-            // but that complicated version updates, and
-            // a mismatch is "only" a problem for handling canonical equivalence.
-            // It need not be a fatal error.
-            // throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
-        }
-        m_dataInputStream_ = new DataInputStream(inputStream);
-    }
-
-    // protected methods -------------------------------------------------
-
-    /**
-     * Read and break up the header stream of data passed in as arguments into meaningful Collator data.
-     * 
-     * @param rbc
-     *            RuleBasedCollator to populate with header information
-     * @exception IOException
-     *                thrown when there's a data error.
-     */
-    private void readHeader(RuleBasedCollator rbc, Output<Integer> maxUCAContractionLength) throws IOException {
-        m_size_ = m_dataInputStream_.readInt();
-        // all the offsets are in bytes
-        // to get the address add to the header address and cast properly
-        // Default options int options
-        m_headerSize_ = m_dataInputStream_.readInt(); // start of options
-        int readcount = 8; // for size and headersize
-        // structure which holds values for indirect positioning and implicit
-        // ranges
-        m_UCAConstOffset_ = m_dataInputStream_.readInt();
-        readcount += 4;
-        // this one is needed only for UCA, to copy the appropriate
-        // contractions
-        /*int contractionUCACombos =*/ m_dataInputStream_.readInt();
-        readcount += 4;
-        // reserved for future use
-        m_dataInputStream_.skipBytes(4);
-        readcount += 4;
-        // const uint8_t *mappingPosition;
-        int mapping = m_dataInputStream_.readInt();
-        readcount += 4;
-        // uint32_t *expansion;
-        rbc.m_expansionOffset_ = m_dataInputStream_.readInt();
-        readcount += 4;
-        // UChar *contractionIndex;
-        rbc.m_contractionOffset_ = m_dataInputStream_.readInt();
-        readcount += 4;
-        // uint32_t *contractionCEs;
-        int contractionCE = m_dataInputStream_.readInt();
-        readcount += 4;
-        // needed for various closures int contractionSize
-        int contractionSize = m_dataInputStream_.readInt();
-        readcount += 4;
-        // array of last collation element in expansion
-        int expansionEndCE = m_dataInputStream_.readInt();
-        readcount += 4;
-        // array of maximum expansion size corresponding to the expansion
-        // collation elements with last element in expansionEndCE
-        int expansionEndCEMaxSize = m_dataInputStream_.readInt();
-        readcount += 4;
-        // size of endExpansionCE int expansionEndCESize
-        /* int endExpansionCECount = */m_dataInputStream_.readInt();
-        readcount += 4;
-        // hash table of unsafe code points
-        int unsafe = m_dataInputStream_.readInt();
-        readcount += 4;
-        // hash table of final code points in contractions.
-        int contractionEnd = m_dataInputStream_.readInt();
-        readcount += 4;
-        // int CEcount = m_dataInputStream_.readInt();
-        int contractionUCACombosSize = m_dataInputStream_.readInt();
-        readcount += 4;
-        // is jamoSpecial
-        rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
-        readcount++;
-        // isBigEndian and charSetFamily
-        m_dataInputStream_.skipBytes(2);
-        readcount += 2;
-        int contractionUCACombosWidth = m_dataInputStream_.readByte();
-        if (maxUCAContractionLength != null) {
-            maxUCAContractionLength.value = contractionUCACombosWidth;
-        }
-        // We want to be able to output this value if it's not 0.
-        assert contractionUCACombosWidth == 0 || maxUCAContractionLength != null;
-        readcount += 1;
-        rbc.m_version_ = readVersion(m_dataInputStream_);
-        readcount += 4;
-        rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
-        readcount += 4;
-        rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
-        readcount += 4;
-        /*VersionInfo formatVersion =*/ readVersion(m_dataInputStream_);
-        readcount += 4;
-        rbc.m_scriptToLeadBytes = m_dataInputStream_.readInt();
-        readcount += 4;
-        rbc.m_leadByteToScripts = m_dataInputStream_.readInt();
-        readcount += 4;
-
-        // byte charsetName[] = new byte[32]; // for charset CEs
-        m_dataInputStream_.skipBytes(32);
-        readcount += 32;
-
-        m_dataInputStream_.skipBytes(44); // for future use
-        readcount += 44;
-        if (m_headerSize_ < readcount) {
-            // /CLOVER:OFF
-            throw new IOException("Internal Error: Header size error");
-            // /CLOVER:ON
-        }
-        m_dataInputStream_.skipBytes(m_headerSize_ - readcount);
-
-        if (rbc.m_contractionOffset_ == 0) { // contraction can be null
-            rbc.m_contractionOffset_ = mapping;
-            contractionCE = mapping;
-        }
-        m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;
-        m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
-        m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
-        m_contractionCESize_ = mapping - contractionCE;
-        // m_trieSize_ = expansionEndCE - mapping;
-        m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
-        m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
-        m_unsafeSize_ = contractionEnd - unsafe;
-        // m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
-        m_UCAcontractionSize_ = contractionUCACombosSize * contractionUCACombosWidth * 2;
-
-        // treat it as normal collator first
-        // for normal collator there is no UCA contraction
-        // contractions (UChar[contractionSize] + CE[contractionSize])
-        m_contractionSize_ = contractionSize * 2 + contractionSize * 4;
-
-        rbc.m_contractionOffset_ >>= 1; // casting to ints
-        rbc.m_expansionOffset_ >>= 2; // casting to chars
-    }
-
-    /**
-     * Read and break up the collation options passed in the stream of data and update the argument Collator with the
-     * results
-     * 
-     * @param rbc
-     *            RuleBasedCollator to populate
-     * @exception IOException
-     *                thrown when there's a data error.
-     */
-    private void readOptions(RuleBasedCollator rbc) throws IOException {
-        int readcount = 0;
-        rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
-        readcount += 4;
-        rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
-        readcount += 4;
-        rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.SHIFTED_);
-        readcount += 4;
-        rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
-        readcount += 4;
-        // rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
-        // == RuleBasedCollator.AttributeValue.ON_);
-        int defaultIsCaseLevel = m_dataInputStream_.readInt();
-        rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel == RuleBasedCollator.AttributeValue.ON_);
-        readcount += 4;
-        int value = m_dataInputStream_.readInt();
-        readcount += 4;
-        if (value == RuleBasedCollator.AttributeValue.ON_) {
-            value = Collator.CANONICAL_DECOMPOSITION;
-        } else {
-            value = Collator.NO_DECOMPOSITION;
-        }
-        rbc.m_defaultDecomposition_ = value;
-        rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
-        readcount += 4;
-        rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
-        readcount += 4;
-        rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
-        readcount += 4;
-        m_dataInputStream_.skip(60); // reserved for future use
-        readcount += 60;
-        m_dataInputStream_.skipBytes(m_optionSize_ - readcount);
-        if (m_optionSize_ < readcount) {
-            // /CLOVER:OFF
-            throw new IOException("Internal Error: Option size error");
-            // /CLOVER:ON
-        }
-    }
-
-    /**
-     * Read and break up the stream of data passed in as arguments into meaningful Collator data.
-     * 
-     * @param rbc
-     *            RuleBasedCollator to populate
-     * @param UCAConst
-     *            object to fill up with UCA constants if we are reading the UCA collator, if not use a null
-     * @param leadByteConstants
-     * @return UCAContractions array filled up with the UCA contractions if we are reading the UCA collator
-     * @exception IOException
-     *                thrown when there's a data error.
-     */
-    private char[] readImp(RuleBasedCollator rbc, RuleBasedCollator.UCAConstants UCAConst,
-            RuleBasedCollator.LeadByteConstants leadByteConstants,
-            Output<Integer> maxUCAContractionLength) throws IOException {
-        char ucaContractions[] = null; // return result
-
-        readHeader(rbc, maxUCAContractionLength);
-        // header size has been checked by readHeader
-        int readcount = m_headerSize_;
-        // option size has been checked by readOptions
-        readOptions(rbc);
-        readcount += m_optionSize_;
-        m_expansionSize_ >>= 2;
-        rbc.m_expansion_ = new int[m_expansionSize_];
-        for (int i = 0; i < m_expansionSize_; i++) {
-            rbc.m_expansion_[i] = m_dataInputStream_.readInt();
-        }
-        readcount += (m_expansionSize_ << 2);
-        if (m_contractionIndexSize_ > 0) {
-            m_contractionIndexSize_ >>= 1;
-            rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
-            for (int i = 0; i < m_contractionIndexSize_; i++) {
-                rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
-            }
-            readcount += (m_contractionIndexSize_ << 1);
-            m_contractionCESize_ >>= 2;
-            rbc.m_contractionCE_ = new int[m_contractionCESize_];
-            for (int i = 0; i < m_contractionCESize_; i++) {
-                rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
-            }
-            readcount += (m_contractionCESize_ << 2);
-        }
-        rbc.m_trie_ = new IntTrie(m_dataInputStream_, RuleBasedCollator.DataManipulate.getInstance());
-        if (!rbc.m_trie_.isLatin1Linear()) {
-            throw new IOException("Data corrupted, " + "Collator Tries expected to have linear "
-                    + "latin one data arrays");
-        }
-        readcount += rbc.m_trie_.getSerializedDataSize();
-        m_expansionEndCESize_ >>= 2;
-        rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
-        for (int i = 0; i < m_expansionEndCESize_; i++) {
-            rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
-        }
-        readcount += (m_expansionEndCESize_ << 2);
-        rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
-        for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) {
-            rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
-        }
-        readcount += m_expansionEndCEMaxSizeSize_;
-        rbc.m_unsafe_ = new byte[m_unsafeSize_];
-        for (int i = 0; i < m_unsafeSize_; i++) {
-            rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
-        }
-        readcount += m_unsafeSize_;
-        if (UCAConst != null) {
-            // we are reading the UCA
-            // unfortunately the UCA offset in any collator data is not 0 and
-            // only refers to the UCA data
-            // m_contractionSize_ -= m_UCAValuesSize_;
-            m_contractionSize_ = m_UCAConstOffset_ - readcount;
-        } else {
-            m_contractionSize_ = m_size_ - readcount;
-        }
-        rbc.m_contractionEnd_ = new byte[m_contractionSize_];
-        for (int i = 0; i < m_contractionSize_; i++) {
-            rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
-        }
-        readcount += m_contractionSize_;
-        if (UCAConst != null) {
-            UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
-            int readUCAConstcount = 4;
-            UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-            UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
-            readUCAConstcount += 4;
-
-            readcount += readUCAConstcount;
-
-            int resultsize = (rbc.m_scriptToLeadBytes - readcount) / 2;
-            assert resultsize == m_UCAcontractionSize_ / 2;
-            ucaContractions = new char[resultsize];
-            for (int i = 0; i < resultsize; i++) {
-                ucaContractions[i] = m_dataInputStream_.readChar();
-            }
-            readcount += m_UCAcontractionSize_;
-        }
-
-        if (leadByteConstants != null) {
-            readcount += m_dataInputStream_.skip(rbc.m_scriptToLeadBytes - readcount);
-            leadByteConstants.read(m_dataInputStream_);
-            readcount += leadByteConstants.getSerializedDataSize();
-        }
-
-        if (readcount != m_size_) {
-            // /CLOVER:OFF
-            throw new IOException("Internal Error: Data file size error");
-            // /CLOVER:ON
-        }
-        return ucaContractions;
-    }
-
-    /**
-     * Reads in the inverse uca data
-     * 
-     * @param input
-     *            input stream with the inverse uca data
-     * @return an object containing the inverse uca data
-     * @exception IOException
-     *                thrown when error occurs while reading the inverse uca
-     */
-    private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(InputStream inputStream) throws IOException {
-        ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, INVERSE_UCA_AUTHENTICATE_);
-
-        // TODO: Check that the invuca data version (readHeader() return value)
-        // matches the ucadata version.
-        // throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
-
-        CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
-        DataInputStream input = new DataInputStream(inputStream);
-        input.readInt(); // bytesize
-        int tablesize = input.readInt(); // in int size
-        int contsize = input.readInt(); // in char size
-        input.readInt(); // table in bytes
-        input.readInt(); // conts in bytes
-        result.m_UCA_version_ = readVersion(input);
-        input.skipBytes(8); // skip padding
-
-        int size = tablesize * 3; // one column for each strength
-        result.m_table_ = new int[size];
-        result.m_continuations_ = new char[contsize];
-
-        for (int i = 0; i < size; i++) {
-            result.m_table_[i] = input.readInt();
-        }
-        for (int i = 0; i < contsize; i++) {
-            result.m_continuations_[i] = input.readChar();
-        }
-        input.close();
-        return result;
-    }
-
-    /**
-     * Reads four bytes from the input and returns a VersionInfo object. Use it to read different collator versions.
-     * 
-     * @param input
-     *            already instantiated DataInputStream, positioned at the start of four version bytes
-     * @return a ready VersionInfo object
-     * @throws IOException
-     *             thrown when error occurs while reading version bytes
-     */
-
-    protected static VersionInfo readVersion(DataInputStream input) throws IOException {
-        byte[] version = new byte[4];
-        version[0] = input.readByte();
-        version[1] = input.readByte();
-        version[2] = input.readByte();
-        version[3] = input.readByte();
-
-        VersionInfo result = VersionInfo.getInstance((int) version[0], (int) version[1], (int) version[2],
-                (int) version[3]);
-
-        return result;
-    }
-
-    // private inner class -----------------------------------------------
-
-    // private variables -------------------------------------------------
-
-    /**
-     * Authenticate uca data format version
-     */
-    private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
-        public boolean isDataVersionAcceptable(byte version[]) {
-            return version[0] == DATA_FORMAT_VERSION_[0] && version[1] >= DATA_FORMAT_VERSION_[1];
-            // Too harsh
-            // && version[1] == DATA_FORMAT_VERSION_[1]
-            // && version[2] == DATA_FORMAT_VERSION_[2]
-            // && version[3] == DATA_FORMAT_VERSION_[3];
-        }
-    };
-
-    /**
-     * Authenticate uca data format version
-     */
-    private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
-        public boolean isDataVersionAcceptable(byte version[]) {
-            return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0]
-                    && version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
-        }
-    };
-
-    /**
-     * Data input stream for uca.icu
-     */
-    private DataInputStream m_dataInputStream_;
-
-    /**
-     * File format version and id that this class understands. No guarantees are made if a older version is used
-     */
-    private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x3, (byte) 0x0, (byte) 0x0, (byte) 0x0 };
-    private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55, (byte) 0x43, (byte) 0x6f, (byte) 0x6c };
-    /**
-     * Inverse UCA file format version and id that this class understands. No guarantees are made if a older version is
-     * used
-     */
-    private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = { (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 };
-    private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = { (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 };
-
-    /**
-     * Wrong unicode version error string
-     */
-    // private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";
-
-    /**
-     * Size of expansion table in bytes
-     */
-    private int m_expansionSize_;
-    /**
-     * Size of contraction index table in bytes
-     */
-    private int m_contractionIndexSize_;
-    /**
-     * Size of contraction table in bytes
-     */
-    private int m_contractionCESize_;
-    /*
-     * Size of the Trie in bytes
-     */
-    // private int m_trieSize_;
-    /**
-     * Size of the table that contains information about collation elements that end with an expansion
-     */
-    private int m_expansionEndCESize_;
-    /**
-     * Size of the table that contains information about the maximum size of collation elements that end with a
-     * particular expansion CE corresponding to the ones in expansionEndCE
-     */
-    private int m_expansionEndCEMaxSizeSize_;
-    /**
-     * Size of the option table that contains information about the collation options
-     */
-    private int m_optionSize_;
-    /**
-     * Size of the whole data file minusing the ICU header
-     */
-    private int m_size_;
-    /**
-     * Size of the collation data header
-     */
-    private int m_headerSize_;
-    /**
-     * Size of the table that contains information about the "Unsafe" codepoints
-     */
-    private int m_unsafeSize_;
-    /**
-     * Size in bytes of the table that contains information about codepoints that ends with a contraction
-     */
-    private int m_contractionSize_;
-    /**
-     * Size of the table that contains UCA contraction information in bytes
-     */
-    private int m_UCAcontractionSize_;
-    /**
-     * Offset of the UCA Const
-     */
-    private int m_UCAConstOffset_;
-
-    // private methods ---------------------------------------------------
-
-}
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java

index ab1a7f4a590f8e36847658dc225c2ee20455ee19..847aa5027ca17fdf10689bfe1c07a3768ddf3e0d 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java
@@ -1,7 +1,7 @@
  /**
  *******************************************************************************
-* Copyright (C) 2003-2010, International Business Machines Corporation and         *
-* others. All Rights Reserved.                                                *
+* Copyright (C) 2003-2014, International Business Machines Corporation and
+* others. All Rights Reserved.
  *******************************************************************************
  */
  
@@ -16,7 +16,10 @@ import com.ibm.icu.impl.ICULocaleService.LocaleKeyFactory;
  import com.ibm.icu.impl.ICUResourceBundle;
  import com.ibm.icu.impl.ICUService;
  import com.ibm.icu.impl.ICUService.Factory;
+import com.ibm.icu.impl.coll.CollationLoader;
+import com.ibm.icu.impl.coll.CollationTailoring;
  import com.ibm.icu.text.Collator.CollatorFactory;
+import com.ibm.icu.util.Output;
  import com.ibm.icu.util.ULocale;
  
  final class CollatorServiceShim extends Collator.ServiceShim {
@@ -35,9 +38,7 @@ final class CollatorServiceShim extends Collator.ServiceShim {
                  throw new MissingResourceException("Could not locate Collator data", "", "");
                  ///CLOVER:ON
              }
-            coll = (Collator) coll.clone();
-            coll.setLocale(actualLoc[0], actualLoc[0]); // services make no distinction between actual & valid
-            return coll;
+            return (Collator) coll.clone();
          }
          catch (CloneNotSupportedException e) {
          ///CLOVER:OFF
@@ -47,6 +48,10 @@ final class CollatorServiceShim extends Collator.ServiceShim {
      }
  
      Object registerInstance(Collator collator, ULocale locale) {
+        // Set the collator locales while registering so that getInstance()
+        // need not guess whether the collator's locales are already set properly
+        // (as they are by the data loader).
+        collator.setLocale(locale, locale);
          return service.registerObject(collator, locale);
      }
  
@@ -119,7 +124,7 @@ final class CollatorServiceShim extends Collator.ServiceShim {
                  }
  
                  protected Object handleCreate(ULocale uloc, int kind, ICUService srvc) {
-                    return new RuleBasedCollator(uloc);
+                    return makeInstance(uloc);
                  }
              }
  
@@ -133,7 +138,7 @@ final class CollatorServiceShim extends Collator.ServiceShim {
                  actualIDReturn[0] = "root";
              }
              try {
-                return new RuleBasedCollator(ULocale.ROOT);
+                return makeInstance(ULocale.ROOT);
              }
              catch (MissingResourceException e) {
                  return null;
@@ -141,5 +146,14 @@ final class CollatorServiceShim extends Collator.ServiceShim {
          }
          ///CLOVER:ON
      }
+
+    // Ported from C++ Collator::makeInstance().
+    private static final Collator makeInstance(ULocale desiredLocale) {
+        Output<ULocale> validLocale = new Output<ULocale>(ULocale.ROOT);
+        CollationTailoring t =
+            CollationLoader.loadTailoring(desiredLocale, validLocale);
+        return new RuleBasedCollator(t, validLocale.value);
+    }
+
      private static ICULocaleService service = new CService();
  }
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java

index 4c69e0ebe551e9147a98aca7d8f195e840a63ce4..ce28a23d6fadf3b46fb86d2037485c7dbebfe42b 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java
@@ -6,35 +6,35 @@
   */
  package com.ibm.icu.text;
  
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
  import java.text.CharacterIterator;
  import java.text.ParseException;
  import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.MissingResourceException;
-import java.util.Set;
  import java.util.concurrent.locks.Lock;
  import java.util.concurrent.locks.ReentrantLock;
  
-import com.ibm.icu.impl.BOCU;
-import com.ibm.icu.impl.ICUDebug;
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.ImplicitCEGenerator;
-import com.ibm.icu.impl.IntTrie;
-import com.ibm.icu.impl.StringUCharacterIterator;
-import com.ibm.icu.impl.Trie;
-import com.ibm.icu.impl.TrieIterator;
+import com.ibm.icu.impl.Normalizer2Impl;
  import com.ibm.icu.impl.Utility;
-import com.ibm.icu.lang.UCharacter;
-import com.ibm.icu.lang.UScript;
-import com.ibm.icu.util.Output;
-import com.ibm.icu.util.RangeValueIterator;
+import com.ibm.icu.impl.Normalizer2Impl.ReorderingBuffer;
+import com.ibm.icu.impl.coll.BOCSU;
+import com.ibm.icu.impl.coll.Collation;
+import com.ibm.icu.impl.coll.CollationCompare;
+import com.ibm.icu.impl.coll.CollationData;
+import com.ibm.icu.impl.coll.CollationFastLatin;
+import com.ibm.icu.impl.coll.CollationIterator;
+import com.ibm.icu.impl.coll.CollationKeys;
+import com.ibm.icu.impl.coll.CollationKeys.SortKeyByteSink;
+import com.ibm.icu.impl.coll.CollationLoader;
+import com.ibm.icu.impl.coll.CollationRoot;
+import com.ibm.icu.impl.coll.CollationSettings;
+import com.ibm.icu.impl.coll.CollationTailoring;
+import com.ibm.icu.impl.coll.ContractionsAndExpansions;
+import com.ibm.icu.impl.coll.FCDUTF16CollationIterator;
+import com.ibm.icu.impl.coll.SharedObject;
+import com.ibm.icu.impl.coll.TailoredSet;
+import com.ibm.icu.impl.coll.UTF16CollationIterator;
  import com.ibm.icu.util.ULocale;
-import com.ibm.icu.util.UResourceBundle;
  import com.ibm.icu.util.VersionInfo;
  
  /**
@@ -45,28 +45,28 @@ import com.ibm.icu.util.VersionInfo;
   * </p>
   * 
   * <p>
- * Users are strongly encouraged to read <a href="http://www.icu-project.org/userguide/Collate_Intro.html"> the users
- * guide</a> for more information about the collation service before using this class.
+ * Users are strongly encouraged to read the <a href="http://userguide.icu-project.org/collation">User
+ * Guide</a> for more information about the collation service before using this class.
   * </p>
   * 
   * <p>
   * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class
   * Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the
- * argument locale. If a customized collation ordering ar attributes is required, use the RuleBasedCollator(String)
- * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on UCA, while
+ * argument locale. If a customized collation ordering or attributes is required, use the RuleBasedCollator(String)
+ * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on the CLDR root collation, while
   * re-adjusting the attributes and orders of the characters in the specified rule accordingly.
   * </p>
   * 
   * <p>
   * RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale
- * is not available, the orders eventually falls back to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA
- * collation order </a>.
+ * is not available, the orders eventually falls back to the
+ * <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
   * </p>
   * 
   * <p>
   * For information about the collation rule syntax and details about customization, please refer to the <a
- * href="http://www.icu-project.org/userguide/Collate_Customization.html"> Collation customization</a> section of the
- * user's guide.
+ * href="http://userguide.icu-project.org/collation/customization">Collation customization</a> section of the
+ * User Guide.
   * </p>
   * 
   * <p>
@@ -195,11 +195,12 @@ public final class RuleBasedCollator extends Collator {
  
      /**
       * <p>
-     * Constructor that takes the argument rules for customization. The collator will be based on UCA, with the
+     * Constructor that takes the argument rules for customization.
+     * The collator will be based on the CLDR root collation, with the
       * attributes and re-ordering of the characters specified in the argument rules.
       * </p>
       * <p>
-     * See the user guide's section on <a href="http://www.icu-project.org/userguide/Collate_Customization.html">
+     * See the User Guide's section on <a href="http://userguide.icu-project.org/collation/customization">
       * Collation Customization</a> for details on the rule syntax.
       * </p>
       * 
@@ -207,15 +208,51 @@ public final class RuleBasedCollator extends Collator {
       *            the collation rules to build the collation table from.
       * @exception ParseException
       *                and IOException thrown. ParseException thrown when argument rules have an invalid syntax.
-     *                IOException thrown when an error occured while reading internal data.
+     *                IOException thrown when an error occurred while reading internal data.
       * @stable ICU 2.8
       */
      public RuleBasedCollator(String rules) throws Exception {
-        checkUCA();
          if (rules == null) {
              throw new IllegalArgumentException("Collation rules can not be null");
          }
-        init(rules);
+        validLocale = ULocale.ROOT;
+        internalBuildTailoring(rules);
+    }
+
+    /**
+     * Implements from-rule constructors.
+     * @param rules rule string
+     * @throws Exception
+     */
+    private final void internalBuildTailoring(String rules) throws Exception {
+        CollationTailoring base = CollationRoot.getRoot();
+        // Most code using Collator does not need to build a Collator from rules.
+        // By using reflection, most code will not have a static dependency on the builder code.
+        // CollationBuilder builder = new CollationBuilder(base);
+        ClassLoader classLoader = getClass().getClassLoader();
+        CollationTailoring t;
+        try {
+            Class<?> builderClass = classLoader.loadClass("com.ibm.icu.impl.coll.CollationBuilder");
+            Object builder = builderClass.getConstructor(CollationTailoring.class).newInstance(base);
+            // builder.parseAndBuild(rules);
+            Method parseAndBuild = builderClass.getMethod("parseAndBuild", String.class);
+            t = (CollationTailoring)parseAndBuild.invoke(builder, rules);
+        } catch(InvocationTargetException e) {
+            throw (Exception)e.getTargetException();
+        }
+        CollationSettings ts = t.settings.readOnly();
+        char[] fastLatinPrimaries = new char[CollationFastLatin.LATIN_LIMIT];
+        int fastLatinOptions = CollationFastLatin.getOptions(t.data, ts, fastLatinPrimaries);
+        if(fastLatinOptions != ts.fastLatinOptions ||
+                (fastLatinOptions >= 0 &&
+                    !Arrays.equals(fastLatinPrimaries, ts.fastLatinPrimaries))) {
+            CollationSettings ownedSettings = t.settings.copyOnWrite();
+            ownedSettings.fastLatinOptions = CollationFastLatin.getOptions(
+                t.data, ownedSettings,
+                ownedSettings.fastLatinPrimaries);
+        }
+        t.actualLocale = null;
+        adoptTailoring(t);
      }
  
      // public methods --------------------------------------------------------
@@ -226,30 +263,20 @@ public final class RuleBasedCollator extends Collator {
       * @return a new instance of this RuleBasedCollator object
       * @stable ICU 2.8
       */
+    @Override
      public Object clone() throws CloneNotSupportedException {
-        return clone(isFrozen());
+        if (isFrozen()) {
+            return this;
+        }
+        return cloneAsThawed();
      }
  
-    /**
-     * Clones the RuleBasedCollator
-     * 
-     * @param frozen should the clone be frozen or not
-     * @return a new instance of this RuleBasedCollator object
-     */
-    private Object clone(boolean frozen) throws CloneNotSupportedException {
-        //TODO: once buffer and threading issue is resolved have frozen clone just return itself
-        RuleBasedCollator result = (RuleBasedCollator) super.clone();
-        if (latinOneCEs_ != null) {
-            result.m_reallocLatinOneCEs_ = true;
-            result.m_ContInfo_ = new ContractionInfo();
+    private final void initMaxExpansions() {
+        synchronized(tailoring) {
+            if (tailoring.maxExpansions == null) {
+                tailoring.maxExpansions = CollationElementIterator.computeMaxExpansions(tailoring.data);
+            }
          }
-
-        // since all collation data in the RuleBasedCollator do not change
-        // we can safely assign the result.fields to this collator 
-        // except in cases where we can't
-        result.collationBuffer = null;
-        result.frozenLock = frozen ? new ReentrantLock() : null;
-        return result;
      }
  
      /**
@@ -259,6 +286,7 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public CollationElementIterator getCollationElementIterator(String source) {
+        initMaxExpansions();
          return new CollationElementIterator(source, this);
      }
  
@@ -270,6 +298,7 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public CollationElementIterator getCollationElementIterator(CharacterIterator source) {
+        initMaxExpansions();
          CharacterIterator newsource = (CharacterIterator) source.clone();
          return new CollationElementIterator(newsource, this);
      }
@@ -282,6 +311,7 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public CollationElementIterator getCollationElementIterator(UCharacterIterator source) {
+        initMaxExpansions();
          return new CollationElementIterator(source, this);
      }
  
@@ -291,6 +321,7 @@ public final class RuleBasedCollator extends Collator {
       * Determines whether the object has been frozen or not.
       * @stable ICU 4.8
       */
+    @Override
      public boolean isFrozen() {
          return frozenLock != null;
      }
@@ -300,9 +331,13 @@ public final class RuleBasedCollator extends Collator {
       * @return the collator itself.
       * @stable ICU 4.8
       */
+    @Override
      public Collator freeze() {
          if (!isFrozen()) {
              frozenLock = new ReentrantLock();
+            if (collationBuffer == null) {
+                collationBuffer = new CollationBuffer(data);
+            }
          }
          return this;
      }
@@ -311,18 +346,39 @@ public final class RuleBasedCollator extends Collator {
       * Provides for the clone operation. Any clone is initially unfrozen.
       * @stable ICU 4.8
       */
+    @Override
      public RuleBasedCollator cloneAsThawed() {
-        RuleBasedCollator clone = null;
          try {
-            clone = (RuleBasedCollator) clone(false);
+            RuleBasedCollator result = (RuleBasedCollator) super.clone();
+            // since all collation data in the RuleBasedCollator do not change
+            // we can safely assign the result.fields to this collator 
+            // except in cases where we can't
+            result.settings = settings.clone();
+            result.collationBuffer = null;
+            result.frozenLock = null;
+            return result;
          } catch (CloneNotSupportedException e) {
              // Clone is implemented
+            return null;
          }
-        return clone;
      }
  
      // public setters --------------------------------------------------------
  
+    private void checkNotFrozen() {
+        if (isFrozen()) {
+            throw new UnsupportedOperationException("Attempt to modify frozen RuleBasedCollator");
+        }
+    }
+
+    private final CollationSettings getOwnedSettings() {
+        return settings.copyOnWrite();
+    }
+
+    private final CollationSettings getDefaultSettings() {
+        return tailoring.settings.readOnly();
+    }
+
      /**
       * Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator
       * positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a
@@ -341,9 +397,7 @@ public final class RuleBasedCollator extends Collator {
       */
      @Deprecated
      public void setHiraganaQuaternary(boolean flag) {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
+        checkNotFrozen();
      }
  
      /**
@@ -361,9 +415,7 @@ public final class RuleBasedCollator extends Collator {
       */
      @Deprecated
      public void setHiraganaQuaternaryDefault() {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
+        checkNotFrozen();
      }
  
      /**
@@ -381,22 +433,11 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public void setUpperCaseFirst(boolean upperfirst) {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        if (upperfirst) {
-            if (m_caseFirst_ != AttributeValue.UPPER_FIRST_) {
-                latinOneRegenTable_ = true;
-            }
-            m_caseFirst_ = AttributeValue.UPPER_FIRST_;
-        } else {
-            if (m_caseFirst_ != AttributeValue.OFF_) {
-                latinOneRegenTable_ = true;
-            }
-            m_caseFirst_ = AttributeValue.OFF_;
-        }
-        updateInternalState();
+        checkNotFrozen();
+        if (upperfirst == isUpperCaseFirst()) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setCaseFirst(upperfirst ? CollationSettings.CASE_FIRST_AND_UPPER_MASK : 0);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
@@ -414,22 +455,11 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public void setLowerCaseFirst(boolean lowerfirst) {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        if (lowerfirst) {
-            if (m_caseFirst_ != AttributeValue.LOWER_FIRST_) {
-                latinOneRegenTable_ = true;
-            }
-            m_caseFirst_ = AttributeValue.LOWER_FIRST_;
-        } else {
-            if (m_caseFirst_ != AttributeValue.OFF_) {
-                latinOneRegenTable_ = true;
-            }
-            m_caseFirst_ = AttributeValue.OFF_;
-        }
-        updateInternalState();
+        checkNotFrozen();
+        if (lowerfirst == isLowerCaseFirst()) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setCaseFirst(lowerfirst ? CollationSettings.CASE_FIRST : 0);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
@@ -443,15 +473,12 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public final void setCaseFirstDefault() {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        if (m_caseFirst_ != m_defaultCaseFirst_) {
-            latinOneRegenTable_ = true;
-        }
-        m_caseFirst_ = m_defaultCaseFirst_;
-        updateInternalState();
+        checkNotFrozen();
+        CollationSettings defaultSettings = getDefaultSettings();
+        if(settings.readOnly() == defaultSettings) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setCaseFirstDefault(defaultSettings.options);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
@@ -463,12 +490,12 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public void setAlternateHandlingDefault() {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
-        updateInternalState();
+        checkNotFrozen();
+        CollationSettings defaultSettings = getDefaultSettings();
+        if(settings.readOnly() == defaultSettings) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setAlternateHandlingDefault(defaultSettings.options);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
@@ -480,12 +507,12 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public void setCaseLevelDefault() {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        m_isCaseLevel_ = m_defaultIsCaseLevel_;
-        updateInternalState();
+        checkNotFrozen();
+        CollationSettings defaultSettings = getDefaultSettings();
+        if(settings.readOnly() == defaultSettings) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setFlagDefault(CollationSettings.CASE_LEVEL, defaultSettings.options);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
@@ -497,12 +524,12 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public void setDecompositionDefault() {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        setDecomposition(m_defaultDecomposition_);
-        updateInternalState();
+        checkNotFrozen();
+        CollationSettings defaultSettings = getDefaultSettings();
+        if(settings.readOnly() == defaultSettings) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setFlagDefault(CollationSettings.CHECK_FCD, defaultSettings.options);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
@@ -514,15 +541,12 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public void setFrenchCollationDefault() {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        if (m_isFrenchCollation_ != m_defaultIsFrenchCollation_) {
-            latinOneRegenTable_ = true;
-        }
-        m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
-        updateInternalState();
+        checkNotFrozen();
+        CollationSettings defaultSettings = getDefaultSettings();
+        if(settings.readOnly() == defaultSettings) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setFlagDefault(CollationSettings.BACKWARD_SECONDARY, defaultSettings.options);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
@@ -534,32 +558,34 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public void setStrengthDefault() {
-        setStrength(m_defaultStrength_);
-        updateInternalState();
+        checkNotFrozen();
+        CollationSettings defaultSettings = getDefaultSettings();
+        if(settings.readOnly() == defaultSettings) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setStrengthDefault(defaultSettings.options);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
-     * Method to set numeric collation to its default value. When numeric collation is turned on, this Collator
-     * generates a collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER
-     * '2'
-     * 
+     * Method to set numeric collation to its default value.
+     *
       * @see #getNumericCollation
       * @see #setNumericCollation
       * @stable ICU 2.8
       */
      public void setNumericCollationDefault() {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        setNumericCollation(m_defaultIsNumericCollation_);
-        updateInternalState();
+        checkNotFrozen();
+        CollationSettings defaultSettings = getDefaultSettings();
+        if(settings.readOnly() == defaultSettings) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setFlagDefault(CollationSettings.NUMERIC, defaultSettings.options);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
       * Sets the mode for the direction of SECONDARY weights to be used in French collation. The default value is false,
       * which treats SECONDARY weights in the order they appear. If set to true, the SECONDARY weights will be sorted
-     * backwards. See the section on <a href="http://www.icu-project.org/userguide/Collate_ServiceArchitecture.html">
+     * backwards. See the section on <a href="http://userguide.icu-project.org/collation/architecture">
       * French collation</a> for more information.
       * 
       * @param flag
@@ -569,39 +595,34 @@ public final class RuleBasedCollator extends Collator {
       * @see #setFrenchCollationDefault
       */
      public void setFrenchCollation(boolean flag) {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        if (m_isFrenchCollation_ != flag) {
-            latinOneRegenTable_ = true;
-        }
-        m_isFrenchCollation_ = flag;
-        updateInternalState();
+        checkNotFrozen();
+        if(flag == isFrenchCollation()) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setFlag(CollationSettings.BACKWARD_SECONDARY, flag);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
       * Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the UCA definition
-     * on <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting"> Alternate Weighting</a>. This
+     * on <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">Variable Weighting</a>. This
       * attribute will only be effective when QUATERNARY strength is set. The default value for this mode is false,
-     * corresponding to the NON_IGNORABLE mode in UCA. In the NON-IGNORABLE mode, the RuleBasedCollator will treats all
-     * the codepoints with non-ignorable primary weights in the same way. If the mode is set to true, the behaviour
-     * corresponds to SHIFTED defined in UCA, this causes codepoints with PRIMARY orders that are equal or below the
+     * corresponding to the NON_IGNORABLE mode in UCA. In the NON_IGNORABLE mode, the RuleBasedCollator treats all
+     * the code points with non-ignorable primary weights in the same way. If the mode is set to true, the behavior
+     * corresponds to SHIFTED defined in UCA, this causes code points with PRIMARY orders that are equal or below the
       * variable top value to be ignored in PRIMARY order and moved to the QUATERNARY order.
       * 
       * @param shifted
-     *            true if SHIFTED behaviour for alternate handling is desired, false for the NON_IGNORABLE behaviour.
+     *            true if SHIFTED behavior for alternate handling is desired, false for the NON_IGNORABLE behavior.
       * @see #isAlternateHandlingShifted
       * @see #setAlternateHandlingDefault
       * @stable ICU 2.8
       */
      public void setAlternateHandlingShifted(boolean shifted) {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        m_isAlternateHandlingShifted_ = shifted;
-        updateInternalState();
+        checkNotFrozen();
+        if(shifted == isAlternateHandlingShifted()) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setAlternateHandlingShifted(shifted);
+        setFastLatinOptions(ownedSettings);
      }
  
      /**
@@ -614,7 +635,7 @@ public final class RuleBasedCollator extends Collator {
       * case level.
       * </p>
       * <p>
-     * See the section on <a href="http://www.icu-project.org/userguide/Collate_ServiceArchitecture.html"> case
+     * See the section on <a href="http://userguide.icu-project.org/collation/architecture">case
       * level</a> for more information.
       * </p>
       * 
@@ -625,22 +646,69 @@ public final class RuleBasedCollator extends Collator {
       * @see #isCaseLevel
       */
      public void setCaseLevel(boolean flag) {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
-        m_isCaseLevel_ = flag;
-        updateInternalState();
-    }
-
-    /**
-     * <p>
-     * Sets this Collator's strength property. The strength property determines the minimum level of difference
+        checkNotFrozen();
+        if(flag == isCaseLevel()) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setFlag(CollationSettings.CASE_LEVEL, flag);
+        setFastLatinOptions(ownedSettings);
+    }
+
+    /**
+     * Sets the decomposition mode of this Collator.  Setting this
+     * decomposition attribute with CANONICAL_DECOMPOSITION allows the
+     * Collator to handle un-normalized text properly, producing the
+     * same results as if the text were normalized. If
+     * NO_DECOMPOSITION is set, it is the user's responsibility to
+     * insure that all text is already in the appropriate form before
+     * a comparison or before getting a CollationKey. Adjusting
+     * decomposition mode allows the user to select between faster and
+     * more complete collation behavior.</p>
+     *
+     * <p>Since a great many of the world's languages do not require
+     * text normalization, most locales set NO_DECOMPOSITION as the
+     * default decomposition mode.</p>
+     *
+     * The default decompositon mode for the Collator is
+     * NO_DECOMPOSITON, unless specified otherwise by the locale used
+     * to create the Collator.</p>
+     *
+     * <p>See getDecomposition for a description of decomposition
+     * mode.</p>
+     *
+     * @param decomposition the new decomposition mode
+     * @see #getDecomposition
+     * @see #NO_DECOMPOSITION
+     * @see #CANONICAL_DECOMPOSITION
+     * @throws IllegalArgumentException If the given value is not a valid
+     *            decomposition mode.
+     * @stable ICU 2.8
+     */
+    @Override
+    public void setDecomposition(int decomposition)
+    {
+        checkNotFrozen();
+        boolean flag;
+        switch(decomposition) {
+        case NO_DECOMPOSITION:
+            flag = false;
+            break;
+        case CANONICAL_DECOMPOSITION:
+            flag = true;
+            break;
+        default:
+            throw new IllegalArgumentException("Wrong decomposition mode.");
+        }
+        if(flag == settings.readOnly().getFlag(CollationSettings.CHECK_FCD)) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setFlag(CollationSettings.CHECK_FCD, flag);
+        setFastLatinOptions(ownedSettings);
+    }
+
+    /**
+     * Sets this Collator's strength attribute. The strength attribute determines the minimum level of difference
       * considered significant during comparison.
-     * </p>
-     * <p>
-     * See the Collator class description for an example of use.
-     * </p>
+     *
+     * <p>See the Collator class description for an example of use.
       * 
       * @param newStrength
       *            the new strength value.
@@ -655,101 +723,180 @@ public final class RuleBasedCollator extends Collator {
       *                If the new strength value is not one of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
       * @stable ICU 2.8
       */
+    @Override
      public void setStrength(int newStrength) {
-        super.setStrength(newStrength);
-        updateInternalState();
+        checkNotFrozen();
+        if(newStrength == getStrength()) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setStrength(newStrength);
+        setFastLatinOptions(ownedSettings);
+    }
+
+    /**
+     * {@icu} Sets the variable top to the top of the specified reordering group.
+     * The variable top determines the highest-sorting character
+     * which is affected by the alternate handling behavior.
+     * If that attribute is set to NON_IGNORABLE, then the variable top has no effect.
+     * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
+     *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
+     *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
+     * @return this
+     * @see #getMaxVariable
+     * @draft ICU 53
+     * @provisional This API might change or be removed in a future release.
+     */
+    @Override
+    public RuleBasedCollator setMaxVariable(int group) {
+        // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
+        int value;
+        if(group == Collator.ReorderCodes.DEFAULT) {
+            value = -1;  // UCOL_DEFAULT
+        } else if(Collator.ReorderCodes.FIRST <= group && group <= Collator.ReorderCodes.CURRENCY) {
+            value = group - Collator.ReorderCodes.FIRST;
+        } else {
+            throw new IllegalArgumentException("illegal max variable group " + group);
+        }
+        int oldValue = settings.readOnly().getMaxVariable();
+        if(value == oldValue) {
+            return this;
+        }
+        CollationSettings defaultSettings = getDefaultSettings();
+        if(settings.readOnly() == defaultSettings) {
+            if(value < 0) {  // UCOL_DEFAULT
+                return this;
+            }
+        }
+        CollationSettings ownedSettings = getOwnedSettings();
+
+        if(group == Collator.ReorderCodes.DEFAULT) {
+            group = Collator.ReorderCodes.FIRST + defaultSettings.getMaxVariable();
+        }
+        long varTop = data.getLastPrimaryForGroup(group);
+        assert(varTop != 0);
+        ownedSettings.setMaxVariable(value, defaultSettings.options);
+        ownedSettings.variableTop = varTop;
+        setFastLatinOptions(ownedSettings);
+        return this;
      }
  
      /**
-     * <p>
-     * Variable top is a two byte primary value which causes all the codepoints with primary values that are less or
-     * equal than the variable top to be shifted when alternate handling is set to SHIFTED.
-     * </p>
-     * <p>
-     * Sets the variable top to a collation element value of a string supplied.
-     * </p>
+     * {@icu} Returns the maximum reordering group whose characters are affected by
+     * the alternate handling behavior.
+     * @return the maximum variable reordering group.
+     * @see #setMaxVariable
+     * @draft ICU 53
+     * @provisional This API might change or be removed in a future release.
+     */
+    @Override
+    public int getMaxVariable() {
+        return Collator.ReorderCodes.FIRST + settings.readOnly().getMaxVariable();
+    }
+
+    /**
+     * {@icu} Sets the variable top to the primary weight of the specified string.
+     *
+     * <p>Beginning with ICU 53, the variable top is pinned to
+     * the top of one of the supported reordering groups,
+     * and it must not be beyond the last of those groups.
+     * See {@link #setMaxVariable(int)}.
       * 
       * @param varTop
       *            one or more (if contraction) characters to which the variable top should be set
-     * @return a int value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined.
+     * @return variable top primary weight
       * @exception IllegalArgumentException
       *                is thrown if varTop argument is not a valid variable top element. A variable top element is
       *                invalid when
       *                <ul>
       *                <li>it is a contraction that does not exist in the Collation order
-     *                <li>when the PRIMARY strength collation element for the variable top has more than two bytes
+     *                <li>the variable top is beyond
+     *                    the last reordering group supported by setMaxVariable()
       *                <li>when the varTop argument is null or zero in length.
       *                </ul>
       * @see #getVariableTop
       * @see RuleBasedCollator#setAlternateHandlingShifted
-     * @stable ICU 2.6
+     * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
       */
+    @Override
      public int setVariableTop(String varTop) {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
+        checkNotFrozen();
          if (varTop == null || varTop.length() == 0) {
              throw new IllegalArgumentException("Variable top argument string can not be null or zero in length.");
          }
-
-        CollationBuffer buffer = null;
-        try {
-            buffer = getCollationBuffer();
-            return setVariableTop(varTop, buffer);
-        } finally {
-            releaseCollationBuffer(buffer);
-        }
-
-    }
-
-    private int setVariableTop(String varTop, CollationBuffer buffer) {
-        buffer.m_srcUtilColEIter_.setText(varTop);
-        int ce = buffer.m_srcUtilColEIter_.next();
-
-        // here we check if we have consumed all characters
-        // you can put in either one character or a contraction
-        // you shouldn't put more...
-        if (buffer.m_srcUtilColEIter_.getOffset() != varTop.length() || ce == CollationElementIterator.NULLORDER) {
-            throw new IllegalArgumentException("Variable top argument string is a contraction that does not exist "
-                    + "in the Collation order");
+        boolean numeric = settings.readOnly().isNumeric();
+        long ce1, ce2;
+        if(settings.readOnly().dontCheckFCD()) {
+            UTF16CollationIterator ci = new UTF16CollationIterator(data, numeric, varTop, 0);
+            ce1 = ci.nextCE();
+            ce2 = ci.nextCE();
+        } else {
+            FCDUTF16CollationIterator ci = new FCDUTF16CollationIterator(data, numeric, varTop, 0);
+            ce1 = ci.nextCE();
+            ce2 = ci.nextCE();
          }
-
-        int nextCE = buffer.m_srcUtilColEIter_.next();
-
-        if ((nextCE != CollationElementIterator.NULLORDER)
-                && (!isContinuation(nextCE) || (nextCE & CE_PRIMARY_MASK_) != 0)) {
-            throw new IllegalArgumentException("Variable top argument string can only have a single collation "
-                    + "element that has less than or equal to two PRIMARY strength " + "bytes");
+        if(ce1 == Collation.NO_CE || ce2 != Collation.NO_CE) {
+            throw new IllegalArgumentException("Variable top argument string must map to exactly one collation element");
          }
-
-        m_variableTopValue_ = (ce & CE_PRIMARY_MASK_) >> 16;
-
-        return ce & CE_PRIMARY_MASK_;
+        internalSetVariableTop(ce1 >>> 32);
+        return (int)settings.readOnly().variableTop;
      }
  
      /**
-     * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits. Lower 16
-     * bits are ignored.
+     * {@icu} Sets the variable top to the specified primary weight.
+     *
+     * <p>Beginning with ICU 53, the variable top is pinned to
+     * the top of one of the supported reordering groups,
+     * and it must not be beyond the last of those groups.
+     * See {@link #setMaxVariable(int)}.
       * 
-     * @param varTop
-     *            Collation element value, as returned by setVariableTop or getVariableTop
+     * @param varTop primary weight, as returned by setVariableTop or getVariableTop
       * @see #getVariableTop
       * @see #setVariableTop(String)
-     * @stable ICU 2.6
+     * @deprecated ICU 53 Call setMaxVariable() instead.
       */
+    @Override
      public void setVariableTop(int varTop) {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
+        checkNotFrozen();
+        internalSetVariableTop(varTop & 0xffffffffL);
+    }
  
-        m_variableTopValue_ = (varTop & CE_PRIMARY_MASK_) >> 16;
+    private void internalSetVariableTop(long varTop) {
+        if(varTop != settings.readOnly().variableTop) {
+            // Pin the variable top to the end of the reordering group which contains it.
+            // Only a few special groups are supported.
+            int group = data.getGroupForPrimary(varTop);
+            if(group < Collator.ReorderCodes.FIRST || Collator.ReorderCodes.CURRENCY < group) {
+                throw new IllegalArgumentException("The variable top must be a primary weight in " +
+                        "the space/punctuation/symbols/currency symbols range");
+            }
+            long v = data.getLastPrimaryForGroup(group);
+            assert(v != 0 && v >= varTop);
+            varTop = v;
+            if(varTop != settings.readOnly().variableTop) {
+                CollationSettings ownedSettings = getOwnedSettings();
+                ownedSettings.setMaxVariable(group - Collator.ReorderCodes.FIRST,
+                        getDefaultSettings().options);
+                ownedSettings.variableTop = varTop;
+                setFastLatinOptions(ownedSettings);
+            }
+        }
      }
  
      /**
-     * When numeric collation is turned on, this Collator generates a collation key for the numeric value of substrings
-     * of digits. This is a way to get '100' to sort AFTER '2'
-     * 
+     * {@icu} When numeric collation is turned on, this Collator makes
+     * substrings of digits sort according to their numeric values.
+     *
+     * <p>This is a way to get '100' to sort AFTER '2'. Note that the longest
+     * digit substring that can be treated as a single unit is
+     * 254 digits (not counting leading zeros). If a digit substring is
+     * longer than that, the digits beyond the limit will be treated as a
+     * separate digit substring.
+     *
+     * <p>A "digit" in this sense is a code point with General_Category=Nd,
+     * which does not include circled numbers, roman numerals, etc.
+     * Only a contiguous digit substring is considered, that is,
+     * non-negative integers without separators.
+     * There is no support for plus/minus signs, decimals, exponents, etc.
+     *
       * @param flag
       *            true to turn numeric collation on and false to turn it off
       * @see #getNumericCollation
@@ -757,13 +904,12 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public void setNumericCollation(boolean flag) {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
-        }
-
+        checkNotFrozen();
          // sort substrings of digits as numbers
-        m_isNumericCollation_ = flag;
-        updateInternalState();
+        if(flag == getNumericCollation()) { return; }
+        CollationSettings ownedSettings = getOwnedSettings();
+        ownedSettings.setFlag(CollationSettings.NUMERIC, flag);
+        setFastLatinOptions(ownedSettings);
      }
  
      /** 
@@ -792,20 +938,42 @@ public final class RuleBasedCollator extends Collator {
       * then this clears any existing reordering
       * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts)
       * @see #getReorderCodes
-     * @see #getEquivalentReorderCodes
+     * @see Collator#getEquivalentReorderCodes
       * @stable ICU 4.8
       */ 
+    @Override
      public void setReorderCodes(int... order) {
-        if (isFrozen()) {
-            throw new UnsupportedOperationException("Attempt to modify frozen object");
+        checkNotFrozen();
+        if(order == null ?
+                settings.readOnly().reorderCodes.length == 0 :
+                Arrays.equals(order, settings.readOnly().reorderCodes)) {
+            return;
          }
-
-        if (order != null && order.length > 0) {
-            m_reorderCodes_ = order.clone();
+        int length = (order != null) ? order.length : 0;
+        CollationSettings defaultSettings = getDefaultSettings();
+        if(length == 1 && order[0] == Collator.ReorderCodes.DEFAULT) {
+            if(settings.readOnly() != defaultSettings) {
+                CollationSettings ownedSettings = getOwnedSettings();
+                ownedSettings.setReordering(defaultSettings.reorderCodes,
+                                            defaultSettings.reorderTable);
+                setFastLatinOptions(ownedSettings);
+            }
+            return;
+        }
+        CollationSettings ownedSettings = getOwnedSettings();
+        if(length == 0) {
+            ownedSettings.resetReordering();
          } else {
-            m_reorderCodes_ = null;
+            byte[] reorderTable = new byte[256];
+            data.makeReorderTable(order, reorderTable);
+            ownedSettings.setReordering(order.clone(), reorderTable);
          }
-        buildPermutationTable();
+        setFastLatinOptions(ownedSettings);
+    }
+
+    private void setFastLatinOptions(CollationSettings ownedSettings) {
+        ownedSettings.fastLatinOptions = CollationFastLatin.getOptions(
+                data, ownedSettings, ownedSettings.fastLatinPrimaries);
      }
  
      // public getters --------------------------------------------------------
@@ -819,14 +987,15 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public String getRules() {
-        return m_rules_;
+        return tailoring.rules;
      }
  
      /**
-     * Returns current rules. The argument defines whether full rules (UCA + tailored) rules are returned or just the
-     * tailoring.
+     * Returns current rules.
+     * The argument defines whether full rules (root collation + tailored) rules are returned
+     * or just the tailoring.
       * 
-     * <p>The "UCA rules" are an <i>approximation</i> of the root collator's sort order.
+     * <p>The root collation rules are an <i>approximation</i> of the root collator's sort order.
       * They are almost never used or useful at runtime and can be removed from the data.
       * See <a href="http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales">User Guide:
       * Collation Customization, Building on Existing Locales</a>
@@ -841,142 +1010,25 @@ public final class RuleBasedCollator extends Collator {
       */
      public String getRules(boolean fullrules) {
          if (!fullrules) {
-            return m_rules_;
+            return tailoring.rules;
          }
-        // take the UCA rules and append real rules at the end
-        return UCA_.m_rules_.concat(m_rules_);
+        return CollationLoader.getRootRules() + tailoring.rules;
      }
  
      /**
-     * Get an UnicodeSet that contains all the characters and sequences tailored in this collator.
+     * Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
       * 
       * @return a pointer to a UnicodeSet object containing all the code points and sequences that may sort differently
-     *         than in the UCA.
+     *         than in the root collator.
       * @stable ICU 2.4
       */
+    @Override
      public UnicodeSet getTailoredSet() {
-        try {
-            CollationRuleParser src = new CollationRuleParser(getRules());
-            return src.getTailoredSet();
-        } catch (Exception e) {
-            throw new IllegalStateException("A tailoring rule should not " + "have errors. Something is quite wrong!");
-        }
-    }
-
-    private static class contContext {
-        RuleBasedCollator coll;
-        UnicodeSet contractions;
-        UnicodeSet expansions;
-        UnicodeSet removedContractions;
-        boolean addPrefixes;
-
-        contContext(RuleBasedCollator coll, UnicodeSet contractions, UnicodeSet expansions,
-                UnicodeSet removedContractions, boolean addPrefixes) {
-            this.coll = coll;
-            this.contractions = contractions;
-            this.expansions = expansions;
-            this.removedContractions = removedContractions;
-            this.addPrefixes = addPrefixes;
-        }
-    }
-
-    private void addSpecial(contContext c, StringBuilder buffer, int CE) {
-        StringBuilder b = new StringBuilder();
-        int offset = (CE & 0xFFFFFF) - c.coll.m_contractionOffset_;
-        int newCE = c.coll.m_contractionCE_[offset];
-        // we might have a contraction that ends from previous level
-        if (newCE != CollationElementIterator.CE_NOT_FOUND_) {
-            if (isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_ && isSpecial(newCE)
-                    && getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) {
-                addSpecial(c, buffer, newCE);
-            }
-            if (buffer.length() > 1) {
-                if (c.contractions != null) {
-                    c.contractions.add(buffer.toString());
-                }
-                if (c.expansions != null && isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
-                    c.expansions.add(buffer.toString());
-                }
-            }
-        }
-
-        offset++;
-        // check whether we're doing contraction or prefix
-        if (getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) {
-            while (c.coll.m_contractionIndex_[offset] != 0xFFFF) {
-                b.delete(0, b.length());
-                b.append(buffer);
-                newCE = c.coll.m_contractionCE_[offset];
-                b.insert(0, c.coll.m_contractionIndex_[offset]);
-                if (isSpecial(newCE)
-                        && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) {
-                    addSpecial(c, b, newCE);
-                } else {
-                    if (c.contractions != null) {
-                        c.contractions.add(b.toString());
-                    }
-                    if (c.expansions != null && isSpecial(newCE)
-                            && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) {
-                        c.expansions.add(b.toString());
-                    }
-                }
-                offset++;
-            }
-        } else if (getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_) {
-            while (c.coll.m_contractionIndex_[offset] != 0xFFFF) {
-                b.delete(0, b.length());
-                b.append(buffer);
-                newCE = c.coll.m_contractionCE_[offset];
-                b.append(c.coll.m_contractionIndex_[offset]);
-                if (isSpecial(newCE)
-                        && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) {
-                    addSpecial(c, b, newCE);
-                } else {
-                    if (c.contractions != null) {
-                        c.contractions.add(b.toString());
-                    }
-                    if (c.expansions != null && isSpecial(newCE)
-                            && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) {
-                        c.expansions.add(b.toString());
-                    }
-                }
-                offset++;
-            }
-        }
-    }
-
-    private void processSpecials(contContext c) {
-        int internalBufferSize = 512;
-        TrieIterator trieiterator = new TrieIterator(c.coll.m_trie_);
-        RangeValueIterator.Element element = new RangeValueIterator.Element();
-        while (trieiterator.next(element)) {
-            int start = element.start;
-            int limit = element.limit;
-            int CE = element.value;
-            StringBuilder contraction = new StringBuilder(internalBufferSize);
-
-            if (isSpecial(CE)) {
-                if (((getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) || getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_)) {
-                    while (start < limit) {
-                        // if there are suppressed contractions, we don't
-                        // want to add them.
-                        if (c.removedContractions != null && c.removedContractions.contains(start)) {
-                            start++;
-                            continue;
-                        }
-                        // we start our contraction from middle, since we don't know if it
-                        // will grow toward right or left
-                        contraction.append((char) start);
-                        addSpecial(c, contraction, CE);
-                        start++;
-                    }
-                } else if (c.expansions != null && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
-                    while (start < limit) {
-                        c.expansions.add(start++);
-                    }
-                }
-            }
+        UnicodeSet tailored = new UnicodeSet();
+        if(data.base != null) {
+            new TailoredSet(tailored).forData(data);
          }
+        return tailored;
      }
  
      /**
@@ -993,28 +1045,24 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 3.4
       */
      public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)
-    throws Exception {
+            throws Exception {
          if (contractions != null) {
              contractions.clear();
          }
          if (expansions != null) {
              expansions.clear();
          }
-        String rules = getRules();
-        try {
-            CollationRuleParser src = new CollationRuleParser(rules);
-            contContext c = new contContext(RuleBasedCollator.UCA_, contractions, expansions, src.m_removeSet_,
-                    addPrefixes);
+        new ContractionsAndExpansions(contractions, expansions, null, addPrefixes).forData(data);
+    }
  
-            // Add the UCA contractions
-            processSpecials(c);
-            // This is collator specific. Add contractions from a collator
-            c.coll = this;
-            c.removedContractions = null;
-            processSpecials(c);
-        } catch (Exception e) {
-            throw e;
-        }
+    /**
+     * Adds the contractions that start with character c to the set.
+     * Ignores prefixes. Used by AlphabeticIndex.
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    void internalAddContractions(int c, UnicodeSet set) {
+        new ContractionsAndExpansions(set, null, null, false).forCodePoint(data, c);
      }
  
      /**
@@ -1041,6 +1089,7 @@ public final class RuleBasedCollator extends Collator {
       * @see #getRawCollationKey
       * @stable ICU 2.8
       */
+    @Override
      public CollationKey getCollationKey(String source) {
          if (source == null) {
              return null;
@@ -1055,8 +1104,8 @@ public final class RuleBasedCollator extends Collator {
      }
  
      private CollationKey getCollationKey(String source, CollationBuffer buffer) {
-        buffer.m_utilRawCollationKey_ = getRawCollationKey(source, buffer.m_utilRawCollationKey_, buffer);
-        return new CollationKey(source, buffer.m_utilRawCollationKey_);
+        buffer.rawCollationKey = getRawCollationKey(source, buffer.rawCollationKey, buffer);
+        return new CollationKey(source, buffer.rawCollationKey);
      }
  
      /**
@@ -1073,6 +1122,7 @@ public final class RuleBasedCollator extends Collator {
       * @see RawCollationKey
       * @stable ICU 2.8
       */
+    @Override
      public RawCollationKey getRawCollationKey(String source, RawCollationKey key) {
          if (source == null) {
              return null;
@@ -1086,48 +1136,170 @@ public final class RuleBasedCollator extends Collator {
          }
      }
  
-    private RawCollationKey getRawCollationKey(String source, RawCollationKey key, CollationBuffer buffer) {
-        int strength = getStrength();
-        buffer.m_utilCompare0_ = m_isCaseLevel_;
-        // m_utilCompare1_ = true;
-        buffer.m_utilCompare2_ = strength >= SECONDARY;
-        buffer.m_utilCompare3_ = strength >= TERTIARY;
-        buffer.m_utilCompare4_ = strength >= QUATERNARY;
-        buffer.m_utilCompare5_ = strength == IDENTICAL;
+    private static final class CollationKeyByteSink extends SortKeyByteSink {
+        CollationKeyByteSink(RawCollationKey key) {
+            super(key.bytes);
+            key_ = key;
+        }
  
-        boolean doFrench = m_isFrenchCollation_ && buffer.m_utilCompare2_;
-        // TODO: UCOL_COMMON_BOT4 should be a function of qShifted.
-        // If we have no qShifted, we don't need to set UCOL_COMMON_BOT4 so
-        // high.
-        int commonBottom4 = ((m_variableTopValue_ >>> 8) + 1) & LAST_BYTE_MASK_;
-        byte hiragana4 = 0;
-        if (m_isHiragana4_ && buffer.m_utilCompare4_) {
-            // allocate one more space for hiragana, value for hiragana
-            hiragana4 = (byte) commonBottom4;
-            commonBottom4++;
+        @Override
+        protected void AppendBeyondCapacity(byte[] bytes, int start, int n, int length) {
+            // n > 0 && appended_ > capacity_
+            if (Resize(n, length)) {
+                System.arraycopy(bytes, start, buffer_, length, n);
+            }
          }
  
-        int bottomCount4 = 0xFF - commonBottom4;
-        // If we need to normalize, we'll do it all at once at the beginning!
-        if (buffer.m_utilCompare5_ && Normalizer.quickCheck(source, Normalizer.NFD, 0) != Normalizer.YES) {
-            // if it is identical strength, we have to normalize the string to
-            // NFD so that it will be appended correctly to the end of the sort
-            // key
-            source = Normalizer.decompose(source, false);
-        } else if (getDecomposition() != NO_DECOMPOSITION
-                && Normalizer.quickCheck(source, Normalizer.FCD, 0) != Normalizer.YES) {
-            // for the rest of the strength, if decomposition is on, FCD is
-            // enough for us to work on.
-            source = Normalizer.normalize(source, Normalizer.FCD);
+        @Override
+        protected boolean Resize(int appendCapacity, int length) {
+            int newCapacity = 2 * buffer_.length;
+            int altCapacity = length + 2 * appendCapacity;
+            if (newCapacity < altCapacity) {
+                newCapacity = altCapacity;
+            }
+            if (newCapacity < 200) {
+                newCapacity = 200;
+            }
+            // Do not call key_.ensureCapacity(newCapacity) because we do not
+            // keep key_.size in sync with appended_.
+            // We only set it when we are done.
+            byte[] newBytes = new byte[newCapacity];
+            System.arraycopy(buffer_, 0, newBytes, 0, length);
+            buffer_ = key_.bytes = newBytes;
+            return true;
          }
-        getSortKeyBytes(source, doFrench, hiragana4, commonBottom4, bottomCount4, buffer);
+
+        private RawCollationKey key_;
+    }
+
+    private RawCollationKey getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer) {
          if (key == null) {
-            key = new RawCollationKey();
+            key = new RawCollationKey(simpleKeyLengthEstimate(source));
+        } else if (key.bytes == null) {
+            key.bytes = new byte[simpleKeyLengthEstimate(source)];
          }
-        getSortKey(source, doFrench, commonBottom4, bottomCount4, key, buffer);
+        CollationKeyByteSink sink = new CollationKeyByteSink(key);
+        writeSortKey(source, sink, buffer);
+        key.size = sink.NumberOfBytesAppended();
          return key;
      }
  
+    private int simpleKeyLengthEstimate(CharSequence source) {
+        return 2 * source.length() + 10;
+    }
+
+    private void writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer) {
+        boolean numeric = settings.readOnly().isNumeric();
+        if(settings.readOnly().dontCheckFCD()) {
+            buffer.leftUTF16CollIter.setText(numeric, s, 0);
+            CollationKeys.writeSortKeyUpToQuaternary(
+                    buffer.leftUTF16CollIter, data.compressibleBytes, settings.readOnly(),
+                    sink, Collation.PRIMARY_LEVEL,
+                    CollationKeys.SIMPLE_LEVEL_FALLBACK, true);
+        } else {
+            buffer.leftFCDUTF16Iter.setText(numeric, s, 0);
+            CollationKeys.writeSortKeyUpToQuaternary(
+                    buffer.leftFCDUTF16Iter, data.compressibleBytes, settings.readOnly(),
+                    sink, Collation.PRIMARY_LEVEL,
+                    CollationKeys.SIMPLE_LEVEL_FALLBACK, true);
+        }
+        if(settings.readOnly().getStrength() == IDENTICAL) {
+            writeIdenticalLevel(s, sink);
+        }
+        sink.Append(Collation.TERMINATOR_BYTE);
+    }
+
+    private void writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink) {
+        // NFD quick check
+        int nfdQCYesLimit = data.nfcImpl.decompose(s, 0, s.length(), null);
+        sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
+        // Sync the ByteArrayWrapper size with the key length.
+        sink.key_.size = sink.NumberOfBytesAppended();
+        int prev = 0;
+        if(nfdQCYesLimit != 0) {
+            prev = BOCSU.writeIdenticalLevelRun(prev, s, 0, nfdQCYesLimit, sink.key_);
+        }
+        // Is there non-NFD text?
+        if(nfdQCYesLimit < s.length()) {
+            int destLengthEstimate = s.length() - nfdQCYesLimit;
+            StringBuilder nfd = new StringBuilder();
+            data.nfcImpl.decompose(s, nfdQCYesLimit, s.length(), nfd, destLengthEstimate);
+            BOCSU.writeIdenticalLevelRun(prev, nfd, 0, nfd.length(), sink.key_);
+        }
+        // Sync the key with the buffer again which got bytes appended and may have been reallocated.
+        sink.setBufferAndAppended(sink.key_.bytes, sink.key_.size);
+    }
+
+    /**
+     * Returns the CEs for the string.
+     * @param str the string
+     * @internal for tests & tools
+     * @deprecated This API is ICU internal only.
+     */
+    public long[] internalGetCEs(CharSequence str) {
+        CollationBuffer buffer = null;
+        try {
+            buffer = getCollationBuffer();
+            boolean numeric = settings.readOnly().isNumeric();
+            CollationIterator iter;
+            if(settings.readOnly().dontCheckFCD()) {
+                buffer.leftUTF16CollIter.setText(numeric, str, 0);
+                iter = buffer.leftUTF16CollIter;
+            } else {
+                buffer.leftFCDUTF16Iter.setText(numeric, str, 0);
+                iter = buffer.leftFCDUTF16Iter;
+            }
+            int length = iter.fetchCEs() - 1;
+            assert length >= 0 && iter.getCE(length) == Collation.NO_CE;
+            long[] ces = new long[length];
+            System.arraycopy(iter.getCEs(), 0, ces, 0, length);
+            return ces;
+        } finally {
+            releaseCollationBuffer(buffer);
+        }
+    }
+
+    /**
+     * Returns this Collator's strength attribute. The strength attribute
+     * determines the minimum level of difference considered significant.
+     *
+     * <p>{@icunote} This can return QUATERNARY strength, which is not supported by the
+     * JDK version.
+     *
+     * <p>See the Collator class description for more details.
+     *
+     * @return this Collator's current strength attribute.
+     * @see #setStrength
+     * @see #PRIMARY
+     * @see #SECONDARY
+     * @see #TERTIARY
+     * @see #QUATERNARY
+     * @see #IDENTICAL
+     * @stable ICU 2.8
+     */
+    @Override
+    public int getStrength() {
+        return settings.readOnly().getStrength();
+    }
+
+    /**
+     * Returns the decomposition mode of this Collator. The decomposition mode
+     * determines how Unicode composed characters are handled.
+     *
+     * <p>See the Collator class description for more details.
+     *
+     * @return the decomposition mode
+     * @see #setDecomposition
+     * @see #NO_DECOMPOSITION
+     * @see #CANONICAL_DECOMPOSITION
+     * @stable ICU 2.8
+     */
+    @Override
+    public int getDecomposition() {
+        return (settings.readOnly().options & CollationSettings.CHECK_FCD) != 0 ?
+                CANONICAL_DECOMPOSITION : NO_DECOMPOSITION;
+    }
+
      /**
       * Return true if an uppercase character is sorted before the corresponding lowercase character. See
       * setCaseFirst(boolean) for details.
@@ -1140,7 +1312,7 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public boolean isUpperCaseFirst() {
-        return (m_caseFirst_ == AttributeValue.UPPER_FIRST_);
+        return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST_AND_UPPER_MASK);
      }
  
      /**
@@ -1155,11 +1327,11 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public boolean isLowerCaseFirst() {
-        return (m_caseFirst_ == AttributeValue.LOWER_FIRST_);
+        return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST);
      }
  
      /**
-     * Checks if the alternate handling behaviour is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true,
+     * Checks if the alternate handling behavior is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true,
       * then the alternate handling attribute for the Collator is SHIFTED. Otherwise if return value is false, then the
       * alternate handling attribute for the Collator is NON_IGNORABLE See setAlternateHandlingShifted(boolean) for more
       * details.
@@ -1170,7 +1342,7 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public boolean isAlternateHandlingShifted() {
-        return m_isAlternateHandlingShifted_;
+        return settings.readOnly().getAlternateHandling();
      }
  
      /**
@@ -1183,7 +1355,7 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public boolean isCaseLevel() {
-        return m_isCaseLevel_;
+        return (settings.readOnly().options & CollationSettings.CASE_LEVEL) != 0;
      }
  
      /**
@@ -1195,7 +1367,7 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public boolean isFrenchCollation() {
-        return m_isFrenchCollation_;
+        return (settings.readOnly().options & CollationSettings.BACKWARD_SECONDARY) != 0;
      }
  
      /**
@@ -1213,18 +1385,19 @@ public final class RuleBasedCollator extends Collator {
       */
      @Deprecated
      public boolean isHiraganaQuaternary() {
-        return m_isHiragana4_;
+        return false;  // TODO: change docs to say always returns false?
      }
  
      /**
-     * Gets the variable top value of a Collator. Lower 16 bits are undefined and should be ignored.
+     * {@icu} Gets the variable top value of a Collator.
       * 
-     * @return the variable top value of a Collator.
-     * @see #setVariableTop
+     * @return the variable top primary weight
+     * @see #getMaxVariable
       * @stable ICU 2.6
       */
+    @Override
      public int getVariableTop() {
-        return m_variableTopValue_ << 16;
+        return (int)settings.readOnly().variableTop;
      }
  
      /**
@@ -1237,7 +1410,7 @@ public final class RuleBasedCollator extends Collator {
       * @stable ICU 2.8
       */
      public boolean getNumericCollation() {
-        return m_isNumericCollation_;
+        return (settings.readOnly().options & CollationSettings.NUMERIC) != 0;
      }
  
      /**  
@@ -1246,3580 +1419,400 @@ public final class RuleBasedCollator extends Collator {
       * @return a copy of the reordering codes for this collator; 
       * if none are set then returns an empty array
       * @see #setReorderCodes
-     * @see #getEquivalentReorderCodes
+     * @see Collator#getEquivalentReorderCodes
       * @stable ICU 4.8
       */ 
+    @Override
      public int[] getReorderCodes() {
-        if (m_reorderCodes_ != null) {
-            return m_reorderCodes_.clone();
-        } else {
-            return LeadByteConstants.EMPTY_INT_ARRAY;
-        }
+        return settings.readOnly().reorderCodes.clone();
      }
  
+    // public other methods -------------------------------------------------
+
      /**
-     * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
-     * codes are grouped and must reorder together.
-     * 
-     * @param reorderCode code for which equivalents to be retrieved
-     * @return the set of all reorder codes in the same group as the given reorder code.
-     * @see #setReorderCodes
-     * @see #getReorderCodes
-     * @stable ICU 4.8
+     * {@inheritDoc}
       */
-    public static int[] getEquivalentReorderCodes(int reorderCode) {
-        Set<Integer> equivalentCodesSet = new HashSet<Integer>();
-        int[] leadBytes = RuleBasedCollator.LEADBYTE_CONSTANTS_.getLeadBytesForReorderCode(reorderCode);
-        for (int leadByte : leadBytes) {
-            int[] codes = RuleBasedCollator.LEADBYTE_CONSTANTS_.getReorderCodesForLeadByte(leadByte);
-            for (int code : codes) {
-                equivalentCodesSet.add(code);
-            }
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj) {
+            return true;
          }
-        int[] equivalentCodes = new int[equivalentCodesSet.size()];
-        int i = 0;
-        for (int code : equivalentCodesSet) {
-            equivalentCodes[i++] = code;
+        if (!super.equals(obj)) {
+            return false;
          }
-        return equivalentCodes;
+        RuleBasedCollator o = (RuleBasedCollator) obj;
+        if(!settings.readOnly().equals(o.settings.readOnly())) { return false; }
+        if(data == o.data) { return true; }
+        boolean thisIsRoot = data.base == null;
+        boolean otherIsRoot = o.data.base == null;
+        assert(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
+        if(thisIsRoot != otherIsRoot) { return false; }
+        if((thisIsRoot || tailoring.rules.length() != 0) &&
+                (otherIsRoot || o.tailoring.rules.length() != 0)) {
+            // Shortcut: If both collators have valid rule strings, then compare those.
+            if(tailoring.rules.equals(o.tailoring.rules)) { return true; }
+        }
+        // Different rule strings can result in the same or equivalent tailoring.
+        // The rule strings are optional in ICU resource bundles, although included by default.
+        // cloneBinary() drops the rule string.
+        UnicodeSet thisTailored = getTailoredSet();
+        UnicodeSet otherTailored = o.getTailoredSet();
+        if(!thisTailored.equals(otherTailored)) { return false; }
+        // For completeness, we should compare all of the mappings;
+        // or we should create a list of strings, sort it with one collator,
+        // and check if both collators compare adjacent strings the same
+        // (order & strength, down to quaternary); or similar.
+        // Testing equality of collators seems unusual.
+        return true;
      }
  
-    // public other methods -------------------------------------------------
-
      /**
-     * Compares the equality of two RuleBasedCollator objects. RuleBasedCollator objects are equal if they have the same
-     * collation rules and the same attributes.
+     * Generates a unique hash code for this RuleBasedCollator.
       * 
-     * @param obj
-     *            the RuleBasedCollator to be compared to.
-     * @return true if this RuleBasedCollator has exactly the same collation behaviour as obj, false otherwise.
+     * @return the unique hash code for this Collator
       * @stable ICU 2.8
       */
-    public boolean equals(Object obj) {
-        if (obj == null) {
-            return false; // super does class check
+    @Override
+    public int hashCode() {
+        int h = settings.readOnly().hashCode();
+        if(data.base == null) { return h; }  // root collator
+        // Do not rely on the rule string, see comments in operator==().
+        UnicodeSet set = getTailoredSet();
+        UnicodeSetIterator iter = new UnicodeSetIterator(set);
+        while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) {
+            h ^= data.getCE32(iter.codepoint);
          }
-        if (this == obj) {
-            return true;
-        }
-        if (getClass() != obj.getClass()) {
-            return false;
-        }
-        RuleBasedCollator other = (RuleBasedCollator) obj;
-        // all other non-transient information is also contained in rules.
-        if (getStrength() != other.getStrength() || getDecomposition() != other.getDecomposition()
-                || other.m_caseFirst_ != m_caseFirst_ || other.m_caseSwitch_ != m_caseSwitch_
-                || other.m_isAlternateHandlingShifted_ != m_isAlternateHandlingShifted_
-                || other.m_isCaseLevel_ != m_isCaseLevel_ || other.m_isFrenchCollation_ != m_isFrenchCollation_
-                || other.m_isHiragana4_ != m_isHiragana4_) {
-            return false;
-        }
-        if (m_reorderCodes_ != null ^ other.m_reorderCodes_ != null) {
-            return false;
-        }
-        if (m_reorderCodes_ != null) {
-            if (m_reorderCodes_.length != other.m_reorderCodes_.length) {
-                return false;
-            }
-            for (int i = 0; i < m_reorderCodes_.length; i++) {
-                if (m_reorderCodes_[i] != other.m_reorderCodes_[i]) {
-                    return false;
-                }
-            }
-        }
-        boolean rules = m_rules_ == other.m_rules_;
-        if (!rules && (m_rules_ != null && other.m_rules_ != null)) {
-            rules = m_rules_.equals(other.m_rules_);
-        }
-        if (!rules || !ICUDebug.enabled("collation")) {
-            return rules;
-        }
-        if (m_addition3_ != other.m_addition3_ || m_bottom3_ != other.m_bottom3_
-                || m_bottomCount3_ != other.m_bottomCount3_ || m_common3_ != other.m_common3_
-                || m_isSimple3_ != other.m_isSimple3_ || m_mask3_ != other.m_mask3_
-                || m_minContractionEnd_ != other.m_minContractionEnd_ || m_minUnsafe_ != other.m_minUnsafe_
-                || m_top3_ != other.m_top3_ || m_topCount3_ != other.m_topCount3_
-                || !Arrays.equals(m_unsafe_, other.m_unsafe_)) {
-            return false;
-        }
-        if (!m_trie_.equals(other.m_trie_)) {
-            // we should use the trie iterator here, but then this part is
-            // only used in the test.
-            for (int i = UCharacter.MAX_VALUE; i >= UCharacter.MIN_VALUE; i--) {
-                int v = m_trie_.getCodePointValue(i);
-                int otherv = other.m_trie_.getCodePointValue(i);
-                if (v != otherv) {
-                    int mask = v & (CE_TAG_MASK_ | CE_SPECIAL_FLAG_);
-                    if (mask == (otherv & 0xff000000)) {
-                        v &= 0xffffff;
-                        otherv &= 0xffffff;
-                        if (mask == 0xf1000000) {
-                            v -= (m_expansionOffset_ << 4);
-                            otherv -= (other.m_expansionOffset_ << 4);
-                        } else if (mask == 0xf2000000) {
-                            v -= m_contractionOffset_;
-                            otherv -= other.m_contractionOffset_;
-                        }
-                        if (v == otherv) {
-                            continue;
-                        }
-                    }
-                    return false;
-                }
-            }
-        }
-        if (!Arrays.equals(m_contractionCE_, other.m_contractionCE_)
-                || !Arrays.equals(m_contractionEnd_, other.m_contractionEnd_)
-                || !Arrays.equals(m_contractionIndex_, other.m_contractionIndex_)
-                || !Arrays.equals(m_expansion_, other.m_expansion_)
-                || !Arrays.equals(m_expansionEndCE_, other.m_expansionEndCE_)) {
-            return false;
-        }
-        // not comparing paddings
-        for (int i = 0; i < m_expansionEndCE_.length; i++) {
-            if (m_expansionEndCEMaxSize_[i] != other.m_expansionEndCEMaxSize_[i]) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    /**
-     * Generates a unique hash code for this RuleBasedCollator.
-     * 
-     * @return the unique hash code for this Collator
-     * @stable ICU 2.8
-     */
-    public int hashCode() {
-        String rules = getRules();
-        if (rules == null) {
-            rules = "";
-        }
-        return rules.hashCode();
-    }
-
-    /**
-     * Compares the source text String to the target text String according to the collation rules, strength and
-     * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero
-     * depending on whether the source String is less than, equal to or greater than the target String. See the Collator
-     * class description for an example of use. </p>
-     * <p>
-     * General recommendation: <br>
-     * If comparison are to be done to the same String multiple times, it would be more efficient to generate
-     * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed
-     * performance is critical and object instantiation is to be reduced, further optimization may be achieved by
-     * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method
-     * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey
-     * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key
-     * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String,
-     * String) will have a better performance.
-     * </p>
-     * 
-     * @param source
-     *            the source text String.
-     * @param target
-     *            the target text String.
-     * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source
-     *         and target are equal, value is greater than zero if source is greater than target.
-     * @see CollationKey
-     * @see #getCollationKey
-     * @stable ICU 2.8
-     */
-    public int compare(String source, String target) {
-        if (source.equals(target)) {
-            return 0;
-        }
-        CollationBuffer buffer = null;
-        try {
-            buffer = getCollationBuffer();
-            return compare(source, target, buffer);
-        } finally {
-            releaseCollationBuffer(buffer);
-        }
-    }
-
-    private int compare(String source, String target, CollationBuffer buffer) {
-        // Find the length of any leading portion that is equal
-        int offset = getFirstUnmatchedOffset(source, target);
-        // return compareRegular(source, target, offset);
-        if (latinOneUse_) {
-            if ((offset < source.length() && source.charAt(offset) > ENDOFLATINONERANGE_)
-                    || (offset < target.length() && target.charAt(offset) > ENDOFLATINONERANGE_)) {
-                // source or target start with non-latin-1
-                return compareRegular(source, target, offset, buffer);
-            } else {
-                return compareUseLatin1(source, target, offset, buffer);
-            }
-        } else {
-            return compareRegular(source, target, offset, buffer);
-        }
-    }
-
-    // package private inner interfaces --------------------------------------
-
-    /**
-     * Attribute values to be used when setting the Collator options
-     */
-    static interface AttributeValue {
-        /**
-         * Indicates that the default attribute value will be used. See individual attribute for details on its default
-         * value.
-         */
-        static final int DEFAULT_ = -1;
-        /**
-         * Primary collation strength
-         */
-        static final int PRIMARY_ = Collator.PRIMARY;
-        /**
-         * Secondary collation strength
-         */
-        static final int SECONDARY_ = Collator.SECONDARY;
-        /**
-         * Tertiary collation strength
-         */
-        static final int TERTIARY_ = Collator.TERTIARY;
-        /**
-         * Default collation strength
-         */
-        static final int DEFAULT_STRENGTH_ = Collator.TERTIARY;
-        /**
-         * Internal use for strength checks in Collation elements
-         */
-        static final int CE_STRENGTH_LIMIT_ = Collator.TERTIARY + 1;
-        /**
-         * Quaternary collation strength
-         */
-        static final int QUATERNARY_ = 3;
-        /**
-         * Identical collation strength
-         */
-        static final int IDENTICAL_ = Collator.IDENTICAL;
-        /**
-         * Internal use for strength checks
-         */
-        static final int STRENGTH_LIMIT_ = Collator.IDENTICAL + 1;
-        /**
-         * Turn the feature off - works for FRENCH_COLLATION, CASE_LEVEL, HIRAGANA_QUATERNARY_MODE and
-         * DECOMPOSITION_MODE
-         */
-        static final int OFF_ = 16;
-        /**
-         * Turn the feature on - works for FRENCH_COLLATION, CASE_LEVEL, HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
-         */
-        static final int ON_ = 17;
-        /**
-         * Valid for ALTERNATE_HANDLING. Alternate handling will be shifted
-         */
-        static final int SHIFTED_ = 20;
-        /**
-         * Valid for ALTERNATE_HANDLING. Alternate handling will be non ignorable
-         */
-        static final int NON_IGNORABLE_ = 21;
-        /**
-         * Valid for CASE_FIRST - lower case sorts before upper case
-         */
-        static final int LOWER_FIRST_ = 24;
-        /**
-         * Upper case sorts before lower case
-         */
-        static final int UPPER_FIRST_ = 25;
-        /**
-         * Number of attribute values
-         */
-        static final int LIMIT_ = 29;
-    }
-
-    /**
-     * Attributes that collation service understands. All the attributes can take DEFAULT value, as well as the values
-     * specific to each one.
-     */
-    static interface Attribute {
-        /**
-         * Attribute for direction of secondary weights - used in French. Acceptable values are ON, which results in
-         * secondary weights being considered backwards and OFF which treats secondary weights in the order they appear.
-         */
-        static final int FRENCH_COLLATION_ = 0;
-        /**
-         * Attribute for handling variable elements. Acceptable values are NON_IGNORABLE (default) which treats all the
-         * codepoints with non-ignorable primary weights in the same way, and SHIFTED which causes codepoints with
-         * primary weights that are equal or below the variable top value to be ignored on primary level and moved to
-         * the quaternary level.
-         */
-        static final int ALTERNATE_HANDLING_ = 1;
-        /**
-         * Controls the ordering of upper and lower case letters. Acceptable values are OFF (default), which orders
-         * upper and lower case letters in accordance to their tertiary weights, UPPER_FIRST which forces upper case
-         * letters to sort before lower case letters, and LOWER_FIRST which does the opposite.
-         */
-        static final int CASE_FIRST_ = 2;
-        /**
-         * Controls whether an extra case level (positioned before the third level) is generated or not. Acceptable
-         * values are OFF (default), when case level is not generated, and ON which causes the case level to be
-         * generated. Contents of the case level are affected by the value of CASE_FIRST attribute. A simple way to
-         * ignore accent differences in a string is to set the strength to PRIMARY and enable case level.
-         */
-        static final int CASE_LEVEL_ = 3;
-        /**
-         * Controls whether the normalization check and necessary normalizations are performed. When set to OFF
-         * (default) no normalization check is performed. The correctness of the result is guaranteed only if the input
-         * data is in so-called FCD form (see users manual for more info). When set to ON, an incremental check is
-         * performed to see whether the input data is in the FCD form. If the data is not in the FCD form, incremental
-         * NFD normalization is performed.
-         */
-        static final int NORMALIZATION_MODE_ = 4;
-        /**
-         * The strength attribute. Can be either PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL. The usual
-         * strength for most locales (except Japanese) is tertiary. Quaternary strength is useful when combined with
-         * shifted setting for alternate handling attribute and for JIS x 4061 collation, when it is used to distinguish
-         * between Katakana and Hiragana (this is achieved by setting the HIRAGANA_QUATERNARY mode to on. Otherwise,
-         * quaternary level is affected only by the number of non ignorable code points in the string. Identical
-         * strength is rarely useful, as it amounts to codepoints of the NFD form of the string.
-         */
-        static final int STRENGTH_ = 5;
-        /**
-         * When turned on, this attribute positions Hiragana before all non-ignorables on quaternary level. This is a
-         * sneaky way to produce JIS sort order.
-         */
-        static final int HIRAGANA_QUATERNARY_MODE_ = 6;
-        /**
-         * Attribute count
-         */
-        static final int LIMIT_ = 7;
-    }
-
-    /**
-     * DataManipulate singleton
-     */
-    static class DataManipulate implements Trie.DataManipulate {
-        // public methods ----------------------------------------------------
-
-        /**
-         * Internal method called to parse a lead surrogate's ce for the offset to the next trail surrogate data.
-         * 
-         * @param ce
-         *            collation element of the lead surrogate
-         * @return data offset or 0 for the next trail surrogate
-         * @stable ICU 2.8
-         */
-        public final int getFoldingOffset(int ce) {
-            if (isSpecial(ce) && getTag(ce) == CE_SURROGATE_TAG_) {
-                return (ce & 0xFFFFFF);
-            }
-            return 0;
-        }
-
-        /**
-         * Get singleton object
-         */
-        public static final DataManipulate getInstance() {
-            if (m_instance_ == null) {
-                m_instance_ = new DataManipulate();
-            }
-            return m_instance_;
-        }
-
-        // private data member ----------------------------------------------
-
-        /**
-         * Singleton instance
-         */
-        private static DataManipulate m_instance_;
-
-        // private constructor ----------------------------------------------
-
-        /**
-         * private to prevent initialization
-         */
-        private DataManipulate() {
-        }
-    }
-
-    /**
-     * UCAConstants
-     */
-    static final class UCAConstants {
-        int FIRST_TERTIARY_IGNORABLE_[] = new int[2]; // 0x00000000
-        int LAST_TERTIARY_IGNORABLE_[] = new int[2]; // 0x00000000
-        int FIRST_PRIMARY_IGNORABLE_[] = new int[2]; // 0x00008705
-        int FIRST_SECONDARY_IGNORABLE_[] = new int[2]; // 0x00000000
-        int LAST_SECONDARY_IGNORABLE_[] = new int[2]; // 0x00000500
-        int LAST_PRIMARY_IGNORABLE_[] = new int[2]; // 0x0000DD05
-        int FIRST_VARIABLE_[] = new int[2]; // 0x05070505
-        int LAST_VARIABLE_[] = new int[2]; // 0x13CF0505
-        int FIRST_NON_VARIABLE_[] = new int[2]; // 0x16200505
-        int LAST_NON_VARIABLE_[] = new int[2]; // 0x767C0505
-        int RESET_TOP_VALUE_[] = new int[2]; // 0x9F000303
-        int FIRST_IMPLICIT_[] = new int[2];
-        int LAST_IMPLICIT_[] = new int[2];
-        int FIRST_TRAILING_[] = new int[2];
-        int LAST_TRAILING_[] = new int[2];
-        int PRIMARY_TOP_MIN_;
-        int PRIMARY_IMPLICIT_MIN_; // 0xE8000000
-        int PRIMARY_IMPLICIT_MAX_; // 0xF0000000
-        int PRIMARY_TRAILING_MIN_; // 0xE8000000
-        int PRIMARY_TRAILING_MAX_; // 0xF0000000
-        int PRIMARY_SPECIAL_MIN_; // 0xE8000000
-        int PRIMARY_SPECIAL_MAX_; // 0xF0000000
-    }
-
-    /**
-     * Script to Lead Byte and Lead Byte to Script Data
-     * 
-     */
-    static final class LeadByteConstants {
-        private static final int DATA_MASK_FOR_INDEX = 0x8000;
-        private static final int[] EMPTY_INT_ARRAY = new int[0];
-
-        private int serializedSize = 0;
-
-        private Map<Integer, Integer> SCRIPT_TO_LEAD_BYTES_INDEX;
-        private byte[] SCRIPT_TO_LEAD_BYTES_DATA;
-
-        private int[] LEAD_BYTE_TO_SCRIPTS_INDEX;
-        private byte[] LEAD_BYTE_TO_SCRIPTS_DATA;
-
-        LeadByteConstants() {
-        }
-
-        void read(DataInputStream dis) throws IOException {
-            int readcount = 0;
-            int indexCount;
-            int dataSize;
-
-            // script to lead bytes
-            indexCount = dis.readShort();
-            readcount += 2;
-            dataSize = dis.readShort();
-            readcount += 2;
-            this.SCRIPT_TO_LEAD_BYTES_INDEX = new HashMap<Integer, Integer>();
-            //System.out.println("Script to Lead Bytes Index - Count = " + indexCount);
-            for (int index = 0; index < indexCount; index++) {
-                int reorderCode = dis.readShort(); // reorder code
-                readcount += 2;
-                int dataOffset = 0xffff & dis.readShort(); // data offset
-                readcount += 2;
-                //                 System.out.println("\t-------------");
-                //                 System.out.println("\toffset = " + Integer.toHexString(readcount - 4));
-                //                 System.out.println("\treorderCode = " + Integer.toHexString(reorderCode));
-                //                 System.out.println("\tdataOffset = " + Integer.toHexString(dataOffset));
-                this.SCRIPT_TO_LEAD_BYTES_INDEX.put(reorderCode, dataOffset);
-            }
-
-            this.SCRIPT_TO_LEAD_BYTES_DATA = new byte[dataSize * 2];
-            dis.readFully(this.SCRIPT_TO_LEAD_BYTES_DATA, 0, this.SCRIPT_TO_LEAD_BYTES_DATA.length);
-            readcount += this.SCRIPT_TO_LEAD_BYTES_DATA.length;
-
-            // lead byte to scripts
-            indexCount = dis.readShort();
-            readcount += 2;
-            dataSize = dis.readShort();
-            readcount += 2;
-            this.LEAD_BYTE_TO_SCRIPTS_INDEX = new int[indexCount];
-            //System.out.println("Lead Byte to Scripts Index - Count = " + indexCount);
-            for (int index = 0; index < indexCount; index++) {
-                this.LEAD_BYTE_TO_SCRIPTS_INDEX[index] = 0xffff & dis.readShort();
-                readcount += 2;
-                //                System.out.println("\t-------------");
-                //                System.out.println("\toffset = " + Integer.toHexString(readcount - 2));
-                //                System.out.println("\tindex = " + Integer.toHexString(index));
-                //                System.out.println("\tdataOffset = " + Integer.toHexString(this.LEAD_BYTE_TO_SCRIPTS_INDEX[index]));
-            }
-
-            this.LEAD_BYTE_TO_SCRIPTS_DATA = new byte[dataSize * 2];
-            dis.readFully(this.LEAD_BYTE_TO_SCRIPTS_DATA, 0, this.LEAD_BYTE_TO_SCRIPTS_DATA.length);
-            readcount += this.LEAD_BYTE_TO_SCRIPTS_DATA.length;
-
-            this.serializedSize = readcount;
-        }
-
-        int getSerializedDataSize() {
-            return this.serializedSize;
-        }
-
-        int[] getReorderCodesForLeadByte(int leadByte) {
-            if (leadByte >= this.LEAD_BYTE_TO_SCRIPTS_INDEX.length) {
-                return EMPTY_INT_ARRAY;
-            }
-            int offset = this.LEAD_BYTE_TO_SCRIPTS_INDEX[leadByte];
-            if (offset == 0) {
-                return EMPTY_INT_ARRAY;
-            }
-            int[] reorderCodes;
-            if ((offset & DATA_MASK_FOR_INDEX) == DATA_MASK_FOR_INDEX) {
-                reorderCodes = new int[1];
-                reorderCodes[0] = offset & ~DATA_MASK_FOR_INDEX;
-            } else {
-                int length = readShort(this.LEAD_BYTE_TO_SCRIPTS_DATA, offset);
-                offset++;
-
-                reorderCodes = new int[length];
-                for (int code = 0; code < length; code++, offset++) {
-                    reorderCodes[code] = readShort(this.LEAD_BYTE_TO_SCRIPTS_DATA, offset);
-                }
-            }
-            return reorderCodes;
-        }
-
-        int[] getLeadBytesForReorderCode(int reorderCode) {
-            if (!this.SCRIPT_TO_LEAD_BYTES_INDEX.containsKey(reorderCode)) {
-                return EMPTY_INT_ARRAY;
-            }
-            int offset = this.SCRIPT_TO_LEAD_BYTES_INDEX.get(reorderCode);
-
-            if (offset == 0) {
-                return EMPTY_INT_ARRAY;
-            }
-
-            int[] leadBytes;
-            if ((offset & DATA_MASK_FOR_INDEX) == DATA_MASK_FOR_INDEX) {
-                leadBytes = new int[1];
-                leadBytes[0] = offset & ~DATA_MASK_FOR_INDEX;
-            } else {
-                int length = readShort(this.SCRIPT_TO_LEAD_BYTES_DATA, offset);
-                offset++;
-
-                leadBytes = new int[length];
-                for (int leadByte = 0; leadByte < length; leadByte++, offset++) {
-                    leadBytes[leadByte] = readShort(this.SCRIPT_TO_LEAD_BYTES_DATA, offset);
-                }
-            }
-            return leadBytes;
-        }
-
-        private static int readShort(byte[] data, int offset) {
-            return (0xff & data[offset * 2]) << 8 | (data[offset * 2 + 1] & 0xff);
-        }
-    }
-
-    // package private data member -------------------------------------------
-
-    static final byte BYTE_FIRST_TAILORED_ = (byte) 0x04;
-    static final byte BYTE_COMMON_ = (byte) 0x05;
-    static final int COMMON_TOP_2_ = 0x86; // int for unsigness
-    static final int COMMON_BOTTOM_2_ = BYTE_COMMON_;
-    static final int COMMON_BOTTOM_3 = 0x05;
-    /**
-     * Case strength mask
-     */
-    static final int CE_CASE_BIT_MASK_ = 0xC0;
-    static final int CE_TAG_SHIFT_ = 24;
-    static final int CE_TAG_MASK_ = 0x0F000000;
-
-    static final int CE_SPECIAL_FLAG_ = 0xF0000000;
-    /**
-     * Lead surrogate that is tailored and doesn't start a contraction
-     */
-    static final int CE_SURROGATE_TAG_ = 5;
-    /**
-     * Mask to get the primary strength of the collation element
-     */
-    static final int CE_PRIMARY_MASK_ = 0xFFFF0000;
-    /**
-     * Mask to get the secondary strength of the collation element
-     */
-    static final int CE_SECONDARY_MASK_ = 0xFF00;
-    /**
-     * Mask to get the tertiary strength of the collation element
-     */
-    static final int CE_TERTIARY_MASK_ = 0xFF;
-    /**
-     * Primary strength shift
-     */
-    static final int CE_PRIMARY_SHIFT_ = 16;
-    /**
-     * Secondary strength shift
-     */
-    static final int CE_SECONDARY_SHIFT_ = 8;
-    /**
-     * Continuation marker
-     */
-    static final int CE_CONTINUATION_MARKER_ = 0xC0;
-
-    /**
-     * Size of collator raw data headers and options before the expansion data. This is used when expansion ces are to
-     * be retrieved. ICU4C uses the expansion offset starting from UCollator.UColHeader, hence ICU4J will have to minus
-     * that off to get the right expansion ce offset. In number of ints.
-     */
-    int m_expansionOffset_;
-    /**
-     * Size of collator raw data headers, options and expansions before contraction data. This is used when contraction
-     * ces are to be retrieved. ICU4C uses contraction offset starting from UCollator.UColHeader, hence ICU4J will have
-     * to minus that off to get the right contraction ce offset. In number of chars.
-     */
-    int m_contractionOffset_;
-    /**
-     * Flag indicator if Jamo is special
-     */
-    boolean m_isJamoSpecial_;
-
-    // Collator options ------------------------------------------------------
-
-    int m_defaultVariableTopValue_;
-    boolean m_defaultIsFrenchCollation_;
-    boolean m_defaultIsAlternateHandlingShifted_;
-    int m_defaultCaseFirst_;
-    boolean m_defaultIsCaseLevel_;
-    int m_defaultDecomposition_;
-    int m_defaultStrength_;
-    boolean m_defaultIsHiragana4_;
-    boolean m_defaultIsNumericCollation_;
-    /**
-     * Default script order - the one created at initial rule parse time
-     */
-    int[] m_defaultReorderCodes_;
-
-    /**
-     * Value of the variable top
-     */
-    int m_variableTopValue_;
-    /**
-     * Attribute for special Hiragana
-     */
-    boolean m_isHiragana4_;
-    /**
-     * Case sorting customization
-     */
-    int m_caseFirst_;
-    /**
-     * Numeric collation option
-     */
-    boolean m_isNumericCollation_;
-    /**
-     * Script order
-     */
-    int[] m_reorderCodes_;
-
-    // end Collator options --------------------------------------------------
-
-    /**
-     * Expansion table
-     */
-    int m_expansion_[];
-    /**
-     * Contraction index table
-     */
-    char m_contractionIndex_[];
-    /**
-     * Contraction CE table
-     */
-    int m_contractionCE_[];
-    /**
-     * Data trie
-     */
-    IntTrie m_trie_;
-    /**
-     * Table to store all collation elements that are the last element of an expansion. This is for use in StringSearch.
-     */
-    int m_expansionEndCE_[];
-    /**
-     * Table to store the maximum size of any expansions that end with the corresponding collation element in
-     * m_expansionEndCE_. For use in StringSearch too
-     */
-    byte m_expansionEndCEMaxSize_[];
-    /**
-     * Heuristic table to store information on whether a char character is considered "unsafe". "Unsafe" character are
-     * combining marks or those belonging to some contraction sequence from the offset 1 onwards. E.g. if "ABC" is the
-     * only contraction, then 'B' and 'C' are considered unsafe. If we have another contraction "ZA" with the one above,
-     * then 'A', 'B', 'C' are "unsafe" but 'Z' is not.
-     */
-    byte m_unsafe_[];
-    /**
-     * Table to store information on whether a codepoint can occur as the last character in a contraction
-     */
-    byte m_contractionEnd_[];
-    /**
-     * Original collation rules
-     */
-    String m_rules_;
-    /**
-     * The smallest "unsafe" codepoint
-     */
-    char m_minUnsafe_;
-    /**
-     * The smallest codepoint that could be the end of a contraction
-     */
-    char m_minContractionEnd_;
-    /**
-     * General version of the collator
-     */
-    VersionInfo m_version_;
-    /**
-     * UCA version
-     */
-    VersionInfo m_UCA_version_;
-    /**
-     * UCD version
-     */
-    VersionInfo m_UCD_version_;
-    /**
-     * Lead byte and script data
-     */
-    int m_leadByteToScripts;
-    int m_scriptToLeadBytes;
-    /**
-     * UnicodeData.txt property object
-     */
-    static final RuleBasedCollator UCA_;
-    /**
-     * UCA Constants
-     */
-    static final UCAConstants UCA_CONSTANTS_;
-    /**
-     * Lead Byte Constants
-     */
-    static LeadByteConstants LEADBYTE_CONSTANTS_;
-    /**
-     * Table for UCA and builder use
-     */
-    static final char UCA_CONTRACTIONS_[];
-    static final int MAX_UCA_CONTRACTION_LENGTH;
-
-    private static boolean UCA_INIT_COMPLETE;
-
-    /**
-     * Implicit generator
-     */
-    static final ImplicitCEGenerator impCEGen_;
-
-    static final byte SORT_LEVEL_TERMINATOR_ = 1;
-
-    // These are values from UCA required for
-    // implicit generation and supressing sort key compression
-    // they should regularly be in the UCA, but if one
-    // is running without UCA, it could be a problem
-    static final int maxRegularPrimary = 0x7A;
-    static final int minImplicitPrimary = 0xE0;
-    static final int maxImplicitPrimary = 0xE4;
-
-    // block to initialise character property database
-    static {
-        // take pains to let static class init succeed, otherwise the class itself won't exist and
-        // clients will get a NoClassDefFoundException. Instead, make the constructors fail if
-        // we can't load the UCA data.
-
-        RuleBasedCollator iUCA_ = null;
-        UCAConstants iUCA_CONSTANTS_ = null;
-        LeadByteConstants iLEADBYTE_CONSTANTS = null;
-        char iUCA_CONTRACTIONS_[] = null;
-        Output<Integer> maxUCAContractionLength = new Output<Integer>();
-        ImplicitCEGenerator iimpCEGen_ = null;
-        try {
-            // !!! note what's going on here...
-            // even though the static init of the class is not yet complete, we
-            // instantiate an instance of the class. So we'd better be sure that
-            // instantiation doesn't rely on the static initialization that's
-            // not complete yet!
-            iUCA_ = new RuleBasedCollator();
-            iUCA_CONSTANTS_ = new UCAConstants();
-            iLEADBYTE_CONSTANTS = new LeadByteConstants();
-            iUCA_CONTRACTIONS_ = CollatorReader.read(iUCA_, iUCA_CONSTANTS_, iLEADBYTE_CONSTANTS, maxUCAContractionLength);
-
-            // called before doing canonical closure for the UCA.
-            iimpCEGen_ = new ImplicitCEGenerator(minImplicitPrimary, maxImplicitPrimary);
-            // iimpCEGen_ = new ImplicitCEGenerator(iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_,
-            // iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_);
-            iUCA_.init();
-            ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(
-                    ICUResourceBundle.ICU_COLLATION_BASE_NAME, ULocale.ENGLISH);
-            iUCA_.m_rules_ = (String) rb.getObject("UCARules");
-        } catch (MissingResourceException ex) {
-            // throw ex;
-        } catch (IOException e) {
-            // e.printStackTrace();
-            // throw new MissingResourceException(e.getMessage(),"","");
-        }
-
-        UCA_ = iUCA_;
-        UCA_CONSTANTS_ = iUCA_CONSTANTS_;
-        LEADBYTE_CONSTANTS_ = iLEADBYTE_CONSTANTS;
-        UCA_CONTRACTIONS_ = iUCA_CONTRACTIONS_;
-        MAX_UCA_CONTRACTION_LENGTH = maxUCAContractionLength.value;
-        impCEGen_ = iimpCEGen_;
-
-        UCA_INIT_COMPLETE = true;
-    }
-
-    private static void checkUCA() throws MissingResourceException {
-        if (UCA_INIT_COMPLETE && UCA_ == null) {
-            throw new MissingResourceException("Collator UCA data unavailable", "", "");
-        }
-    }
-
-    // package private constructors ------------------------------------------
-
-    /**
-     * <p>
-     * Private contructor for use by subclasses. Public access to creating Collators is handled by the API
-     * Collator.getInstance() or RuleBasedCollator(String rules).
-     * </p>
-     * <p>
-     * This constructor constructs the UCA collator internally
-     * </p>
-     */
-    RuleBasedCollator() {
-        checkUCA();
-    }
-
-    /**
-     * Constructs a RuleBasedCollator from the argument locale.
-     * If no resource bundle is associated with the locale, UCA is used instead.
-     * 
-     * @param locale
-     */
-    RuleBasedCollator(ULocale locale) {
-        checkUCA();
-        try {
-            ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(
-                    ICUResourceBundle.ICU_COLLATION_BASE_NAME, locale);
-            if (rb != null) {
-                ICUResourceBundle elements = null;
-
-                // Use keywords, if supplied for lookup
-                String collkey = locale.getKeywordValue("collation");
-                if (collkey != null) {
-                    try {
-                        elements = rb.getWithFallback("collations/" + collkey);
-                    } catch (MissingResourceException e) {
-                        // fall through
-                    }
-                }
-                if (elements == null) {
-                    // either collation keyword was not supplied or
-                    // the keyword was invalid - use default collation for the locale
-
-                    // collations/default should always give a string back
-                    // keyword for the real collation data
-                    collkey = rb.getStringWithFallback("collations/default");
-                    elements = rb.getWithFallback("collations/" + collkey);
-                }
-
-                // TODO: Determine actual & valid locale correctly
-                ULocale uloc = rb.getULocale();
-                setLocale(uloc, uloc);
-
-                m_rules_ = elements.getString("Sequence");
-                ByteBuffer buf = elements.get("%%CollationBin").getBinary();
-                // %%CollationBin
-                if (buf != null) {
-                    // m_rules_ = (String)rules[1][1];
-                    CollatorReader.initRBC(this, buf);
-                    /*
-                     * BufferedInputStream input = new BufferedInputStream( new ByteArrayInputStream(map)); /*
-                     * CollatorReader reader = new CollatorReader(input, false); if (map.length >
-                     * MIN_BINARY_DATA_SIZE_) { reader.read(this, null); } else { reader.readHeader(this);
-                     * reader.readOptions(this); // duplicating UCA_'s data setWithUCATables(); }
-                     */
-                    // at this point, we have read in the collator
-                    // now we need to check whether the binary image has
-                    // the right UCA and other versions
-                    if (!m_UCA_version_.equals(UCA_.m_UCA_version_) || !m_UCD_version_.equals(UCA_.m_UCD_version_)) {
-                        init(m_rules_);
-                        return;
-                    }
-                    init();
-                    try {
-                        UResourceBundle reorderRes = elements.get("%%ReorderCodes");
-                        if (reorderRes != null) {
-                            int[] reorderCodes = reorderRes.getIntVector();
-                            setReorderCodes(reorderCodes);
-                            m_defaultReorderCodes_ = reorderCodes.clone();
-                        }
-                    } catch (MissingResourceException e) {
-                        // ignore
-                    }
-                    return;
-                } else {
-                    init(m_rules_);
-                    return;
-                }
-            }
-        } catch (Exception e) {
-            // fallthrough
-        }
-        setWithUCAData();
-    }
-
-    // package private methods -----------------------------------------------
-
-    /**
-     * Sets this collator to use the tables in UCA. Note options not taken care of here.
-     */
-    final void setWithUCATables() {
-        m_contractionOffset_ = UCA_.m_contractionOffset_;
-        m_expansionOffset_ = UCA_.m_expansionOffset_;
-        m_expansion_ = UCA_.m_expansion_;
-        m_contractionIndex_ = UCA_.m_contractionIndex_;
-        m_contractionCE_ = UCA_.m_contractionCE_;
-        m_trie_ = UCA_.m_trie_;
-        m_expansionEndCE_ = UCA_.m_expansionEndCE_;
-        m_expansionEndCEMaxSize_ = UCA_.m_expansionEndCEMaxSize_;
-        m_unsafe_ = UCA_.m_unsafe_;
-        m_contractionEnd_ = UCA_.m_contractionEnd_;
-        m_minUnsafe_ = UCA_.m_minUnsafe_;
-        m_minContractionEnd_ = UCA_.m_minContractionEnd_;
-    }
-
-    /**
-     * Sets this collator to use the all options and tables in UCA.
-     */
-    final void setWithUCAData() {
-        latinOneFailed_ = true;
-
-        m_addition3_ = UCA_.m_addition3_;
-        m_bottom3_ = UCA_.m_bottom3_;
-        m_bottomCount3_ = UCA_.m_bottomCount3_;
-        m_caseFirst_ = UCA_.m_caseFirst_;
-        m_caseSwitch_ = UCA_.m_caseSwitch_;
-        m_common3_ = UCA_.m_common3_;
-        m_contractionOffset_ = UCA_.m_contractionOffset_;
-        setDecomposition(UCA_.getDecomposition());
-        m_defaultCaseFirst_ = UCA_.m_defaultCaseFirst_;
-        m_defaultDecomposition_ = UCA_.m_defaultDecomposition_;
-        m_defaultIsAlternateHandlingShifted_ = UCA_.m_defaultIsAlternateHandlingShifted_;
-        m_defaultIsCaseLevel_ = UCA_.m_defaultIsCaseLevel_;
-        m_defaultIsFrenchCollation_ = UCA_.m_defaultIsFrenchCollation_;
-        m_defaultIsHiragana4_ = UCA_.m_defaultIsHiragana4_;
-        m_defaultStrength_ = UCA_.m_defaultStrength_;
-        m_defaultVariableTopValue_ = UCA_.m_defaultVariableTopValue_;
-        m_defaultIsNumericCollation_ = UCA_.m_defaultIsNumericCollation_;
-        m_expansionOffset_ = UCA_.m_expansionOffset_;
-        m_isAlternateHandlingShifted_ = UCA_.m_isAlternateHandlingShifted_;
-        m_isCaseLevel_ = UCA_.m_isCaseLevel_;
-        m_isFrenchCollation_ = UCA_.m_isFrenchCollation_;
-        m_isHiragana4_ = UCA_.m_isHiragana4_;
-        m_isJamoSpecial_ = UCA_.m_isJamoSpecial_;
-        m_isSimple3_ = UCA_.m_isSimple3_;
-        m_mask3_ = UCA_.m_mask3_;
-        m_minContractionEnd_ = UCA_.m_minContractionEnd_;
-        m_minUnsafe_ = UCA_.m_minUnsafe_;
-        m_rules_ = UCA_.m_rules_;
-        setStrength(UCA_.getStrength());
-        m_top3_ = UCA_.m_top3_;
-        m_topCount3_ = UCA_.m_topCount3_;
-        m_variableTopValue_ = UCA_.m_variableTopValue_;
-        m_isNumericCollation_ = UCA_.m_isNumericCollation_;
-        setWithUCATables();
-        latinOneFailed_ = false;
-    }
-
-    /**
-     * Test whether a char character is potentially "unsafe" for use as a collation starting point. "Unsafe" characters
-     * are combining marks or those belonging to some contraction sequence from the offset 1 onwards. E.g. if "ABC" is
-     * the only contraction, then 'B' and 'C' are considered unsafe. If we have another contraction "ZA" with the one
-     * above, then 'A', 'B', 'C' are "unsafe" but 'Z' is not.
-     * 
-     * @param ch
-     *            character to determin
-     * @return true if ch is unsafe, false otherwise
-     */
-    final boolean isUnsafe(char ch) {
-        if (ch < m_minUnsafe_) {
-            return false;
-        }
-
-        if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
-            if (UTF16.isLeadSurrogate(ch) || UTF16.isTrailSurrogate(ch)) {
-                // Trail surrogate are always considered unsafe.
-                return true;
-            }
-            ch &= HEURISTIC_OVERFLOW_MASK_;
-            ch += HEURISTIC_OVERFLOW_OFFSET_;
-        }
-        int value = m_unsafe_[ch >> HEURISTIC_SHIFT_];
-        return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;
-    }
-
-    /**
-     * Approximate determination if a char character is at a contraction end. Guaranteed to be true if a character is at
-     * the end of a contraction, otherwise it is not deterministic.
-     * 
-     * @param ch
-     *            character to be determined
-     */
-    final boolean isContractionEnd(char ch) {
-        if (UTF16.isTrailSurrogate(ch)) {
-            return true;
-        }
-
-        if (ch < m_minContractionEnd_) {
-            return false;
-        }
-
-        if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
-            ch &= HEURISTIC_OVERFLOW_MASK_;
-            ch += HEURISTIC_OVERFLOW_OFFSET_;
-        }
-        int value = m_contractionEnd_[ch >> HEURISTIC_SHIFT_];
-        return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;
-    }
-
-    /**
-     * Retrieve the tag of a special ce
-     * 
-     * @param ce
-     *            ce to test
-     * @return tag of ce
-     */
-    static int getTag(int ce) {
-        return (ce & CE_TAG_MASK_) >> CE_TAG_SHIFT_;
-    }
-
-    /**
-     * Checking if ce is special
-     * 
-     * @param ce
-     *            to check
-     * @return true if ce is special
-     */
-    static boolean isSpecial(int ce) {
-        return (ce & CE_SPECIAL_FLAG_) == CE_SPECIAL_FLAG_;
-    }
-
-    /**
-     * Checks if the argument ce is a continuation
-     * 
-     * @param ce
-     *            collation element to test
-     * @return true if ce is a continuation
-     */
-    static final boolean isContinuation(int ce) {
-        return ce != CollationElementIterator.NULLORDER && (ce & CE_CONTINUATION_TAG_) == CE_CONTINUATION_TAG_;
-    }
-
-    // private inner classes ------------------------------------------------
-
-    // private variables -----------------------------------------------------
-
-    /**
-     * The smallest natural unsafe or contraction end char character before tailoring. This is a combining mark.
-     */
-    private static final int DEFAULT_MIN_HEURISTIC_ = 0x300;
-    /**
-     * Heuristic table table size. Size is 32 bytes, 1 bit for each latin 1 char, and some power of two for hashing the
-     * rest of the chars. Size in bytes.
-     */
-    private static final char HEURISTIC_SIZE_ = 1056;
-    /**
-     * Mask value down to "some power of two" - 1, number of bits, not num of bytes.
-     */
-    private static final char HEURISTIC_OVERFLOW_MASK_ = 0x1fff;
-    /**
-     * Unsafe character shift
-     */
-    private static final int HEURISTIC_SHIFT_ = 3;
-    /**
-     * Unsafe character addition for character too large, it has to be folded then incremented.
-     */
-    private static final char HEURISTIC_OVERFLOW_OFFSET_ = 256;
-    /**
-     * Mask value to get offset in heuristic table.
-     */
-    private static final char HEURISTIC_MASK_ = 7;
-
-    private int m_caseSwitch_;
-    private int m_common3_;
-    private int m_mask3_;
-    /**
-     * When switching case, we need to add or subtract different values.
-     */
-    private int m_addition3_;
-    /**
-     * Upper range when compressing
-     */
-    private int m_top3_;
-    /**
-     * Upper range when compressing
-     */
-    private int m_bottom3_;
-    private int m_topCount3_;
-    private int m_bottomCount3_;
-    /**
-     * Script reordering table
-     */
-    private byte[] m_leadBytePermutationTable_;
-    /**
-     * Case first constants
-     */
-    private static final int CASE_SWITCH_ = 0xC0;
-    private static final int NO_CASE_SWITCH_ = 0;
-    /**
-     * Case level constants
-     */
-    private static final int CE_REMOVE_CASE_ = 0x3F;
-    private static final int CE_KEEP_CASE_ = 0xFF;
-    /**
-     * Case strength mask
-     */
-    private static final int CE_CASE_MASK_3_ = 0xFF;
-    /**
-     * Sortkey size factor. Values can be changed.
-     */
-    private static final double PROPORTION_2_ = 0.5;
-    private static final double PROPORTION_3_ = 0.667;
-
-    // These values come from the UCA ----------------------------------------
-
-    /**
-     * This is an enum that lists magic special byte values from the fractional UCA
-     */
-    // private static final byte BYTE_ZERO_ = 0x0;
-    // private static final byte BYTE_LEVEL_SEPARATOR_ = (byte)0x01;
-    // private static final byte BYTE_SORTKEY_GLUE_ = (byte)0x02;
-    private static final byte BYTE_SHIFT_PREFIX_ = (byte) 0x03;
-    /* private */static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;
-    // private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_;
-    // TODO: Make the following values dynamic since they change with almost every UCA version.
-    static final byte CODAN_PLACEHOLDER = 0x12;
-    private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte) 0x5B;
-
-    private static final byte BYTE_UNSHIFTED_MAX_ = (byte) 0xFF;
-    private static final int TOTAL_2_ = COMMON_TOP_2_ - COMMON_BOTTOM_2_ - 1;
-    private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80;
-    private static final int FLAG_BIT_MASK_CASE_SWITCH_ON_ = 0x40;
-    private static final int COMMON_TOP_CASE_SWITCH_OFF_3_ = 0x85;
-    private static final int COMMON_TOP_CASE_SWITCH_LOWER_3_ = 0x45;
-    private static final int COMMON_TOP_CASE_SWITCH_UPPER_3_ = 0xC5;
-    private static final int COMMON_BOTTOM_3_ = 0x05;
-    private static final int COMMON_BOTTOM_CASE_SWITCH_UPPER_3_ = 0x86;
-    private static final int COMMON_BOTTOM_CASE_SWITCH_LOWER_3_ = COMMON_BOTTOM_3_;
-    private static final int TOP_COUNT_2_ = (int) (PROPORTION_2_ * TOTAL_2_);
-    private static final int BOTTOM_COUNT_2_ = TOTAL_2_ - TOP_COUNT_2_;
-    private static final int COMMON_2_ = COMMON_BOTTOM_2_;
-    private static final int COMMON_UPPER_FIRST_3_ = 0xC5;
-    private static final int COMMON_NORMAL_3_ = COMMON_BOTTOM_3_;
-    // private static final int COMMON_4_ = (byte)0xFF;
-
-    /*
-     * Minimum size required for the binary collation data in bytes. Size of UCA header + size of options to 4 bytes
-     */
-    // private static final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
-
-    /**
-     * If this collator is to generate only simple tertiaries for fast path
-     */
-    private boolean m_isSimple3_;
-
-    /**
-     * French collation sorting flag
-     */
-    private boolean m_isFrenchCollation_;
-    /**
-     * Flag indicating if shifted is requested for Quaternary alternate handling. If this is not true, the default for
-     * alternate handling will be non-ignorable.
-     */
-    private boolean m_isAlternateHandlingShifted_;
-    /**
-     * Extra case level for sorting
-     */
-    private boolean m_isCaseLevel_;
-    /**
-     * Frozen state of the collator.
-     */
-    private Lock frozenLock;
-
-
-    private static final int SORT_BUFFER_INIT_SIZE_ = 128;
-    private static final int SORT_BUFFER_INIT_SIZE_1_ = SORT_BUFFER_INIT_SIZE_ << 3;
-    private static final int SORT_BUFFER_INIT_SIZE_2_ = SORT_BUFFER_INIT_SIZE_;
-    private static final int SORT_BUFFER_INIT_SIZE_3_ = SORT_BUFFER_INIT_SIZE_;
-    private static final int SORT_BUFFER_INIT_SIZE_CASE_ = SORT_BUFFER_INIT_SIZE_ >> 2;
-    private static final int SORT_BUFFER_INIT_SIZE_4_ = SORT_BUFFER_INIT_SIZE_;
-
-    private static final int CE_CONTINUATION_TAG_ = 0xC0;
-    private static final int CE_REMOVE_CONTINUATION_MASK_ = 0xFFFFFF3F;
-
-    private static final int LAST_BYTE_MASK_ = 0xFF;
-
-    // private static final int CE_RESET_TOP_VALUE_ = 0x9F000303;
-    // private static final int CE_NEXT_TOP_VALUE_ = 0xE8960303;
-
-    private static final byte SORT_CASE_BYTE_START_ = (byte) 0x80;
-    private static final byte SORT_CASE_SHIFT_START_ = (byte) 7;
-
-    /**
-     * CE buffer size
-     */
-    private static final int CE_BUFFER_SIZE_ = 512;
-
-    // variables for Latin-1 processing
-    boolean latinOneUse_ = false;
-    boolean latinOneRegenTable_ = false;
-    boolean latinOneFailed_ = false;
-
-    int latinOneTableLen_ = 0;
-    int latinOneCEs_[] = null;
-
-    private final class CollationBuffer {
-        /**
-         * Bunch of utility iterators
-         */
-        protected StringUCharacterIterator m_srcUtilIter_;
-        protected CollationElementIterator m_srcUtilColEIter_;
-        protected StringUCharacterIterator m_tgtUtilIter_;
-        protected CollationElementIterator m_tgtUtilColEIter_;
-
-        /**
-         * Utility comparison flags
-         */
-        protected boolean m_utilCompare0_;
-        // private boolean m_utilCompare1_;
-        protected boolean m_utilCompare2_;
-        protected boolean m_utilCompare3_;
-        protected boolean m_utilCompare4_;
-        protected boolean m_utilCompare5_;
-
-        /**
-         * Utility byte buffer
-         */
-        protected byte m_utilBytes0_[];
-        protected byte m_utilBytes1_[];
-        protected byte m_utilBytes2_[];
-        protected byte m_utilBytes3_[];
-        protected byte m_utilBytes4_[];
-        // private byte m_utilBytes5_[];
-
-        protected RawCollationKey m_utilRawCollationKey_;
-
-        protected int m_utilBytesCount0_;
-        protected int m_utilBytesCount1_;
-        protected int m_utilBytesCount2_;
-        protected int m_utilBytesCount3_;
-        protected int m_utilBytesCount4_;
-        // private int m_utilBytesCount5_;
-        
-        // private int m_utilCount0_;
-        // private int m_utilCount1_;
-        protected int m_utilCount2_;
-        protected int m_utilCount3_;
-        protected int m_utilCount4_;
-        // private int m_utilCount5_;
-
-        protected int m_utilFrenchStart_;
-        protected int m_utilFrenchEnd_;
-
-        /**
-         * Preparing the CE buffers. will be filled during the primary phase
-         */
-        protected int m_srcUtilCEBuffer_[];
-        protected int m_tgtUtilCEBuffer_[];
-        protected int m_srcUtilCEBufferSize_;
-        protected int m_tgtUtilCEBufferSize_;
-
-        protected int m_srcUtilContOffset_;
-        protected int m_tgtUtilContOffset_;
-
-        protected int m_srcUtilOffset_;
-        protected int m_tgtUtilOffset_;
-
-        private CollationBuffer() {
-            initBuffers();
-        }
-
-        /**
-         * Initializes utility iterators and byte buffer used by compare
-         */
-        protected final void initBuffers() {
-            resetBuffers();
-            m_srcUtilIter_ = new StringUCharacterIterator();
-            m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, RuleBasedCollator.this);
-            m_tgtUtilIter_ = new StringUCharacterIterator();
-            m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, RuleBasedCollator.this);
-            m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case
-            m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary
-            m_utilBytes2_ = new byte[SORT_BUFFER_INIT_SIZE_2_]; // secondary
-            m_utilBytes3_ = new byte[SORT_BUFFER_INIT_SIZE_3_]; // tertiary
-            m_utilBytes4_ = new byte[SORT_BUFFER_INIT_SIZE_4_]; // Quaternary
-            m_srcUtilCEBuffer_ = new int[CE_BUFFER_SIZE_];
-            m_tgtUtilCEBuffer_ = new int[CE_BUFFER_SIZE_];
-        }
-
-        protected final void resetBuffers() {
-            m_utilCompare0_ = false;
-            // private boolean m_utilCompare1_;
-            m_utilCompare2_ = false;
-            m_utilCompare3_ = false;
-            m_utilCompare4_ = false;
-            m_utilCompare5_ = false;
-
-            m_utilBytesCount0_ = 0;
-            m_utilBytesCount1_ = 0;
-            m_utilBytesCount2_ = 0;
-            m_utilBytesCount3_ = 0;
-            m_utilBytesCount4_ = 0;
-            // private int m_utilBytesCount5_;
-
-            m_utilCount2_ = 0;
-            m_utilCount3_ = 0;
-            m_utilCount4_ = 0;
-
-            m_utilFrenchStart_ = 0;
-            m_utilFrenchEnd_ = 0;
-
-            m_srcUtilContOffset_ = 0;
-            m_tgtUtilContOffset_ = 0;
-
-            m_srcUtilOffset_ = 0;
-            m_tgtUtilOffset_ = 0;            
-        }
-    }
-
-    // private methods -------------------------------------------------------
-
-    private void init(String rules) throws Exception {
-        setWithUCAData();
-        CollationParsedRuleBuilder builder = new CollationParsedRuleBuilder(rules);
-        builder.setRules(this);
-        m_rules_ = rules;
-        init();
-        buildPermutationTable();
-    }
-
-    private final int compareRegular(String source, String target, int offset, CollationBuffer buffer) {
-        buffer.resetBuffers();
-        
-        int strength = getStrength();
-        // setting up the collator parameters
-        buffer.m_utilCompare0_ = m_isCaseLevel_;
-        // m_utilCompare1_ = true;
-        buffer.m_utilCompare2_ = strength >= SECONDARY;
-        buffer.m_utilCompare3_ = strength >= TERTIARY;
-        buffer.m_utilCompare4_ = strength >= QUATERNARY;
-        buffer.m_utilCompare5_ = strength == IDENTICAL;
-        boolean doFrench = m_isFrenchCollation_ && buffer.m_utilCompare2_;
-        boolean doShift4 = m_isAlternateHandlingShifted_ && buffer.m_utilCompare4_;
-        boolean doHiragana4 = m_isHiragana4_ && buffer.m_utilCompare4_;
-
-        if (doHiragana4 && doShift4) {
-            String sourcesub = source.substring(offset);
-            String targetsub = target.substring(offset);
-            return compareBySortKeys(sourcesub, targetsub, buffer);
-        }
-
-        // This is the lowest primary value that will not be ignored if shifted
-        int lowestpvalue = m_isAlternateHandlingShifted_ ? m_variableTopValue_ << 16 : 0;
-        buffer.m_srcUtilCEBufferSize_ = 0;
-        buffer.m_tgtUtilCEBufferSize_ = 0;
-        int result = doPrimaryCompare(doHiragana4, lowestpvalue, source, target, offset, buffer);
-        if (buffer.m_srcUtilCEBufferSize_ == -1 && buffer.m_tgtUtilCEBufferSize_ == -1) {
-            // since the cebuffer is cleared when we have determined that
-            // either source is greater than target or vice versa, the return
-            // result is the comparison result and not the hiragana result
-            return result;
-        }
-
-        int hiraganaresult = result;
-
-        if (buffer.m_utilCompare2_) {
-            result = doSecondaryCompare(doFrench, buffer);
-            if (result != 0) {
-                return result;
-            }
-        }
-        // doing the case bit
-        if (buffer.m_utilCompare0_) {
-            result = doCaseCompare(buffer);
-            if (result != 0) {
-                return result;
-            }
-        }
-        // Tertiary level
-        if (buffer.m_utilCompare3_) {
-            result = doTertiaryCompare(buffer);
-            if (result != 0) {
-                return result;
-            }
-        }
-
-        if (doShift4) { // checkQuad
-            result = doQuaternaryCompare(lowestpvalue, buffer);
-            if (result != 0) {
-                return result;
-            }
-        } else if (doHiragana4 && hiraganaresult != 0) {
-            // If we're fine on quaternaries, we might be different
-            // on Hiragana. This, however, might fail us in shifted.
-            return hiraganaresult;
-        }
-
-        // For IDENTICAL comparisons, we use a bitwise character comparison
-        // as a tiebreaker if all else is equal.
-        // Getting here should be quite rare - strings are not identical -
-        // that is checked first, but compared == through all other checks.
-        if (buffer.m_utilCompare5_) {
-            return doIdenticalCompare(source, target, offset, true);
-        }
-        return 0;
-    }
-
-    // Is this primary weight compressible?
-    // Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit).
-    // TODO: This should use per-lead-byte flags from FractionalUCA.txt.
-    static boolean isCompressible(int primary1) {
-        return BYTE_FIRST_NON_LATIN_PRIMARY_ <= primary1 && primary1 <= maxRegularPrimary;
-    }
-
-    /**
-     * Gets the 2 bytes of primary order and adds it to the primary byte array
-     * 
-     * @param ce
-     *            current ce
-     * @param notIsContinuation
-     *            flag indicating if the current bytes belong to a continuation ce
-     * @param doShift
-     *            flag indicating if ce is to be shifted
-     * @param leadPrimary
-     *            lead primary used for compression
-     * @param commonBottom4
-     *            common byte value for Quaternary
-     * @param bottomCount4
-     *            smallest byte value for Quaternary
-     * @return the new lead primary for compression
-     */
-    private final int doPrimaryBytes(int ce, boolean notIsContinuation, boolean doShift, int leadPrimary,
-            int commonBottom4, int bottomCount4, CollationBuffer buffer) {
-
-        int p2 = (ce >>>= 16) & LAST_BYTE_MASK_; // in ints for unsigned
-        int p1 = ce >>> 8; // comparison
-        int originalP1 = p1;
-        if (notIsContinuation) {
-            if (m_leadBytePermutationTable_ != null) {
-                p1 = 0xff & m_leadBytePermutationTable_[p1];
-            }
-        }
-        
-        if (doShift) {
-            if (buffer.m_utilCount4_ > 0) {
-                while (buffer.m_utilCount4_ > bottomCount4) {
-                    buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonBottom4 + bottomCount4));
-                    buffer.m_utilBytesCount4_++;
-                    buffer.m_utilCount4_ -= bottomCount4;
-                }
-                buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonBottom4 + (buffer.m_utilCount4_ - 1)));
-                buffer.m_utilBytesCount4_++;
-                buffer.m_utilCount4_ = 0;
-            }
-            // dealing with a variable and we're treating them as shifted
-            // This is a shifted ignorable
-            if (p1 != 0) {
-                // we need to check this since we could be in continuation
-                buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) p1);
-                buffer.m_utilBytesCount4_++;
-            }
-            if (p2 != 0) {
-                buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) p2);
-                buffer.m_utilBytesCount4_++;
-            }
-        } else {
-            // Note: This code assumes that the table is well built
-            // i.e. not having 0 bytes where they are not supposed to be.
-            // Usually, we'll have non-zero primary1 & primary2, except
-            // in cases of LatinOne and friends, when primary2 will be
-            // regular and simple sortkey calc
-            if (p1 != CollationElementIterator.IGNORABLE) {
-                if (notIsContinuation) {
-                    if (leadPrimary == p1) {
-                        buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p2);
-                        buffer.m_utilBytesCount1_++;
-                    } else {
-                        if (leadPrimary != 0) {
-                            buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_,
-                                    ((p1 > leadPrimary) ? BYTE_UNSHIFTED_MAX_ : BYTE_UNSHIFTED_MIN_));
-                            buffer.m_utilBytesCount1_++;
-                        }
-                        if (p2 == CollationElementIterator.IGNORABLE) {
-                            // one byter, not compressed
-                            buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p1);
-                            buffer.m_utilBytesCount1_++;
-                            leadPrimary = 0;
-                        } else if (isCompressible(originalP1)) {
-                            // compress
-                            leadPrimary = p1;
-                            buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p1);
-                            buffer.m_utilBytesCount1_++;
-                            buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p2);
-                            buffer.m_utilBytesCount1_++;
-                        } else {
-                            leadPrimary = 0;
-                            buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p1);
-                            buffer.m_utilBytesCount1_++;
-                            buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p2);
-                            buffer.m_utilBytesCount1_++;
-                        }
-                    }
-                } else {
-                    // continuation, add primary to the key, no compression
-                    buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p1);
-                    buffer.m_utilBytesCount1_++;
-                    if (p2 != CollationElementIterator.IGNORABLE) {
-                        buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p2);
-                        // second part
-                        buffer.m_utilBytesCount1_++;
-                    }
-                }
-            }
-        }
-        return leadPrimary;
-    }
-
-    /**
-     * Gets the secondary byte and adds it to the secondary byte array
-     * 
-     * @param ce current ce
-     * @param notIsContinuation flag indicating if the current bytes belong to a continuation ce
-     * @param doFrench flag indicator if french sort is to be performed
-     * @param buffer collation buffer temporary state
-     */
-    private final void doSecondaryBytes(int ce, boolean notIsContinuation, boolean doFrench, CollationBuffer buffer) {
-        int s = (ce >> 8) & LAST_BYTE_MASK_; // int for comparison
-        if (s != 0) {
-            if (!doFrench) {
-                // This is compression code.
-                if (s == COMMON_2_ && notIsContinuation) {
-                    buffer.m_utilCount2_++;
-                } else {
-                    if (buffer.m_utilCount2_ > 0) {
-                        if (s > COMMON_2_) { // not necessary for 4th level.
-                            while (buffer.m_utilCount2_ > TOP_COUNT_2_) {
-                                buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_,
-                                        (byte) (COMMON_TOP_2_ - TOP_COUNT_2_));
-                                buffer.m_utilBytesCount2_++;
-                                buffer.m_utilCount2_ -= TOP_COUNT_2_;
-                            }
-                            buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_,
-                                    (byte) (COMMON_TOP_2_ - (buffer.m_utilCount2_ - 1)));
-                            buffer.m_utilBytesCount2_++;
-                        } else {
-                            while (buffer.m_utilCount2_ > BOTTOM_COUNT_2_) {
-                                buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_,
-                                        (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
-                                buffer.m_utilBytesCount2_++;
-                                buffer.m_utilCount2_ -= BOTTOM_COUNT_2_;
-                            }
-                            buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_,
-                                    (byte) (COMMON_BOTTOM_2_ + (buffer.m_utilCount2_ - 1)));
-                            buffer.m_utilBytesCount2_++;
-                        }
-                        buffer.m_utilCount2_ = 0;
-                    }
-                    buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_, (byte) s);
-                    buffer.m_utilBytesCount2_++;
-                }
-            } else {
-                buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_, (byte) s);
-                buffer.m_utilBytesCount2_++;
-                // Do the special handling for French secondaries
-                // We need to get continuation elements and do intermediate
-                // restore
-                // abc1c2c3de with french secondaries need to be edc1c2c3ba
-                // NOT edc3c2c1ba
-                if (notIsContinuation) {
-                    if (buffer.m_utilFrenchStart_ != -1) {
-                        // reverse secondaries from frenchStartPtr up to
-                        // frenchEndPtr
-                        reverseBuffer(buffer.m_utilBytes2_, buffer.m_utilFrenchStart_, buffer.m_utilFrenchEnd_);
-                        buffer.m_utilFrenchStart_ = -1;
-                    }
-                } else {
-                    if (buffer.m_utilFrenchStart_ == -1) {
-                        buffer.m_utilFrenchStart_ = buffer.m_utilBytesCount2_ - 2;
-                    }
-                    buffer.m_utilFrenchEnd_ = buffer.m_utilBytesCount2_ - 1;
-                }
-            }
-        }
-    }
-
-    /**
-     * Reverse the argument buffer
-     * 
-     * @param buffer to reverse
-     * @param start index in buffer to start from
-     * @param end index in buffer to end at
-     */
-    private static void reverseBuffer(byte buffer[], int start, int end) {
-        while (start < end) {
-            byte b = buffer[start];
-            buffer[start++] = buffer[end];
-            buffer[end--] = b;
-        }
-    }
-
-    /**
-     * Insert the case shifting byte if required
-     * 
-     * @param caseshift value
-     * @return new caseshift value
-     */
-    private final int doCaseShift(int caseshift, CollationBuffer buffer) {
-        if (caseshift == 0) {
-            buffer.m_utilBytes0_ = append(buffer.m_utilBytes0_, buffer.m_utilBytesCount0_, SORT_CASE_BYTE_START_);
-            buffer.m_utilBytesCount0_++;
-            caseshift = SORT_CASE_SHIFT_START_;
-        }
-        return caseshift;
-    }
-
-    /**
-     * Performs the casing sort
-     * 
-     * @param tertiary byte in ints for easy comparison
-     * @param notIsContinuation flag indicating if the current bytes belong to a continuation ce
-     * @param caseshift
-     * @param buffer collation buffer temporary state
-     * @return the new value of case shift
-     */
-    private final int doCaseBytes(int tertiary, boolean notIsContinuation, int caseshift, CollationBuffer buffer) {
-        caseshift = doCaseShift(caseshift, buffer);
-
-        if (notIsContinuation && tertiary != 0) {
-            byte casebits = (byte) (tertiary & 0xC0);
-            if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
-                if (casebits == 0) {
-                    buffer.m_utilBytes0_[buffer.m_utilBytesCount0_ - 1] |= (1 << (--caseshift));
-                } else {
-                    // second bit
-                    caseshift = doCaseShift(caseshift - 1, buffer);
-                    buffer.m_utilBytes0_[buffer.m_utilBytesCount0_ - 1] |= ((casebits >> 6) & 1) << (--caseshift);
-                }
-            } else {
-                if (casebits != 0) {
-                    buffer.m_utilBytes0_[buffer.m_utilBytesCount0_ - 1] |= 1 << (--caseshift);
-                    // second bit
-                    caseshift = doCaseShift(caseshift, buffer);
-                    buffer.m_utilBytes0_[buffer.m_utilBytesCount0_ - 1] |= ((casebits >> 7) & 1) << (--caseshift);
-                } else {
-                    caseshift--;
-                }
-            }
-        }
-
-        return caseshift;
-    }
-
-    /**
-     * Gets the tertiary byte and adds it to the tertiary byte array
-     * 
-     * @param tertiary byte in int for easy comparison
-     * @param notIsContinuation flag indicating if the current bytes belong to a continuation ce
-     * @param buffer collation buffer temporary state
-     */
-    private final void doTertiaryBytes(int tertiary, boolean notIsContinuation, CollationBuffer buffer) {
-        if (tertiary != 0) {
-            // This is compression code.
-            // sequence size check is included in the if clause
-            if (tertiary == m_common3_ && notIsContinuation) {
-                buffer.m_utilCount3_++;
-            } else {
-                int common3 = m_common3_ & LAST_BYTE_MASK_;
-                if (tertiary > common3 && m_common3_ == COMMON_NORMAL_3_) {
-                    tertiary += m_addition3_;
-                } else if (tertiary <= common3 && m_common3_ == COMMON_UPPER_FIRST_3_) {
-                    tertiary -= m_addition3_;
-                }
-                if (buffer.m_utilCount3_ > 0) {
-                    if (tertiary > common3) {
-                        while (buffer.m_utilCount3_ > m_topCount3_) {
-                            buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) (m_top3_ - m_topCount3_));
-                            buffer.m_utilBytesCount3_++;
-                            buffer.m_utilCount3_ -= m_topCount3_;
-                        }
-                        buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_,
-                                (byte) (m_top3_ - (buffer.m_utilCount3_ - 1)));
-                        buffer.m_utilBytesCount3_++;
-                    } else {
-                        while (buffer.m_utilCount3_ > m_bottomCount3_) {
-                            buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_,
-                                    (byte) (m_bottom3_ + m_bottomCount3_));
-                            buffer.m_utilBytesCount3_++;
-                            buffer.m_utilCount3_ -= m_bottomCount3_;
-                        }
-                        buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_,
-                                (byte) (m_bottom3_ + (buffer.m_utilCount3_ - 1)));
-                        buffer.m_utilBytesCount3_++;
-                    }
-                    buffer.m_utilCount3_ = 0;
-                }
-                buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) tertiary);
-                buffer.m_utilBytesCount3_++;
-            }
-        }
-    }
-
-    /**
-     * Gets the Quaternary byte and adds it to the Quaternary byte array
-     * 
-     * @param isCodePointHiragana flag indicator if the previous codepoint we dealt with was Hiragana
-     * @param commonBottom4 smallest common Quaternary byte
-     * @param bottomCount4 smallest Quaternary byte
-     * @param hiragana4 hiragana Quaternary byte
-     * @param buffer collation buffer temporary state
-     */
-    private final void doQuaternaryBytes(boolean isCodePointHiragana, int commonBottom4, int bottomCount4,
-            byte hiragana4, CollationBuffer buffer) {
-        if (isCodePointHiragana) { // This was Hiragana, need to note it
-            if (buffer.m_utilCount4_ > 0) { // Close this part
-                while (buffer.m_utilCount4_ > bottomCount4) {
-                    buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonBottom4 + bottomCount4));
-                    buffer.m_utilBytesCount4_++;
-                    buffer.m_utilCount4_ -= bottomCount4;
-                }
-                buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonBottom4 + (buffer.m_utilCount4_ - 1)));
-                buffer.m_utilBytesCount4_++;
-                buffer.m_utilCount4_ = 0;
-            }
-            buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, hiragana4); // Add the Hiragana
-            buffer.m_utilBytesCount4_++;
-        } else { // This wasn't Hiragana, so we can continue adding stuff
-            buffer.m_utilCount4_++;
-        }
-    }
-
-    /**
-     * Iterates through the argument string for all ces. Split the ces into their relevant primaries, secondaries etc.
-     * 
-     * @param source normalized string
-     * @param doFrench flag indicator if special handling of French has to be done
-     * @param hiragana4 offset for Hiragana quaternary
-     * @param commonBottom4 smallest common quaternary byte
-     * @param bottomCount4 smallest quaternary byte
-     * @param buffer collation buffer temporary state
-     */
-    private final void getSortKeyBytes(String source, boolean doFrench, byte hiragana4, int commonBottom4,
-            int bottomCount4, CollationBuffer buffer)
-
-    {
-        int backupDecomposition = getDecomposition();
-        // TODO- hack fix around frozen state - stop self-modification
-        internalSetDecomposition(NO_DECOMPOSITION); // have to revert to backup later
-        buffer.m_srcUtilIter_.setText(source);
-        buffer.m_srcUtilColEIter_.setText(buffer.m_srcUtilIter_);
-        buffer.m_utilFrenchStart_ = -1;
-        buffer.m_utilFrenchEnd_ = -1;
-
-        boolean doShift = false;
-        boolean notIsContinuation = false;
-
-        int leadPrimary = 0; // int for easier comparison
-        int caseShift = 0;
-
-        while (true) {
-            int ce = buffer.m_srcUtilColEIter_.next();
-            if (ce == CollationElementIterator.NULLORDER) {
-                break;
-            }
-
-            if (ce == CollationElementIterator.IGNORABLE) {
-                continue;
-            }
-
-            notIsContinuation = !isContinuation(ce);
-
-            boolean isPrimaryByteIgnorable = (ce & CE_PRIMARY_MASK_) == 0;
-            // actually we can just check that the first byte is 0
-            // generation stuffs the order left first
-            boolean isSmallerThanVariableTop = (ce >>> CE_PRIMARY_SHIFT_) <= m_variableTopValue_;
-            doShift = (m_isAlternateHandlingShifted_
-                    && ((notIsContinuation && isSmallerThanVariableTop && !isPrimaryByteIgnorable) // primary byte not 0
-                            || (!notIsContinuation && doShift)) || (doShift && isPrimaryByteIgnorable));
-            if (doShift && isPrimaryByteIgnorable) {
-                // amendment to the UCA says that primary ignorables and other
-                // ignorables should be removed if following a shifted code
-                // point
-                // if we were shifted and we got an ignorable code point
-                // we should just completely ignore it
-                continue;
-            }
-            leadPrimary = doPrimaryBytes(ce, notIsContinuation, doShift, leadPrimary, commonBottom4, bottomCount4, buffer);
-
-            if (doShift) {
-                continue;
-            }
-            if (buffer.m_utilCompare2_) {
-                doSecondaryBytes(ce, notIsContinuation, doFrench, buffer);
-            }
-
-            int t = ce & LAST_BYTE_MASK_;
-            if (!notIsContinuation) {
-                t = ce & CE_REMOVE_CONTINUATION_MASK_;
-            }
-
-            if (buffer.m_utilCompare0_ && (!isPrimaryByteIgnorable || buffer.m_utilCompare2_)) {
-                // do the case level if we need to do it. We don't want to calculate
-                // case level for primary ignorables if we have only primary strength and case level
-                // otherwise we would break well formedness of CEs
-                caseShift = doCaseBytes(t, notIsContinuation, caseShift, buffer);
-            } else if (notIsContinuation) {
-                t ^= m_caseSwitch_;
-            }
-
-            t &= m_mask3_;
-
-            if (buffer.m_utilCompare3_) {
-                doTertiaryBytes(t, notIsContinuation, buffer);
-            }
-
-            if (buffer.m_utilCompare4_ && notIsContinuation) { // compare quad
-                doQuaternaryBytes(buffer.m_srcUtilColEIter_.m_isCodePointHiragana_, commonBottom4, bottomCount4, hiragana4, buffer);
-            }
-        }
-        // TODO - hack fix around frozen state - stop self-modification
-        internalSetDecomposition(backupDecomposition); // reverts to original
-        if (buffer.m_utilFrenchStart_ != -1) {
-            // one last round of checks
-            reverseBuffer(buffer.m_utilBytes2_, buffer.m_utilFrenchStart_, buffer.m_utilFrenchEnd_);
-        }
-    }
-
-    /**
-     * From the individual strength byte results the final compact sortkey will be calculated.
-     * 
-     * @param source text string
-     * @param doFrench flag indicating that special handling of French has to be done
-     * @param commonBottom4 smallest common quaternary byte
-     * @param bottomCount4 smallest quaternary byte
-     * @param key output RawCollationKey to store results, key cannot be null
-     * @param buffer collation buffer temporary state
-     */
-    private final void getSortKey(String source, boolean doFrench, int commonBottom4, int bottomCount4,
-            RawCollationKey key, CollationBuffer buffer) {
-        // we have done all the CE's, now let's put them together to form
-        // a key
-        if (buffer.m_utilCompare2_) {
-            doSecondary(doFrench, buffer);
-        }
-        // adding case level should be independent of secondary level
-        if (buffer.m_utilCompare0_) {
-            doCase(buffer);
-        }
-        if (buffer.m_utilCompare3_) {
-            doTertiary(buffer);
-            if (buffer.m_utilCompare4_) {
-                doQuaternary(commonBottom4, bottomCount4, buffer);
-                if (buffer.m_utilCompare5_) {
-                    doIdentical(source, buffer);
-                }
-
-            }
-        }
-        buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) 0);
-        buffer.m_utilBytesCount1_++;
-
-        key.set(buffer.m_utilBytes1_, 0, buffer.m_utilBytesCount1_);
-    }
-
-    /**
-     * Packs the French bytes
-     * @param buffer collation buffer temporary state
-     */
-    private static final void doFrench(CollationBuffer buffer) {
-        for (int i = 0; i < buffer.m_utilBytesCount2_; i++) {
-            byte s = buffer.m_utilBytes2_[buffer.m_utilBytesCount2_ - i - 1];
-            // This is compression code.
-            if (s == COMMON_2_) {
-                ++buffer.m_utilCount2_;
-            } else {
-                if (buffer.m_utilCount2_ > 0) {
-                    // getting the unsigned value
-                    if ((s & LAST_BYTE_MASK_) > COMMON_2_) {
-                        // not necessary for 4th level.
-                        while (buffer.m_utilCount2_ > TOP_COUNT_2_) {
-                            buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_,
-                                    (byte) (COMMON_TOP_2_ - TOP_COUNT_2_));
-                            buffer.m_utilBytesCount1_++;
-                            buffer.m_utilCount2_ -= TOP_COUNT_2_;
-                        }
-                        buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_,
-                                (byte) (COMMON_TOP_2_ - (buffer.m_utilCount2_ - 1)));
-                        buffer.m_utilBytesCount1_++;
-                    } else {
-                        while (buffer.m_utilCount2_ > BOTTOM_COUNT_2_) {
-                            buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_,
-                                    (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
-                            buffer.m_utilBytesCount1_++;
-                            buffer.m_utilCount2_ -= BOTTOM_COUNT_2_;
-                        }
-                        buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_,
-                                (byte) (COMMON_BOTTOM_2_ + (buffer.m_utilCount2_ - 1)));
-                        buffer.m_utilBytesCount1_++;
-                    }
-                    buffer.m_utilCount2_ = 0;
-                }
-                buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, s);
-                buffer.m_utilBytesCount1_++;
-            }
-        }
-        if (buffer.m_utilCount2_ > 0) {
-            while (buffer.m_utilCount2_ > BOTTOM_COUNT_2_) {
-                buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
-                buffer.m_utilBytesCount1_++;
-                buffer.m_utilCount2_ -= BOTTOM_COUNT_2_;
-            }
-            buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) (COMMON_BOTTOM_2_ + (buffer.m_utilCount2_ - 1)));
-            buffer.m_utilBytesCount1_++;
-        }
-    }
-
-    /**
-     * Compacts the secondary bytes and stores them into the primary array
-     * 
-     * @param doFrench flag indicator that French has to be handled specially
-     * @param buffer collation buffer temporary state
-     */
-    private static final void doSecondary(boolean doFrench, CollationBuffer buffer) {
-        if (buffer.m_utilCount2_ > 0) {
-            while (buffer.m_utilCount2_ > BOTTOM_COUNT_2_) {
-                buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_, (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
-                buffer.m_utilBytesCount2_++;
-                buffer.m_utilCount2_ -= BOTTOM_COUNT_2_;
-            }
-            buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_, (byte) (COMMON_BOTTOM_2_ + (buffer.m_utilCount2_ - 1)));
-            buffer.m_utilBytesCount2_++;
-        }
-
-        buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, SORT_LEVEL_TERMINATOR_);
-        buffer.m_utilBytesCount1_++;
-
-        if (doFrench) { // do the reverse copy
-            doFrench(buffer);
-        } else {
-            if (buffer.m_utilBytes1_.length <= buffer.m_utilBytesCount1_ + buffer.m_utilBytesCount2_) {
-                buffer.m_utilBytes1_ = increase(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount2_);
-            }
-            System.arraycopy(buffer.m_utilBytes2_, 0, buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount2_);
-            buffer.m_utilBytesCount1_ += buffer.m_utilBytesCount2_;
-        }
-    }
-
-    /**
-     * Increase buffer size
-     * 
-     * @param buffer array of bytes
-     * @param size of the byte array
-     * @param incrementsize size to increase
-     * @return the new buffer
-     */
-    private static final byte[] increase(byte buffer[], int size, int incrementsize) {
-        byte result[] = new byte[buffer.length + incrementsize];
-        System.arraycopy(buffer, 0, result, 0, size);
-        return result;
-    }
-
-    /**
-     * Increase buffer size
-     * 
-     * @param buffer array of ints
-     * @param size of the byte array
-     * @param incrementsize size to increase
-     * @return the new buffer
-     */
-    private static final int[] increase(int buffer[], int size, int incrementsize) {
-        int result[] = new int[buffer.length + incrementsize];
-        System.arraycopy(buffer, 0, result, 0, size);
-        return result;
-    }
-
-    /**
-     * Compacts the case bytes and stores them into the primary array
-     * 
-     * @param buffer collation buffer temporary state
-     */
-    private static final void doCase(CollationBuffer buffer) {
-        buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, SORT_LEVEL_TERMINATOR_);
-        buffer.m_utilBytesCount1_++;
-        if (buffer.m_utilBytes1_.length <= buffer.m_utilBytesCount1_ + buffer.m_utilBytesCount0_) {
-            buffer.m_utilBytes1_ = increase(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount0_);
-        }
-        System.arraycopy(buffer.m_utilBytes0_, 0, buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount0_);
-        buffer.m_utilBytesCount1_ += buffer.m_utilBytesCount0_;
-    }
-
-    /**
-     * Compacts the tertiary bytes and stores them into the primary array
-     * 
-     * @param buffer collation buffer temporary state
-     */
-    private final void doTertiary(CollationBuffer buffer) {
-        if (buffer.m_utilCount3_ > 0) {
-            if (m_common3_ != COMMON_BOTTOM_3_) {
-                while (buffer.m_utilCount3_ >= m_topCount3_) {
-                    buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) (m_top3_ - m_topCount3_));
-                    buffer.m_utilBytesCount3_++;
-                    buffer.m_utilCount3_ -= m_topCount3_;
-                }
-                buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) (m_top3_ - buffer.m_utilCount3_));
-                buffer.m_utilBytesCount3_++;
-            } else {
-                while (buffer.m_utilCount3_ > m_bottomCount3_) {
-                    buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) (m_bottom3_ + m_bottomCount3_));
-                    buffer.m_utilBytesCount3_++;
-                    buffer.m_utilCount3_ -= m_bottomCount3_;
-                }
-                buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) (m_bottom3_ + (buffer.m_utilCount3_ - 1)));
-                buffer.m_utilBytesCount3_++;
-            }
-        }
-        buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, SORT_LEVEL_TERMINATOR_);
-        buffer.m_utilBytesCount1_++;
-        if (buffer.m_utilBytes1_.length <= buffer.m_utilBytesCount1_ + buffer.m_utilBytesCount3_) {
-            buffer.m_utilBytes1_ = increase(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount3_);
-        }
-        System.arraycopy(buffer.m_utilBytes3_, 0, buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount3_);
-        buffer.m_utilBytesCount1_ += buffer.m_utilBytesCount3_;
-    }
-
-    /**
-     * Compacts the quaternary bytes and stores them into the primary array
-     * 
-     * @param buffer collation buffer temporary state
-     */
-    private final void doQuaternary(int commonbottom4, int bottomcount4, CollationBuffer buffer) {
-        if (buffer.m_utilCount4_ > 0) {
-            while (buffer.m_utilCount4_ > bottomcount4) {
-                buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonbottom4 + bottomcount4));
-                buffer.m_utilBytesCount4_++;
-                buffer.m_utilCount4_ -= bottomcount4;
-            }
-            buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonbottom4 + (buffer.m_utilCount4_ - 1)));
-            buffer.m_utilBytesCount4_++;
-        }
-        buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, SORT_LEVEL_TERMINATOR_);
-        buffer.m_utilBytesCount1_++;
-        if (buffer.m_utilBytes1_.length <= buffer.m_utilBytesCount1_ + buffer.m_utilBytesCount4_) {
-            buffer.m_utilBytes1_ = increase(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount4_);
-        }
-        System.arraycopy(buffer.m_utilBytes4_, 0, buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount4_);
-        buffer.m_utilBytesCount1_ += buffer.m_utilBytesCount4_;
-    }
-
-    /**
-     * Deals with the identical sort. Appends the BOCSU version of the source string to the ends of the byte buffer.
-     * 
-     * @param source text string
-     * @param buffer collation buffer temporary state
-     */
-    private static final void doIdentical(String source, CollationBuffer buffer) {
-        int isize = BOCU.getCompressionLength(source);
-        buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, SORT_LEVEL_TERMINATOR_);
-        buffer.m_utilBytesCount1_++;
-        if (buffer.m_utilBytes1_.length <= buffer.m_utilBytesCount1_ + isize) {
-            buffer.m_utilBytes1_ = increase(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, 1 + isize);
-        }
-        buffer.m_utilBytesCount1_ = BOCU.compress(source, buffer.m_utilBytes1_, buffer.m_utilBytesCount1_);
-    }
-
-    /**
-     * Gets the offset of the first unmatched characters in source and target. This method returns the offset of the
-     * start of a contraction or a combining sequence, if the first difference is in the middle of such a sequence.
-     * 
-     * @param source
-     *            string
-     * @param target
-     *            string
-     * @return offset of the first unmatched characters in source and target.
-     */
-    private final int getFirstUnmatchedOffset(String source, String target) {
-        int result = 0;
-        int slength = source.length();
-        int tlength = target.length();
-        int minlength = slength;
-        if (minlength > tlength) {
-            minlength = tlength;
-        }
-        while (result < minlength && source.charAt(result) == target.charAt(result)) {
-            result++;
-        }
-        if (result > 0) {
-            // There is an identical portion at the beginning of the two
-            // strings. If the identical portion ends within a contraction or a
-            // combining character sequence, back up to the start of that
-            // sequence.
-            char schar = 0;
-            char tchar = 0;
-            if (result < minlength) {
-                schar = source.charAt(result); // first differing chars
-                tchar = target.charAt(result);
-            } else {
-                schar = source.charAt(minlength - 1);
-                if (isUnsafe(schar)) {
-                    tchar = schar;
-                } else if (slength == tlength) {
-                    return result;
-                } else if (slength < tlength) {
-                    tchar = target.charAt(result);
-                } else {
-                    schar = source.charAt(result);
-                }
-            }
-            if (isUnsafe(schar) || isUnsafe(tchar)) {
-                // We are stopped in the middle of a contraction or combining
-                // sequence.
-                // Look backwards for the part of the string for the start of
-                // the sequence
-                // It doesn't matter which string we scan, since they are the
-                // same in this region.
-                do {
-                    result--;
-                } while (result > 0 && isUnsafe(source.charAt(result)));
-            }
-        }
-        return result;
-    }
-
-    /**
-     * Appending an byte to an array of bytes and increases it if we run out of space
-     * 
-     * @param array
-     *            of byte arrays
-     * @param appendindex
-     *            index in the byte array to append
-     * @param value
-     *            to append
-     * @return array if array size can accomodate the new value, otherwise a bigger array will be created and returned
-     */
-    private static final byte[] append(byte array[], int appendindex, byte value) {
-        try {
-            array[appendindex] = value;
-        } catch (ArrayIndexOutOfBoundsException e) {
-            array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_);
-            array[appendindex] = value;
-        }
-        return array;
-    }
-
-    /**
-     * This is a trick string compare function that goes in and uses sortkeys to compare. It is used when compare gets
-     * in trouble and needs to bail out.
-     * 
-     * @param source text string
-     * @param target text string
-     * @param buffer collation buffer temporary state
-     */
-    private final int compareBySortKeys(String source, String target, CollationBuffer buffer)
-    {
-        buffer.m_utilRawCollationKey_ = getRawCollationKey(source, buffer.m_utilRawCollationKey_);
-        // this method is very seldom called
-        RawCollationKey targetkey = getRawCollationKey(target, null);
-        return buffer.m_utilRawCollationKey_.compareTo(targetkey);
-    }
-
-    /**
-     * Performs the primary comparisons, and fills up the CE buffer at the same time. The return value toggles between
-     * the comparison result and the hiragana result. If either the source is greater than target or vice versa, the
-     * return result is the comparison result, ie 1 or -1, furthermore the cebuffers will be cleared when that happens.
-     * If the primary comparisons are equal, we'll have to continue with secondary comparison. In this case the cebuffer
-     * will not be cleared and the return result will be the hiragana result.
-     * 
-     * @param doHiragana4 flag indicator that Hiragana Quaternary has to be observed
-     * @param lowestpvalue the lowest primary value that will not be ignored if alternate handling is shifted
-     * @param source text string
-     * @param target text string
-     * @param textoffset offset in text to start the comparison
-     * @param buffer collation buffer temporary state
-     * @return comparion result if a primary difference is found, otherwise hiragana result
-     */
-    private final int doPrimaryCompare(boolean doHiragana4, int lowestpvalue, String source, String target,
-            int textoffset, CollationBuffer buffer)
-
-    {
-        // Preparing the context objects for iterating over strings
-        buffer.m_srcUtilIter_.setText(source);
-        buffer.m_srcUtilColEIter_.setText(buffer.m_srcUtilIter_, textoffset);
-        buffer.m_tgtUtilIter_.setText(target);
-        buffer.m_tgtUtilColEIter_.setText(buffer.m_tgtUtilIter_, textoffset);
-
-        // Non shifted primary processing is quite simple
-        if (!m_isAlternateHandlingShifted_) {
-            int hiraganaresult = 0;
-            while (true) {
-                int sorder = 0;
-                int sPrimary;
-                // We fetch CEs until we hit a non ignorable primary or end.
-                do {
-                    sorder = buffer.m_srcUtilColEIter_.next();
-                    buffer.m_srcUtilCEBuffer_ = append(buffer.m_srcUtilCEBuffer_, buffer.m_srcUtilCEBufferSize_, sorder);
-                    buffer.m_srcUtilCEBufferSize_++;
-                    sPrimary = sorder & CE_PRIMARY_MASK_;
-                } while (sPrimary == CollationElementIterator.IGNORABLE);
-
-                int torder = 0;
-                int tPrimary;
-                do {
-                    torder = buffer.m_tgtUtilColEIter_.next();
-                    buffer.m_tgtUtilCEBuffer_ = append(buffer.m_tgtUtilCEBuffer_, buffer.m_tgtUtilCEBufferSize_, torder);
-                    buffer.m_tgtUtilCEBufferSize_++;
-                    tPrimary = torder & CE_PRIMARY_MASK_;
-                } while (tPrimary == CollationElementIterator.IGNORABLE);
-
-                // if both primaries are the same
-                if (sPrimary == tPrimary) {
-                    // and there are no more CEs, we advance to the next level
-                    // see if we are at the end of either string
-                    if (buffer.m_srcUtilCEBuffer_[buffer.m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) {
-                        if (buffer.m_tgtUtilCEBuffer_[buffer.m_tgtUtilCEBufferSize_ - 1] != CollationElementIterator.NULLORDER) {
-                            return -1;
-                        }
-                        break;
-                    } else if (buffer.m_tgtUtilCEBuffer_[buffer.m_tgtUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) {
-                        return 1;
-                    }
-                    if (doHiragana4 && hiraganaresult == 0
-                            && buffer.m_srcUtilColEIter_.m_isCodePointHiragana_ != buffer.m_tgtUtilColEIter_.m_isCodePointHiragana_) {
-                        if (buffer.m_srcUtilColEIter_.m_isCodePointHiragana_) {
-                            hiraganaresult = -1;
-                        } else {
-                            hiraganaresult = 1;
-                        }
-                    }
-                } else {
-                    if (!isContinuation(sorder) && m_leadBytePermutationTable_ != null) {
-                        sPrimary = (m_leadBytePermutationTable_[sPrimary >>> 24] << 24) | (sPrimary & 0x00FFFFFF);
-                        tPrimary = (m_leadBytePermutationTable_[tPrimary >>> 24] << 24) | (tPrimary & 0x00FFFFFF);
-                    }
-                    // if two primaries are different, we are done
-                    return endPrimaryCompare(sPrimary, tPrimary, buffer);
-                }
-            }
-            // no primary difference... do the rest from the buffers
-            return hiraganaresult;
-        } else { // shifted - do a slightly more complicated processing :)
-            while (true) {
-                int sorder = getPrimaryShiftedCompareCE(buffer.m_srcUtilColEIter_, lowestpvalue, true, buffer);
-                int torder = getPrimaryShiftedCompareCE(buffer.m_tgtUtilColEIter_, lowestpvalue, false, buffer);
-                if (sorder == torder) {
-                    if (buffer.m_srcUtilCEBuffer_[buffer.m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) {
-                        break;
-                    } else {
-                        continue;
-                    }
-                } else {
-                    return endPrimaryCompare(sorder, torder, buffer);
-                }
-            } // no primary difference... do the rest from the buffers
-        }
-        return 0;
-    }
-
-    /**
-     * This is used only for primary strength when we know that sorder is already different from torder. Compares sorder
-     * and torder, returns -1 if sorder is less than torder. Clears the cebuffer at the same time.
-     * 
-     * @param sorder source strength order
-     * @param torder target strength order
-     * @param buffer collation buffer temporary state
-     * @return the comparison result of sorder and torder
-     */
-    private static final int endPrimaryCompare(int sorder, int torder, CollationBuffer buffer) {
-        // if we reach here, the ce offset accessed is the last ce
-        // appended to the buffer
-        boolean isSourceNullOrder = (buffer.m_srcUtilCEBuffer_[buffer.m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER);
-        boolean isTargetNullOrder = (buffer.m_tgtUtilCEBuffer_[buffer.m_tgtUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER);
-        buffer.m_srcUtilCEBufferSize_ = -1;
-        buffer.m_tgtUtilCEBufferSize_ = -1;
-        if (isSourceNullOrder) {
-            return -1;
-        }
-        if (isTargetNullOrder) {
-            return 1;
-        }
-        // getting rid of the sign
-        sorder >>>= CE_PRIMARY_SHIFT_;
-                    torder >>>= CE_PRIMARY_SHIFT_;
-                    if (sorder < torder) {
-                        return -1;
-                    }
-                    return 1;
-    }
-
-    /**
-     * Calculates the next primary shifted value and fills up cebuffer with the next non-ignorable ce.
-     * 
-     * @param coleiter collation element iterator
-     * @param doHiragana4 flag indicator if hiragana quaternary is to be handled
-     * @param lowestpvalue lowest primary shifted value that will not be ignored
-     * @param buffer collation buffer temporary state
-     * @return result next modified ce
-     */
-    private static final int getPrimaryShiftedCompareCE(CollationElementIterator coleiter, int lowestpvalue, boolean isSrc, CollationBuffer buffer)
-    {
-        boolean shifted = false;
-        int result = CollationElementIterator.IGNORABLE;
-        int cebuffer[] = buffer.m_srcUtilCEBuffer_;
-        int cebuffersize = buffer.m_srcUtilCEBufferSize_;
-        if (!isSrc) {
-            cebuffer = buffer.m_tgtUtilCEBuffer_;
-            cebuffersize = buffer.m_tgtUtilCEBufferSize_;
-        }
-        while (true) {
-            result = coleiter.next();
-            if (result == CollationElementIterator.NULLORDER) {
-                cebuffer = append(cebuffer, cebuffersize, result);
-                cebuffersize++;
-                break;
-            } else if (result == CollationElementIterator.IGNORABLE
-                    || (shifted && (result & CE_PRIMARY_MASK_) == CollationElementIterator.IGNORABLE)) {
-                // UCA amendment - ignore ignorables that follow shifted code
-                // points
-                continue;
-            } else if (isContinuation(result)) {
-                if ((result & CE_PRIMARY_MASK_) != CollationElementIterator.IGNORABLE) {
-                    // There is primary value
-                    if (shifted) {
-                        result = (result & CE_PRIMARY_MASK_) | CE_CONTINUATION_MARKER_;
-                        // preserve interesting continuation
-                        cebuffer = append(cebuffer, cebuffersize, result);
-                        cebuffersize++;
-                        continue;
-                    } else {
-                        cebuffer = append(cebuffer, cebuffersize, result);
-                        cebuffersize++;
-                        break;
-                    }
-                } else { // Just lower level values
-                    if (!shifted) {
-                        cebuffer = append(cebuffer, cebuffersize, result);
-                        cebuffersize++;
-                    }
-                }
-            } else { // regular
-                if (Utility.compareUnsigned(result & CE_PRIMARY_MASK_, lowestpvalue) > 0) {
-                    cebuffer = append(cebuffer, cebuffersize, result);
-                    cebuffersize++;
-                    break;
-                } else {
-                    if ((result & CE_PRIMARY_MASK_) != 0) {
-                        shifted = true;
-                        result &= CE_PRIMARY_MASK_;
-                        cebuffer = append(cebuffer, cebuffersize, result);
-                        cebuffersize++;
-                        continue;
-                    } else {
-                        cebuffer = append(cebuffer, cebuffersize, result);
-                        cebuffersize++;
-                        shifted = false;
-                        continue;
-                    }
-                }
-            }
-        }
-        if (isSrc) {
-            buffer.m_srcUtilCEBuffer_ = cebuffer;
-            buffer.m_srcUtilCEBufferSize_ = cebuffersize;
-        } else {
-            buffer.m_tgtUtilCEBuffer_ = cebuffer;
-            buffer.m_tgtUtilCEBufferSize_ = cebuffersize;
-        }
-        result &= CE_PRIMARY_MASK_;
-        return result;
-    }
-
-    /**
-     * Appending an int to an array of ints and increases it if we run out of space
-     * 
-     * @param array
-     *            of int arrays
-     * @param appendindex
-     *            index at which value will be appended
-     * @param value
-     *            to append
-     * @return array if size is not increased, otherwise a new array will be returned
-     */
-    private static final int[] append(int array[], int appendindex, int value) {
-        if (appendindex + 1 >= array.length) {
-            array = increase(array, appendindex, CE_BUFFER_SIZE_);
-        }
-        array[appendindex] = value;
-        return array;
-    }
-
-    /**
-     * Does secondary strength comparison based on the collected ces.
-     * 
-     * @param doFrench flag indicates if French ordering is to be done
-     * @param buffer collation buffer temporary state
-     * @return the secondary strength comparison result
-     */
-    private static final int doSecondaryCompare(boolean doFrench, CollationBuffer buffer) {
-        // now, we're gonna reexamine collected CEs
-        if (!doFrench) { // normal
-            int soffset = 0;
-            int toffset = 0;
-            while (true) {
-                int sorder = CollationElementIterator.IGNORABLE;
-                while (sorder == CollationElementIterator.IGNORABLE) {
-                    sorder = buffer.m_srcUtilCEBuffer_[soffset++] & CE_SECONDARY_MASK_;
-                }
-                int torder = CollationElementIterator.IGNORABLE;
-                while (torder == CollationElementIterator.IGNORABLE) {
-                    torder = buffer.m_tgtUtilCEBuffer_[toffset++] & CE_SECONDARY_MASK_;
-                }
-
-                if (sorder == torder) {
-                    if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
-                        if (buffer.m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
-                            return -1;
-                        }
-                        break;
-                    } else if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
-                        return 1;
-                    }
-                } else {
-                    if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
-                        return -1;
-                    }
-                    if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
-                        return 1;
-                    }
-                    return (sorder < torder) ? -1 : 1;
-                }
-            }
-        } else { // do the French
-            buffer.m_srcUtilContOffset_ = 0;
-            buffer.m_tgtUtilContOffset_ = 0;
-            buffer.m_srcUtilOffset_ = buffer.m_srcUtilCEBufferSize_ - 2;
-            buffer.m_tgtUtilOffset_ = buffer.m_tgtUtilCEBufferSize_ - 2;
-            while (true) {
-                int sorder = getSecondaryFrenchCE(true, buffer);
-                int torder = getSecondaryFrenchCE(false, buffer);
-                if (sorder == torder) {
-                    if ((buffer.m_srcUtilOffset_ < 0 && buffer.m_tgtUtilOffset_ < 0)
-                            || (buffer.m_srcUtilOffset_ >= 0 && buffer.m_srcUtilCEBuffer_[buffer.m_srcUtilOffset_] == CollationElementIterator.NULLORDER)) {
-                        break;
-                    }
-                } else {
-                    return (sorder < torder) ? -1 : 1;
-                }
-            }
-        }
-        return 0;
-    }
-
-    /**
-     * Calculates the next secondary french CE.
-     * 
-     * @param isSrc flag indicator if we are calculating the src ces
-     * @param buffer collation buffer temporary state
-     * @return result next modified ce
-     */
-    private static final int getSecondaryFrenchCE(boolean isSrc, CollationBuffer buffer) {
-        int result = CollationElementIterator.IGNORABLE;
-        int offset = buffer.m_srcUtilOffset_;
-        int continuationoffset = buffer.m_srcUtilContOffset_;
-        int cebuffer[] = buffer.m_srcUtilCEBuffer_;
-        if (!isSrc) {
-            offset = buffer.m_tgtUtilOffset_;
-            continuationoffset = buffer.m_tgtUtilContOffset_;
-            cebuffer = buffer.m_tgtUtilCEBuffer_;
-        }
-
-        while (result == CollationElementIterator.IGNORABLE && offset >= 0) {
-            if (continuationoffset == 0) {
-                result = cebuffer[offset];
-                while (isContinuation(cebuffer[offset--])) {
-                }
-                // after this, sorder is at the start of continuation,
-                // and offset points before that
-                if (isContinuation(cebuffer[offset + 1])) {
-                    // save offset for later
-                    continuationoffset = offset;
-                    offset += 2;
-                }
-            } else {
-                result = cebuffer[offset++];
-                if (!isContinuation(result)) {
-                    // we have finished with this continuation
-                    offset = continuationoffset;
-                    // reset the pointer to before continuation
-                    continuationoffset = 0;
-                    continue;
-                }
-            }
-            result &= CE_SECONDARY_MASK_; // remove continuation bit
-        }
-        if (isSrc) {
-            buffer.m_srcUtilOffset_ = offset;
-            buffer.m_srcUtilContOffset_ = continuationoffset;
-        } else {
-            buffer.m_tgtUtilOffset_ = offset;
-            buffer.m_tgtUtilContOffset_ = continuationoffset;
-        }
-        return result;
-    }
-
-    /**
-     * Does case strength comparison based on the collected ces.
-     * 
-     * @param buffer collation buffer temporary state
-     * @return the case strength comparison result
-     */
-    private final int doCaseCompare(CollationBuffer buffer) {
-        int soffset = 0;
-        int toffset = 0;
-        while (true) {
-            int sorder = CollationElementIterator.IGNORABLE;
-            int torder = CollationElementIterator.IGNORABLE;
-            while ((sorder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
-                sorder = buffer.m_srcUtilCEBuffer_[soffset++];
-                if (!isContinuation(sorder) && ((sorder & CE_PRIMARY_MASK_) != 0 || buffer.m_utilCompare2_ == true)) {
-                    // primary ignorables should not be considered on the case level when the strength is primary
-                    // otherwise, the CEs stop being well-formed
-                    sorder &= CE_CASE_MASK_3_;
-                    sorder ^= m_caseSwitch_;
-                } else {
-                    sorder = CollationElementIterator.IGNORABLE;
-                }
-            }
-
-            while ((torder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
-                torder = buffer.m_tgtUtilCEBuffer_[toffset++];
-                if (!isContinuation(torder) && ((torder & CE_PRIMARY_MASK_) != 0 || buffer.m_utilCompare2_ == true)) {
-                    // primary ignorables should not be considered on the case level when the strength is primary
-                    // otherwise, the CEs stop being well-formed
-                    torder &= CE_CASE_MASK_3_;
-                    torder ^= m_caseSwitch_;
-                } else {
-                    torder = CollationElementIterator.IGNORABLE;
-                }
-            }
-
-            sorder &= CE_CASE_BIT_MASK_;
-            torder &= CE_CASE_BIT_MASK_;
-            if (sorder == torder) {
-                // checking end of strings
-                if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
-                    if (buffer.m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
-                        return -1;
-                    }
-                    break;
-                } else if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
-                    return 1;
-                }
-            } else {
-                if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
-                    return -1;
-                }
-                if (buffer.m_tgtUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
-                    return 1;
-                }
-                return (sorder < torder) ? -1 : 1;
-            }
-        }
-        return 0;
-    }
-
-    /**
-     * Does tertiary strength comparison based on the collected ces.
-     * 
-     * @param buffer collation buffer temporary state
-     * @return the tertiary strength comparison result
-     */
-    private final int doTertiaryCompare(CollationBuffer buffer) {
-        int soffset = 0;
-        int toffset = 0;
-        while (true) {
-            int sorder = CollationElementIterator.IGNORABLE;
-            int torder = CollationElementIterator.IGNORABLE;
-            while ((sorder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
-                sorder = buffer.m_srcUtilCEBuffer_[soffset++];
-                if (!isContinuation(sorder)) {
-                    sorder = (sorder & m_mask3_) ^ m_caseSwitch_;
-                } else {
-                    sorder = (sorder & m_mask3_) & CE_REMOVE_CASE_;
-                }
-            }
-
-            while ((torder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
-                torder = buffer.m_tgtUtilCEBuffer_[toffset++];
-                if (!isContinuation(torder)) {
-                    torder = (torder & m_mask3_) ^ m_caseSwitch_;
-                } else {
-                    torder = (torder & m_mask3_) & CE_REMOVE_CASE_;
-                }
-            }
-
-            if (sorder == torder) {
-                if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
-                    if (buffer.m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
-                        return -1;
-                    }
-                    break;
-                } else if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
-                    return 1;
-                }
-            } else {
-                if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
-                    return -1;
-                }
-                if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
-                    return 1;
-                }
-                return (sorder < torder) ? -1 : 1;
-            }
-        }
-        return 0;
-    }
-
-    /**
-     * Does quaternary strength comparison based on the collected ces.
-     * 
-     * @param lowestpvalue the lowest primary value that will not be ignored if alternate handling is shifted
-     * @param buffer collation buffer temporary state
-     * @return the quaternary strength comparison result
-     */
-    private final int doQuaternaryCompare(int lowestpvalue, CollationBuffer buffer) {
-        boolean sShifted = true;
-        boolean tShifted = true;
-        int soffset = 0;
-        int toffset = 0;
-        while (true) {
-            int sorder = CollationElementIterator.IGNORABLE;
-            int torder = CollationElementIterator.IGNORABLE;
-            while (sorder == CollationElementIterator.IGNORABLE || (isContinuation(sorder) && !sShifted)) {
-                sorder = buffer.m_srcUtilCEBuffer_[soffset++];
-                if (isContinuation(sorder)) {
-                    if (!sShifted) {
-                        continue;
-                    }
-                } else if (Utility.compareUnsigned(sorder, lowestpvalue) > 0
-                        || (sorder & CE_PRIMARY_MASK_) == CollationElementIterator.IGNORABLE) {
-                    // non continuation
-                    sorder = CE_PRIMARY_MASK_;
-                    sShifted = false;
-                } else {
-                    sShifted = true;
-                }
-            }
-            sorder >>>= CE_PRIMARY_SHIFT_;
-                    while (torder == CollationElementIterator.IGNORABLE || (isContinuation(torder) && !tShifted)) {
-                        torder = buffer.m_tgtUtilCEBuffer_[toffset++];
-                        if (isContinuation(torder)) {
-                            if (!tShifted) {
-                                continue;
-                            }
-                        } else if (Utility.compareUnsigned(torder, lowestpvalue) > 0
-                                || (torder & CE_PRIMARY_MASK_) == CollationElementIterator.IGNORABLE) {
-                            // non continuation
-                            torder = CE_PRIMARY_MASK_;
-                            tShifted = false;
-                        } else {
-                            tShifted = true;
-                        }
-                    }
-                    torder >>>= CE_PRIMARY_SHIFT_;
-
-                    if (sorder == torder) {
-                        if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
-                            if (buffer.m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
-                                return -1;
-                            }
-                            break;
-                        } else if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
-                            return 1;
-                        }
-                    } else {
-                        if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
-                            return -1;
-                        }
-                        if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
-                            return 1;
-                        }
-                        return (sorder < torder) ? -1 : 1;
-                    }
-        }
-        return 0;
+        return h;
      }
  
      /**
-     * Internal function. Does byte level string compare. Used by strcoll if strength == identical and strings are
-     * otherwise equal. This is a rare case. Comparison must be done on NFD normalized strings. FCD is not good enough.
+     * Compares the source text String to the target text String according to the collation rules, strength and
+     * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero
+     * depending on whether the source String is less than, equal to or greater than the target String. See the Collator
+     * class description for an example of use. </p>
+     * <p>
+     * General recommendation: <br>
+     * If comparison are to be done to the same String multiple times, it would be more efficient to generate
+     * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed
+     * performance is critical and object instantiation is to be reduced, further optimization may be achieved by
+     * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method
+     * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey
+     * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key
+     * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String,
+     * String) will have a better performance.
+     * </p>
       * 
       * @param source
-     *            text
+     *            the source text String.
       * @param target
-     *            text
-     * @param offset
-     *            of the first difference in the text strings
-     * @param normalize
-     *            flag indicating if we are to normalize the text before comparison
-     * @return 1 if source is greater than target, -1 less than and 0 if equals
+     *            the target text String.
+     * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source
+     *         and target are equal, value is greater than zero if source is greater than target.
+     * @see CollationKey
+     * @see #getCollationKey
+     * @stable ICU 2.8
       */
-    private static final int doIdenticalCompare(String source, String target, int offset, boolean normalize)
-
-    {
-        if (normalize) {
-            if (Normalizer.quickCheck(source, Normalizer.NFD, 0) != Normalizer.YES) {
-                source = Normalizer.decompose(source, false);
-            }
-
-            if (Normalizer.quickCheck(target, Normalizer.NFD, 0) != Normalizer.YES) {
-                target = Normalizer.decompose(target, false);
-            }
-            offset = 0;
-        }
-
-        return doStringCompare(source, target, offset);
+    @Override
+    public int compare(String source, String target) {
+        return doCompare(source, target);
      }
  
      /**
-     * Compares string for their codepoint order. This comparison handles surrogate characters and place them after the
-     * all non surrogate characters.
-     * 
-     * @param source
-     *            text
-     * @param target
-     *            text
-     * @param offset
-     *            start offset for comparison
-     * @return 1 if source is greater than target, -1 less than and 0 if equals
-     */
-    private static final int doStringCompare(String source, String target, int offset) {
-        // compare identical prefixes - they do not need to be fixed up
-        char schar = 0;
-        char tchar = 0;
-        int slength = source.length();
-        int tlength = target.length();
-        int minlength = Math.min(slength, tlength);
-        while (offset < minlength) {
-            schar = source.charAt(offset);
-            tchar = target.charAt(offset++);
-            if (schar != tchar) {
-                break;
-            }
+    * Abstract iterator for identical-level string comparisons.
+    * Returns FCD code points and handles temporary switching to NFD.
+    *
+    * <p>As with CollationIterator,
+    * Java NFDIterator instances are partially constructed and cached,
+    * and completed when reset for use.
+    * C++ NFDIterator instances are stack-allocated.
+    */
+    private static abstract class NFDIterator {
+        /**
+         * Partial constructor, must call reset().
+         */
+        NFDIterator() {}
+        final void reset() {
+            index = -1;
          }
  
-        if (schar == tchar && offset == minlength) {
-            if (slength > minlength) {
-                return 1;
-            }
-            if (tlength > minlength) {
-                return -1;
+        /**
+         * Returns the next code point from the internal normalization buffer,
+         * or else the next text code point.
+         * Returns -1 at the end of the text.
+         */
+        final int nextCodePoint() {
+            if(index >= 0) {
+                if(index == decomp.length()) {
+                    index = -1;
+                } else {
+                    int c = Character.codePointAt(decomp, index);
+                    index += Character.charCount(c);
+                    return c;
+                }
              }
-            return 0;
+            return nextRawCodePoint();
          }
-
-        // if both values are in or above the surrogate range, Fix them up.
-        if (schar >= UTF16.LEAD_SURROGATE_MIN_VALUE && tchar >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
-            schar = fixupUTF16(schar);
-            tchar = fixupUTF16(tchar);
+        /**
+         * @param nfcImpl
+         * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
+         * @return the first code point in c's decomposition,
+         *         or c itself if it was decomposed already or if it does not decompose
+         */
+        final int nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c) {
+            if(index >= 0) { return c; }
+            decomp = nfcImpl.getDecomposition(c);
+            if(decomp == null) { return c; }
+            c = Character.codePointAt(decomp, 0);
+            index = Character.charCount(c);
+            return c;
          }
  
-        // now c1 and c2 are in UTF-32-compatible order
-        return (schar < tchar) ? -1 : 1; // schar and tchar has to be different
-    }
+        /**
+         * Returns the next text code point in FCD order.
+         * Returns -1 at the end of the text.
+         */
+        protected abstract int nextRawCodePoint();
  
-    /**
-     * Rotate surrogates to the top to get code point order
-     */
-    private static final char fixupUTF16(char ch) {
-        if (ch >= 0xe000) {
-            ch -= 0x800;
-        } else {
-            ch += 0x2000;
-        }
-        return ch;
+        private String decomp;
+        private int index;
      }
  
-    private static final int UCOL_REORDER_CODE_IGNORE = ReorderCodes.LIMIT + 1;
-    /**
-     * Builds the lead byte permuatation table
-     */
-    private void buildPermutationTable() {
-        if (m_reorderCodes_ == null || m_reorderCodes_.length == 0 || (m_reorderCodes_.length == 1 && m_reorderCodes_[0] == ReorderCodes.NONE)) {
-            m_leadBytePermutationTable_ = null;
-            return;
-        }
-
-        if (m_reorderCodes_[0] == ReorderCodes.DEFAULT) {
-            if (m_reorderCodes_.length != 1) {
-                throw new IllegalArgumentException("Illegal collation reorder codes - default reorder code must be the only code in the list.");
-            }
-            // swap the reorder codes for those at build of the rules
-            if (m_defaultReorderCodes_ == null || m_defaultReorderCodes_.length == 0) {
-                m_leadBytePermutationTable_ = null;
-                return;
-            }
-            m_reorderCodes_ = m_defaultReorderCodes_.clone();
-        }
-
-        // TODO - these need to be read in from the UCA data file
-        // The lowest byte that hasn't been assigned a mapping
-        int toBottom = 0x03;
-        // The highest byte that hasn't been assigned a mapping
-        int toTop = 0xe4;
-
-        // filled slots in the output m_scriptOrder_
-        boolean[] permutationSlotFilled = new boolean[256];
-
-        // used lead bytes
-        boolean[] newLeadByteUsed = new boolean[256];
-
-        if (m_leadBytePermutationTable_ == null) {
-            m_leadBytePermutationTable_ = new byte[256];
+    private static class UTF16NFDIterator extends NFDIterator {
+        UTF16NFDIterator() {}
+        void setText(CharSequence seq, int start) {
+            reset();
+            s = seq;
+            pos = start;
          }
  
-        // prefill the reordering codes with the leading entries
-        int[] internalReorderCodes = new int[m_reorderCodes_.length + (ReorderCodes.LIMIT - ReorderCodes.FIRST)];
-        for (int codeIndex = 0; codeIndex < ReorderCodes.LIMIT - ReorderCodes.FIRST; codeIndex++) {
-            internalReorderCodes[codeIndex] = ReorderCodes.FIRST + codeIndex;
-        }
-        for (int codeIndex = 0; codeIndex < m_reorderCodes_.length; codeIndex++) {
-            internalReorderCodes[codeIndex + (ReorderCodes.LIMIT - ReorderCodes.FIRST)] = m_reorderCodes_[codeIndex];
-            if (m_reorderCodes_[codeIndex] >= ReorderCodes.FIRST && m_reorderCodes_[codeIndex] < ReorderCodes.LIMIT) {
-                internalReorderCodes[m_reorderCodes_[codeIndex] - ReorderCodes.FIRST] = UCOL_REORDER_CODE_IGNORE;
-            }
+        @Override
+        protected int nextRawCodePoint() {
+            if(pos == s.length()) { return Collation.SENTINEL_CP; }
+            int c = Character.codePointAt(s, pos);
+            pos += Character.charCount(c);
+            return c;
          }
  
-        /*
-         * Start from the front of the list and place each script we encounter at the earliest possible locatation
-         * in the permutation table. If we encounter UNKNOWN, start processing from the back, and place each script
-         * in the last possible location. At each step, we also need to make sure that any scripts that need to not
-         * be moved are copied to their same location in the final table.
-         */
-        boolean fromTheBottom = true;
-        int reorderCodesIndex = -1;
-        for (int reorderCodesCount = 0; reorderCodesCount < internalReorderCodes.length; reorderCodesCount++) {
-            reorderCodesIndex += fromTheBottom ? 1 : -1;
-            int next = internalReorderCodes[reorderCodesIndex];
-            if (next == UCOL_REORDER_CODE_IGNORE) {
-                continue;
-            }
-            if (next == UScript.UNKNOWN) {
-                if (fromTheBottom == false) {
-                    // double turnaround
-                    m_leadBytePermutationTable_ = null;
-                    throw new IllegalArgumentException("Illegal collation reorder codes - two \"from the end\" markers.");
-                }
-                fromTheBottom = false;
-                reorderCodesIndex = internalReorderCodes.length;
-                continue;
-            }
+        protected CharSequence s;
+        protected int pos;
+    }
  
-            int[] leadBytes = RuleBasedCollator.LEADBYTE_CONSTANTS_.getLeadBytesForReorderCode(next);
-            if (fromTheBottom) {
-                for (int leadByte : leadBytes) {
-                    // don't place a lead byte twice in the permutation table
-                    if (permutationSlotFilled[leadByte]) {
-                        // lead byte already used
-                        m_leadBytePermutationTable_ = null;
-                        throw new IllegalArgumentException("Illegal reorder codes specified - multiple codes with the same lead byte.");
-                    }
-                    m_leadBytePermutationTable_[leadByte] = (byte) toBottom;
-                    newLeadByteUsed[toBottom] = true;
-                    permutationSlotFilled[leadByte] = true;
-                    toBottom++;                    
-                }
+    private static final class FCDUTF16NFDIterator extends UTF16NFDIterator {
+        FCDUTF16NFDIterator() {}
+        void setText(Normalizer2Impl nfcImpl, CharSequence seq, int start) {
+            reset();
+            int spanLimit = nfcImpl.makeFCD(seq, start, seq.length(), null);
+            if(spanLimit == seq.length()) {
+                s = seq;
+                pos = start;
              } else {
-                for (int leadByteIndex = leadBytes.length - 1; leadByteIndex >= 0; leadByteIndex--) {
-                    int leadByte = leadBytes[leadByteIndex];
-                    // don't place a lead byte twice in the permutation table
-                    if (permutationSlotFilled[leadByte]) {
-                        // lead byte already used
-                        m_leadBytePermutationTable_ = null;
-                        throw new IllegalArgumentException("Illegal reorder codes specified - multiple codes with the same lead byte.");
-                    }
-
-                    m_leadBytePermutationTable_[leadByte] = (byte) toTop;
-                    newLeadByteUsed[toTop] = true;
-                    permutationSlotFilled[leadByte] = true;
-                    toTop--;                    
+                if(str == null) {
+                    str = new StringBuilder();
+                } else {
+                    str.setLength(0);
                  }
+                str.append(seq, start, spanLimit);
+                ReorderingBuffer buffer = new ReorderingBuffer(nfcImpl, str, seq.length() - start);
+                nfcImpl.makeFCD(seq, spanLimit, seq.length(), buffer);
+                s = str;
+                pos = 0;
              }
          }
  
-        /* Copy everything that's left over */
-        int reorderCode = 0;
-        for (int i = 0; i < 256; i++) {
-            if (!permutationSlotFilled[i]) {
-                while (newLeadByteUsed[reorderCode]) {
-                    if (reorderCode > 255) {
-                        throw new IllegalArgumentException("Unable to fill collation reordering table slots - no available reordering code.");
-                    }
-                    reorderCode++;
-                }
-                m_leadBytePermutationTable_[i] = (byte) reorderCode;
-                permutationSlotFilled[i] = true;
-                newLeadByteUsed[reorderCode] = true;
-            }
-        } 
-
-        // for (int i = 0; i < 256; i++){
-        // System.out.println(Integer.toString(i, 16) + " -> " + Integer.toString(m_scriptReorderTable_[i], 16));
-        // }
-        latinOneRegenTable_ = true;
-        updateInternalState();
+        private StringBuilder str;
      }
  
-    /**
-     * Resets the internal case data members and compression values.
-     */
-    private void updateInternalState() {
-        if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
-            m_caseSwitch_ = CASE_SWITCH_;
-        } else {
-            m_caseSwitch_ = NO_CASE_SWITCH_;
-        }
-
-        if (m_isCaseLevel_ || m_caseFirst_ == AttributeValue.OFF_) {
-            m_mask3_ = CE_REMOVE_CASE_;
-            m_common3_ = COMMON_NORMAL_3_;
-            m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_OFF_;
-            m_top3_ = COMMON_TOP_CASE_SWITCH_OFF_3_;
-            m_bottom3_ = COMMON_BOTTOM_3_;
-        } else {
-            m_mask3_ = CE_KEEP_CASE_;
-            m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;
-            if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
-                m_common3_ = COMMON_UPPER_FIRST_3_;
-                m_top3_ = COMMON_TOP_CASE_SWITCH_UPPER_3_;
-                m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_UPPER_3_;
+    private static final int compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right) {
+        for(;;) {
+            // Fetch the next FCD code point from each string.
+            int leftCp = left.nextCodePoint();
+            int rightCp = right.nextCodePoint();
+            if(leftCp == rightCp) {
+                if(leftCp < 0) { break; }
+                continue;
+            }
+            // If they are different, then decompose each and compare again.
+            if(leftCp < 0) {
+                leftCp = -2;  // end of string
+            } else if(leftCp == 0xfffe) {
+                leftCp = -1;  // U+FFFE: merge separator
              } else {
-                m_common3_ = COMMON_NORMAL_3_;
-                m_top3_ = COMMON_TOP_CASE_SWITCH_LOWER_3_;
-                m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_LOWER_3_;
+                leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
              }
-        }
-
-        // Set the compression values
-        int total3 = m_top3_ - m_bottom3_ - 1;
-        // we multilply double with int, but need only int
-        m_topCount3_ = (int) (PROPORTION_3_ * total3);
-        m_bottomCount3_ = total3 - m_topCount3_;
-
-        if (!m_isCaseLevel_ && getStrength() == AttributeValue.TERTIARY_ && !m_isFrenchCollation_
-                && !m_isAlternateHandlingShifted_) {
-            m_isSimple3_ = true;
-        } else {
-            m_isSimple3_ = false;
-        }
-        if (!m_isCaseLevel_ && getStrength() <= AttributeValue.TERTIARY_ && !m_isNumericCollation_
-                && !m_isAlternateHandlingShifted_ && !latinOneFailed_) {
-            if (latinOneCEs_ == null || latinOneRegenTable_) {
-                if (setUpLatinOne()) { // if we succeed in building latin1 table, we'll use it
-                    latinOneUse_ = true;
-                } else {
-                    latinOneUse_ = false;
-                    latinOneFailed_ = true;
-                }
-                latinOneRegenTable_ = false;
-            } else { // latin1Table exists and it doesn't need to be regenerated, just use it
-                latinOneUse_ = true;
+            if(rightCp < 0) {
+                rightCp = -2;  // end of string
+            } else if(rightCp == 0xfffe) {
+                rightCp = -1;  // U+FFFE: merge separator
+            } else {
+                rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
              }
-        } else {
-            latinOneUse_ = false;
+            if(leftCp < rightCp) { return Collation.LESS; }
+            if(leftCp > rightCp) { return Collation.GREATER; }
          }
-
+        return Collation.EQUAL;
      }
  
      /**
-     * Initializes the RuleBasedCollator
+     * Compares two CharSequences.
+     * @internal
+     * @deprecated This API is ICU internal only.
       */
-    private final void init() {
-        for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_; m_minUnsafe_++) {
-            // Find the smallest unsafe char.
-            if (isUnsafe(m_minUnsafe_)) {
-                break;
-            }
+    @Override
+    protected int doCompare(CharSequence left, CharSequence right) {
+        if(left == right) {
+            return Collation.EQUAL;
          }
  
-        for (m_minContractionEnd_ = 0; m_minContractionEnd_ < DEFAULT_MIN_HEURISTIC_; m_minContractionEnd_++) {
-            // Find the smallest contraction-ending char.
-            if (isContractionEnd(m_minContractionEnd_)) {
+        // Identical-prefix test.
+        int equalPrefixLength = 0;
+        for(;;) {
+            if(equalPrefixLength == left.length()) {
+                if(equalPrefixLength == right.length()) { return Collation.EQUAL; }
+                break;
+            } else if(equalPrefixLength == right.length() ||
+                      left.charAt(equalPrefixLength) != right.charAt(equalPrefixLength)) {
                  break;
              }
-        }
-        latinOneFailed_ = true;
-        setStrength(m_defaultStrength_);
-        setDecomposition(m_defaultDecomposition_);
-        m_variableTopValue_ = m_defaultVariableTopValue_;
-        m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
-        m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
-        m_isCaseLevel_ = m_defaultIsCaseLevel_;
-        m_caseFirst_ = m_defaultCaseFirst_;
-        m_isHiragana4_ = m_defaultIsHiragana4_;
-        m_isNumericCollation_ = m_defaultIsNumericCollation_;
-        latinOneFailed_ = false;
-        if (m_defaultReorderCodes_ != null) {
-            m_reorderCodes_ = m_defaultReorderCodes_.clone();
-        } else {
-            m_reorderCodes_ = null;
-        }
-        updateInternalState();
-    }
-
-    // Consts for Latin-1 special processing
-    private static final int ENDOFLATINONERANGE_ = 0xFF;
-    private static final int LATINONETABLELEN_ = (ENDOFLATINONERANGE_ + 50);
-    private static final int BAIL_OUT_CE_ = 0xFF000000;
-
-    /**
-     * Generate latin-1 tables
-     */
-
-    private static class shiftValues {
-        int primShift = 24;
-        int secShift = 24;
-        int terShift = 24;
-    }
-
-    private final void addLatinOneEntry(char ch, int CE, shiftValues sh) {
-        int primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
-        boolean continuation = isContinuation(CE);
-        boolean reverseSecondary = false;
-        if (!continuation) {
-            tertiary = ((CE & m_mask3_));
-            tertiary ^= m_caseSwitch_;
-            reverseSecondary = true;
+            ++equalPrefixLength;
+        }
+
+        CollationSettings roSettings = settings.readOnly();
+        boolean numeric = roSettings.isNumeric();
+        if(equalPrefixLength > 0) {
+            if((equalPrefixLength != left.length() &&
+                        data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) ||
+                    (equalPrefixLength != right.length() &&
+                        data.isUnsafeBackward(right.charAt(equalPrefixLength), numeric))) {
+                // Identical prefix: Back up to the start of a contraction or reordering sequence.
+                while(--equalPrefixLength > 0 &&
+                        data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) {}
+            }
+            // Notes:
+            // - A longer string can compare equal to a prefix of it if only ignorables follow.
+            // - With a backward level, a longer string can compare less-than a prefix of it.
+
+            // Pass the actual start of each string into the CollationIterators,
+            // plus the equalPrefixLength position,
+            // so that prefix matches back into the equal prefix work.
+        }
+
+        int result;
+        int fastLatinOptions = roSettings.fastLatinOptions;
+        if(fastLatinOptions >= 0 &&
+                (equalPrefixLength == left.length() ||
+                    left.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX) &&
+                (equalPrefixLength == right.length() ||
+                    right.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX)) {
+            result = CollationFastLatin.compareUTF16(data.fastLatinTable,
+                                                      roSettings.fastLatinPrimaries,
+                                                      fastLatinOptions,
+                                                      left, right, equalPrefixLength);
          } else {
-            tertiary = (byte) ((CE & CE_REMOVE_CONTINUATION_MASK_));
-            tertiary &= CE_REMOVE_CASE_;
-            reverseSecondary = false;
-        }
-
-        secondary = ((CE >>>= 8) & LAST_BYTE_MASK_);
-        primary2 = ((CE >>>= 8) & LAST_BYTE_MASK_);
-        primary1 = (CE >>> 8);
-
-        if (primary1 != 0) {
-            if (m_leadBytePermutationTable_ != null && !continuation) {
-                primary1 = m_leadBytePermutationTable_[primary1];
-            }
-            latinOneCEs_[ch] |= (primary1 << sh.primShift);
-            sh.primShift -= 8;
-        }
-        if (primary2 != 0) {
-            if (sh.primShift < 0) {
-                latinOneCEs_[ch] = BAIL_OUT_CE_;
-                latinOneCEs_[latinOneTableLen_ + ch] = BAIL_OUT_CE_;
-                latinOneCEs_[2 * latinOneTableLen_ + ch] = BAIL_OUT_CE_;
-                return;
-            }
-            latinOneCEs_[ch] |= (primary2 << sh.primShift);
-            sh.primShift -= 8;
-        }
-        if (secondary != 0) {
-            if (reverseSecondary && m_isFrenchCollation_) { // reverse secondary
-                latinOneCEs_[latinOneTableLen_ + ch] >>>= 8; // make space for secondary
-            latinOneCEs_[latinOneTableLen_ + ch] |= (secondary << 24);
-            } else { // normal case
-                latinOneCEs_[latinOneTableLen_ + ch] |= (secondary << sh.secShift);
+            result = CollationFastLatin.BAIL_OUT_RESULT;
+        }
+
+        if(result == CollationFastLatin.BAIL_OUT_RESULT) {
+            CollationBuffer buffer = null;
+            try {
+                buffer = getCollationBuffer();
+                if(roSettings.dontCheckFCD()) {
+                    buffer.leftUTF16CollIter.setText(numeric, left, equalPrefixLength);
+                    buffer.rightUTF16CollIter.setText(numeric, right, equalPrefixLength);
+                    result = CollationCompare.compareUpToQuaternary(
+                            buffer.leftUTF16CollIter, buffer.rightUTF16CollIter, roSettings);
+                } else {
+                    buffer.leftFCDUTF16Iter.setText(numeric, left, equalPrefixLength);
+                    buffer.rightFCDUTF16Iter.setText(numeric, right, equalPrefixLength);
+                    result = CollationCompare.compareUpToQuaternary(
+                            buffer.leftFCDUTF16Iter, buffer.rightFCDUTF16Iter, roSettings);
+                }
+            } finally {
+                releaseCollationBuffer(buffer);
              }
-            sh.secShift -= 8;
-        }
-        if (tertiary != 0) {
-            latinOneCEs_[2 * latinOneTableLen_ + ch] |= (tertiary << sh.terShift);
-            sh.terShift -= 8;
-        }
-    }
-
-    private final void resizeLatinOneTable(int newSize) {
-        int newTable[] = new int[3 * newSize];
-        int sizeToCopy = ((newSize < latinOneTableLen_) ? newSize : latinOneTableLen_);
-        // uprv_memset(newTable, 0, newSize*sizeof(uint32_t)*3); // automatically cleared.
-        System.arraycopy(latinOneCEs_, 0, newTable, 0, sizeToCopy);
-        System.arraycopy(latinOneCEs_, latinOneTableLen_, newTable, newSize, sizeToCopy);
-        System.arraycopy(latinOneCEs_, 2 * latinOneTableLen_, newTable, 2 * newSize, sizeToCopy);
-        latinOneTableLen_ = newSize;
-        latinOneCEs_ = newTable;
-    }
-
-    private final boolean setUpLatinOne() {
-        if (latinOneCEs_ == null || m_reallocLatinOneCEs_) {
-            latinOneCEs_ = new int[3 * LATINONETABLELEN_];
-            latinOneTableLen_ = LATINONETABLELEN_;
-            m_reallocLatinOneCEs_ = false;
-        } else {
-            Arrays.fill(latinOneCEs_, 0);
          }
-        if (m_ContInfo_ == null) {
-            m_ContInfo_ = new ContractionInfo();
+        if(result != Collation.EQUAL || roSettings.getStrength() < Collator.IDENTICAL) {
+            return result;
          }
-        char ch = 0;
-        // StringBuffer sCh = new StringBuffer();
-        // CollationElementIterator it = getCollationElementIterator(sCh.toString());
-        CollationElementIterator it = getCollationElementIterator("");
  
-        shiftValues s = new shiftValues();
-        int CE = 0;
-        char contractionOffset = ENDOFLATINONERANGE_ + 1;
-
-        for (ch = 0; ch <= ENDOFLATINONERANGE_; ch++) {
-            s.primShift = 24;
-            s.secShift = 24;
-            s.terShift = 24;
-            if (ch < 0x100) {
-                CE = m_trie_.getLatin1LinearValue(ch);
-            } else {
-                CE = m_trie_.getLeadValue(ch);
-                if (CE == CollationElementIterator.CE_NOT_FOUND_) {
-                    CE = UCA_.m_trie_.getLeadValue(ch);
-                }
-            }
-            if (!isSpecial(CE)) {
-                addLatinOneEntry(ch, CE, s);
+        CollationBuffer buffer = null;
+        try {
+            buffer = getCollationBuffer();
+            // Compare identical level.
+            Normalizer2Impl nfcImpl = data.nfcImpl;
+            if(roSettings.dontCheckFCD()) {
+                buffer.leftUTF16NFDIter.setText(left, equalPrefixLength);
+                buffer.rightUTF16NFDIter.setText(right, equalPrefixLength);
+                return compareNFDIter(nfcImpl, buffer.leftUTF16NFDIter, buffer.rightUTF16NFDIter);
              } else {
-                switch (RuleBasedCollator.getTag(CE)) {
-                case CollationElementIterator.CE_EXPANSION_TAG_:
-                case CollationElementIterator.CE_DIGIT_TAG_:
-                    // sCh.delete(0, sCh.length());
-                    // sCh.append(ch);
-                    // it.setText(sCh.toString());
-                    it.setText(UCharacter.toString(ch));
-                    while ((CE = it.next()) != CollationElementIterator.NULLORDER) {
-                        if (s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {
-                            latinOneCEs_[ch] = BAIL_OUT_CE_;
-                            latinOneCEs_[latinOneTableLen_ + ch] = BAIL_OUT_CE_;
-                            latinOneCEs_[2 * latinOneTableLen_ + ch] = BAIL_OUT_CE_;
-                            break;
-                        }
-                        addLatinOneEntry(ch, CE, s);
-                    }
-                    break;
-                case CollationElementIterator.CE_CONTRACTION_TAG_:
-                    // here is the trick
-                    // F2 is contraction. We do something very similar to contractions
-                    // but have two indices, one in the real contraction table and the
-                    // other to where we stuffed things. This hopes that we don't have
-                    // many contractions (this should work for latin-1 tables).
-                {
-                    if ((CE & 0x00FFF000) != 0) {
-                        latinOneFailed_ = true;
-                        return false;
-                    }
-
-                    int UCharOffset = (CE & 0xFFFFFF) - m_contractionOffset_; // getContractionOffset(CE)]
-
-                    CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table
-
-                    latinOneCEs_[ch] = CE;
-                    latinOneCEs_[latinOneTableLen_ + ch] = CE;
-                    latinOneCEs_[2 * latinOneTableLen_ + ch] = CE;
-
-                    // We're going to jump into contraction table, pick the elements
-                    // and use them
-                    do {
-                        // CE = *(contractionCEs + (UCharOffset - contractionIndex));
-                        CE = m_contractionCE_[UCharOffset];
-                        if (isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
-                            int i; /* general counter */
-                            // uint32_t *CEOffset = (uint32_t *)image+getExpansionOffset(CE); /* find the offset to
-                            // expansion table */
-                            int offset = ((CE & 0xFFFFF0) >> 4) - m_expansionOffset_; // it.getExpansionOffset(this,
-                            // CE);
-                            int size = CE & 0xF; // getExpansionCount(CE);
-                            // CE = *CEOffset++;
-                            if (size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
-                                for (i = 0; i < size; i++) {
-                                    if (s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {
-                                        latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
-                                        latinOneCEs_[latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
-                                        latinOneCEs_[2 * latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
-                                        break;
-                                    }
-                                    addLatinOneEntry(contractionOffset, m_expansion_[offset + i], s);
-                                }
-                            } else { /* else, we do */
-                                while (m_expansion_[offset] != 0) {
-                                    if (s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {
-                                        latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
-                                        latinOneCEs_[latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
-                                        latinOneCEs_[2 * latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
-                                        break;
-                                    }
-                                    addLatinOneEntry(contractionOffset, m_expansion_[offset++], s);
-                                }
-                            }
-                            contractionOffset++;
-                        } else if (!isSpecial(CE)) {
-                            addLatinOneEntry(contractionOffset++, CE, s);
-                        } else {
-                            latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
-                            latinOneCEs_[latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
-                            latinOneCEs_[2 * latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
-                            contractionOffset++;
-                        }
-                        UCharOffset++;
-                        s.primShift = 24;
-                        s.secShift = 24;
-                        s.terShift = 24;
-                        if (contractionOffset == latinOneTableLen_) { // we need to reallocate
-                            resizeLatinOneTable(2 * latinOneTableLen_);
-                        }
-                    } while (m_contractionIndex_[UCharOffset] != 0xFFFF);
-                }
-                break;
-                case CollationElementIterator.CE_SPEC_PROC_TAG_: {
-                    // 0xB7 is a precontext character defined in UCA5.1, a special
-                    // handle is implemeted in order to save LatinOne table for
-                    // most locales.
-                    if (ch == 0xb7) {
-                        addLatinOneEntry(ch, CE, s);
-                    } else {
-                        latinOneFailed_ = true;
-                        return false;
-                    }
-                }
-                break;
-                default:
-                    latinOneFailed_ = true;
-                    return false;
-                }
+                buffer.leftFCDUTF16NFDIter.setText(nfcImpl, left, equalPrefixLength);
+                buffer.rightFCDUTF16NFDIter.setText(nfcImpl, right, equalPrefixLength);
+                return compareNFDIter(nfcImpl, buffer.leftFCDUTF16NFDIter, buffer.rightFCDUTF16NFDIter);
              }
+        } finally {
+            releaseCollationBuffer(buffer);
          }
-        // compact table
-        if (contractionOffset < latinOneTableLen_) {
-            resizeLatinOneTable(contractionOffset);
-        }
-        return true;
-    }
-
-    private static class ContractionInfo {
-        int index;
      }
  
-    ContractionInfo m_ContInfo_;
-
-    private int getLatinOneContraction(int strength, int CE, String s) {
-        // int strength, int CE, String s, Integer ind) {
-        int len = s.length();
-        // const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
-        int UCharOffset = (CE & 0xFFF) - m_contractionOffset_;
-        int offset = 1;
-        int latinOneOffset = (CE & 0x00FFF000) >>> 12;
-                                    char schar = 0, tchar = 0;
+    // package private constructors ------------------------------------------
  
-                                    for (;;) {
-                                        /*
-                                         * if(len == -1) { if(s[*index] == 0) { // end of string
-                                         * return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]); } else { schar = s[*index]; }
-                                         * } else {
-                                         */
-                                        if (m_ContInfo_.index == len) {
-                                            return (latinOneCEs_[strength * latinOneTableLen_ + latinOneOffset]);
-                                        } else {
-                                            schar = s.charAt(m_ContInfo_.index);
-                                        }
-                                        // }
+    RuleBasedCollator(CollationTailoring t, ULocale vl) {
+        data = t.data;
+        settings = t.settings.clone();
+        tailoring = t;
+        validLocale = vl;
+        actualLocaleIsSameAsValid = false;
+    }
  
-                                        while (schar > (tchar = m_contractionIndex_[UCharOffset + offset]/** (UCharOffset+offset) */
-                                        )) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
-                                            offset++;
-                                        }
+    private void adoptTailoring(CollationTailoring t) {
+        assert(settings == null && data == null && tailoring == null);
+        data = t.data;
+        settings = t.settings.clone();
+        tailoring = t;
+        validLocale = t.actualLocale;
+        actualLocaleIsSameAsValid = false;
+    }
  
-                                        if (schar == tchar) {
-                                            m_ContInfo_.index++;
-                                            return (latinOneCEs_[strength * latinOneTableLen_ + latinOneOffset + offset]);
-                                        } else {
-                                            if (schar > ENDOFLATINONERANGE_ /* & 0xFF00 */) {
-                                                return BAIL_OUT_CE_;
-                                            }
-                                            // skip completely ignorables
-                                            int isZeroCE = m_trie_.getLeadValue(schar); // UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
-                                            if (isZeroCE == 0) { // we have to ignore completely ignorables
-                                                m_ContInfo_.index++;
-                                                continue;
-                                            }
+    // package private methods -----------------------------------------------
  
-                                            return (latinOneCEs_[strength * latinOneTableLen_ + latinOneOffset]);
-                                        }
-                                    }
+    /**
+     * Tests whether a character is "unsafe" for use as a collation starting point.
+     *
+     * @param c code point or code unit
+     * @return true if c is unsafe
+     * @see CollationElementIterator#setOffset(int)
+     */
+    final boolean isUnsafe(int c) {
+        return data.isUnsafeBackward(c, settings.readOnly().isNumeric());
      }
  
      /**
-     * This is a fast strcoll, geared towards text in Latin-1. It supports contractions of size two, French secondaries
-     * and case switching. You can use it with strengths primary to tertiary. It does not support shifted and case
-     * level. It relies on the table build by setupLatin1Table. If it doesn't understand something, it will go to the
-     * regular strcoll.
-     * @param buffer collation buffer temporary state
+     * Frozen state of the collator.
       */
-    private final int compareUseLatin1(String source, String target, int startOffset, CollationBuffer buffer) {
-        int sLen = source.length();
-        int tLen = target.length();
-
-        int strength = getStrength();
-
-        int sIndex = startOffset, tIndex = startOffset;
-        char sChar = 0, tChar = 0;
-        int sOrder = 0, tOrder = 0;
-
-        boolean endOfSource = false;
-
-        // uint32_t *elements = coll->latinOneCEs;
-
-        boolean haveContractions = false; // if we have contractions in our string
-        // we cannot do French secondary
-
-        int offset = latinOneTableLen_;
-
-        // Do the primary level
-        primLoop: 
-            for (;;) {
-                while (sOrder == 0) { // this loop skips primary ignorables
-                    // sOrder=getNextlatinOneCE(source);
-                    if (sIndex == sLen) {
-                        endOfSource = true;
-                        break;
-                    }
-                    sChar = source.charAt(sIndex++); // [sIndex++];
-                    // }
-                    if (sChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out
-                        // fprintf(stderr, "R");
-                        return compareRegular(source, target, startOffset, buffer);
-                    }
-                    sOrder = latinOneCEs_[sChar];
-                    if (isSpecial(sOrder)) { // if we got a special
-                        // specials can basically be either contractions or bail-out signs. If we get anything
-                        // else, we'll bail out anywasy
-                        if (getTag(sOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) {
-                            m_ContInfo_.index = sIndex;
-                            sOrder = getLatinOneContraction(0, sOrder, source);
-                            sIndex = m_ContInfo_.index;
-                            haveContractions = true; // if there are contractions, we cannot do French secondary
-                            // However, if there are contractions in the table, but we always use just one char,
-                            // we might be able to do French. This should be checked out.
-                        }
-                        if (isSpecial(sOrder) /* == UCOL_BAIL_OUT_CE */) {
-                            // fprintf(stderr, "S");
-                            return compareRegular(source, target, startOffset, buffer);
-                        }
-                    }
-                }
-
-                while (tOrder == 0) { // this loop skips primary ignorables
-                    // tOrder=getNextlatinOneCE(target);
-                    if (tIndex == tLen) {
-                        if (endOfSource) {
-                            break primLoop;
-                        } else {
-                            return 1;
-                        }
-                    }
-                    tChar = target.charAt(tIndex++); // [tIndex++];
-                    if (tChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out
-                        // fprintf(stderr, "R");
-                        return compareRegular(source, target, startOffset, buffer);
-                    }
-                    tOrder = latinOneCEs_[tChar];
-                    if (isSpecial(tOrder)) {
-                        // Handling specials, see the comments for source
-                        if (getTag(tOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) {
-                            m_ContInfo_.index = tIndex;
-                            tOrder = getLatinOneContraction(0, tOrder, target);
-                            tIndex = m_ContInfo_.index;
-                            haveContractions = true;
-                        }
-                        if (isSpecial(tOrder)/* == UCOL_BAIL_OUT_CE */) {
-                            // fprintf(stderr, "S");
-                            return compareRegular(source, target, startOffset, buffer);
-                        }
-                    }
-                }
-                if (endOfSource) { // source is finished, but target is not, say the result.
-                    return -1;
-                }
-
-                if (sOrder == tOrder) { // if we have same CEs, we continue the loop
-                    sOrder = 0;
-                    tOrder = 0;
-                    continue;
-                } else {
-                    // compare current top bytes
-                    if (((sOrder ^ tOrder) & 0xFF000000) != 0) {
-                        // top bytes differ, return difference
-                        if (sOrder >>> 8 < tOrder >>> 8) {
-                            return -1;
-                        } else {
-                            return 1;
-                        }
-                        // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24);
-                        // since we must return enum value
-                    }
-
-                    // top bytes match, continue with following bytes
-                    sOrder <<= 8;
-                    tOrder <<= 8;
-                }
-            }
-
-        // after primary loop, we definitely know the sizes of strings,
-        // so we set it and use simpler loop for secondaries and tertiaries
-        // sLen = sIndex; tLen = tIndex;
-        if (strength >= SECONDARY) {
-            // adjust the table beggining
-            // latinOneCEs_ += coll->latinOneTableLen;
-            endOfSource = false;
-
-            if (!m_isFrenchCollation_) { // non French
-                // This loop is a simplified copy of primary loop
-                // at this point we know that whole strings are latin-1, so we don't
-                // check for that. We also know that we only have contractions as
-                // specials.
-                // sIndex = 0; tIndex = 0;
-                sIndex = startOffset;
-                tIndex = startOffset;
-                secLoop: for (;;) {
-                    while (sOrder == 0) {
-                        if (sIndex == sLen) {
-                            endOfSource = true;
-                            break;
-                        }
-                        sChar = source.charAt(sIndex++); // [sIndex++];
-                        sOrder = latinOneCEs_[offset + sChar];
-                        if (isSpecial(sOrder)) {
-                            m_ContInfo_.index = sIndex;
-                            sOrder = getLatinOneContraction(1, sOrder, source);
-                            sIndex = m_ContInfo_.index;
-                        }
-                    }
-
-                    while (tOrder == 0) {
-                        if (tIndex == tLen) {
-                            if (endOfSource) {
-                                break secLoop;
-                            } else {
-                                return 1;
-                            }
-                        }
-                        tChar = target.charAt(tIndex++); // [tIndex++];
-                        tOrder = latinOneCEs_[offset + tChar];
-                        if (isSpecial(tOrder)) {
-                            m_ContInfo_.index = tIndex;
-                            tOrder = getLatinOneContraction(1, tOrder, target);
-                            tIndex = m_ContInfo_.index;
-                        }
-                    }
-                    if (endOfSource) {
-                        return -1;
-                    }
+    private Lock frozenLock;
  
-                    if (sOrder == tOrder) {
-                        sOrder = 0;
-                        tOrder = 0;
-                        continue;
-                    } else {
-                        // see primary loop for comments on this
-                        if (((sOrder ^ tOrder) & 0xFF000000) != 0) {
-                            if (sOrder >>> 8 < tOrder >>> 8) {
-                                return -1;
-                            } else {
-                                return 1;
-                            }
-                        }
-                        sOrder <<= 8;
-                        tOrder <<= 8;
-                    }
-                }
-            } else { // French
-                if (haveContractions) { // if we have contractions, we have to bail out
-                    // since we don't really know how to handle them here
-                    return compareRegular(source, target, startOffset, buffer);
-                }
-                // For French, we go backwards
-                sIndex = sLen;
-                tIndex = tLen;
-                secFLoop: for (;;) {
-                    while (sOrder == 0) {
-                        if (sIndex == startOffset) {
-                            endOfSource = true;
-                            break;
-                        }
-                        sChar = source.charAt(--sIndex); // [--sIndex];
-                        sOrder = latinOneCEs_[offset + sChar];
-                        // don't even look for contractions
-                    }
+    private static final class CollationBuffer {
+        private CollationBuffer(CollationData data) {
+            leftUTF16CollIter = new UTF16CollationIterator(data);
+            rightUTF16CollIter = new UTF16CollationIterator(data);
+            leftFCDUTF16Iter = new FCDUTF16CollationIterator(data);
+            rightFCDUTF16Iter = new FCDUTF16CollationIterator(data);
+            leftUTF16NFDIter = new UTF16NFDIterator();
+            rightUTF16NFDIter = new UTF16NFDIterator();
+            leftFCDUTF16NFDIter = new FCDUTF16NFDIterator();
+            rightFCDUTF16NFDIter = new FCDUTF16NFDIterator();
+        }
  
-                    while (tOrder == 0) {
-                        if (tIndex == startOffset) {
-                            if (endOfSource) {
-                                break secFLoop;
-                            } else {
-                                return 1;
-                            }
-                        }
-                        tChar = target.charAt(--tIndex); // [--tIndex];
-                        tOrder = latinOneCEs_[offset + tChar];
-                        // don't even look for contractions
-                    }
-                    if (endOfSource) {
-                        return -1;
-                    }
+        UTF16CollationIterator leftUTF16CollIter;
+        UTF16CollationIterator rightUTF16CollIter;
+        FCDUTF16CollationIterator leftFCDUTF16Iter;
+        FCDUTF16CollationIterator rightFCDUTF16Iter;
  
-                    if (sOrder == tOrder) {
-                        sOrder = 0;
-                        tOrder = 0;
-                        continue;
-                    } else {
-                        // see the primary loop for comments
-                        if (((sOrder ^ tOrder) & 0xFF000000) != 0) {
-                            if (sOrder >>> 8 < tOrder >>> 8) {
-                                return -1;
-                            } else {
-                                return 1;
-                            }
-                        }
-                        sOrder <<= 8;
-                        tOrder <<= 8;
-                    }
-                }
-            }
-        }
+        UTF16NFDIterator leftUTF16NFDIter;
+        UTF16NFDIterator rightUTF16NFDIter;
+        FCDUTF16NFDIterator leftFCDUTF16NFDIter;
+        FCDUTF16NFDIterator rightFCDUTF16NFDIter;
  
-        if (strength >= TERTIARY) {
-            // tertiary loop is the same as secondary (except no French)
-            offset += latinOneTableLen_;
-            // sIndex = 0; tIndex = 0;
-            sIndex = startOffset;
-            tIndex = startOffset;
-            endOfSource = false;
-            for (;;) {
-                while (sOrder == 0) {
-                    if (sIndex == sLen) {
-                        endOfSource = true;
-                        break;
-                    }
-                    sChar = source.charAt(sIndex++); // [sIndex++];
-                    sOrder = latinOneCEs_[offset + sChar];
-                    if (isSpecial(sOrder)) {
-                        m_ContInfo_.index = sIndex;
-                        sOrder = getLatinOneContraction(2, sOrder, source);
-                        sIndex = m_ContInfo_.index;
-                    }
-                }
-                while (tOrder == 0) {
-                    if (tIndex == tLen) {
-                        if (endOfSource) {
-                            return 0; // if both strings are at the end, they are equal
-                        } else {
-                            return 1;
-                        }
-                    }
-                    tChar = target.charAt(tIndex++); // [tIndex++];
-                    tOrder = latinOneCEs_[offset + tChar];
-                    if (isSpecial(tOrder)) {
-                        m_ContInfo_.index = tIndex;
-                        tOrder = getLatinOneContraction(2, tOrder, target);
-                        tIndex = m_ContInfo_.index;
-                    }
-                }
-                if (endOfSource) {
-                    return -1;
-                }
-                if (sOrder == tOrder) {
-                    sOrder = 0;
-                    tOrder = 0;
-                    continue;
-                } else {
-                    if (((sOrder ^ tOrder) & 0xff000000) != 0) {
-                        if (sOrder >>> 8 < tOrder >>> 8) {
-                            return -1;
-                        } else {
-                            return 1;
-                        }
-                    }
-                    sOrder <<= 8;
-                    tOrder <<= 8;
-                }
-            }
-        }
-        return 0;
+        RawCollationKey rawCollationKey;
      }
  
      /**
@@ -4828,29 +1821,13 @@ public final class RuleBasedCollator extends Collator {
       * @return the version object associated with this collator
       * @stable ICU 2.8
       */
+    @Override
      public VersionInfo getVersion() {
-        /* RunTime version */
+        VersionInfo version = tailoring.version;
          int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();
-        /* Builder version */
-        int bdVersion = m_version_.getMajor();
-
-        /*
-         * Charset Version. Need to get the version from cnv files makeconv should populate cnv files with version and
-         * an api has to be provided in ucnv.h to obtain this version
-         */
-        int csVersion = 0;
-
-        /* combine the version info */
-        int cmbVersion = ((rtVersion << 11) | (bdVersion << 6) | (csVersion)) & 0xFFFF;
-
-        /* Tailoring rules */
-        return VersionInfo.getInstance(cmbVersion >> 8, cmbVersion & 0xFF, m_version_.getMinor(),
-        UCA_.m_UCA_version_.getMajor());
-
-        // versionInfo[0] = (uint8_t)(cmbVersion>>8);
-        // versionInfo[1] = (uint8_t)cmbVersion;
-        // versionInfo[2] = coll->image->version[1];
-        // versionInfo[3] = coll->UCA->image->UCAVersion[0];
+        return VersionInfo.getInstance(
+                version.getMajor() + (rtVersion << 4) + (rtVersion >> 4),
+                version.getMinor(), version.getMilli(), version.getMicro());
      }
  
      /**
@@ -4859,22 +1836,26 @@ public final class RuleBasedCollator extends Collator {
       * @return the version object associated with this collator
       * @stable ICU 2.8
       */
+    @Override
      public VersionInfo getUCAVersion() {
-        return UCA_.m_UCA_version_;
+        VersionInfo v = getVersion();
+        // Note: This is tied to how the current implementation encodes the UCA version
+        // in the overall getVersion().
+        // Alternatively, we could load the root collator and get at lower-level data from there.
+        // Either way, it will reflect the input collator's UCA version only
+        // if it is a known implementation.
+        // (C++ comment) It would be cleaner to make this a virtual Collator method.
+        // (In Java, it is virtual.)
+        return VersionInfo.getInstance(v.getMinor() >> 3, v.getMinor() & 7, v.getMilli() >> 6, 0);
      }
  
-    private transient boolean m_reallocLatinOneCEs_;
-
      private CollationBuffer collationBuffer;
  
      private final CollationBuffer getCollationBuffer() {
          if (isFrozen()) {
              frozenLock.lock();
-        }
-        if (collationBuffer == null) {
-            collationBuffer = new CollationBuffer();
-        } else {
-            collationBuffer.resetBuffers();
+        } else if (collationBuffer == null) {
+            collationBuffer = new CollationBuffer(data);
          }
          return collationBuffer;
      }
@@ -4884,4 +1865,50 @@ public final class RuleBasedCollator extends Collator {
              frozenLock.unlock();
          }
      }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public ULocale getLocale(ULocale.Type type) {
+        if (type == ULocale.ACTUAL_LOCALE) {
+            return actualLocaleIsSameAsValid ? validLocale : tailoring.actualLocale;
+        } else if(type == ULocale.VALID_LOCALE) {
+            return validLocale;
+        } else {
+            throw new IllegalArgumentException("unknown ULocale.Type " + type);
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    void setLocale(ULocale valid, ULocale actual) {
+        // This method is called
+        // by other protected functions that checks and makes sure that
+        // valid and actual are not null before passing
+        assert (valid == null) == (actual == null);
+        // Another check we could do is that the actual locale is at
+        // the same level or less specific than the valid locale.
+        // TODO: Starting with Java 7, use Objects.equals(a, b).
+        if(Utility.objectEquals(actual, tailoring.actualLocale)) {
+            actualLocaleIsSameAsValid = false;
+        } else {
+            assert(Utility.objectEquals(actual, valid));
+            actualLocaleIsSameAsValid = true;
+        }
+        // Do not modify tailoring.actualLocale:
+        // We cannot be sure that that would be thread-safe.
+        validLocale = valid;
+    }
+
+    CollationData data;
+    SharedObject.Reference<CollationSettings> settings;  // reference-counted
+    CollationTailoring tailoring;  // C++: reference-counted
+    private ULocale validLocale;
+    // Note: No need in Java to track which attributes have been set explicitly.
+    // int or EnumSet  explicitlySetAttributes;
+
+    private boolean actualLocaleIsSameAsValid;
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SearchIterator.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/SearchIterator.java

similarity index 67%

rename from icu4j/main/classes/core/src/com/ibm/icu/text/SearchIterator.java

rename to icu4j/main/classes/collate/src/com/ibm/icu/text/SearchIterator.java

index 80f496679c234e285f491e62e56cdb39dd1a39a3..2ac55fe6952a65b054908b98a259675187921bb2 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/SearchIterator.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/SearchIterator.java
@@ -1,6 +1,6 @@
  /*
   *******************************************************************************
- * Copyright (C) 1996-2010, International Business Machines Corporation and    *
+ * Copyright (C) 1996-2014, International Business Machines Corporation and    *
   * others. All Rights Reserved.                                                *
   *******************************************************************************
   */
@@ -125,7 +125,113 @@ import java.text.CharacterIterator;
   */
  public abstract class SearchIterator 
  {
-    
+    /**
+     * The BreakIterator to define the boundaries of a logical match.
+     * This value can be a null.
+     * See class documentation for more information.
+     * @see #setBreakIterator(BreakIterator)
+     * @see #getBreakIterator
+     * @see BreakIterator
+     * @stable ICU 2.0
+     */
+    protected BreakIterator breakIterator; 
+
+    /**
+     * Target text for searching.
+     * @see #setTarget(CharacterIterator)
+     * @see #getTarget
+     * @stable ICU 2.0
+     */
+    protected CharacterIterator targetText;
+    /**
+     * Length of the most current match in target text. 
+     * Value 0 is the default value.
+     * @see #setMatchLength
+     * @see #getMatchLength
+     * @stable ICU 2.0
+     */
+    protected int matchLength;
+
+    /**
+     * Java port of ICU4C struct USearch (usrchimp.h)
+     * 
+     * Note:
+     * 
+     *  ICU4J already exposed some protected members such as
+     * targetText, brekIterator and matchedLength as a part of stable
+     * APIs. In ICU4C, they are exposed through USearch struct, 
+     * although USearch struct itself is internal API.
+     * 
+     *  This class was created for making ICU4J code in parallel to
+     * ICU4C implementation. ICU4J implementation access member
+     * fields like C struct (e.g. search_.isOverlap_) mostly, except
+     * fields already exposed as protected member (e.g. search_.text()).
+     * 
+     */
+    final class Search {
+
+        CharacterIterator text() {
+            return SearchIterator.this.targetText;
+        }
+
+        void setTarget(CharacterIterator text) {
+            SearchIterator.this.targetText = text;
+        }
+
+        /** Flag to indicate if overlapping search is to be done.
+            E.g. looking for "aa" in "aaa" will yield matches at offset 0 and 1. */
+        boolean isOverlap_;
+
+        boolean isCanonicalMatch_;
+
+        ElementComparisonType elementComparisonType_;
+
+        BreakIterator internalBreakIter_;
+
+        BreakIterator breakIter() {
+            return SearchIterator.this.breakIterator;
+        }
+
+        void setBreakIter(BreakIterator breakIter) {
+            SearchIterator.this.breakIterator = breakIter;
+        }
+
+        int matchedIndex_;
+
+        int matchedLength() {
+            return SearchIterator.this.matchLength;
+        }
+
+        void setMatchedLength(int matchedLength) {
+            SearchIterator.this.matchLength = matchedLength;
+        }
+
+        /** Flag indicates if we are doing a forwards search */
+        boolean isForwardSearching_;
+
+        /** Flag indicates if we are at the start of a string search.
+            This indicates that we are in forward search and at the start of m_text. */ 
+        boolean reset_;
+
+        // Convenient methods for accessing begin/end index of the
+        // target text. These are ICU4J only and are not data fields.
+        int beginIndex() {
+            if (targetText == null) {
+                return 0;
+            }
+            return targetText.getBeginIndex();
+        }
+
+        int endIndex() {
+            if (targetText == null) {
+                return 0;
+            }
+            return targetText.getEndIndex();
+        }
+    }
+
+    Search search_ = new Search();
+
      // public data members -------------------------------------------------
      
      /**
@@ -153,15 +259,15 @@ public abstract class SearchIterator
       * @stable ICU 2.8
       */
      public void setIndex(int position) {
-        if (position < targetText.getBeginIndex() 
-            || position > targetText.getEndIndex()) {
+        if (position < search_.beginIndex() 
+            || position > search_.endIndex()) {
              throw new IndexOutOfBoundsException(
                  "setIndex(int) expected position to be between " +
-                targetText.getBeginIndex() + " and " + targetText.getEndIndex());
+                search_.beginIndex() + " and " + search_.endIndex());
          }
-        m_setOffset_ = position;
-        m_reset_ = false;
-        matchLength = 0;
+        search_.reset_ = false;
+        search_.setMatchedLength(0);
+        search_.matchedIndex_ = DONE;
      }
      
      /**
@@ -178,7 +284,7 @@ public abstract class SearchIterator
       */
      public void setOverlapping(boolean allowOverlap)
      {
-        m_isOverlap_ = allowOverlap;
+        search_.isOverlap_ = allowOverlap;
      }
      
      /**
@@ -195,12 +301,16 @@ public abstract class SearchIterator
       */
      public void setBreakIterator(BreakIterator breakiter) 
      {
-        breakIterator = breakiter;
-        if (breakIterator != null) {
-            breakIterator.setText(targetText);
+        search_.setBreakIter(breakiter);
+        if (search_.breakIter() != null) {
+            // Create a clone of CharacterItearator, so it won't
+            // affect the position currently held by search_.text()
+            if (search_.text() != null) {
+                search_.breakIter().setText((CharacterIterator)search_.text().clone());
+            }
          }
      }
-    
+
      /**
       * Set the target text to be searched. Text iteration will then begin at 
        * the start of the text string. This method is useful if you want to 
@@ -216,17 +326,27 @@ public abstract class SearchIterator
          if (text == null || text.getEndIndex() == text.getIndex()) {
              throw new IllegalArgumentException("Illegal null or empty text");
          }
-        
-        targetText = text;
-        targetText.setIndex(targetText.getBeginIndex());
-        matchLength = 0;
-        m_reset_ = true;
-        m_isForwardSearching_ = true;
-        if (breakIterator != null) {
-            breakIterator.setText(targetText);
+
+        text.setIndex(text.getBeginIndex());
+        search_.setTarget(text);
+        search_.matchedIndex_ = DONE;
+        search_.setMatchedLength(0);
+        search_.reset_ = true;
+        search_.isForwardSearching_ = true;
+        if (search_.breakIter() != null) {
+            // Create a clone of CharacterItearator, so it won't
+            // affect the position currently held by search_.text()
+            search_.breakIter().setText((CharacterIterator)text.clone());
+        }
+        if (search_.internalBreakIter_ != null) {
+            search_.internalBreakIter_.setText((CharacterIterator)text.clone());
          }
      }
  
+    //TODO: We should add APIs below to match ICU4C APIs
+    // setCanonicalMatch
+    // setElementComparison
+
      // public getters ----------------------------------------------------
      
      /**
@@ -255,7 +375,7 @@ public abstract class SearchIterator
       */
      public int getMatchStart()
      {
-        return m_lastMatchStart_;
+        return search_.matchedIndex_;
      }
  
      /**
@@ -297,7 +417,7 @@ public abstract class SearchIterator
       */
      public int getMatchLength() 
      {
-        return matchLength;
+        return search_.matchedLength();
      }
      
      /**
@@ -313,7 +433,7 @@ public abstract class SearchIterator
       */
      public BreakIterator getBreakIterator() 
      {
-        return breakIterator;
+        return search_.breakIter();
      }
      
      /**
@@ -324,7 +444,7 @@ public abstract class SearchIterator
       */
      public CharacterIterator getTarget() 
      {
-        return targetText;
+        return search_.text();
      }
      
      /**
@@ -345,16 +465,16 @@ public abstract class SearchIterator
       */
      public String getMatchedText() 
      {
-        if (matchLength > 0) {
-            int limit = m_lastMatchStart_ + matchLength;
-            StringBuilder result = new StringBuilder(matchLength);
-            result.append(targetText.current());
-            targetText.next();
-            while (targetText.getIndex() < limit) {
-                result.append(targetText.current());
-                targetText.next();
+        if (search_.matchedLength() > 0) {
+            int limit = search_.matchedIndex_ + search_.matchedLength();
+            StringBuilder result = new StringBuilder(search_.matchedLength());
+            CharacterIterator it = search_.text();
+            it.setIndex(search_.matchedIndex_);
+            while (it.getIndex() < limit) {
+                result.append(it.current());
+                it.next();
              }
-            targetText.setIndex(m_lastMatchStart_);
+            it.setIndex(search_.matchedIndex_);
              return result.toString();
          }
          return null;
@@ -386,50 +506,42 @@ public abstract class SearchIterator
       */
      public int next()
      {
-        int start = targetText.getIndex();
-        if (m_setOffset_ != DONE) {
-            start = m_setOffset_;    
-            m_setOffset_ = DONE;    
-        }
-        if (m_isForwardSearching_) {
-            if (!m_reset_ && 
-                start + matchLength >= targetText.getEndIndex()) {
-                // not enough characters to match
-                matchLength = 0;
-                targetText.setIndex(targetText.getEndIndex());
-                m_lastMatchStart_ = DONE;
-                return DONE; 
+        int index = getIndex(); // offset = getOffset() in ICU4C
+        int matchindex = search_.matchedIndex_;
+        int matchlength = search_.matchedLength();
+        search_.reset_ = false;
+        if (search_.isForwardSearching_) {
+            int endIdx = search_.endIndex();
+            if (index == endIdx || matchindex == endIdx ||
+                    (matchindex != DONE &&
+                    matchindex + matchlength >= endIdx)) {
+                setMatchNotFound();
+                return DONE;
              }
-            m_reset_ = false;
-        }
-        else {
-            // switching direction. 
-            // if matchedIndex == USEARCH_DONE, it means that either a 
-            // setIndex has been called or that previous ran off the text
+        } else {
+            // switching direction.
+            // if matchedIndex == DONE, it means that either a 
+            // setIndex (setOffset in C) has been called or that previous ran off the text
              // string. the iterator would have been set to offset 0 if a 
              // match is not found.
-            m_isForwardSearching_ = true;
-            if (start != DONE) {
+            search_.isForwardSearching_ = true;
+            if (search_.matchedIndex_ != DONE) {
                  // there's no need to set the collation element iterator
                  // the next call to next will set the offset.
-                return start;
+                return matchindex;
              }
          }
-        
-        if (start == DONE) {
-            start = targetText.getBeginIndex();
-        }
-        if (matchLength > 0) {
-            // if match length is 0 we are at the start of the iteration
-            if (m_isOverlap_) {
-                start ++;
-            }
-            else {
-                start += matchLength;
+
+        if (matchlength > 0) {
+            // if matchlength is 0 we are at the start of the iteration
+            if (search_.isOverlap_) {
+                index++;
+            } else {
+                index += matchlength;
              }
          }
-        m_lastMatchStart_ = handleNext(start);
-        return m_lastMatchStart_;
+
+        return handleNext(index);
      }
  
      /**
@@ -456,40 +568,45 @@ public abstract class SearchIterator
       */
      public int previous()
      {
-        int start = targetText.getIndex();
-        if (m_setOffset_ != DONE) {
-            start = m_setOffset_;    
-            m_setOffset_ = DONE;    
-        }
-        if (m_reset_) {
-            m_isForwardSearching_ = false;
-            m_reset_ = false;
-            start = targetText.getEndIndex();
+        int index;  // offset in ICU4C
+        if (search_.reset_) {
+            index = search_.endIndex();   // m_search_->textLength in ICU4C
+            search_.isForwardSearching_ = false;
+            search_.reset_ = false;
+            setIndex(index);
+        } else {
+            index = getIndex();
          }
-        
-        if (m_isForwardSearching_ == true) {
+
+        int matchindex = search_.matchedIndex_;
+        if (search_.isForwardSearching_) {
              // switching direction. 
-            // if matchedIndex == USEARCH_DONE, it means that either a 
-            // setIndex has been called or that next ran off the text
+            // if matchedIndex == DONE, it means that either a 
+            // setIndex (setOffset in C) has been called or that next ran off the text
              // string. the iterator would have been set to offset textLength if 
              // a match is not found.
-            m_isForwardSearching_ = false;
-            if (start != targetText.getEndIndex()) {
-                return start;
+            search_.isForwardSearching_ = false;
+            if (matchindex != DONE) {
+                return matchindex;
              }
-        }
-        else {
-            if (start == targetText.getBeginIndex()) {
+        } else {
+            int startIdx = search_.beginIndex();
+            if (index == startIdx || matchindex == startIdx) {
                  // not enough characters to match
-                matchLength = 0;
-                targetText.setIndex(targetText.getBeginIndex());
-                m_lastMatchStart_ = DONE;
+                setMatchNotFound();
                  return DONE; 
              }
          }
  
-        m_lastMatchStart_ = handlePrevious(start);
-        return m_lastMatchStart_;
+        if (matchindex != DONE) {
+            if (search_.isOverlap_) {
+                matchindex += search_.matchedLength() - 2;
+            }
+
+            return handlePrevious(matchindex);
+        }
+
+        return handlePrevious(index);
      }
  
      /**
@@ -501,9 +618,13 @@ public abstract class SearchIterator
       */
      public boolean isOverlapping() 
      {
-        return m_isOverlap_;
+        return search_.isOverlap_;
      }
-    
+
+    //TODO: We should add APIs below to match ICU4C APIs
+    // isCanonicalMatch
+    // getElementComparison
+
      /** 
       * <p>
       * Resets the search iteration. All properties will be reset to their
@@ -518,13 +639,13 @@ public abstract class SearchIterator
       */
      public void reset()
      {
-        // reset is setting the attributes that are already in string search
-        matchLength = 0;
-        setIndex(targetText.getBeginIndex());
-        m_isOverlap_ = false;
-        m_isForwardSearching_ = true;
-        m_reset_ = true;
-        m_setOffset_ = DONE;
+        setMatchNotFound();
+        setIndex(search_.beginIndex());
+        search_.isOverlap_ = false;
+        search_.isCanonicalMatch_ = false;
+        search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
+        search_.isForwardSearching_ = true;
+        search_.reset_ = true;
      }
      
      /**
@@ -546,9 +667,9 @@ public abstract class SearchIterator
       */
      public final int first() 
      {
-        m_isForwardSearching_ = true;
-        setIndex(targetText.getBeginIndex());
-        return next();
+        int startIdx = search_.beginIndex();
+        setIndex(startIdx);
+        return handleNext(startIdx);
      }
  
      /**
@@ -571,10 +692,8 @@ public abstract class SearchIterator
       */
      public final int following(int position) 
      {
-        m_isForwardSearching_ = true;
-        // position checked in usearch_setOffset
          setIndex(position);
-        return next();
+        return handleNext(position);
      }
      
      /**
@@ -596,9 +715,9 @@ public abstract class SearchIterator
       */
      public final int last() 
      {
-        m_isForwardSearching_ = false;
-        setIndex(targetText.getEndIndex());
-        return previous();
+        int endIdx = search_.endIndex();
+        setIndex(endIdx);
+        return handlePrevious(endIdx);
      }
       
      /**
@@ -622,41 +741,10 @@ public abstract class SearchIterator
       */
      public final int preceding(int position) 
      {
-        m_isForwardSearching_ = false;
-        // position checked in usearch_setOffset
          setIndex(position);
-        return previous();   
+        return handlePrevious(position);
      }
-    
-    // protected data member ----------------------------------------------
-    
-    /**
-     * The BreakIterator to define the boundaries of a logical match.
-     * This value can be a null.
-     * See class documentation for more information.
-     * @see #setBreakIterator(BreakIterator)
-     * @see #getBreakIterator
-     * @see BreakIterator
-     * @stable ICU 2.0
-     */
-    protected BreakIterator breakIterator; 
  
-    /**
-     * Target text for searching.
-     * @see #setTarget(CharacterIterator)
-     * @see #getTarget
-     * @stable ICU 2.0
-     */
-    protected CharacterIterator targetText;
-    /**
-     * Length of the most current match in target text. 
-     * Value 0 is the default value.
-     * @see #setMatchLength
-     * @see #getMatchLength
-     * @stable ICU 2.0
-     */
-    protected int matchLength;
-    
      // protected constructor ----------------------------------------------
      
      /**
@@ -681,19 +769,21 @@ public abstract class SearchIterator
                                     "Illegal argument target. " +
                                     " Argument can not be null or of length 0");
          }
-        targetText = target;
-        breakIterator = breaker;
-        if (breakIterator != null) {
-            breakIterator.setText(target);
+
+        search_.setTarget(target);
+        search_.setBreakIter(breaker);
+        if (search_.breakIter() != null) {
+            search_.breakIter().setText((CharacterIterator)target.clone());
          }
-        matchLength = 0;
-        m_lastMatchStart_ = DONE;
-        m_isOverlap_ = false;
-        m_isForwardSearching_ = true;
-        m_reset_ = true;
-        m_setOffset_ = DONE;
+        search_.isOverlap_ = false;
+        search_.isCanonicalMatch_ = false;
+        search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
+        search_.isForwardSearching_ = true;
+        search_.reset_ = true;
+        search_.matchedIndex_ = DONE;
+        search_.setMatchedLength(0);
      }    
-    
+
      // protected methods --------------------------------------------------
  
     
@@ -708,7 +798,7 @@ public abstract class SearchIterator
       */
      protected void setMatchLength(int length)
      {
-        matchLength = length;
+        search_.setMatchedLength(length);
      }
  
      /**
@@ -759,30 +849,92 @@ public abstract class SearchIterator
       * @stable ICU 2.0
       */
      protected abstract int handlePrevious(int startAt);
-    
-    // private data members ------------------------------------------------
-    
+
      /**
-     * Flag indicates if we are doing a forwards search
+     * @internal
+     * @deprecated This API is ICU internal only.
       */
-    private boolean m_isForwardSearching_;
+    //TODO: This protected method is @stable 2.0 in ICU4C
+    protected void setMatchNotFound() {
+        search_.matchedIndex_ = DONE;
+        search_.setMatchedLength(0);
+    }
+
      /**
-     * Flag to indicate if overlapping search is to be done.
-     * E.g. looking for "aa" in "aaa" will yield matches at offset 0 and 1.
+     * Option to control how collation elements are compared.
+     * The default value will be {@link #STANDARD_ELEMENT_COMPARISON}.
+     * 
+     * @see #setElementComparisonType(ElementComparisonType)
+     * @see #getElementComparisonType()
+     * @draft ICU 53
+     * @provisional This API might change or be removed in a future release.
       */
-    private boolean m_isOverlap_;
-    /**
-     * Flag indicates if we are at the start of a string search.
-     * This indicates that we are in forward search and at the start of m_text.
-     */ 
-    private boolean m_reset_;
+    public enum ElementComparisonType {
+        /**
+         * Standard collation element comparison at the specified collator strength.
+         * 
+         * @draft ICU 53
+         * @provisional This API might change or be removed in a future release.
+         */
+        STANDARD_ELEMENT_COMPARISON,
+        /**
+         * <p>Collation element comparison is modified to effectively provide behavior
+         * between the specified strength and strength - 1.</p>
+         * 
+         * <p>Collation elements in the pattern that have the base weight for the specified
+         * strength are treated as "wildcards" that match an element with any other
+         * weight at that collation level in the searched text. For example, with a
+         * secondary-strength English collator, a plain 'e' in the pattern will match
+         * a plain e or an e with any diacritic in the searched text, but an e with
+         * diacritic in the pattern will only match an e with the same diacritic in
+         * the searched text.<p>
+         * 
+         * @draft ICU 53
+         * @provisional This API might change or be removed in a future release.
+         */
+        PATTERN_BASE_WEIGHT_IS_WILDCARD,
+
+        /**
+         * <p>Collation element comparison is modified to effectively provide behavior
+         * between the specified strength and strength - 1.</p>
+         * 
+         * <p>Collation elements in either the pattern or the searched text that have the
+         * base weight for the specified strength are treated as "wildcards" that match
+         * an element with any other weight at that collation level. For example, with
+         * a secondary-strength English collator, a plain 'e' in the pattern will match
+         * a plain e or an e with any diacritic in the searched text, but an e with
+         * diacritic in the pattern will only match an e with the same diacritic or a
+         * plain e in the searched text.</p>
+         * 
+         * @draft ICU 53
+         * @provisional This API might change or be removed in a future release.
+         */
+        ANY_BASE_WEIGHT_IS_WILDCARD
+    }
+
      /**
-     * Data member to store user defined position in setIndex().
-     * If setIndex() is not called, this value will be DONE.
-     */ 
-    private int m_setOffset_;
+     * <p>Sets the collation element comparison type.</p>
+     * 
+     * <p>The default comparison type is {@link ElementComparisonType#STANDARD_ELEMENT_COMPARISON}.</p>
+     * 
+     * @see ElementComparisonType
+     * @see #getElementComparisonType()
+     * @draft ICU 53
+     * @provisional This API might change or be removed in a future release.
+     */
+    public void setElementComparisonType(ElementComparisonType type) {
+        search_.elementComparisonType_ = type;
+    }
+
      /**
-     * Offset of the beginning of the last match
+     * <p>Returns the collation element comparison type.</p>
+     * 
+     * @see ElementComparisonType
+     * @see #setElementComparisonType(ElementComparisonType)
+     * @draft ICU 53
+     * @provisional This API might change or be removed in a future release.
       */
-    private int m_lastMatchStart_;
+    public ElementComparisonType getElementComparisonType() {
+        return search_.elementComparisonType_;
+    }
  }
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/StringSearch.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/StringSearch.java

index 2be076c6e8aeb94379c4fba91c23d41b06cee973..12c46299a0b0d8e7d09cdb929af083d586ee8bca 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/StringSearch.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/StringSearch.java
@@ -1,22 +1,35 @@
  /*
   *******************************************************************************
- * Copyright (C) 1996-2011, International Business Machines Corporation and
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
   * others. All Rights Reserved.
   *******************************************************************************
   */
-
  package com.ibm.icu.text;
  
  import java.text.CharacterIterator;
  import java.text.StringCharacterIterator;
  import java.util.Locale;
  
-import com.ibm.icu.impl.CharacterIteratorWrapper;
-import com.ibm.icu.impl.Norm2AllModes;
-import com.ibm.icu.impl.Normalizer2Impl;
-import com.ibm.icu.lang.UCharacter;
  import com.ibm.icu.util.ULocale;
  
+// Java porting note:
+//      ICU4C implementation contains dead code in many places.
+//      While porting ICU4C linear search implementation, these dead codes
+//      were not fully ported. The code block tagged by "// *** Boyer-Moore ***"
+//      are those dead code, still available in ICU4C.
+
+//TODO: ICU4C implementation does not seem to handle UCharacterIterator pointing
+//      a fragment of text properly. ICU4J uses CharacterIterator to navigate through
+//      the input text. We need to carefully review the code ported from ICU4C
+//      assuming the start index is 0.
+
+//TODO: ICU4C implementation initializes pattern.CE and pattern.PCE. It looks
+//      CE is no longer used, except a few places checking CELength. It looks this
+//      is a left over from already disable Boyer-Moore search code. This Java implementation
+//      preserves the code, but we should clean them up later.
+
+//TODO: We need to update document to remove the term "Boyer-Moore search".
+
  /**
   * <p>
   * <code>StringSearch</code> is the concrete subclass of 
@@ -148,11 +161,39 @@ import com.ibm.icu.util.ULocale;
  // internal notes: all methods do not guarantee the correct status of the 
  // characteriterator. the caller has to maintain the original index position
  // if necessary. methods could change the index position as it deems fit
-public final class StringSearch extends SearchIterator
-{
-    
-    // public constructors --------------------------------------------------
+public final class StringSearch extends SearchIterator {
      
+    /**
+     * DONE is returned by previous() and next() after all valid matches have 
+     * been returned, and by first() and last() if there are no matches at all.
+     * @see #previous
+     * @see #next
+     * @stable ICU 2.0
+     */
+    public static final int DONE = -1;
+
+    private Pattern pattern_;
+    private RuleBasedCollator collator_;
+
+    // positions within the collation element iterator is used to determine
+    // if we are at the start of the text.
+    private CollationElementIterator textIter_;
+    private CollationPCE textProcessedIter_;
+
+    // utility collation element, used throughout program for temporary
+    // iteration.
+    private CollationElementIterator utilIter_;
+
+    private int strength_;
+    int ceMask_;
+    int variableTop_;
+
+    private boolean toShift_;
+
+    // *** Boyer-Moore ***
+    // private char[] canonicalPrefixAccents_;
+    // private char[] canonicalSuffixAccents_;
+
      /**
       * Initializes the iterator to use the language-specific rules defined in 
       * the argument collator to search for argument pattern in the argument 
@@ -171,21 +212,46 @@ public final class StringSearch extends SearchIterator
       * @see SearchIterator
       * @stable ICU 2.0
       */
-    public StringSearch(String pattern, CharacterIterator target,
-                        RuleBasedCollator collator, BreakIterator breakiter) 
-    {
+    public StringSearch(String pattern, CharacterIterator target, RuleBasedCollator collator,
+            BreakIterator breakiter) {
+
+        // This implementation is ported from ICU4C usearch_open()
+
          super(target, breakiter);
-        m_textBeginOffset_ = targetText.getBeginIndex();
-        m_textLimitOffset_ = targetText.getEndIndex();
-        m_collator_ = collator;
-        m_colEIter_ = m_collator_.getCollationElementIterator(target);
-        m_utilColEIter_ = collator.getCollationElementIterator("");
-        m_ceMask_ = getMask(m_collator_.getStrength());
-        m_isCanonicalMatch_ = false;
-        m_pattern_ = new Pattern(pattern);
-        m_matchedIndex_ = DONE;
-        m_charBreakIter_ = BreakIterator.getCharacterInstance(/*m_collator_.getLocale(ULocale.ACTUAL_LOCALE)*/);
-        m_charBreakIter_.setText(target);
+
+        // string search does not really work when numeric collation is turned on
+        if (collator.getNumericCollation()) {
+            throw new UnsupportedOperationException("Numeric collation is not supported by StringSearch");
+        }
+
+        collator_ = collator;
+        strength_ = collator.getStrength();
+        ceMask_ = getMask(strength_);
+        toShift_ = collator.isAlternateHandlingShifted();
+        variableTop_ = collator.getVariableTop();
+
+        pattern_ = new Pattern(pattern);
+
+        search_.setMatchedLength(0);
+        search_.matchedIndex_ = DONE;
+
+        utilIter_ = null;
+        textIter_ = new CollationElementIterator(target, collator);
+
+        textProcessedIter_ = null;
+
+        // This is done by super class constructor
+        /*
+        search_.isOverlap_ = false;
+        search_.isCanonicalMatch_ = false;
+        search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
+        search_.isForwardSearching_ = true;
+        search_.reset_ = true;
+         */
+        ULocale collLocale = collator.getLocale(ULocale.VALID_LOCALE);
+        search_.internalBreakIter_ = BreakIterator.getCharacterInstance(collLocale == null ? ULocale.ROOT : collLocale);
+        search_.internalBreakIter_.setText((CharacterIterator)target.clone());  // We need to create a clone
+
          initialize();
      }
  
@@ -202,10 +268,8 @@ public final class StringSearch extends SearchIterator
       * @see SearchIterator
       * @stable ICU 2.0
       */
-    public StringSearch(String pattern, CharacterIterator target,
-                        RuleBasedCollator collator) 
-    {
-        this(pattern, target, collator, null/*BreakIterator.getCharacterInstance()*/);
+    public StringSearch(String pattern, CharacterIterator target, RuleBasedCollator collator) {
+        this(pattern, target, collator, null);
      }
  
      /**
@@ -225,8 +289,7 @@ public final class StringSearch extends SearchIterator
       * @see SearchIterator
       * @stable ICU 2.0
       */
-    public StringSearch(String pattern, CharacterIterator target, Locale locale)
-    {
+    public StringSearch(String pattern, CharacterIterator target, Locale locale) {
          this(pattern, target, ULocale.forLocale(locale));
      }
  
@@ -247,10 +310,8 @@ public final class StringSearch extends SearchIterator
       * @see SearchIterator
       * @stable ICU 3.2
       */
-    public StringSearch(String pattern, CharacterIterator target, ULocale locale)
-    {
-        this(pattern, target, (RuleBasedCollator)Collator.getInstance(locale),
-             null/*BreakIterator.getCharacterInstance(locale)*/);
+    public StringSearch(String pattern, CharacterIterator target, ULocale locale) {
+        this(pattern, target, (RuleBasedCollator) Collator.getInstance(locale), null);
      }
  
      /**
@@ -269,15 +330,11 @@ public final class StringSearch extends SearchIterator
       * @see SearchIterator
       * @stable ICU 2.0
       */
-    public StringSearch(String pattern, String target) 
-    {
+    public StringSearch(String pattern, String target) {
          this(pattern, new StringCharacterIterator(target),
-             (RuleBasedCollator)Collator.getInstance(),
-             null/*BreakIterator.getCharacterInstance()*/);
+                (RuleBasedCollator) Collator.getInstance(), null);
      }
  
-    // public getters -----------------------------------------------------
-    
      /**
       * <p>
       * Gets the RuleBasedCollator used for the language rules.
@@ -294,54 +351,10 @@ public final class StringSearch extends SearchIterator
       * @see #setCollator
       * @stable ICU 2.0
       */
-    public RuleBasedCollator getCollator() 
-    {
-        return m_collator_;
-    }
-    
-    /**
-     * Returns the pattern for which StringSearch is searching for.
-     * @return the pattern searched for
-     * @stable ICU 2.0
-     */
-    public String getPattern() 
-    {
-        return m_pattern_.targetText;
-    }
-    
-    /**
-     * Return the index in the target text where the iterator is currently 
-     * positioned at. 
-     * If the iteration has gone past the end of the target text or past 
-     * the beginning for a backwards search, {@link #DONE} is returned.
-     * @return index in the target text where the iterator is currently 
-     *         positioned at
-     * @stable ICU 2.8
-     */
-    public int getIndex() 
-    {
-        int result = m_colEIter_.getOffset();
-        if (isOutOfBounds(m_textBeginOffset_, m_textLimitOffset_, result)) {
-            return DONE;
-        }
-        return result;
-    }
-    
-    /**
-     * Determines whether canonical matches (option 1, as described in the 
-     * class documentation) is set.
-     * See setCanonical(boolean) for more information.
-     * @see #setCanonical
-     * @return true if canonical matches is set, false otherwise
-     * @stable ICU 2.8
-     */
-    public boolean isCanonical() 
-    {
-        return m_isCanonicalMatch_;
+    public RuleBasedCollator getCollator() {
+        return collator_;
      }
-    
-    // public setters -----------------------------------------------------
-    
+
      /**
       * <p>
       * Sets the RuleBasedCollator to be used for language-specific searching.
@@ -355,21 +368,35 @@ public final class StringSearch extends SearchIterator
       * @see #getCollator
       * @stable ICU 2.0
       */
-    public void setCollator(RuleBasedCollator collator) 
-    {
+    public void setCollator(RuleBasedCollator collator) {
          if (collator == null) {
              throw new IllegalArgumentException("Collator can not be null");
          }
-        m_collator_ = collator;
-        m_ceMask_ = getMask(m_collator_.getStrength());
-        // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
+        collator_ = collator;
+        ceMask_ = getMask(collator_.getStrength());
+
+        ULocale collLocale = collator.getLocale(ULocale.VALID_LOCALE);
+        search_.internalBreakIter_ = BreakIterator.getCharacterInstance(collLocale == null ? ULocale.ROOT : collLocale);
+        search_.internalBreakIter_.setText((CharacterIterator)search_.text().clone());  // We need to create a clone
+
+        toShift_ = collator.isAlternateHandlingShifted();
+        variableTop_ = collator.getVariableTop();
+        textIter_ = new CollationElementIterator(pattern_.text_, collator);
+        utilIter_ = new CollationElementIterator(pattern_.text_, collator);
+
+        // initialize() _after_ setting the iterators for the new collator.
          initialize();
-        m_colEIter_.setCollator(m_collator_);
-        m_utilColEIter_.setCollator(m_collator_);
-        m_charBreakIter_ = BreakIterator.getCharacterInstance(/*collator.getLocale(ULocale.VALID_LOCALE)*/);
-        m_charBreakIter_.setText(targetText);
      }
-    
+
+    /**
+     * Returns the pattern for which StringSearch is searching for.
+     * @return the pattern searched for
+     * @stable ICU 2.0
+     */
+    public String getPattern() {
+        return pattern_.text_;
+    }
+
      /**
       * <p>
       * Set the pattern to search for.  
@@ -384,18 +411,44 @@ public final class StringSearch extends SearchIterator
       *               length 0
       * @stable ICU 2.0
       */
-    public void setPattern(String pattern) 
-    {
+    public void setPattern(String pattern) {
          if (pattern == null || pattern.length() <= 0) {
              throw new IllegalArgumentException(
                      "Pattern to search for can not be null or of length 0");
          }
-        m_pattern_.targetText = pattern;
+        pattern_.text_ = pattern;
          initialize();
      }
-    
+
+    /**
+     * Determines whether canonical matches (option 1, as described in the 
+     * class documentation) is set.
+     * See setCanonical(boolean) for more information.
+     * @see #setCanonical
+     * @return true if canonical matches is set, false otherwise
+     * @stable ICU 2.8
+     */
+    //TODO: hoist this to SearchIterator
+    public boolean isCanonical() {
+        return search_.isCanonicalMatch_;
+    }
+
+    /**
+     * <p>
+     * Set the canonical match mode. See class documentation for details.
+     * The default setting for this property is false.
+     * </p>
+     * @param allowCanonical flag indicator if canonical matches are allowed
+     * @see #isCanonical
+     * @stable ICU 2.8
+     */
+    //TODO: hoist this to SearchIterator
+    public void setCanonical(boolean allowCanonical) {
+        search_.isCanonicalMatch_ = allowCanonical;
+    }
+
      /**
-      * Set the target text to be searched. Text iteration will hence begin at 
+     * Set the target text to be searched. Text iteration will hence begin at 
       * the start of the text string. This method is useful if you want to 
       * re-use an iterator to search within a different body of text.
       * @param text new text iterator to look for match, 
@@ -404,15 +457,30 @@ public final class StringSearch extends SearchIterator
       * @see #getTarget
       * @stable ICU 2.8
       */
-    public void setTarget(CharacterIterator text)
-    {
+    @Override
+    public void setTarget(CharacterIterator text) {
          super.setTarget(text);
-        m_textBeginOffset_ = targetText.getBeginIndex();
-        m_textLimitOffset_ = targetText.getEndIndex();
-        m_colEIter_.setText(targetText);
-        m_charBreakIter_.setText(targetText);
+        textIter_.setText(text);
      }
-    
+
+    /**
+     * Return the index in the target text where the iterator is currently 
+     * positioned at. 
+     * If the iteration has gone past the end of the target text or past 
+     * the beginning for a backwards search, {@link #DONE} is returned.
+     * @return index in the target text where the iterator is currently 
+     *         positioned at
+     * @stable ICU 2.8
+     */
+    @Override
+    public int getIndex() {
+        int result = textIter_.getOffset();
+        if (isOutOfBounds(search_.beginIndex(), search_.endIndex(), result)) {
+            return DONE;
+        }
+        return result;
+    }
+
      /**
       * <p>
       * Sets the position in the target text which the next search will start 
@@ -433,45 +501,16 @@ public final class StringSearch extends SearchIterator
       * @see #getIndex
       * @stable ICU 2.8
       */
-    public void setIndex(int position)
-    {
+    @Override
+    public void setIndex(int position) {
+        // Java porting note: This method is equivalent to setOffset() in ICU4C.
+        // ICU4C SearchIterator::setOffset() is a pure virtual method, while
+        // ICU4J SearchIterator.setIndex() is not abstract method.
+
          super.setIndex(position);
-        m_matchedIndex_ = DONE;
-        m_colEIter_.setExactOffset(position);
-    }
-    
-    /**
-     * <p>
-     * Set the canonical match mode. See class documentation for details.
-     * The default setting for this property is false.
-     * </p>
-     * @param allowCanonical flag indicator if canonical matches are allowed
-     * @see #isCanonical
-     * @stable ICU 2.8
-     */
-    public void setCanonical(boolean allowCanonical)
-    {
-        m_isCanonicalMatch_ = allowCanonical;
-        if (m_isCanonicalMatch_ == true) {
-            if (m_canonicalPrefixAccents_ == null) {
-                m_canonicalPrefixAccents_ = new StringBuilder();
-            }
-            else {
-                m_canonicalPrefixAccents_.delete(0, 
-                                            m_canonicalPrefixAccents_.length());
-            }
-            if (m_canonicalSuffixAccents_ == null) {
-                m_canonicalSuffixAccents_ = new StringBuilder();
-            }
-            else {
-                m_canonicalSuffixAccents_.delete(0, 
-                                            m_canonicalSuffixAccents_.length());
-            }
-        }
+        textIter_.setOffset(position);
      }
-    
-    // public miscellaneous methods -----------------------------------------
-    
+
      /** 
       * <p>
       * Resets the search iteration. All properties will be reset to the 
@@ -488,30 +527,65 @@ public final class StringSearch extends SearchIterator
       * </p>
       * @stable ICU 2.8
       */
-    public void reset()
-    {
-        // reset is setting the attributes that are already in string search, 
-        // hence all attributes in the collator should be retrieved without any 
-        // problems
-        super.reset();
-        m_isCanonicalMatch_ = false;
-        m_ceMask_ = getMask(m_collator_.getStrength());
-        // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
-        initialize();
-        m_colEIter_.setCollator(m_collator_);
-        m_colEIter_.reset();
-        m_utilColEIter_.setCollator(m_collator_);
+    @Override
+    public void reset() {
+        // reset is setting the attributes that are already in
+        // string search, hence all attributes in the collator should
+        // be retrieved without any problems
+
+        boolean sameCollAttribute = true;
+        int ceMask;
+        boolean shift;
+        int varTop;
+
+        // **** hack to deal w/ how processed CEs encode quaternary ****
+        int newStrength = collator_.getStrength();
+        if ((strength_ < Collator.QUATERNARY && newStrength >= Collator.QUATERNARY)
+                || (strength_ >= Collator.QUATERNARY && newStrength < Collator.QUATERNARY)) {
+            sameCollAttribute = false;
+        }
+
+        strength_ = collator_.getStrength();
+        ceMask = getMask(strength_);
+        if (ceMask_ != ceMask) {
+            ceMask_ = ceMask;
+            sameCollAttribute = false;
+        }
+
+        shift = collator_.isAlternateHandlingShifted();
+        if (toShift_ != shift) {
+            toShift_ = shift;
+            sameCollAttribute = false;
+        }
+
+        varTop = collator_.getVariableTop();
+        if (variableTop_ != varTop) {
+            variableTop_ = varTop;
+            sameCollAttribute = false;
+        }
+
+        if (!sameCollAttribute) {
+            initialize();
+        }
+
+        textIter_.setText(search_.text());
+
+        search_.setMatchedLength(0);
+        search_.matchedIndex_ = DONE;
+        search_.isOverlap_ = false;
+        search_.isCanonicalMatch_ = false;
+        search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
+        search_.isForwardSearching_ = true;
+        search_.reset_ = true;
      }
  
-    // protected methods -----------------------------------------------------
-    
      /**
       * <p>
       * Concrete method to provide the mechanism 
       * for finding the next <b>forwards</b> match in the target text.
       * See super class documentation for its use.
       * </p>  
-     * @param start index in the target text at which the forwards search 
+     * @param position index in the target text at which the forwards search 
       *        should begin.
       * @return the starting index of the next forwards match if found, DONE 
       *         otherwise
@@ -519,74 +593,59 @@ public final class StringSearch extends SearchIterator
       * @see #DONE
       * @stable ICU 2.8
       */
-    protected int handleNext(int start)
-    {
-        if (m_pattern_.m_CELength_ == 0) {
-            matchLength = 0;
-            if (m_matchedIndex_ == DONE && start == m_textBeginOffset_) {
-                m_matchedIndex_ = start;
-                return m_matchedIndex_;
-            }
-            
-            targetText.setIndex(start);
-            char ch = targetText.current();
-            // ch can never be done, it is handled by next()
-            char ch2 = targetText.next();
-            if (ch2 == CharacterIterator.DONE) {
-                m_matchedIndex_ = DONE;    
-            }
-            else {
-                m_matchedIndex_ = targetText.getIndex();
-            }
-            if (UTF16.isLeadSurrogate(ch) && UTF16.isTrailSurrogate(ch2)) {
-                targetText.next();
-                m_matchedIndex_ = targetText.getIndex();
+    @Override
+    protected int handleNext(int position) {
+        if (pattern_.CELength_ == 0) {
+            search_.matchedIndex_ = search_.matchedIndex_ == DONE ?
+                                    getIndex() : search_.matchedIndex_ + 1;
+            search_.setMatchedLength(0);
+            textIter_.setOffset(search_.matchedIndex_);
+            if (search_.matchedIndex_ == search_.endIndex()) {
+                search_.matchedIndex_ = DONE;
              }
-        }
-        else {
-            if (matchLength <= 0) {
-                // we must have reversed direction after we reached the start
-                // of the target text
-                // see SearchIterator next(), it checks the bounds and returns
-                // if it exceeds the range. It does not allow setting of
-                // m_matchedIndex
-                if (start == m_textBeginOffset_) {
-                    m_matchedIndex_ = DONE;
-                }
-                else {
-                    // for boundary check purposes. this will ensure that the
-                    // next match will not preceed the current offset
-                    // note search->matchedIndex will always be set to something
-                    // in the code
-                    m_matchedIndex_ = start - 1;
-                }
+        } else {
+            if (search_.matchedLength() <= 0) {
+                // the flipping direction issue has already been handled
+                // in next()
+                // for boundary check purposes. this will ensure that the
+                // next match will not preceed the current offset
+                // note search_.matchedIndex_ will always be set to something
+                // in the code
+                search_.matchedIndex_ = position - 1;
              }
-    
-            // status checked below
-            if (m_isCanonicalMatch_) {
-                // can't use exact here since extra accents are allowed.
-                handleNextCanonical(start);
+
+            textIter_.setOffset(position);
+
+            // ICU4C comment:
+            // if strsrch_->breakIter is always the same as m_breakiterator_
+            // then we don't need to check the match boundaries here because
+            // usearch_handleNextXXX will already have done it.
+            if (search_.isCanonicalMatch_) {
+                // *could* actually use exact here 'cause no extra accents allowed...
+                handleNextCanonical();
+            } else {
+                handleNextExact();
              }
-            else {
-                handleNextExact(start);
+
+            if (search_.matchedIndex_ == DONE) {
+                textIter_.setOffset(search_.endIndex());
+            } else {
+                textIter_.setOffset(search_.matchedIndex_);
              }
+
+            return search_.matchedIndex_;
          }
-        if (m_matchedIndex_ == DONE) {
-            targetText.setIndex(m_textLimitOffset_);
-        }
-        else {
-            targetText.setIndex(m_matchedIndex_);
-        }
-        return m_matchedIndex_;
+
+        return DONE;
      }
-    
+
      /**
       * <p>
       * Concrete method to provide the mechanism 
       * for finding the next <b>backwards</b> match in the target text.
       * See super class documentation for its use.
       * </p>  
-     * @param start index in the target text at which the backwards search 
+     * @param position index in the target text at which the backwards search 
       *        should begin.
       * @return the starting index of the next backwards match if found, DONE 
       *         otherwise
@@ -594,2591 +653,1459 @@ public final class StringSearch extends SearchIterator
       * @see #DONE
       * @stable ICU 2.8
       */
-    protected int handlePrevious(int start)
-    {
-        if (m_pattern_.m_CELength_ == 0) {
-            matchLength = 0;
-            // start can never be DONE or 0, it is handled in previous
-            targetText.setIndex(start);
-            char ch = targetText.previous();
-            if (ch == CharacterIterator.DONE) {
-                m_matchedIndex_ = DONE;
-            }
-            else {
-                m_matchedIndex_ = targetText.getIndex();
-                if (UTF16.isTrailSurrogate(ch)) {
-                    if (UTF16.isLeadSurrogate(targetText.previous())) {
-                        m_matchedIndex_ = targetText.getIndex();
-                    }
-                }
-            }            
-        }
-        else {
-            if (matchLength == 0) {
-                // we must have reversed direction after we reached the end
-                // of the target text
-                // see SearchIterator next(), it checks the bounds and returns
-                // if it exceeds the range. It does not allow setting of
-                // m_matchedIndex
-                m_matchedIndex_ = DONE;
-            }
-            if (m_isCanonicalMatch_) {
-                // can't use exact here since extra accents are allowed.
-                handlePreviousCanonical(start);
-            }
-            else {
-                handlePreviousExact(start);
+    @Override
+    protected int handlePrevious(int position) {
+        if (pattern_.CELength_ == 0) {
+            search_.matchedIndex_ =
+                    search_.matchedIndex_ == DONE ? getIndex() : search_.matchedIndex_;
+            if (search_.matchedIndex_ == search_.beginIndex()) {
+                setMatchNotFound();
+            } else {
+                search_.matchedIndex_--;
+                textIter_.setOffset(search_.matchedIndex_);
+                search_.setMatchedLength(0);
              }
-        }
+        } else {
+            textIter_.setOffset(position);
  
-        if (m_matchedIndex_ == DONE) {
-            targetText.setIndex(m_textBeginOffset_);
-        }
-        else {
-            targetText.setIndex(m_matchedIndex_);
+            if (search_.isCanonicalMatch_) {
+                // *could* use exact match here since extra accents *not* allowed!
+                handlePreviousCanonical();
+            } else {
+                handlePreviousExact();
+            }
          }
-        return m_matchedIndex_;
-    }
  
-    // private static inner classes ----------------------------------------
-    
-    private static class Pattern 
-    {
-        // protected methods -----------------------------------------------
-        
-        /**
-         * Pattern string
-         */
-        protected String targetText;
-        /**
-         * Array containing the collation elements of targetText
-         */
-        protected int m_CE_[];
-        /**
-         * Number of collation elements in m_CE_
-         */
-        protected int m_CELength_; 
-        /**
-         * Flag indicator if targetText starts with an accent
-         */
-        protected boolean m_hasPrefixAccents_;
-        /**
-         * Flag indicator if targetText ends with an accent
-         */
-        protected boolean m_hasSuffixAccents_;
-        /**
-         * Default number of characters to shift for Boyer Moore
-         */
-        protected int m_defaultShiftSize_;
-        /**
-         * Number of characters to shift for Boyer Moore, depending on the
-         * source text to search
-         */
-        protected char m_shift_[];
-        /**
-         * Number of characters to shift backwards for Boyer Moore, depending 
-         * on the source text to search
-         */
-        protected char m_backShift_[];
-        
-        // protected constructors ------------------------------------------
-        
-        /**
-         * Empty constructor 
-         */
-        protected Pattern(String pattern) 
-        {
-            targetText = pattern;
-            m_CE_ = new int[INITIAL_ARRAY_SIZE_];    
-            m_CELength_ = 0;
-            m_hasPrefixAccents_ = false;
-            m_hasSuffixAccents_ = false;
-            m_defaultShiftSize_ = 1;        
-            m_shift_ = new char[MAX_TABLE_SIZE_];
-            m_backShift_ = new char[MAX_TABLE_SIZE_];
-        }
+        return search_.matchedIndex_;
      }
  
+    // ------------------ Internal implementation code ---------------------------
  
-    // private data members ------------------------------------------------
-    
-    /**
-     * target text begin offset. Each targetText has a valid contiguous region 
-     * to iterate and this data member is the offset to the first such
-     * character in the region.
-     */
-    private int m_textBeginOffset_;
-    /**
-     * target text limit offset. Each targetText has a valid contiguous region 
-     * to iterate and this data member is the offset to 1 after the last such
-     * character in the region.
-     */
-    private int m_textLimitOffset_;
-    /**
-     * Upon completion of a search, m_matchIndex_ will store starting offset in
-     * m_text for the match. The Value DONE is the default value. 
-     * If we are not at the start of the text or the end of the text and 
-     * m_matchedIndex_ is DONE it means that we can find any more matches in 
-     * that particular direction
-     */
-    private int m_matchedIndex_; 
-    /**
-     * Current pattern to search for
-     */
-    private Pattern m_pattern_;
-    /**
-     * Collator whose rules are used to perform the search
-     */
-    private RuleBasedCollator m_collator_;
-    /** 
-     * The collation element iterator for the text source.
-     */
-    private CollationElementIterator m_colEIter_;
-    /** 
-     * Utility collation element, used throughout program for temporary 
-     * iteration.
-     */
-    private CollationElementIterator m_utilColEIter_;
-    /**
-     * The mask used on the collation elements to retrieve the valid strength
-     * weight 
-     */
-    private int m_ceMask_;
-    /**
-     * Buffer storing accents during a canonical search
-     */
-    private StringBuilder m_canonicalPrefixAccents_;
-    /**
-     * Buffer storing accents during a canonical search
-     */
-    private StringBuilder m_canonicalSuffixAccents_;
-    /**
-     * Flag to indicate if canonical search is to be done.
-     * E.g looking for "a\u0300" in "a\u0318\u0300" will yield the match at 0.
-     */
-    private boolean m_isCanonicalMatch_;
-    /**
-     * Character break iterator for boundary checking.
-     */
-    private BreakIterator m_charBreakIter_;
-    private final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
-    /**
-     * Size of the shift tables
-     */
-    private static final int MAX_TABLE_SIZE_ = 257; 
-    /**
-     * Initial array size
-     */
      private static final int INITIAL_ARRAY_SIZE_ = 256;
-    /**
-     * Utility mask
-     */
-    private static final int SECOND_LAST_BYTE_SHIFT_ = 8;
-    /**
-     * Utility mask
-     */
-    private static final int LAST_BYTE_MASK_ = 0xff;
-    /**
-     * Utility buffer for return values and temporary storage
-     */
-    private int m_utilBuffer_[] = new int[2];
-    /**
-     *  Unsigned 32-Bit Integer Mask
-     */
-    private static final long UNSIGNED_32BIT_MASK = 0xffffffffL;
  
-    // private methods -------------------------------------------------------
+    // *** Boyer-Moore ***
+    // private static final Normalizer2Impl nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
+    // private static final int LAST_BYTE_MASK_ = 0xff;
+    // private static final int SECOND_LAST_BYTE_SHIFT_ = 8;
+
+    private static final int PRIMARYORDERMASK = 0xffff0000;
+    private static final int SECONDARYORDERMASK = 0x0000ff00;
+    private static final int TERTIARYORDERMASK = 0x000000ff;
  
      /**
-     * Hash a collation element from its full size (32 bits) down into a
-     * value that can be used as an index into the shift tables.  Right
-     * now we do a modulus by the size of the hash table.
-     * @param ce collation element
-     * @return collapsed version of the collation element
+     * Getting the mask for collation strength
+     * @param strength collation strength
+     * @return collation element mask
       */
-    private static final int hash(int ce) 
-    {
-        // the old value UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_ does not work
-        // well with the new collation where most of the latin 1 characters
-        // are of the value xx000xxx. their hashes will most of the time be 0
-        // to be discussed on the hash algo.
-        return CollationElementIterator.primaryOrder(ce) % MAX_TABLE_SIZE_;
+    private static int getMask(int strength) {
+        switch (strength) {
+        case Collator.PRIMARY:
+            return PRIMARYORDERMASK;
+        case Collator.SECONDARY:
+            return SECONDARYORDERMASK | PRIMARYORDERMASK;
+        default:
+            return TERTIARYORDERMASK | SECONDARYORDERMASK | PRIMARYORDERMASK;
+        }
      }
  
-    private final char getFCD(int c) {
-        return (char)m_nfcImpl_.getFCD16(c);
-    }
-    /**
-     * Gets the fcd value for a character at the argument index.
-     * This method takes into accounts of the supplementary characters.
-     * Note this method changes the offset in the character iterator.
-     * @param str UTF16 string where character for fcd retrieval resides
-     * @param offset position of the character whose fcd is to be retrieved
-     * @return fcd value
-     */
-    private final char getFCD(CharacterIterator str, int offset)
-    {
-        char ch = str.setIndex(offset);
+
+    // *** Boyer-Moore ***
+    /*
+    private final char getFCD(String str, int offset) {
+        char ch = str.charAt(offset);
          if (ch < 0x180) {
-            return (char)m_nfcImpl_.getFCD16FromBelow180(ch);
-        } else if (m_nfcImpl_.singleLeadMightHaveNonZeroFCD16(ch)) {
+            return (char) nfcImpl_.getFCD16FromBelow180(ch);
+        } else if (nfcImpl_.singleLeadMightHaveNonZeroFCD16(ch)) {
              if (!Character.isHighSurrogate(ch)) {
-                return (char)m_nfcImpl_.getFCD16FromNormData(ch);
+                return (char) nfcImpl_.getFCD16FromNormData(ch);
              } else {
-                char c2 = str.next();
-                if (Character.isLowSurrogate(c2)) {
-                    return (char)m_nfcImpl_.getFCD16FromNormData(Character.toCodePoint(ch, c2));
+                char c2;
+                if (++offset < str.length() && Character.isLowSurrogate(c2 = str.charAt(offset))) {
+                    return (char) nfcImpl_.getFCD16FromNormData(Character.toCodePoint(ch, c2));
                  }
              }
          }
          return 0;
      }
+
+    private final char getFCD(int c) {
+        return (char)nfcImpl_.getFCD16(c);
+    }
+    */
+
      /**
-     * Gets the FCD value for the code point before the input offset.
-     * Modifies the iterator's index.
-     * @param iter text iterator
-     * @param offset index after the character to test
-     * @return FCD value for the character before offset
+     * Getting the modified collation elements taking into account the collation
+     * attributes.
+     * 
+     * @param sourcece
+     * @return the modified collation element
       */
-    private final int getFCDBefore(CharacterIterator iter, int offset) {
-        iter.setIndex(offset);
-        char c = iter.previous();
-        if (c < 0x180) {
-            return (char)m_nfcImpl_.getFCD16FromBelow180(c);
-        } else if (!Character.isLowSurrogate(c)) {
-            if (m_nfcImpl_.singleLeadMightHaveNonZeroFCD16(c)) {
-                return (char)m_nfcImpl_.getFCD16FromNormData(c);
-            }
-        } else {
-            char lead = iter.previous();
-            if (Character.isHighSurrogate(lead)) {
-                return (char)m_nfcImpl_.getFCD16FromNormData(Character.toCodePoint(lead, c));
-            }
-        }
-        return 0;
-    }
-    /**
-     * Gets the fcd value for a character at the argument index.
-     * This method takes into accounts of the supplementary characters.
-     * @param str UTF16 string where character for fcd retrieval resides
-     * @param offset position of the character whose fcd is to be retrieved
-     * @return fcd value
-     */
-    private final char getFCD(String str, int offset)
-    {
-        char ch = str.charAt(offset);
-        if (ch < 0x180) {
-            return (char)m_nfcImpl_.getFCD16FromBelow180(ch);
-        } else if (m_nfcImpl_.singleLeadMightHaveNonZeroFCD16(ch)) {
-            if (!Character.isHighSurrogate(ch)) {
-                return (char)m_nfcImpl_.getFCD16FromNormData(ch);
-            } else {
-                char c2;
-                if (++offset < str.length() && Character.isLowSurrogate(c2 = str.charAt(offset))) {
-                    return (char)m_nfcImpl_.getFCD16FromNormData(Character.toCodePoint(ch, c2));
-                }
-            }
-        }
-        return 0;
-    }
-
-    /**
-    * Getting the modified collation elements taking into account the collation 
-    * attributes
-    * @param ce 
-    * @return the modified collation element
-    */
-    private final int getCE(int ce)
-    {
+    private int getCE(int sourcece) {
          // note for tertiary we can't use the collator->tertiaryMask, that
          // is a preprocessed mask that takes into account case options. since
          // we are only concerned with exact matches, we don't need that.
-        ce &= m_ceMask_;
-        
-        if (m_collator_.isAlternateHandlingShifted()) {
-            // alternate handling here, since only the 16 most significant 
-            // digits is only used, we can safely do a compare without masking
+        sourcece &= ceMask_;
+
+        if (toShift_) {
+            // alternate handling here, since only the 16 most significant digits
+            // is only used, we can safely do a compare without masking
              // if the ce is a variable, we mask and get only the primary values
              // no shifting to quartenary is required since all primary values
              // less than variabletop will need to be masked off anyway.
-            if (((m_collator_.m_variableTopValue_  << 16) & UNSIGNED_32BIT_MASK) > (ce & UNSIGNED_32BIT_MASK)) {
-                if (m_collator_.getStrength() == Collator.QUATERNARY) {
-                    ce = CollationElementIterator.primaryOrder(ce);
-                }
-                else { 
-                    ce = CollationElementIterator.IGNORABLE;
+            if (variableTop_ > sourcece) {
+                if (strength_ >= Collator.QUATERNARY) {
+                    sourcece &= PRIMARYORDERMASK;
+                } else {
+                    sourcece = CollationElementIterator.IGNORABLE;
                  }
              }
+        } else if (strength_ >= Collator.QUATERNARY && sourcece == CollationElementIterator.IGNORABLE) {
+            sourcece = 0xFFFF;
          }
-    
-        return ce;
+
+        return sourcece;
      }
-    
+
      /**
-     * Appends a int to a int array, increasing the size of the array when 
-     * we are out of space.
-     * @param offset in array to append to
-     * @param value to append
-     * @param array to append to
-     * @return the array appended to, this could be a new and bigger array
-     */
-    private static final int[] append(int offset, int value, int array[])
-    {
-        if (offset >= array.length) {
-            int temp[] = new int[offset + INITIAL_ARRAY_SIZE_];
-            System.arraycopy(array, 0, temp, 0, array.length);
-            array = temp;
-        }
-        array[offset] = value;
-        return array;
+     * Direct port of ICU4C static int32_t * addTouint32_tArray(...) in usearch.cpp.
+     * This is used for appending a PCE to Pattern.PCE_ buffer. We probably should
+     * implement this in Pattern class.
+     * 
+     * @param destination target array
+     * @param offset destination offset to add value
+     * @param destinationlength target array size
+     * @param value to be added
+     * @param increments incremental size expected
+     * @return new destination array, destination if there was no new allocation
+     */
+    private static int[] addToIntArray(int[] destination, int offset, int destinationlength,
+            int value, int increments) {
+        int newlength = destinationlength;
+        if (offset + 1 == newlength) {
+            newlength += increments;
+            int temp[] = new int[newlength];
+            System.arraycopy(destination, 0, temp, 0, offset);
+            destination = temp;
+        }
+        destination[offset] = value;
+        return destination;
      }
-    
+
      /**
-     * Initializing the ce table for a pattern. Stores non-ignorable collation 
-     * keys. Table size will be estimated by the size of the pattern text. 
-     * Table expansion will be perform as we go along. Adding 1 to ensure that 
-     * the table size definitely increases.
-     * Internal method, status assumed to be a success.
-     * @return total number of expansions 
-     */
-    private final int initializePatternCETable()
-    {
-        m_utilColEIter_.setText(m_pattern_.targetText);
-        
+     * Direct port of ICU4C static int64_t * addTouint64_tArray(...) in usearch.cpp.
+     * This is used for appending a PCE to Pattern.PCE_ buffer. We probably should
+     * implement this in Pattern class.
+     * 
+     * @param destination target array
+     * @param offset destination offset to add value
+     * @param destinationlength target array size
+     * @param value to be added
+     * @param increments incremental size expected
+     * @return new destination array, destination if there was no new allocation
+     */
+    private static long[] addToLongArray(long[] destination, int offset, int destinationlength,
+            long value, int increments) {
+        int newlength = destinationlength;
+        if (offset + 1 == newlength) {
+            newlength += increments;
+            long temp[] = new long[newlength];
+            System.arraycopy(destination, 0, temp, 0, offset);
+            destination = temp;
+        }
+        destination[offset] = value;
+        return destination;
+    }
+
+    /**
+     * Initializing the ce table for a pattern.
+     * Stores non-ignorable collation keys.
+     * Table size will be estimated by the size of the pattern text. Table
+     * expansion will be perform as we go along. Adding 1 to ensure that the table
+     * size definitely increases.
+     * @return total number of expansions
+     */
+    // TODO: We probably do not need Pattern CE table.
+    private int initializePatternCETable() {
+        int[] cetable = new int[INITIAL_ARRAY_SIZE_];
+        int cetablesize = cetable.length;
+        int patternlength = pattern_.text_.length();
+        CollationElementIterator coleiter = utilIter_;
+
+        if (coleiter == null) {
+            coleiter = new CollationElementIterator(pattern_.text_, collator_);
+            utilIter_ = coleiter;
+        } else {
+            coleiter.setText(pattern_.text_);
+        }
+
          int offset = 0;
          int result = 0;
-        int ce = m_utilColEIter_.next();
-    
-        while (ce != CollationElementIterator.NULLORDER) {
+        int ce;
+
+        while ((ce = coleiter.next()) != CollationElementIterator.NULLORDER) {
              int newce = getCE(ce);
-            if (newce != CollationElementIterator.IGNORABLE) {
-                m_pattern_.m_CE_ = append(offset, newce, m_pattern_.m_CE_);
-                offset ++;            
+            if (newce != CollationElementIterator.IGNORABLE /* 0 */) {
+                int[] temp = addToIntArray(cetable, offset, cetablesize, newce,
+                        patternlength - coleiter.getOffset() + 1);
+                offset++;
+                cetable = temp;
              }
-            result += m_utilColEIter_.getMaxExpansion(ce) - 1;
-            ce = m_utilColEIter_.next();
+            result += (coleiter.getMaxExpansion(ce) - 1);
          }
-    
-        m_pattern_.m_CE_ = append(offset, 0, m_pattern_.m_CE_);
-        m_pattern_.m_CELength_ = offset;
-    
+
+        cetable[offset] = 0;
+        pattern_.CE_ = cetable;
+        pattern_.CELength_ = offset;
+
          return result;
      }
-    
+
      /**
-     * Initializes the pattern struct.
-     * Internal method, status assumed to be success.
-     * @return expansionsize the total expansion size of the pattern
-     */ 
-    private final int initializePattern()
-    {
-        if (m_collator_.getStrength() == Collator.PRIMARY) {
-            m_pattern_.m_hasPrefixAccents_ = false;
-            m_pattern_.m_hasSuffixAccents_ = false;
+     * Initializing the pce table for a pattern.
+     * Stores non-ignorable collation keys.
+     * Table size will be estimated by the size of the pattern text. Table
+     * expansion will be perform as we go along. Adding 1 to ensure that the table
+     * size definitely increases.
+     * @return total number of expansions
+     */
+    private int initializePatternPCETable() {
+        long[] pcetable = new long[INITIAL_ARRAY_SIZE_];
+        int pcetablesize = pcetable.length;
+        int patternlength = pattern_.text_.length();
+        CollationElementIterator coleiter = utilIter_;
+
+        if (coleiter == null) {
+            coleiter = new CollationElementIterator(pattern_.text_, collator_);
+            utilIter_ = coleiter;
          } else {
-            m_pattern_.m_hasPrefixAccents_ = (getFCD(m_pattern_.targetText, 0) 
-                                                 >> SECOND_LAST_BYTE_SHIFT_) != 0;
-            m_pattern_.m_hasSuffixAccents_ = (getFCD(m_pattern_.targetText.codePointBefore(
-                                                        m_pattern_.targetText.length()))
-                                                & LAST_BYTE_MASK_) != 0;
-        }
-        // since intializePattern is an internal method status is a success.
-        return initializePatternCETable();   
-    }
-    
-    /**
-     * Initializing shift tables, with the default values.
-     * If a corresponding default value is 0, the shift table is not set.
-     * @param shift table for forwards shift 
-     * @param backshift table for backwards shift
-     * @param cetable table containing pattern ce
-     * @param cesize size of the pattern ces
-     * @param expansionsize total size of the expansions
-     * @param defaultforward the default forward value
-     * @param defaultbackward the default backward value
-     */
-     private final void setShiftTable(char shift[], 
-                                                    char backshift[], 
-                                                    int cetable[], int cesize, 
-                                                      int expansionsize,
-                                                    char defaultforward,
-                                                      char defaultbackward)
-    {
-        // estimate the value to shift. to do that we estimate the smallest 
-        // number of characters to give the relevant ces, ie approximately
-        // the number of ces minus their expansion, since expansions can come 
-        // from a character.
-        for (int count = 0; count < MAX_TABLE_SIZE_; count ++) {
-            shift[count] = defaultforward;
-        }
-        cesize --; // down to the last index
-        for (int count = 0; count < cesize; count ++) {
-            // number of ces from right of array to the count
-            int temp = defaultforward - count - 1;
-            shift[hash(cetable[count])] = temp > 1 ? ((char)temp) : 1;
-        }
-        shift[hash(cetable[cesize])] = 1;
-        // for ignorables we just shift by one. see test examples.
-        shift[hash(0)] = 1;
-        
-        for (int count = 0; count < MAX_TABLE_SIZE_; count ++) {
-            backshift[count] = defaultbackward;
-        }
-        for (int count = cesize; count > 0; count --) {
-            // the original value count does not seem to work
-            backshift[hash(cetable[count])] = (char)(count > expansionsize ? 
-                                                      count - expansionsize : 1);
-        }
-        backshift[hash(cetable[0])] = 1;
-        backshift[hash(0)] = 1;
-    }
-    
-    /**
-     * <p>Building of the pattern collation element list and the Boyer Moore 
-     * StringSearch table.</p>
-     * <p>The canonical match will only be performed after the default match 
-     * fails.</p>
-     * <p>For both cases we need to remember the size of the composed and 
-     * decomposed versions of the string. Since the Boyer-Moore shift 
-     * calculations shifts by a number of characters in the text and tries to 
-     * match the pattern from that offset, the shift value can not be too large 
-     * in case we miss some characters. To choose a right shift size, we 
-     * estimate the NFC form of the and use its size as a shift guide. The NFC 
-     * form should be the small possible representation of the pattern. Anyways, 
-     * we'll err on the smaller shift size. Hence the calculation for 
-     * minlength. Canonical match will be performed slightly differently. We'll 
-     * split the pattern into 3 parts, the prefix accents (PA), the middle 
-     * string bounded by the first and last base character (MS), the ending 
-     * accents (EA). Matches will be done on MS first, and only when we match 
-     * MS then some processing will be required for the prefix and end accents 
-     * in order to determine if they match PA and EA. Hence the default shift 
-     * values for the canonical match will take the size of either end's accent 
-     * into consideration. Forwards search will take the end accents into 
-     * consideration for the default shift values and the backwards search will 
-     * take the prefix accents into consideration.</p>
-     * <p>If pattern has no non-ignorable ce, we return a illegal argument 
-     * error.</p>
-     */ 
-    private final void initialize()
-    {
-        int expandlength  = initializePattern();   
-        if (m_pattern_.m_CELength_ > 0) {
-            char minlength = (char)(m_pattern_.m_CELength_ > expandlength 
-                                ? m_pattern_.m_CELength_ - expandlength : 1);
-            m_pattern_.m_defaultShiftSize_ = minlength;
-            setShiftTable(m_pattern_.m_shift_, m_pattern_.m_backShift_, 
-                          m_pattern_.m_CE_, m_pattern_.m_CELength_, 
-                          expandlength, minlength, minlength);
-        }
-        else {
-            m_pattern_.m_defaultShiftSize_ = 0;
-        }
-    }
-    
-    /**
-     * Determine whether the search text bounded by the offset start and end is 
-     * one or more whole units of text as determined by the breakiterator in 
-     * StringSearch.
-     * @param start target text start offset
-     * @param end target text end offset
-     */
-    private final boolean isBreakUnit(int start, int end) 
-    {
-        if (breakIterator != null) {
-            int startindex = breakIterator.first();
-            int endindex   = breakIterator.last();
-            
-            // out-of-range indexes are never boundary positions
-            if (start < startindex || start > endindex || end < startindex 
-                || end > endindex) {
-                return false;
-            }
-            // otherwise, we can use following() on the position before the 
-            // specified one and return true of the position we get back is the 
-            // one the user specified
-            boolean result = (start == startindex 
-                              || breakIterator.following(start - 1) == start) 
-                             && (end == endindex 
-                                  || breakIterator.following(end - 1) == end);
-            if (result) {
-                // iterates the individual ces
-                m_utilColEIter_.setText(
-                    new CharacterIteratorWrapper(targetText), start);
-                for (int count = 0; count < m_pattern_.m_CELength_;
-                     count ++) {
-                    int ce = getCE(m_utilColEIter_.next());
-                    if (ce == CollationElementIterator.IGNORABLE) {
-                        count --;
-                        continue;
-                    }
-                    if (ce != m_pattern_.m_CE_[count]) {
-                        return false;
-                    }
-                }
-                int nextce = m_utilColEIter_.next();
-                while (m_utilColEIter_.getOffset() == end 
-                       && getCE(nextce) == CollationElementIterator.IGNORABLE) {
-                    nextce = m_utilColEIter_.next();       
-                }
-                if (nextce != CollationElementIterator.NULLORDER 
-                    && m_utilColEIter_.getOffset() == end) {
-                    // extra collation elements at the end of the match
-                    return false;
-                }
-            }
-            return result;
+            coleiter.setText(pattern_.text_);
          }
-        return true;
-    }
  
-    /**
-     * Getting the next base character offset if current offset is an accent, 
-     * or the current offset if the current character contains a base character. 
-     * accents the following base character will be returned
-     * @param text string
-     * @param textoffset current offset
-     * @param textlength length of text string
-     * @return the next base character or the current offset
-     *         if the current character is contains a base character.
-     */
-    private final int getNextBaseOffset(CharacterIterator text, int textoffset)
-    {
-        if (textoffset >= text.getEndIndex()) {
-            return textoffset;
-        }
-        // iteration ends with reading CharacterIterator.DONE which has fcd==0
-        char c = text.setIndex(textoffset);
-        for (;;) {
-            if (c < Normalizer2Impl.MIN_CCC_LCCC_CP || !m_nfcImpl_.singleLeadMightHaveNonZeroFCD16(c)) {
-                return textoffset;
-            }
-            char next = text.next();
-            if (Character.isSurrogatePair(c, next)) {
-                int fcd = m_nfcImpl_.getFCD16FromNormData(Character.toCodePoint(c, next));
-                if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) {
-                    return textoffset;
-                }
-                next = text.next();
-                textoffset += 2;
-            } else {
-                int fcd = m_nfcImpl_.getFCD16FromNormData(c);
-                if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) {
-                    return textoffset;
-                }
-                ++textoffset;
-            }
-            c = next;
+        int offset = 0;
+        int result = 0;
+        long pce;
+
+        CollationPCE iter = new CollationPCE(coleiter);
+
+        // ** Should processed CEs be signed or unsigned?
+        // ** (the rest of the code in this file seems to play fast-and-loose with
+        // ** whether a CE is signed or unsigned. For example, look at routine above this one.)
+        while ((pce = iter.nextProcessed(null)) != CollationPCE.PROCESSED_NULLORDER) {
+            long[] temp = addToLongArray(pcetable, offset, pcetablesize, pce, patternlength - coleiter.getOffset() + 1);
+            offset++;
+            pcetable = temp;
          }
+
+        pcetable[offset] = 0;
+        pattern_.PCE_ = pcetable;
+        pattern_.PCELength_ = offset;
+
+        return result;
      }
  
-    /**
-     * Gets the next base character offset depending on the string search 
-     * pattern data
-     * @param textoffset one offset away from the last character
-     *                   to search for.
-     * @return start index of the next base character or the current offset
-     *         if the current character is contains a base character.
-     */
-    private final int getNextBaseOffset(int textoffset)
-    {
-        if (m_pattern_.m_hasSuffixAccents_ && textoffset < m_textLimitOffset_) {
-            if ((getFCDBefore(targetText, textoffset) & LAST_BYTE_MASK_) != 0) {
-                return getNextBaseOffset(targetText, textoffset);
-            }
+    // TODO: This method only triggers initializePatternCETable(), which is probably no
+    //      longer needed.
+    private int initializePattern() {
+        // Since the strength is primary, accents are ignored in the pattern.
+
+        // *** Boyer-Moore ***
+        /*
+        if (strength_ == Collator.PRIMARY) {
+            pattern_.hasPrefixAccents_ = false;
+            pattern_.hasSuffixAccents_ = false;
+        } else {
+            pattern_.hasPrefixAccents_ = (getFCD(pattern_.text_, 0) >>> SECOND_LAST_BYTE_SHIFT_) != 0;
+            pattern_.hasSuffixAccents_ = (getFCD(pattern_.text_.codePointBefore(pattern_.text_.length())) & LAST_BYTE_MASK_) != 0;
          }
-        return textoffset;
+        */
+
+        pattern_.PCE_ = null;
+
+        // since intializePattern is an internal method status is a success.
+        return initializePatternCETable();
      }
  
-    /**
-     * Shifting the collation element iterator position forward to prepare for
-     * a following match. If the last character is a unsafe character, we'll 
-     * only shift by 1 to capture contractions, normalization etc.
-     * Internal method, status assumed to be success.
-     * @param textoffset start text position to do search
-     * @param ce the text ce which failed the match.
-     * @param patternceindex index of the ce within the pattern ce buffer which
-     *        failed the match
-     * @return final offset
+    // *** Boyer-Moore ***
+    /*
+     private final void setShiftTable(char shift[], 
+                                         char backshift[], 
+                                         int cetable[], int cesize, 
+                                         int expansionsize,
+                                         int defaultforward,
+                                         int defaultbackward) {
+         // No implementation
+     }
       */
-    private int shiftForward(int textoffset, int ce, int patternceindex)
-                                    
-    {
-        if (ce != CollationElementIterator.NULLORDER) {
-            int shift = m_pattern_.m_shift_[hash(ce)];
-            // this is to adjust for characters in the middle of the 
-            // substring for matching that failed.
-            int adjust = m_pattern_.m_CELength_ - patternceindex;
-            if (adjust > 1 && shift >= adjust) {
-                shift -= adjust - 1;
-            }
-            textoffset += shift;
-        }
-        else {
-            textoffset += m_pattern_.m_defaultShiftSize_;
-        }
-         
-        textoffset = getNextBaseOffset(textoffset);
-        // check for unsafe characters
-        // * if it is the start or middle of a contraction: to be done after 
-        //   a initial match is found
-        // * thai or lao base consonant character: similar to contraction
-        // * high surrogate character: similar to contraction
-        // * next character is a accent: shift to the next base character
-        return textoffset;
+
+    // TODO: This method only triggers initializePattern(), which is probably no
+    //      longer needed.
+    private void initialize() {
+        /* int expandlength = */ initializePattern();
+
+        // *** Boyer-Moore ***
+        /*
+        if (pattern_.CELength_ > 0) {
+            int cesize = pattern_.CELength_;
+            int minlength = cesize > expandlength ? cesize - expandlength : 1;
+            pattern_.defaultShiftSize_ = minlength;
+            setShiftTable(pattern_.shift_, pattern_.backShift_, pattern_.CE_, cesize,
+                    expandlength, minlength, minlength);
+            return;
+        }
+        return pattern_.defaultShiftSize_;
+        */
      }
-    
+
      /**
-     * Gets the offset to the next safe point in text.
-     * ie. not the middle of a contraction, swappable characters or 
-     * supplementary characters.
-     * @param textoffset offset in string
-     * @param end offset in string
-     * @return offset to the next safe character
-     */
-    private final int getNextSafeOffset(int textoffset, int end)
-    {
-        int result = textoffset; // first contraction character
-        targetText.setIndex(result);
-        while (result != end && 
-            m_collator_.isUnsafe(targetText.current())) {
-               result ++;
-               targetText.setIndex(result);
-        }
-        return result; 
-    }
-    
-    /** 
-     * This checks for accents in the potential match started with a composite 
-     * character.
-     * This is really painful... we have to check that composite character do 
-     * not have any extra accents. We have to normalize the potential match and 
-     * find the immediate decomposed character before the match.
-     * The first composite character would have been taken care of by the fcd 
-     * checks in checkForwardExactMatch.
-     * This is the slow path after the fcd of the first character and 
-     * the last character has been checked by checkForwardExactMatch and we 
-     * determine that the potential match has extra non-ignorable preceding
-     * ces.
-     * E.g. looking for \u0301 acute in \u01FA A ring above and acute, 
-     * checkExtraMatchAccent should fail since there is a middle ring in 
-     * \u01FA Note here that accents checking are slow and cautioned in the API 
-     * docs.
-     * Internal method, status assumed to be a success, caller should check 
-     * status before calling this method
-     * @param start index of the potential unfriendly composite character
-     * @param end index of the potential unfriendly composite character
-     * @return true if there is non-ignorable accents before at the beginning
-     *              of the match, false otherwise.
+     * @internal
+     * @deprecated This API is ICU internal only.
       */
-    private final boolean checkExtraMatchAccents(int start, int end)
-    {
-        boolean result = false;
-        if (m_pattern_.m_hasPrefixAccents_) {
-            targetText.setIndex(start);
-            
-            if (UTF16.isLeadSurrogate(targetText.next())) {
-                if (!UTF16.isTrailSurrogate(targetText.next())) {
-                    targetText.previous();
-                }
-            }
-            // we are only concerned with the first composite character
-            String str = getString(targetText, start, end);
-            if (Normalizer.quickCheck(str, Normalizer.NFD,0) 
-                                                    == Normalizer.NO) {
-                int safeoffset = getNextSafeOffset(start, end);
-                if (safeoffset != end) {
-                    safeoffset ++;
-                }
-                String decomp = Normalizer.decompose(
-                                str.substring(0, safeoffset - start), false);
-                m_utilColEIter_.setText(decomp);
-                int firstce = m_pattern_.m_CE_[0];
-                boolean ignorable = true;
-                int ce = CollationElementIterator.IGNORABLE;
-                int offset = 0;
-                while (ce != firstce) {
-                    offset = m_utilColEIter_.getOffset();
-                    if (ce != firstce 
-                        && ce != CollationElementIterator.IGNORABLE) {
-                        ignorable = false;
-                    }
-                    ce = m_utilColEIter_.next();
-                }
-                m_utilColEIter_.setExactOffset(offset); // back up 1 to the 
-                m_utilColEIter_.previous();             // right offset
-                offset = m_utilColEIter_.getOffset();
-                result = !ignorable && (UCharacter.getCombiningClass(
-                                            UTF16.charAt(decomp, offset)) != 0);
-            }
-        }
-    
-        return result;
-    }
-    
-    /**
-    * Used by exact matches, checks if there are accents before the match. 
-    * This is really painful... we have to check that composite characters at
-    * the start of the matches have to not have any extra accents. 
-    * We check the FCD of the character first, if it starts with an accent and 
-    * the first pattern ce does not match the first ce of the character, we 
-    * bail.
-    * Otherwise we try normalizing the first composite 
-    * character and find the immediate decomposed character before the match to 
-    * see if it is an non-ignorable accent.
-    * Now normalizing the first composite character is enough because we ensure 
-    * that when the match is passed in here with extra beginning ces, the 
-    * first or last ce that match has to occur within the first character.
-    * E.g. looking for \u0301 acute in \u01FA A ring above and acute, 
-    * checkExtraMatchAccent should fail since there is a middle ring in \u01FA
-    * Note here that accents checking are slow and cautioned in the API docs.
-    * @param start offset 
-    * @param end offset
-    * @return true if there are accents on either side of the match, 
-    *         false otherwise
-    */
-    private final boolean hasAccentsBeforeMatch(int start, int end) 
-    {
-        if (m_pattern_.m_hasPrefixAccents_) {
-            // we have been iterating forwards previously
-            boolean ignorable = true;
-            int firstce = m_pattern_.m_CE_[0];
-            m_colEIter_.setExactOffset(start);
-            int ce  = getCE(m_colEIter_.next());
-            while (ce != firstce) {
-                if (ce != CollationElementIterator.IGNORABLE) {
-                    ignorable = false;
-                }
-                ce = getCE(m_colEIter_.next());
-            }
-            if (!ignorable && m_colEIter_.isInBuffer()) {
-                // within normalization buffer, discontiguous handled here
-                return true;
-            }
-    
-            // within text
-            boolean accent = (getFCD(targetText, start) >> SECOND_LAST_BYTE_SHIFT_)
-                                                        != 0; 
-            if (!accent) {
-                return checkExtraMatchAccents(start, end);
-            }
-            if (!ignorable) {
-                return true;
-            }
-            if (start > m_textBeginOffset_) {
-                targetText.setIndex(start);
-                targetText.previous();
-                if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) 
-                                                                        != 0) {
-                    m_colEIter_.setExactOffset(start);
-                    ce = m_colEIter_.previous();
-                    if (ce != CollationElementIterator.NULLORDER 
-                        && ce != CollationElementIterator.IGNORABLE) {
-                        return true;
-                    }
-                }
-            }
+    protected void setMatchNotFound() {
+        super.setMatchNotFound();
+        // SearchIterator#setMatchNotFound() does following:
+        //      search_.matchedIndex_ = DONE;
+        //      search_.setMatchedLength(0);
+        if (search_.isForwardSearching_) {
+            textIter_.setOffset(search_.text().getEndIndex());
+        } else {
+            textIter_.setOffset(0);
          }
-      
-        return false;
      }
-    
+
      /**
-     * Used by exact matches, checks if there are accents bounding the match.
-     * Note this is the initial boundary check. If the potential match
-     * starts or ends with composite characters, the accents in those
-     * characters will be determined later.
-     * Not doing backwards iteration here, since discontiguos contraction for 
-     * backwards collation element iterator, use up too many characters.
-     * E.g. looking for \u030A ring in \u01FA A ring above and acute, 
-     * should fail since there is a acute at the end of \u01FA
-     * Note here that accents checking are slow and cautioned in the API docs.
-     * @param start offset of match
-     * @param end end offset of the match
-     * @return true if there are accents on either side of the match, 
-     *         false otherwise
+     * Checks if the offset runs out of the text string range
+     * @param textstart offset of the first character in the range
+     * @param textlimit limit offset of the text string range
+     * @param offset to test
+     * @return true if offset is out of bounds, false otherwise
       */
-    private final boolean hasAccentsAfterMatch(int start, int end) 
-    {
-        if (m_pattern_.m_hasSuffixAccents_) {
-            targetText.setIndex(end);
-            if (end > m_textBeginOffset_ 
-                && UTF16.isTrailSurrogate(targetText.previous())) {
-                if (targetText.getIndex() > m_textBeginOffset_ &&
-                    !UTF16.isLeadSurrogate(targetText.previous())) {
-                    targetText.next();
-                }
-            }
-            if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) != 0) {
-                int firstce  = m_pattern_.m_CE_[0];
-                m_colEIter_.setExactOffset(start);
-                while (getCE(m_colEIter_.next()) != firstce) {
-                }
-                int count = 1;
-                while (count < m_pattern_.m_CELength_) {
-                    if (getCE(m_colEIter_.next()) 
-                        == CollationElementIterator.IGNORABLE) {
-                        count --;
-                    }
-                    count ++;
-                }
-                //int ce = getCE(m_colEIter_.next());
-                int ce = m_colEIter_.next();
-                if (ce != CollationElementIterator.NULLORDER 
-                        && ce != CollationElementIterator.IGNORABLE) {
-                    ce = getCE(ce);
-                }
-                if (ce != CollationElementIterator.NULLORDER 
-                            && ce != CollationElementIterator.IGNORABLE) {
-                    if (m_colEIter_.getOffset() <= end) {
-                        return true;
-                    }
-                    if ((getFCD(targetText, end) >> SECOND_LAST_BYTE_SHIFT_) 
-                        != 0) {
-                        return true;
-                    }
-                }
-            }
-        }
-        return false;
-    }
-    
-    /**
-    * Checks if the offset runs out of the text string range
-    * @param textstart offset of the first character in the range
-    * @param textlimit limit offset of the text string range
-    * @param offset to test
-    * @return true if offset is out of bounds, false otherwise
-    */
-    private static final boolean isOutOfBounds(int textstart, int textlimit, 
-                                                int offset)
-    {
+    private static final boolean isOutOfBounds(int textstart, int textlimit, int offset) {
          return offset < textstart || offset > textlimit;
      }
-    
+
      /**
       * Checks for identical match
-     * @param strsrch string search data
       * @param start offset of possible match
       * @param end offset of possible match
-     * @return true if identical match is found
+     * @return TRUE if identical match is found
       */
-    private final boolean checkIdentical(int start, int end) 
-    {
-        if (m_collator_.getStrength() != Collator.IDENTICAL) {
+    private boolean checkIdentical(int start, int end) {
+        if (strength_ != Collator.IDENTICAL) {
              return true;
          }
-    
+        // Note: We could use Normalizer::compare() or similar, but for short strings
+        // which may not be in FCD it might be faster to just NFD them.
          String textstr = getString(targetText, start, end - start);
-        if (Normalizer.quickCheck(textstr, Normalizer.NFD,0) 
-                                                    == Normalizer.NO) {
+        if (Normalizer.quickCheck(textstr, Normalizer.NFD, 0) == Normalizer.NO) {
              textstr = Normalizer.decompose(textstr, false);
          }
-        String patternstr = m_pattern_.targetText;
-        if (Normalizer.quickCheck(patternstr, Normalizer.NFD,0) 
-                                                    == Normalizer.NO) {
+        String patternstr = pattern_.text_;
+        if (Normalizer.quickCheck(patternstr, Normalizer.NFD, 0) == Normalizer.NO) {
              patternstr = Normalizer.decompose(patternstr, false);
          }
          return textstr.equals(patternstr);
      }
-    
-    /**
-     * Checks to see if the match is repeated
-     * @param start new match start index
-     * @param limit new match limit index
-     * @return true if the the match is repeated, false otherwise
-     */
-    private final boolean checkRepeatedMatch(int start, int limit)
-    {
-        if (m_matchedIndex_ == DONE) {
-            return false;
-        }
-        int end = limit - 1; // last character in the match
-        int lastmatchend = m_matchedIndex_ + matchLength - 1; 
-        if (!isOverlapping()) {
-            return (start >= m_matchedIndex_ && start <= lastmatchend) 
-                    || (end >= m_matchedIndex_ && end <= lastmatchend)
-                    || (start <= m_matchedIndex_ && end >= lastmatchend);
-                      
+
+    private boolean initTextProcessedIter() {
+        if (textProcessedIter_ == null) {
+            textProcessedIter_ = new CollationPCE(textIter_);
+        } else {
+            textProcessedIter_.init(textIter_);
          }
-        return start <= m_matchedIndex_ && end >= lastmatchend;
-    }
-    
-    /**
-     * Checks match for contraction. 
-     * If the match ends with a partial contraction we fail.
-     * If the match starts too far off (because of backwards iteration) we try 
-     * to chip off the extra characters depending on whether a breakiterator 
-     * has been used.
-     * Temporary utility buffer used to return modified start and end.
-     * @param start offset of potential match, to be modified if necessary
-     * @param end offset of potential match, to be modified if necessary
-     * @return true if match passes the contraction test, false otherwise.
-     */
-    private final boolean checkNextExactContractionMatch(int start, int end) 
-    {
-        // This part checks if either ends of the match contains potential 
-        // contraction. If so we'll have to iterate through them
-        char endchar = 0;
-        if (end < m_textLimitOffset_) {
-            targetText.setIndex(end);
-            endchar = targetText.current();
-        }
-        char poststartchar = 0;
-        if (start + 1 < m_textLimitOffset_) {
-            targetText.setIndex(start + 1);
-            poststartchar = targetText.current();
-        }
-        if (m_collator_.isUnsafe(endchar) 
-            || m_collator_.isUnsafe(poststartchar)) {
-            // expansion prefix, what's left to iterate
-            int bufferedCEOffset = m_colEIter_.m_CEBufferOffset_;
-            boolean hasBufferedCE = bufferedCEOffset > 0;
-            m_colEIter_.setExactOffset(start);
-            int temp = start;
-            while (bufferedCEOffset > 0) {
-                // getting rid of the redundant ce, caused by setOffset.
-                // since backward contraction/expansion may have extra ces if 
-                // we are in the normalization buffer, hasAccentsBeforeMatch 
-                // would have taken care of it.
-                // E.g. the character \u01FA will have an expansion of 3, but 
-                // if we are only looking for acute and ring \u030A and \u0301, 
-                // we'll have to skip the first ce in the expansion buffer.
-                m_colEIter_.next();
-                if (m_colEIter_.getOffset() != temp) {
-                    start = temp;
-                    temp  = m_colEIter_.getOffset();
-                }
-                bufferedCEOffset --;
-            }
-    
-            int count = 0;
-            while (count < m_pattern_.m_CELength_) {
-                int ce = getCE(m_colEIter_.next());
-                if (ce == CollationElementIterator.IGNORABLE) {
-                    continue;
-                }
-                if (hasBufferedCE && count == 0 
-                    && m_colEIter_.getOffset() != temp) {
-                    start = temp;
-                    temp   = m_colEIter_.getOffset();
-                }
-                if (ce != m_pattern_.m_CE_[count]) {
-                    end ++;
-                    end = getNextBaseOffset(end);  
-                    m_utilBuffer_[0] = start;
-                    m_utilBuffer_[1] = end;
-                    return false;
-                }
-                count ++;
-            }
-        } 
-        m_utilBuffer_[0] = start;
-        m_utilBuffer_[1] = end;
          return true;
      }
-    
-    
-    /**
-     * Checks and sets the match information if found.
-     * Checks 
-     * <ul>
-     * <li> the potential match does not repeat the previous match
-     * <li> boundaries are correct
-     * <li> exact matches has no extra accents
-     * <li> identical matchesb
-     * <li> potential match does not end in the middle of a contraction
-     * </ul>
-     * Otherwise the offset will be shifted to the next character.
-     * The result m_matchIndex_ and m_matchLength_ will be set to the truncated
-     * more fitting result value.
-     * Uses the temporary utility buffer for storing the modified textoffset.
-     * @param textoffset offset in the collation element text.
-     * @return true if the match is valid, false otherwise
+
+    /*
+     * Find the next break boundary after startIndex. If the UStringSearch object
+     * has an external break iterator, use that. Otherwise use the internal character
+     * break iterator.
       */
-    private final boolean checkNextExactMatch(int textoffset)
-    {
-        int start = m_colEIter_.getOffset();        
-        if (!checkNextExactContractionMatch(start, textoffset)) {
-            // returns the modified textoffset
-            m_utilBuffer_[0] = m_utilBuffer_[1];
-            return false;
-        }
-    
-        start = m_utilBuffer_[0];
-        textoffset = m_utilBuffer_[1];
-        // this totally matches, however we need to check if it is repeating
-        if (!isBreakUnit(start, textoffset) 
-            || checkRepeatedMatch(start, textoffset) 
-            || hasAccentsBeforeMatch(start, textoffset) 
-            || !checkIdentical(start, textoffset) 
-            || hasAccentsAfterMatch(start, textoffset)) {
-            textoffset ++;
-            textoffset = getNextBaseOffset(textoffset);  
-            m_utilBuffer_[0] = textoffset;
-            return false;
-        }
-        
-        if (m_collator_.getStrength() == Collator.PRIMARY) {
-            textoffset = checkBreakBoundary(textoffset);
-        }
-            
-        // totally match, we will get rid of the ending ignorables.
-        m_matchedIndex_  = start;
-        matchLength = textoffset - start;
-        return true;
-    }
-    
-    /**
-    * Getting the previous base character offset, or the current offset if the 
-    * current character is a base character
-    * @param text the source text to work on
-    * @param textoffset one offset after the current character
-    * @return the offset of the next character after the base character or the 
-    *             first composed character with accents
-    */
-    private final int getPreviousBaseOffset(CharacterIterator text, 
-                                            int textoffset)
-    {
-        if (textoffset > m_textBeginOffset_) {
-            while (true) {
-                int result = textoffset;
-                text.setIndex(result);
-                if (UTF16.isTrailSurrogate(text.previous())) {
-                    if (text.getIndex() != text.getBeginIndex() &&
-                        !UTF16.isLeadSurrogate(text.previous())) {
-                        text.next();
-                    }
-                }
-                textoffset = text.getIndex();
-                char fcd = getFCD(text, textoffset);
-                if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) {
-                    if ((fcd & LAST_BYTE_MASK_) != 0) {
-                        return textoffset;
-                    }
-                    return result;
-                }
-                if (textoffset == m_textBeginOffset_) {
-                    return m_textBeginOffset_;
-                }
-            }
+    private int nextBoundaryAfter(int startIndex) {
+        BreakIterator breakiterator = search_.breakIter();
+
+        if (breakiterator == null) {
+            breakiterator = search_.internalBreakIter_;
          }
-        return textoffset;
-    }
-    
-    /**
-    * Getting the indexes of the accents that are not blocked in the argument
-    * accent array
-    * @param accents accents in nfd.
-    * @param accentsindex array to store the indexes of accents in accents that 
-    *         are not blocked
-    * @return the length of populated accentsindex
-    */
-    private int getUnblockedAccentIndex(StringBuilder accents, 
-                                        int accentsindex[])
-    {
-        int index = 0;
-        int length = accents.length();
-        int cclass = 0;
-        int result = 0;
-        while (index < length) {
-            int codepoint = UTF16.charAt(accents, index);
-            int tempclass = UCharacter.getCombiningClass(codepoint);
-            if (tempclass != cclass) {
-                cclass = tempclass;
-                accentsindex[result] = index;
-                result ++;
-            }
-            if (UCharacter.isSupplementary(codepoint)) {
-                index += 2;
-            }
-            else {
-                index ++;
-            }
+
+        if (breakiterator != null) {
+            return breakiterator.following(startIndex);
          }
-        accentsindex[result] = length;
-        return result;
+
+        return startIndex;
      }
  
-    /**
-     * Appends 3 StringBuilder/CharacterIterator together into a destination 
-     * string buffer.
-     * @param source1 string buffer
-     * @param source2 character iterator
-     * @param start2 start of the character iterator to merge
-     * @param end2 end of the character iterator to merge
-     * @param source3 string buffer
-     * @return appended string buffer
+    /*
+     * Returns TRUE if index is on a break boundary. If the UStringSearch
+     * has an external break iterator, test using that, otherwise test
+     * using the internal character break iterator.
       */
-    private static final StringBuilder merge(StringBuilder source1, 
-                                             CharacterIterator source2,
-                                             int start2, int end2,
-                                             StringBuilder source3) 
-    {
-        StringBuilder result = new StringBuilder();    
-        if (source1 != null && source1.length() != 0) {
-            result.append(source1);
-        }
-        source2.setIndex(start2);
-        while (source2.getIndex() < end2) {
-            result.append(source2.current());
-            source2.next();
-        }
-        if (source3 != null && source3.length() != 0) {
-            result.append(source3);
+    private boolean isBreakBoundary(int index) {
+        BreakIterator breakiterator = search_.breakIter();
+
+        if (breakiterator == null) {
+            breakiterator = search_.internalBreakIter_;
          }
-        return result;
+
+        return (breakiterator != null && breakiterator.isBoundary(index));
      }
-    
-    /**
-    * Running through a collation element iterator to see if the contents 
-    * matches pattern in string search data
-    * @param coleiter collation element iterator to test
-    * @return true if a match if found, false otherwise
-    */
-    private final boolean checkCollationMatch(CollationElementIterator coleiter)
-    {
-        int patternceindex = m_pattern_.m_CELength_;
-        int offset = 0;
-        while (patternceindex > 0) {
-            int ce = getCE(coleiter.next());
-            if (ce == CollationElementIterator.IGNORABLE) {
-                continue;
-            }
-            if (ce != m_pattern_.m_CE_[offset]) {
-                return false;
-            }
-            offset ++;
-            patternceindex --;
+
+
+    // Java porting note: Followings are corresponding to UCompareCEsResult enum
+    private static final int CE_MATCH = -1;
+    private static final int CE_NO_MATCH = 0;
+    private static final int CE_SKIP_TARG = 1;
+    private static final int CE_SKIP_PATN = 2;
+
+    private static int CE_LEVEL2_BASE = 0x00000005;
+    private static int CE_LEVEL3_BASE = 0x00050000;
+
+    private static int compareCE64s(long targCE, long patCE, ElementComparisonType compareType) {
+        if (targCE == patCE) {
+            return CE_MATCH;
          }
-        return true;
-    }
-    
-    /**
-     * Rearranges the front accents to try matching.
-     * Prefix accents in the text will be grouped according to their combining 
-     * class and the groups will be mixed and matched to try find the perfect 
-     * match with the pattern.
-     * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
-     * step 1: split "\u030A\u0301" into 6 other type of potential accent 
-     *            substrings "\u030A", "\u0301", "\u0325", "\u030A\u0301", 
-     *            "\u030A\u0325", "\u0301\u0325".
-     * step 2: check if any of the generated substrings matches the pattern.
-     * Internal method, status is assumed to be success, caller has to check 
-     * status before calling this method.
-     * @param start first offset of the accents to start searching
-     * @param end start of the last accent set
-     * @return DONE if a match is not found, otherwise return the starting
-     *         offset of the match. Note this start includes all preceding 
-     *            accents.
-     */
-    private int doNextCanonicalPrefixMatch(int start, int end)
-    {
-        if ((getFCD(targetText, start) & LAST_BYTE_MASK_) == 0) {
-            // die... failed at a base character
-            return DONE;
+        if (compareType == ElementComparisonType.STANDARD_ELEMENT_COMPARISON) {
+            return CE_NO_MATCH;
          }
-    
-        start = targetText.getIndex(); // index changed by fcd
-        int offset = getNextBaseOffset(targetText, start);
-        start = getPreviousBaseOffset(start);
-    
-        StringBuilder accents = new StringBuilder();
-        String accentstr = getString(targetText, start, offset - start);
-        // normalizing the offensive string
-        if (Normalizer.quickCheck(accentstr, Normalizer.NFD,0) 
-                                                    == Normalizer.NO) {
-            accentstr = Normalizer.decompose(accentstr, false);
-        }
-        accents.append(accentstr);
-            
-        int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];      
-        int accentsize = getUnblockedAccentIndex(accents, accentsindex);
-        int count = (2 << (accentsize - 1)) - 1;  
-        while (count > 0) {
-            // copy the base characters
-            m_canonicalPrefixAccents_.delete(0, 
-                                        m_canonicalPrefixAccents_.length());
-            int k = 0;
-            for (; k < accentsindex[0]; k ++) {
-                m_canonicalPrefixAccents_.append(accents.charAt(k));
+
+        long targCEshifted = targCE >>> 32;
+        long patCEshifted = patCE >>> 32;
+        long mask;
+
+        mask = 0xFFFF0000L;
+        int targLev1 = (int)(targCEshifted & mask);
+        int patLev1 = (int)(patCEshifted & mask);
+        if (targLev1 != patLev1) {
+            if (targLev1 == 0) {
+                return CE_SKIP_TARG;
              }
-            // forming all possible canonical rearrangement by dropping
-            // sets of accents
-            for (int i = 0; i <= accentsize - 1; i ++) {
-                int mask = 1 << (accentsize - i - 1);
-                if ((count & mask) != 0) {
-                    for (int j = accentsindex[i]; j < accentsindex[i + 1]; 
-                                                                        j ++) {
-                        m_canonicalPrefixAccents_.append(accents.charAt(j));
-                    }
-                }
+            if (patLev1 == 0
+                    && compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD) {
+                return CE_SKIP_PATN;
              }
-            StringBuilder match = merge(m_canonicalPrefixAccents_,
-                                       targetText, offset, end,
-                                       m_canonicalSuffixAccents_);
-                
-            // if status is a failure, ucol_setText does nothing.
-            // run the collator iterator through this match
-            m_utilColEIter_.setText(match.toString());
-            if (checkCollationMatch(m_utilColEIter_)) {
-                 return start;
+            return CE_NO_MATCH;
+        }
+
+        mask = 0x0000FFFFL;
+        int targLev2 = (int)(targCEshifted & mask);
+        int patLev2 = (int)(patCEshifted & mask);
+        if (targLev2 != patLev2) {
+            if (targLev2 == 0) {
+                return CE_SKIP_TARG;
              }
-            count --;
+            if (patLev2 == 0
+                    && compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD) {
+                return CE_SKIP_PATN;
+            }
+            return (patLev2 == CE_LEVEL2_BASE ||
+                    (compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD &&
+                        targLev2 == CE_LEVEL2_BASE)) ? CE_MATCH : CE_NO_MATCH;
          }
-        return DONE;
-    }
  
-    /**
-    * Gets the offset to the safe point in text before textoffset.
-    * ie. not the middle of a contraction, swappable characters or 
-    * supplementary characters.
-    * @param start offset in string
-    * @param textoffset offset in string
-    * @return offset to the previous safe character
-    */
-    private final int getPreviousSafeOffset(int start, int textoffset)
-    {
-        int result = textoffset; // first contraction character
-        targetText.setIndex(textoffset);
-        while (result >= start && m_collator_.isUnsafe(targetText.previous())) {
-            result = targetText.getIndex();
-        }
-        if (result != start) {
-            // the first contraction character is consider unsafe here
-            result = targetText.getIndex(); // originally result --;
-        }
-        return result; 
+        mask = 0xFFFF0000L;
+        int targLev3 = (int)(targCE & mask);
+        int patLev3 = (int)(patCE & mask);
+        if (targLev3 != patLev3) {
+            return (patLev3 == CE_LEVEL3_BASE ||
+                    (compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD &&
+                        targLev3 == CE_LEVEL3_BASE) )? CE_MATCH: CE_NO_MATCH;
+        }
+
+        return CE_MATCH;
      }
  
      /**
-     * Take the rearranged end accents and tries matching. If match failed at
-     * a seperate preceding set of accents (seperated from the rearranged on by
-     * at least a base character) then we rearrange the preceding accents and 
-     * tries matching again.
-     * We allow skipping of the ends of the accent set if the ces do not match. 
-     * However if the failure is found before the accent set, it fails.
-     * Internal method, status assumed to be success, caller has to check 
-     * status before calling this method.
-     * @param textoffset of the start of the rearranged accent
-     * @return DONE if a match is not found, otherwise return the starting
-     *         offset of the match. Note this start includes all preceding 
-     *         accents.
+     * An object used for receiving matched index in search() and
+     * searchBackwards().
       */
-    private int doNextCanonicalSuffixMatch(int textoffset)
-    {
-        int safelength = 0;
-        StringBuilder safetext;
-        int safeoffset = m_textBeginOffset_; 
-        
-        if (textoffset != m_textBeginOffset_ 
-            && m_canonicalSuffixAccents_.length() > 0
-            && m_collator_.isUnsafe(m_canonicalSuffixAccents_.charAt(0))) {
-            safeoffset     = getPreviousSafeOffset(m_textBeginOffset_, 
-                                                    textoffset);
-            safelength     = textoffset - safeoffset;
-            safetext       = merge(null, targetText, safeoffset, textoffset, 
-                                   m_canonicalSuffixAccents_);
-        }
-        else {
-            safetext = m_canonicalSuffixAccents_;
+    private static class Match {
+        int start_ = -1;
+        int limit_ = -1;
+    }
+
+    private boolean search(int startIdx, Match m) {
+        // Input parameter sanity check.
+        if (pattern_.CELength_ == 0
+                || startIdx < search_.beginIndex()
+                || startIdx > search_.endIndex()) {
+            throw new IllegalArgumentException("search(" + startIdx + ", m) - expected position to be between " +
+                    search_.beginIndex() + " and " + search_.endIndex());
          }
-    
-        // if status is a failure, ucol_setText does nothing
-        CollationElementIterator coleiter = m_utilColEIter_;
-        coleiter.setText(safetext.toString());
-        // status checked in loop below
-    
-        int ceindex = m_pattern_.m_CELength_ - 1;
-        boolean isSafe = true; // indication flag for position in safe zone
-        
-        while (ceindex >= 0) {
-            int textce = coleiter.previous();
-            if (textce == CollationElementIterator.NULLORDER) {
-                // check if we have passed the safe buffer
-                if (coleiter == m_colEIter_) {
-                    return DONE;
-                }
-                coleiter = m_colEIter_;
-                if (safetext != m_canonicalSuffixAccents_) {
-                    safetext.delete(0, safetext.length());
-                }
-                coleiter.setExactOffset(safeoffset);
-                // status checked at the start of the loop
-                isSafe = false;
-                continue;
+
+        if (pattern_.PCE_ == null) {
+            initializePatternPCETable();
+        }
+
+        textIter_.setOffset(startIdx);
+        CEBuffer ceb = new CEBuffer(this);
+
+        int targetIx = 0;
+        CEI targetCEI = null;
+        int patIx;
+        boolean found;
+
+        int mStart = -1;
+        int mLimit = -1;
+        int minLimit;
+        int maxLimit;
+
+        // Outer loop moves over match starting positions in the
+        //      target CE space.
+        // Here we see the target as a sequence of collation elements, resulting from the following:
+        // 1. Target characters were decomposed, and (if appropriate) other compressions and expansions are applied
+        //    (for example, digraphs such as IJ may be broken into two characters).
+        // 2. An int64_t CE weight is determined for each resulting unit (high 16 bits are primary strength, next
+        //    16 bits are secondary, next 16 (the high 16 bits of the low 32-bit half) are tertiary. Any of these
+        //    fields that are for strengths below that of the collator are set to 0. If this makes the int64_t
+        //    CE weight 0 (as for a combining diacritic with secondary weight when the collator strentgh is primary),
+        //    then the CE is deleted, so the following code sees only CEs that are relevant.
+        // For each CE, the lowIndex and highIndex correspond to where this CE begins and ends in the original text.
+        // If lowIndex==highIndex, either the CE resulted from an expansion/decomposition of one of the original text
+        // characters, or the CE marks the limit of the target text (in which case the CE weight is UCOL_PROCESSED_NULLORDER).
+        for (targetIx = 0; ; targetIx++) {
+            found = true;
+            // Inner loop checks for a match beginning at each
+            // position from the outer loop.
+            int targetIxOffset = 0;
+            long patCE = 0;
+            // For targetIx > 0, this ceb.get gets a CE that is as far back in the ring buffer
+            // (compared to the last CE fetched for the previous targetIx value) as we need to go
+            // for this targetIx value, so if it is non-NULL then other ceb.get calls should be OK.
+            CEI firstCEI = ceb.get(targetIx);
+            if (firstCEI == null) {
+                throw new RuntimeException("CEBuffer.get(" + targetIx + ") returned null.");
              }
-            textce = getCE(textce);
-            if (textce != CollationElementIterator.IGNORABLE 
-                && textce != m_pattern_.m_CE_[ceindex]) {
-                // do the beginning stuff
-                int failedoffset = coleiter.getOffset();
-                if (isSafe && failedoffset >= safelength) {
-                    // alas... no hope. failed at rearranged accent set
-                    return DONE;
-                }
-                else {
-                    if (isSafe) {
-                        failedoffset += safeoffset;
-                    }
-                    
-                    // try rearranging the front accents
-                    int result = doNextCanonicalPrefixMatch(failedoffset, 
-                                                            textoffset);
-                    if (result != DONE) {
-                        // if status is a failure, ucol_setOffset does nothing
-                        m_colEIter_.setExactOffset(result);
+
+            for (patIx = 0; patIx < pattern_.PCELength_; patIx++) {
+                patCE = pattern_.PCE_[patIx];
+                targetCEI = ceb.get(targetIx + patIx + targetIxOffset);
+                // Compare CE from target string with CE from the pattern.
+                // Note that the target CE will be UCOL_PROCESSED_NULLORDER if we reach the end of input,
+                // which will fail the compare, below.
+                int ceMatch = compareCE64s(targetCEI.ce_, patCE, search_.elementComparisonType_);
+                if (ceMatch == CE_NO_MATCH) {
+                    found = false;
+                    break;
+                } else if (ceMatch > CE_NO_MATCH) {
+                    if (ceMatch == CE_SKIP_TARG) {
+                        // redo with same patCE, next targCE
+                        patIx--;
+                        targetIxOffset++;
+                    } else { // ceMatch == CE_SKIP_PATN
+                        // redo with same targCE, next patCE
+                        targetIxOffset--;
                      }
-                    return result;
                  }
              }
-            if (textce == m_pattern_.m_CE_[ceindex]) {
-                ceindex --;
-            }
-        }
-        // set offset here
-        if (isSafe) {
-            int result = coleiter.getOffset();
-            // sets the text iterator with the correct expansion and offset
-            int leftoverces = coleiter.m_CEBufferOffset_;
-            if (result >= safelength) { 
-                result = textoffset;
-            }
-            else {
-                result += safeoffset;
+            targetIxOffset += pattern_.PCELength_; // this is now the offset in target CE space to end of the match so far
+
+            if (!found && ((targetCEI == null) || (targetCEI.ce_ != CollationPCE.PROCESSED_NULLORDER))) {
+                // No match at this targetIx.  Try again at the next.
+                continue;
              }
-            m_colEIter_.setExactOffset(result);
-            m_colEIter_.m_CEBufferOffset_ = leftoverces;
-            return result;
-        }
-        
-        return coleiter.getOffset();              
-    }
-    
-    /**
-     * Trying out the substring and sees if it can be a canonical match.
-     * This will try normalizing the end accents and arranging them into 
-     * canonical equivalents and check their corresponding ces with the pattern 
-     * ce.
-     * Suffix accents in the text will be grouped according to their combining 
-     * class and the groups will be mixed and matched to try find the perfect 
-     * match with the pattern.
-     * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
-     * step 1: split "\u030A\u0301" into 6 other type of potential accent 
-     *         substrings
-     *         "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", 
-     *         "\u0301\u0325".
-     * step 2: check if any of the generated substrings matches the pattern.
-     * @param textoffset end offset in the collation element text that ends with 
-     *                   the accents to be rearranged
-     * @return true if the match is valid, false otherwise
-     */
-    private boolean doNextCanonicalMatch(int textoffset)
-    {
-        int offset = m_colEIter_.getOffset();
-        targetText.setIndex(textoffset);
-        if (UTF16.isTrailSurrogate(targetText.previous()) 
-            && targetText.getIndex() > m_textBeginOffset_) { 
-            if (!UTF16.isLeadSurrogate(targetText.previous())) {
-                targetText.next();
+
+            if (!found) {
+                // No match at all, we have run off the end of the target text.
+                break;
              }
-        }
-        if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) == 0) {
-            if (m_pattern_.m_hasPrefixAccents_) {
-                offset = doNextCanonicalPrefixMatch(offset, textoffset);
-                if (offset != DONE) {
-                    m_colEIter_.setExactOffset(offset);
-                    return true;
+
+            // We have found a match in CE space.
+            // Now determine the bounds in string index space.
+            // There still is a chance of match failure if the CE range not correspond to
+            // an acceptable character range.
+            //
+            CEI lastCEI = ceb.get(targetIx + targetIxOffset -1);
+
+            mStart = firstCEI.lowIndex_;
+            minLimit = lastCEI.lowIndex_;
+
+            // Look at the CE following the match.  If it is UCOL_NULLORDER the match
+            // extended to the end of input, and the match is good.
+
+            // Look at the high and low indices of the CE following the match. If
+            // they are the same it means one of two things:
+            //    1. The match extended to the last CE from the target text, which is OK, or
+            //    2. The last CE that was part of the match is in an expansion that extends
+            //       to the first CE after the match. In this case, we reject the match.
+            CEI nextCEI = null;
+            if (search_.elementComparisonType_ == ElementComparisonType.STANDARD_ELEMENT_COMPARISON) {
+                nextCEI = ceb.get(targetIx + targetIxOffset);
+                maxLimit = nextCEI.lowIndex_;
+                if (nextCEI.lowIndex_ == nextCEI.highIndex_ && nextCEI.ce_ != CollationPCE.PROCESSED_NULLORDER) {
+                    found = false;
+                }
+            } else {
+                for (;; ++targetIxOffset) {
+                    nextCEI = ceb.get(targetIx + targetIxOffset);
+                    maxLimit = nextCEI.lowIndex_;
+                    // If we are at the end of the target too, match succeeds
+                    if (nextCEI.ce_ == CollationPCE.PROCESSED_NULLORDER) {
+                        break;
+                    }
+                    // As long as the next CE has primary weight of 0,
+                    // it is part of the last target element matched by the pattern;
+                    // make sure it can be part of a match with the last patCE
+                    if ((((nextCEI.ce_) >>> 32) & 0xFFFF0000L) == 0) {
+                        int ceMatch = compareCE64s(nextCEI.ce_, patCE, search_.elementComparisonType_);
+                        if (ceMatch == CE_NO_MATCH || ceMatch == CE_SKIP_PATN ) {
+                            found = false;
+                            break;
+                        }
+                    // If lowIndex == highIndex, this target CE is part of an expansion of the last matched
+                    // target element, but it has non-zero primary weight => match fails
+                    } else if ( nextCEI.lowIndex_ == nextCEI.highIndex_ ) {
+                        found = false;
+                        break;
+                    // Else the target CE is not part of an expansion of the last matched element, match succeeds
+                    } else {
+                        break;
+                    }
                  }
              }
-            return false;
-        }
-    
-        if (!m_pattern_.m_hasSuffixAccents_) {
-            return false;
-        }
-    
-        StringBuilder accents = new StringBuilder();
-        // offset to the last base character in substring to search
-        int baseoffset = getPreviousBaseOffset(targetText, textoffset);
-        // normalizing the offensive string
-        String accentstr = getString(targetText, baseoffset, 
-                                     textoffset - baseoffset);
-        if (Normalizer.quickCheck(accentstr, Normalizer.NFD,0) 
-                                                    == Normalizer.NO) {
-            accentstr = Normalizer.decompose(accentstr, false);
-        }
-        accents.append(accentstr);
-        // status checked in loop below
-            
-        int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];
-        int size = getUnblockedAccentIndex(accents, accentsindex);
-    
-        // 2 power n - 1 plus the full set of accents
-        int  count = (2 << (size - 1)) - 1;  
-        while (count > 0) {
-            m_canonicalSuffixAccents_.delete(0, 
-                                           m_canonicalSuffixAccents_.length());
-            // copy the base characters
-            for (int k = 0; k < accentsindex[0]; k ++) {
-                m_canonicalSuffixAccents_.append(accents.charAt(k));
+
+            // Check for the start of the match being within a combining sequence.
+            // This can happen if the pattern itself begins with a combining char, and
+            // the match found combining marks in the target text that were attached
+            // to something else.
+            // This type of match should be rejected for not completely consuming a
+            // combining sequence.
+            if (!isBreakBoundary(mStart)) {
+                found = false;
+            }
+
+            // Check for the start of the match being within an Collation Element Expansion,
+            // meaning that the first char of the match is only partially matched.
+            // With expansions, the first CE will report the index of the source
+            // character, and all subsequent (expansions) CEs will report the source index of the
+            // _following_ character.
+            int secondIx = firstCEI.highIndex_;
+            if (mStart == secondIx) {
+                found = false;
              }
-            // forming all possible canonical rearrangement by dropping
-            // sets of accents
-            for (int i = 0; i <= size - 1; i ++) {
-                int mask = 1 << (size - i - 1);
-                if ((count & mask) != 0) {
-                    for (int j = accentsindex[i]; j < accentsindex[i + 1]; 
-                        j ++) {
-                        m_canonicalSuffixAccents_.append(accents.charAt(j));
+
+            // Advance the match end position to the first acceptable match boundary.
+            // This advances the index over any combining characters.
+            mLimit = maxLimit;
+            if (minLimit < maxLimit) {
+                // When the last CE's low index is same with its high index, the CE is likely
+                // a part of expansion. In this case, the index is located just after the
+                // character corresponding to the CEs compared above. If the index is right
+                // at the break boundary, move the position to the next boundary will result
+                // incorrect match length when there are ignorable characters exist between
+                // the position and the next character produces CE(s). See ticket#8482.
+                if (minLimit == lastCEI.highIndex_ && isBreakBoundary(minLimit)) {
+                    mLimit = minLimit;
+                } else {
+                    int nba = nextBoundaryAfter(minLimit);
+                    if (nba >= lastCEI.highIndex_) {
+                        mLimit = nba;
                      }
                  }
              }
-            offset = doNextCanonicalSuffixMatch(baseoffset);
-            if (offset != DONE) {
-                return true; // match found
+
+            // If advancing to the end of a combining sequence in character indexing space
+            // advanced us beyond the end of the match in CE space, reject this match.
+            if (mLimit > maxLimit) {
+                found = false;
              }
-            count --;
-        }
-        return false;
-    }
-    
-    /**
-     * Gets the previous base character offset depending on the string search 
-     * pattern data
-     * @param strsrch string search data
-     * @param textoffset current offset, current character
-     * @return the offset of the next character after this base character or 
-     *             itself if it is a composed character with accents
-     */
-    private final int getPreviousBaseOffset(int textoffset)
-    {
-        if (m_pattern_.m_hasPrefixAccents_ && textoffset > m_textBeginOffset_) {
-            int offset = textoffset;
-            if ((getFCD(targetText, offset) >> SECOND_LAST_BYTE_SHIFT_) != 0) {
-                return getPreviousBaseOffset(targetText, textoffset);
+
+            if (!isBreakBoundary(mLimit)) {
+                found = false;
+            }
+
+            if (!checkIdentical(mStart, mLimit)) {
+                found = false;
+            }
+
+            if (found) {
+                break;
              }
          }
-        return textoffset;
+
+        // All Done.  Store back the match bounds to the caller.
+        //
+        if (found == false) {
+            mLimit = -1;
+            mStart = -1;
+        }
+
+        if (m != null) {
+            m.start_ = mStart;
+            m.limit_ = mLimit;
+        }
+
+        return found;
      }
-    
-    /**
-     * Checks match for contraction. 
-     * If the match ends with a partial contraction we fail.
-     * If the match starts too far off (because of backwards iteration) we try 
-     * to chip off the extra characters.
-     * Uses the temporary util buffer for return values of the modified start
-     * and end.
-     * @param start offset of potential match, to be modified if necessary
-     * @param end offset of potential match, to be modified if necessary
-     * @return true if match passes the contraction test, false otherwise. 
-     */
-    private boolean checkNextCanonicalContractionMatch(int start, int end) 
-    {
-        // This part checks if either ends of the match contains potential 
-        // contraction. If so we'll have to iterate through them
-        char schar = 0;
-        char echar = 0;
-        if (end < m_textLimitOffset_) {
-            targetText.setIndex(end);
-            echar = targetText.current();
-        }
-        if (start < m_textLimitOffset_) {
-            targetText.setIndex(start + 1);
-            schar = targetText.current();
-        }
-        if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) {
-            int expansion  = m_colEIter_.m_CEBufferOffset_;
-            boolean hasExpansion = expansion > 0;
-            m_colEIter_.setExactOffset(start);
-            int temp = start;
-            while (expansion > 0) {
-                // getting rid of the redundant ce, caused by setOffset.
-                // since backward contraction/expansion may have extra ces if 
-                // we are in the normalization buffer, hasAccentsBeforeMatch 
-                // would have taken care of it.
-                // E.g. the character \u01FA will have an expansion of 3, but 
-                // if we are only looking for acute and ring \u030A and \u0301, 
-                // we'll have to skip the first ce in the expansion buffer.
-                m_colEIter_.next();
-                if (m_colEIter_.getOffset() != temp) {
-                    start = temp;
-                    temp  = m_colEIter_.getOffset();
+
+    private boolean searchBackwards(int startIdx, Match m) {
+        //ICU4C_TODO comment:  reject search patterns beginning with a combining char.
+
+        // Input parameter sanity check.
+        if (pattern_.CELength_ == 0
+                || startIdx < search_.beginIndex()
+                || startIdx > search_.endIndex()) {
+            throw new IllegalArgumentException("searchBackwards(" + startIdx + ", m) - expected position to be between " +
+                    search_.beginIndex() + " and " + search_.endIndex());
+        }
+
+        if (pattern_.PCE_ == null) {
+            initializePatternPCETable();
+        }
+
+        CEBuffer ceb = new CEBuffer(this);
+        int targetIx = 0;
+
+        /*
+         * Pre-load the buffer with the CE's for the grapheme
+         * after our starting position so that we're sure that
+         * we can look at the CE following the match when we
+         * check the match boundaries.
+         *
+         * This will also pre-fetch the first CE that we'll
+         * consider for the match.
+         */
+        if (startIdx < search_.endIndex()) {
+            BreakIterator bi = search_.internalBreakIter_;
+            int next = bi.following(startIdx);
+
+            textIter_.setOffset(next);
+
+            for (targetIx = 0; ; targetIx++) {
+                if (ceb.getPrevious(targetIx).lowIndex_ < startIdx) {
+                    break;
                  }
-                expansion --;
              }
-    
-            int count = 0;
-            while (count < m_pattern_.m_CELength_) {
-                int ce = getCE(m_colEIter_.next());
-                // status checked below, note that if status is a failure
-                // ucol_next returns UCOL_NULLORDER
-                if (ce == CollationElementIterator.IGNORABLE) {
-                    continue;
+        } else {
+            textIter_.setOffset(startIdx);
+        }
+
+        CEI targetCEI = null;
+        int patIx;
+        boolean found;
+
+        int limitIx = targetIx;
+        int mStart = -1;
+        int mLimit = -1;
+        int minLimit;
+        int maxLimit;
+
+        // Outer loop moves over match starting positions in the
+        //      target CE space.
+        // Here, targetIx values increase toward the beginning of the base text (i.e. we get the text CEs in reverse order).
+        // But  patIx is 0 at the beginning of the pattern and increases toward the end.
+        // So this loop performs a comparison starting with the end of pattern, and prcessd toward the beginning of the pattern
+        // and the beginning of the base text.
+        for (targetIx = limitIx; ; targetIx++) {
+            found = true;
+            // For targetIx > limitIx, this ceb.getPrevious gets a CE that is as far back in the ring buffer
+            // (compared to the last CE fetched for the previous targetIx value) as we need to go
+            // for this targetIx value, so if it is non-NULL then other ceb.getPrevious calls should be OK.
+            CEI lastCEI = ceb.getPrevious(targetIx);
+            if (lastCEI == null) {
+                throw new RuntimeException("CEBuffer.getPrevious(" + targetIx + ") returned null.");
+            }
+            // Inner loop checks for a match beginning at each
+            // position from the outer loop.
+            int targetIxOffset = 0;
+            for (patIx = pattern_.PCELength_ - 1; patIx >= 0; patIx--) {
+                long patCE = pattern_.PCE_[patIx];
+
+                targetCEI = ceb.getPrevious(targetIx + pattern_.PCELength_ - 1 - patIx + targetIxOffset);
+                // Compare CE from target string with CE from the pattern.
+                // Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
+                // which will fail the compare, below.
+                int ceMatch = compareCE64s(targetCEI.ce_, patCE, search_.elementComparisonType_);
+                if (ceMatch == CE_NO_MATCH) {
+                    found = false;
+                    break;
+                } else if (ceMatch > CE_NO_MATCH) {
+                    if (ceMatch == CE_SKIP_TARG) {
+                        // redo with same patCE, next targCE
+                        patIx++;
+                        targetIxOffset++;
+                    } else { // ceMatch == CE_SKIP_PATN
+                        // redo with same targCE, next patCE
+                        targetIxOffset--;
+                    }
                  }
-                if (hasExpansion && count == 0 
-                    && m_colEIter_.getOffset() != temp) {
-                    start = temp;
-                    temp = m_colEIter_.getOffset();
+            }
+
+            if (!found && ((targetCEI == null) || (targetCEI.ce_ != CollationPCE.PROCESSED_NULLORDER))) {
+                // No match at this targetIx.  Try again at the next.
+                continue;
+            }
+
+            if (!found) {
+                // No match at all, we have run off the end of the target text.
+                break;
+            }
+
+            // We have found a match in CE space.
+            // Now determine the bounds in string index space.
+            // There still is a chance of match failure if the CE range not correspond to
+            // an acceptable character range.
+            //
+            CEI firstCEI = ceb.getPrevious(targetIx + pattern_.PCELength_ - 1 + targetIxOffset);
+            mStart = firstCEI.lowIndex_;
+
+            // Check for the start of the match being within a combining sequence.
+            // This can happen if the pattern itself begins with a combining char, and
+            // the match found combining marks in the target text that were attached
+            // to something else.
+            // This type of match should be rejected for not completely consuming a
+            // combining sequence.
+            if (!isBreakBoundary(mStart)) {
+                found = false;
+            }
+
+            // Look at the high index of the first CE in the match. If it's the same as the
+            // low index, the first CE in the match is in the middle of an expansion.
+            if (mStart == firstCEI.highIndex_) {
+                found = false;
+            }
+
+            minLimit = lastCEI.lowIndex_;
+
+            if (targetIx > 0) {
+                // Look at the CE following the match.  If it is UCOL_NULLORDER the match
+                // extended to the end of input, and the match is good.
+
+                // Look at the high and low indices of the CE following the match. If
+                // they are the same it means one of two things:
+                //    1. The match extended to the last CE from the target text, which is OK, or
+                //    2. The last CE that was part of the match is in an expansion that extends
+                //       to the first CE after the match. In this case, we reject the match.
+                CEI nextCEI  = ceb.getPrevious(targetIx - 1);
+
+                if (nextCEI.lowIndex_ == nextCEI.highIndex_ && nextCEI.ce_ != CollationPCE.PROCESSED_NULLORDER) {
+                    found = false;
                  }
-    
-                if (count == 0 && ce != m_pattern_.m_CE_[0]) {
-                    // accents may have extra starting ces, this occurs when a 
-                    // pure accent pattern is matched without rearrangement
-                    // text \u0325\u0300 and looking for \u0300
-                    int expected = m_pattern_.m_CE_[0]; 
-                    if ((getFCD(targetText, start) & LAST_BYTE_MASK_) != 0) {
-                        ce = getCE(m_colEIter_.next());
-                        while (ce != expected 
-                               && ce != CollationElementIterator.NULLORDER 
-                               && m_colEIter_.getOffset() <= end) {
-                            ce = getCE(m_colEIter_.next());
-                        }
+
+                mLimit = maxLimit = nextCEI.lowIndex_;
+
+                // Advance the match end position to the first acceptable match boundary.
+                // This advances the index over any combining charcters.
+                if (minLimit < maxLimit) {
+                    int nba = nextBoundaryAfter(minLimit);
+
+                    if (nba >= lastCEI.highIndex_) {
+                        mLimit = nba;
                      }
                  }
-                if (ce != m_pattern_.m_CE_[count]) {
-                    end ++;
-                    end = getNextBaseOffset(end);  
-                    m_utilBuffer_[0] = start;
-                    m_utilBuffer_[1] = end;
-                    return false;
+
+                // If advancing to the end of a combining sequence in character indexing space
+                // advanced us beyond the end of the match in CE space, reject this match.
+                if (mLimit > maxLimit) {
+                    found = false;
                  }
-                count ++;
+
+                // Make sure the end of the match is on a break boundary
+                if (!isBreakBoundary(mLimit)) {
+                    found = false;
+                }
+
+            } else {
+                // No non-ignorable CEs after this point.
+                // The maximum position is detected by boundary after
+                // the last non-ignorable CE. Combining sequence
+                // across the start index will be truncated.
+                int nba = nextBoundaryAfter(minLimit);
+                mLimit = maxLimit = (nba > 0) && (startIdx > nba) ? nba : startIdx;
              }
-        } 
-        m_utilBuffer_[0] = start;
-        m_utilBuffer_[1] = end;
-        return true;
-    }
  
-    /**
-     * Checks and sets the match information if found.
-     * Checks 
-     * <ul>
-     * <li> the potential match does not repeat the previous match
-     * <li> boundaries are correct
-     * <li> potential match does not end in the middle of a contraction
-     * <li> identical matches
-     * </ul>
-     * Otherwise the offset will be shifted to the next character.
-     * The result m_matchIndex_ and m_matchLength_ will be set to the truncated
-     * more fitting result value.
-     * Uses the temporary utility buffer for storing the modified textoffset.
-     * @param textoffset offset in the collation element text.
-     * @return true if the match is valid, false otherwise
-     */
-    private boolean checkNextCanonicalMatch(int textoffset)
-    {
-        // to ensure that the start and ends are not composite characters
-        // if we have a canonical accent match
-        if ((m_pattern_.m_hasSuffixAccents_ 
-                && m_canonicalSuffixAccents_.length() != 0) || 
-            (m_pattern_.m_hasPrefixAccents_ 
-                && m_canonicalPrefixAccents_.length() != 0)) {
-            m_matchedIndex_ = getPreviousBaseOffset(m_colEIter_.getOffset());
-            matchLength = textoffset - m_matchedIndex_;
-            return true;
+            if (!checkIdentical(mStart, mLimit)) {
+                found = false;
+            }
+
+            if (found) {
+                break;
+            }
          }
-    
-        int start = m_colEIter_.getOffset();
-        if (!checkNextCanonicalContractionMatch(start, textoffset)) {
-            // return the modified textoffset
-            m_utilBuffer_[0] = m_utilBuffer_[1]; 
-            return false;
+
+        // All Done.  Store back the match bounds to the caller.
+        //
+        if (found == false) {
+            mLimit = -1;
+            mStart = -1;
          }
-        start = m_utilBuffer_[0];
-        textoffset = m_utilBuffer_[1];
-        start = getPreviousBaseOffset(start);
-        // this totally matches, however we need to check if it is repeating
-        if (checkRepeatedMatch(start, textoffset) 
-            || !isBreakUnit(start, textoffset) 
-            || !checkIdentical(start, textoffset)) {
-            textoffset ++;
-            textoffset = getNextBaseOffset(targetText, textoffset);
-            m_utilBuffer_[0] = textoffset;
-            return false;
+
+        if (m != null) {
+            m.start_ = mStart;
+            m.limit_ = mLimit;
          }
-        
-        m_matchedIndex_  = start;
-        matchLength = textoffset - start;
-        return true;
+
+        return found;
      }
-    
-    /**
-     * Shifting the collation element iterator position forward to prepare for
-     * a preceding match. If the first character is a unsafe character, we'll 
-     * only shift by 1 to capture contractions, normalization etc.
-     * @param textoffset start text position to do search
-     * @param ce the text ce which failed the match.
-     * @param patternceindex index of the ce within the pattern ce buffer which
-     *        failed the match
-     * @return final offset
-     */
-    private int reverseShift(int textoffset, int ce, int patternceindex)
-    {         
-        if (isOverlapping()) {
-            if (textoffset != m_textLimitOffset_) {
-                textoffset --;
-            }
-            else {
-                textoffset -= m_pattern_.m_defaultShiftSize_;
-            }
+
+    // Java porting note:
+    //
+    // ICU4C usearch_handleNextExact() is identical to usearch_handleNextCanonical()
+    // for the linear search implementation. The differences are addressed in search().
+    // 
+    private boolean handleNextExact() {
+        return handleNextCommonImpl();
+    }
+
+    private boolean handleNextCanonical() {
+        return handleNextCommonImpl();
+    }
+
+    private boolean handleNextCommonImpl() {
+        int textOffset = textIter_.getOffset();
+        Match match = new Match();
+
+        if (search(textOffset, match)) {
+            search_.matchedIndex_ = match.start_;
+            search_.setMatchedLength(match.limit_ - match.start_);
+            return true;
+        } else {
+            setMatchNotFound();
+            return false;
          }
-        else {
-            if (ce != CollationElementIterator.NULLORDER) {
-                int shift = m_pattern_.m_backShift_[hash(ce)];
-                
-                // this is to adjust for characters in the middle of the substring 
-                // for matching that failed.
-                int adjust = patternceindex;
-                if (adjust > 1 && shift > adjust) {
-                    shift -= adjust - 1;
-                }
-                textoffset -= shift;
-            }
-            else {
-                textoffset -= m_pattern_.m_defaultShiftSize_;
-            }
-        }    
-        
-        textoffset = getPreviousBaseOffset(textoffset);
-        return textoffset;
      }
  
-    /**
-     * Checks match for contraction. 
-     * If the match starts with a partial contraction we fail.
-     * Uses the temporary utility buffer to return the modified start and end.
-     * @param start offset of potential match, to be modified if necessary
-     * @param end offset of potential match, to be modified if necessary
-     * @return true if match passes the contraction test, false otherwise.
-     */
-    private boolean checkPreviousExactContractionMatch(int start, int end) 
-    {
-        // This part checks if either ends of the match contains potential 
-        // contraction. If so we'll have to iterate through them
-        char echar = 0;
-        if (end < m_textLimitOffset_) {
-            targetText.setIndex(end);
-            echar = targetText.current();
-        }
-        char schar = 0;
-        if (start + 1 < m_textLimitOffset_) {
-            targetText.setIndex(start + 1);
-            schar = targetText.current();
-        }
-        if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) {
-            // expansion suffix, what's left to iterate
-            int expansion = m_colEIter_.m_CEBufferSize_ 
-                                            - m_colEIter_.m_CEBufferOffset_;
-            boolean hasExpansion = expansion > 0;
-            m_colEIter_.setExactOffset(end);
-            int temp = end;
-            while (expansion > 0) {
-                // getting rid of the redundant ce
-                // since forward contraction/expansion may have extra ces
-                // if we are in the normalization buffer, hasAccentsBeforeMatch
-                // would have taken care of it.
-                // E.g. the character \u01FA will have an expansion of 3, but if
-                // we are only looking for A ring A\u030A, we'll have to skip the 
-                // last ce in the expansion buffer
-                m_colEIter_.previous();
-                if (m_colEIter_.getOffset() != temp) {
-                    end = temp;
-                    temp = m_colEIter_.getOffset();
-                }
-                expansion --;
-            }
-    
-            int count = m_pattern_.m_CELength_;
-            while (count > 0) {
-                int ce = getCE(m_colEIter_.previous());
-                // status checked below, note that if status is a failure
-                // ucol_previous returns UCOL_NULLORDER
-                if (ce == CollationElementIterator.IGNORABLE) {
-                    continue;
-                }
-                if (hasExpansion && count == 0 
-                    && m_colEIter_.getOffset() != temp) {
-                    end = temp;
-                    temp = m_colEIter_.getOffset();
-                }
-                if (ce != m_pattern_.m_CE_[count - 1]) {
-                    start --;
-                    start = getPreviousBaseOffset(targetText, start);
-                    m_utilBuffer_[0] = start;
-                    m_utilBuffer_[1] = end;
+    // Java porting note:
+    //
+    // ICU4C usearch_handlePreviousExact() is identical to usearch_handlePreviousCanonical()
+    // for the linear search implementation. The differences are addressed in searchBackwards().
+    //
+    private boolean handlePreviousExact() {
+        return handlePreviousCommonImpl();
+    }
+
+    private boolean handlePreviousCanonical() {
+        return handlePreviousCommonImpl();
+    }
+
+    private boolean handlePreviousCommonImpl() {
+        int textOffset;
+
+        if (search_.isOverlap_) {
+            if (search_.matchedIndex_ != DONE) {
+                textOffset = search_.matchedIndex_ + search_.matchedLength() - 1;
+            } else {
+                // move the start position at the end of possible match
+                initializePatternPCETable();
+                if (!initTextProcessedIter()) {
+                    setMatchNotFound();
                      return false;
                  }
-                count --;
+                for (int nPCEs = 0; nPCEs < pattern_.PCELength_ - 1; nPCEs++) {
+                    long pce = textProcessedIter_.nextProcessed(null);
+                    if (pce == CollationPCE.PROCESSED_NULLORDER) {
+                        // at the end of the text
+                        break;
+                    }
+                }
+                textOffset = textIter_.getOffset();
              }
-        } 
-        m_utilBuffer_[0] = start;
-        m_utilBuffer_[1] = end;
-        return true;
-    }
-    
-    /**
-     * Checks and sets the match information if found.
-     * Checks 
-     * <ul>
-     * <li> the current match does not repeat the last match
-     * <li> boundaries are correct
-     * <li> exact matches has no extra accents
-     * <li> identical matches
-     * </ul>
-     * Otherwise the offset will be shifted to the preceding character.
-     * Uses the temporary utility buffer to store the modified textoffset.
-     * @param textoffset offset in the collation element text. the returned value
-     *        will be the truncated start offset of the match or the new start 
-     *        search offset.
-     * @return true if the match is valid, false otherwise
-     */
-    private final boolean checkPreviousExactMatch(int textoffset)
-    {
-        // to ensure that the start and ends are not composite characters
-        int end = m_colEIter_.getOffset();        
-        if (!checkPreviousExactContractionMatch(textoffset, end)) {
-            return false;
+        } else {
+            textOffset = textIter_.getOffset();
          }
-        textoffset = m_utilBuffer_[0];
-        end = m_utilBuffer_[1];
-            
-        // this totally matches, however we need to check if it is repeating
-        // the old match
-        if (checkRepeatedMatch(textoffset, end) 
-            || !isBreakUnit(textoffset, end) 
-            || hasAccentsBeforeMatch(textoffset, end) 
-            || !checkIdentical(textoffset, end) 
-            || hasAccentsAfterMatch(textoffset, end)) {
-            textoffset --;
-            textoffset = getPreviousBaseOffset(targetText, textoffset);
-            m_utilBuffer_[0] = textoffset;
+
+        Match match = new Match();
+        if (searchBackwards(textOffset, match)) {
+            search_.matchedIndex_ = match.start_;
+            search_.setMatchedLength(match.limit_ - match.start_);
+            return true;
+        } else {
+            setMatchNotFound();
              return false;
          }
-        
-        if (m_collator_.getStrength() == Collator.PRIMARY) {
-            end = checkBreakBoundary(end);
-        }
-        
-        m_matchedIndex_ = textoffset;
-        matchLength = end - textoffset;
-        return true;
      }
  
      /**
-     * Rearranges the end accents to try matching.
-     * Suffix accents in the text will be grouped according to their combining 
-     * class and the groups will be mixed and matched to try find the perfect 
-     * match with the pattern.
-     * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
-     * step 1: split "\u030A\u0301" into 6 other type of potential accent 
-     *             substrings
-     *         "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", 
-     *         "\u0301\u0325".
-     * step 2: check if any of the generated substrings matches the pattern.
-     * @param start offset of the first base character
-     * @param end start of the last accent set
-     * @return DONE if a match is not found, otherwise return the ending
-     *         offset of the match. Note this start includes all following 
-     *         accents.
+     * Gets a substring out of a CharacterIterator
+     * 
+     * Java porting note: Not available in ICU4C
+     * 
+     * @param text CharacterIterator
+     * @param start start offset
+     * @param length of substring
+     * @return substring from text starting at start and length length
       */
-    private int doPreviousCanonicalSuffixMatch(int start, int end)
-    {
-        targetText.setIndex(end);
-        if (UTF16.isTrailSurrogate(targetText.previous()) 
-            && targetText.getIndex() > m_textBeginOffset_) {
-            if (!UTF16.isLeadSurrogate(targetText.previous())) {
-                targetText.next();
-            } 
-        }
-        if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) == 0) {
-            // die... failed at a base character
-            return DONE;
+    private static final String getString(CharacterIterator text, int start, int length) {
+        StringBuilder result = new StringBuilder(length);
+        int offset = text.getIndex();
+        text.setIndex(start);
+        for (int i = 0; i < length; i++) {
+            result.append(text.current());
+            text.next();
          }
-        end = getNextBaseOffset(targetText, end);
-    
-        StringBuilder accents = new StringBuilder();
-        int offset = getPreviousBaseOffset(targetText, end);
-        // normalizing the offensive string
-        String accentstr = getString(targetText, offset, end - offset);
-        if (Normalizer.quickCheck(accentstr, Normalizer.NFD,0) 
-                                                    == Normalizer.NO) {
-            accentstr = Normalizer.decompose(accentstr, false);
-        }
-        accents.append(accentstr);    
-            
-        int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];      
-        int accentsize = getUnblockedAccentIndex(accents, accentsindex);
-        int count = (2 << (accentsize - 1)) - 1;  
-        while (count > 0) {
-            m_canonicalSuffixAccents_.delete(0, 
-                                           m_canonicalSuffixAccents_.length());
-            // copy the base characters
-            for (int k = 0; k < accentsindex[0]; k ++) {
-                 m_canonicalSuffixAccents_.append(accents.charAt(k));
-            }
-            // forming all possible canonical rearrangement by dropping
-            // sets of accents
-            for (int i = 0; i <= accentsize - 1; i ++) {
-                int mask = 1 << (accentsize - i - 1);
-                if ((count & mask) != 0) {
-                    for (int j = accentsindex[i]; j < accentsindex[i + 1]; 
-                                                                        j ++) {
-                        m_canonicalSuffixAccents_.append(accents.charAt(j));
-                    }
-                }
-            }
-            StringBuilder match = merge(m_canonicalPrefixAccents_, targetText,
-                                        start, offset, 
-                                        m_canonicalSuffixAccents_);
-            // run the collator iterator through this match
-            // if status is a failure ucol_setText does nothing
-            m_utilColEIter_.setText(match.toString());
-            if (checkCollationMatch(m_utilColEIter_)) {
-                return end;
-            }
-            count --;
+        text.setIndex(offset);
+        return result.toString();
+    }
+
+    /**
+     * Java port of ICU4C struct UPattern (usrchimp.h)
+     */
+    private static final class Pattern {
+        /** Pattern string */
+        String text_;
+
+        long[] PCE_;
+        int PCELength_ = 0;
+
+        // TODO: We probably do not need CE_ / CELength_
+        @SuppressWarnings("unused")
+        int[] CE_;
+        int CELength_ = 0;
+
+        // *** Boyer-Moore ***
+        // boolean hasPrefixAccents_ = false;
+        // boolean hasSuffixAccents_ = false;
+        // int defaultShiftSize_;
+        // char[] shift_;
+        // char[] backShift_;
+
+        protected Pattern(String pattern) {
+            text_ = pattern;
          }
-        return DONE;
      }
-    
+
      /**
-     * Take the rearranged start accents and tries matching. If match failed at
-     * a seperate following set of accents (seperated from the rearranged on by
-     * at least a base character) then we rearrange the preceding accents and 
-     * tries matching again.
-     * We allow skipping of the ends of the accent set if the ces do not match. 
-     * However if the failure is found before the accent set, it fails.
-     * Internal method, status assumed to be success, caller has to check 
-     * status before calling this method.
-     * @param textoffset of the ends of the rearranged accent
-     * @return DONE if a match is not found, otherwise return the ending offset 
-     *             of the match. Note this start includes all following accents.
+     * Java port of ICU4C UCollationPCE (usrchimp.h)
       */
-    private int doPreviousCanonicalPrefixMatch(int textoffset)
-    {
-       // int safelength = 0;
-        StringBuilder safetext;
-        int safeoffset = textoffset;
-    
-        if (textoffset > m_textBeginOffset_
-            && m_collator_.isUnsafe(m_canonicalPrefixAccents_.charAt(
-                                    m_canonicalPrefixAccents_.length() - 1))) {
-            safeoffset = getNextSafeOffset(textoffset, m_textLimitOffset_);
-            //safelength = safeoffset - textoffset;
-            safetext = merge(m_canonicalPrefixAccents_, targetText, textoffset, 
-                             safeoffset, null);
+    private static class CollationPCE {
+        public static final long PROCESSED_NULLORDER = -1;
+
+        private static final int DEFAULT_BUFFER_SIZE = 16;
+        private static final int BUFFER_GROW = 8;
+
+        // Note: PRIMARYORDERMASK is also duplicated in StringSearch class
+        private static final int PRIMARYORDERMASK = 0xffff0000;
+        private static final int CONTINUATION_MARKER = 0xc0;
+
+        private PCEBuffer pceBuffer_ = new PCEBuffer();
+        private CollationElementIterator cei_;
+        private int strength_;
+        private boolean toShift_;
+        private boolean isShifted_;
+        private int variableTop_;
+
+        public CollationPCE(CollationElementIterator iter) {
+            init(iter);
          }
-        else {
-            safetext = m_canonicalPrefixAccents_;
+
+        public void init(CollationElementIterator iter) {
+            cei_ = iter;
+            init(iter.getRuleBasedCollator());
          }
-    
-        // if status is a failure, ucol_setText does nothing
-        CollationElementIterator coleiter = m_utilColEIter_;
-        coleiter.setText(safetext.toString());
-        // status checked in loop below
-        
-        int ceindex = 0;
-        boolean isSafe = true; // safe zone indication flag for position
-        int prefixlength = m_canonicalPrefixAccents_.length();
-        
-        while (ceindex < m_pattern_.m_CELength_) {
-            int textce = coleiter.next();
-            if (textce == CollationElementIterator.NULLORDER) {
-                // check if we have passed the safe buffer
-                if (coleiter == m_colEIter_) {
-                    return DONE;
+
+        private void init(RuleBasedCollator coll) {
+            strength_ = coll.getStrength();
+            toShift_ = coll.isAlternateHandlingShifted();
+            isShifted_ = false;
+            variableTop_ = coll.getVariableTop();
+        }
+
+        @SuppressWarnings("fallthrough")
+        private long processCE(int ce) {
+            long primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
+
+            // This is clean, but somewhat slow...
+            // We could apply the mask to ce and then
+            // just get all three orders...
+            switch (strength_) {
+            default:
+                tertiary = CollationElementIterator.tertiaryOrder(ce);
+                /* note fall-through */
+
+            case Collator.SECONDARY:
+                secondary = CollationElementIterator.secondaryOrder(ce);
+                /* note fall-through */
+
+            case Collator.PRIMARY:
+                primary = CollationElementIterator.primaryOrder(ce);
+            }
+
+            // **** This should probably handle continuations too. ****
+            // **** That means that we need 24 bits for the primary ****
+            // **** instead of the 16 that we're currently using. ****
+            // **** So we can lay out the 64 bits as: 24.12.12.16. ****
+            // **** Another complication with continuations is that ****
+            // **** the *second* CE is marked as a continuation, so ****
+            // **** we always have to peek ahead to know how long ****
+            // **** the primary is... ****
+            if ((toShift_ && variableTop_ > ce && primary != 0) || (isShifted_ && primary == 0)) {
+
+                if (primary == 0) {
+                    return CollationElementIterator.IGNORABLE;
                  }
-                if (safetext != m_canonicalPrefixAccents_) {
-                    safetext.delete(0, safetext.length());
+
+                if (strength_ >= Collator.QUATERNARY) {
+                    quaternary = primary;
                  }
-                coleiter = m_colEIter_;
-                coleiter.setExactOffset(safeoffset);
-                // status checked at the start of the loop
-                isSafe = false;
-                continue;
+
+                primary = secondary = tertiary = 0;
+                isShifted_ = true;
+            } else {
+                if (strength_ >= Collator.QUATERNARY) {
+                    quaternary = 0xFFFF;
+                }
+
+                isShifted_ = false;
              }
-            textce = getCE(textce);
-            if (textce != CollationElementIterator.IGNORABLE 
-                && textce != m_pattern_.m_CE_[ceindex]) {
-                // do the beginning stuff
-                int failedoffset = coleiter.getOffset();
-                if (isSafe && failedoffset <= prefixlength) {
-                    // alas... no hope. failed at rearranged accent set
-                    return DONE;
+
+            return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
+        }
+
+        /**
+         * Get the processed ordering priority of the next collation element in the text.
+         * A single character may contain more than one collation element.
+         * 
+         * Note: This is equivalent to
+         * UCollationPCE::nextProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
+         *
+         * @param range receiving the iterator index before/after fetching the CE.
+         * @return The next collation elements ordering, otherwise returns PROCESSED_NULLORDER 
+         *         if an error has occurred or if the end of string has been reached
+         */
+        public long nextProcessed(Range range) {
+            long result = CollationElementIterator.IGNORABLE;
+            int low = 0, high = 0;
+
+            pceBuffer_.reset();
+
+            do {
+                low = cei_.getOffset();
+                int ce = cei_.next();
+                high = cei_.getOffset();
+
+                if (ce == CollationElementIterator.NULLORDER) {
+                     result = PROCESSED_NULLORDER;
+                     break;
                  }
-                else {
-                    if (isSafe) {
-                        failedoffset = safeoffset - failedoffset;
-                        if (safetext != m_canonicalPrefixAccents_) {
-                            safetext.delete(0, safetext.length());
+
+                result = processCE(ce);
+            } while (result == CollationElementIterator.IGNORABLE);
+
+            if (range != null) {
+                range.ixLow_ = low;
+                range.ixHigh_ = high;
+            }
+
+            return result;
+        }
+
+        /**
+         * Get the processed ordering priority of the previous collation element in the text.
+         * A single character may contain more than one collation element.
+         *
+         * Note: This is equivalent to
+         * UCollationPCE::previousProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
+         *
+         * @param range receiving the iterator index before/after fetching the CE.
+         * @return The previous collation elements ordering, otherwise returns 
+         *         PROCESSED_NULLORDER if an error has occurred or if the start of
+         *         string has been reached.
+         */
+        public long previousProcessed(Range range) {
+            long result = CollationElementIterator.IGNORABLE;
+            int low = 0, high = 0;
+
+            // pceBuffer_.reset();
+
+            while (pceBuffer_.empty()) {
+                // buffer raw CEs up to non-ignorable primary
+                RCEBuffer rceb = new RCEBuffer();
+                int ce;
+
+                boolean finish = false;
+
+                // **** do we need to reset rceb, or will it always be empty at this point ****
+                do {
+                    high = cei_.getOffset();
+                    ce = cei_.previous();
+                    low = cei_.getOffset();
+
+                    if (ce == CollationElementIterator.NULLORDER) {
+                        if (!rceb.empty()) {
+                            break;
                          }
+
+                        finish = true;
+                        break;
                      }
-                    
-                    // try rearranging the end accents
-                    int result = doPreviousCanonicalSuffixMatch(textoffset, 
-                                                                failedoffset);
-                    if (result != DONE) {
-                        // if status is a failure, ucol_setOffset does nothing
-                        m_colEIter_.setExactOffset(result);
+
+                    rceb.put(ce, low, high);
+                } while ((ce & PRIMARYORDERMASK) == 0 || isContinuation(ce));
+
+                if (finish) {
+                    break;
+                }
+
+                // process the raw CEs
+                while (!rceb.empty()) {
+                    RCEI rcei = rceb.get();
+
+                    result = processCE(rcei.ce_);
+
+                    if (result != CollationElementIterator.IGNORABLE) {
+                        pceBuffer_.put(result, rcei.low_, rcei.high_);
                      }
-                    return result;
                  }
              }
-            if (textce == m_pattern_.m_CE_[ceindex]) {
-                ceindex ++;
-            }
-        }
-        // set offset here
-        if (isSafe) {
-            int result = coleiter.getOffset();
-            // sets the text iterator here with the correct expansion and offset
-            int leftoverces = coleiter.m_CEBufferSize_ 
-                                                - coleiter.m_CEBufferOffset_;
-            if (result <= prefixlength) { 
-                result = textoffset;
+
+            if (pceBuffer_.empty()) {
+                // **** Is -1 the right value for ixLow, ixHigh? ****
+                if (range != null) {
+                    range.ixLow_ = -1;
+                    range.ixHigh_ = -1;
+                }
+                return CollationElementIterator.NULLORDER;
              }
-            else {
-                result = textoffset + (safeoffset - result);
+
+            PCEI pcei = pceBuffer_.get();
+
+            if (range != null) {
+                range.ixLow_ = pcei.low_;
+                range.ixHigh_ = pcei.high_;
              }
-            m_colEIter_.setExactOffset(result);
-            m_colEIter_.m_CEBufferOffset_ = m_colEIter_.m_CEBufferSize_ 
-                                                                - leftoverces;
-            return result;
+
+            return pcei.ce_;
          }
-        
-        return coleiter.getOffset();              
-    }
-    
-    /**
-     * Trying out the substring and sees if it can be a canonical match.
-     * This will try normalizing the starting accents and arranging them into 
-     * canonical equivalents and check their corresponding ces with the pattern 
-     * ce.
-     * Prefix accents in the text will be grouped according to their combining 
-     * class and the groups will be mixed and matched to try find the perfect 
-     * match with the pattern.
-     * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
-     * step 1: split "\u030A\u0301" into 6 other type of potential accent 
-     *            substrings
-     *         "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", 
-     *         "\u0301\u0325".
-     * step 2: check if any of the generated substrings matches the pattern.
-     * @param textoffset start offset in the collation element text that starts 
-     *                   with the accents to be rearranged
-     * @return true if the match is valid, false otherwise
-     */
-    private boolean doPreviousCanonicalMatch(int textoffset)
-    {
-        int offset = m_colEIter_.getOffset();
-        if ((getFCD(targetText, textoffset) >> SECOND_LAST_BYTE_SHIFT_) == 0) {
-            if (m_pattern_.m_hasSuffixAccents_) {
-                offset = doPreviousCanonicalSuffixMatch(textoffset, offset);
-                if (offset != DONE) {
-                    m_colEIter_.setExactOffset(offset);
-                    return true;
-                }
-            }
-            return false;
+
+        private static boolean isContinuation(int ce) {
+            return ((ce & CONTINUATION_MARKER) == CONTINUATION_MARKER);
          }
-    
-        if (!m_pattern_.m_hasPrefixAccents_) {
-            return false;
+
+        public static final class Range {
+            int ixLow_;
+            int ixHigh_;
          }
-    
-        StringBuilder accents = new StringBuilder();
-        // offset to the last base character in substring to search
-        int baseoffset = getNextBaseOffset(targetText, textoffset);
-        // normalizing the offensive string
-        String textstr = getString(targetText, textoffset, 
-                                                    baseoffset - textoffset);
-        if (Normalizer.quickCheck(textstr, Normalizer.NFD,0) 
-                                                    == Normalizer.NO) {
-            textstr = Normalizer.decompose(textstr, false);
+
+        /** Processed collation element buffer stuff ported from ICU4C ucoleitr.cpp */
+        private static final class PCEI {
+            long ce_;
+            int low_;
+            int high_;
          }
-        accents.append(textstr);
-        // status checked in loop
-            
-        int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];
-        int size = getUnblockedAccentIndex(accents, accentsindex);
-    
-        // 2 power n - 1 plus the full set of accents
-        int count = (2 << (size - 1)) - 1;  
-        while (count > 0) {
-            m_canonicalPrefixAccents_.delete(0, 
-                                        m_canonicalPrefixAccents_.length());
-            // copy the base characters
-            for (int k = 0; k < accentsindex[0]; k ++) {
-                m_canonicalPrefixAccents_.append(accents.charAt(k));
-            }
-            // forming all possible canonical rearrangement by dropping
-            // sets of accents
-            for (int i = 0; i <= size - 1; i ++) {
-                int mask = 1 << (size - i - 1);
-                if ((count & mask) != 0) {
-                    for (int j = accentsindex[i]; j < accentsindex[i + 1]; 
-                         j ++) {
-                        m_canonicalPrefixAccents_.append(accents.charAt(j));
-                    }
-                }
+
+        private static final class PCEBuffer {
+            private PCEI[] buffer_ = new PCEI[DEFAULT_BUFFER_SIZE];
+            private int bufferIndex_ = 0;
+
+            void reset() {
+                bufferIndex_ = 0;
              }
-            offset = doPreviousCanonicalPrefixMatch(baseoffset);
-            if (offset != DONE) {
-                return true; // match found
+
+            boolean empty() {
+                return bufferIndex_ <= 0;
              }
-            count --;
-        }
-        return false;
-    }
-    
-    /**
-     * Checks match for contraction. 
-     * If the match starts with a partial contraction we fail.
-     * Uses the temporary utility buffer to return the modified start and end.
-     * @param start offset of potential match, to be modified if necessary
-     * @param end offset of potential match, to be modified if necessary
-     * @return true if match passes the contraction test, false otherwise.
-     */
-    private boolean checkPreviousCanonicalContractionMatch(int start, int end) 
-    {
-        int temp = end;
-        // This part checks if either ends of the match contains potential 
-        // contraction. If so we'll have to iterate through them
-        char echar = 0;
-        char schar = 0;
-        if (end < m_textLimitOffset_) {
-            targetText.setIndex(end);
-            echar = targetText.current();
-        }
-        if (start + 1 < m_textLimitOffset_) {
-            targetText.setIndex(start + 1);
-            schar = targetText.current();
-        }
-        if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) {
-            int expansion = m_colEIter_.m_CEBufferSize_ 
-                                            - m_colEIter_.m_CEBufferOffset_;
-            boolean hasExpansion = expansion > 0;
-            m_colEIter_.setExactOffset(end);
-            while (expansion > 0) {
-                // getting rid of the redundant ce
-                // since forward contraction/expansion may have extra ces
-                // if we are in the normalization buffer, hasAccentsBeforeMatch
-                // would have taken care of it.
-                // E.g. the character \u01FA will have an expansion of 3, but 
-                // if we are only looking for A ring A\u030A, we'll have to 
-                // skip the last ce in the expansion buffer
-                m_colEIter_.previous();
-                if (m_colEIter_.getOffset() != temp) {
-                    end = temp;
-                    temp = m_colEIter_.getOffset();
-                }
-                expansion --;
+
+            void put(long ce, int ixLow, int ixHigh)
+            {
+                if (bufferIndex_ >= buffer_.length) {
+                    PCEI[] newBuffer = new PCEI[buffer_.length + BUFFER_GROW];
+                    System.arraycopy(buffer_, 0, newBuffer, 0, buffer_.length);
+                    buffer_ = newBuffer;
+                }
+                buffer_[bufferIndex_] = new PCEI();
+                buffer_[bufferIndex_].ce_ = ce;
+                buffer_[bufferIndex_].low_ = ixLow;
+                buffer_[bufferIndex_].high_ = ixHigh;
+
+                bufferIndex_ += 1;
              }
-    
-            int count = m_pattern_.m_CELength_;
-            while (count > 0) {
-                int ce = getCE(m_colEIter_.previous());
-                // status checked below, note that if status is a failure
-                // previous() returns NULLORDER
-                if (ce == CollationElementIterator.IGNORABLE) {
-                    continue;
-                }
-                if (hasExpansion && count == 0 
-                    && m_colEIter_.getOffset() != temp) {
-                    end = temp;
-                    temp = m_colEIter_.getOffset();
-                }
-                if (count == m_pattern_.m_CELength_ 
-                    && ce != m_pattern_.m_CE_[m_pattern_.m_CELength_ - 1]) {
-                    // accents may have extra starting ces, this occurs when a 
-                    // pure accent pattern is matched without rearrangement
-                    int expected = m_pattern_.m_CE_[m_pattern_.m_CELength_ - 1];
-                    targetText.setIndex(end);
-                    if (UTF16.isTrailSurrogate(targetText.previous())) {
-                        if (targetText.getIndex() > m_textBeginOffset_ &&
-                            !UTF16.isLeadSurrogate(targetText.previous())) {
-                            targetText.next();
-                        }
-                    }
-                    end = targetText.getIndex();
-                    if ((getFCD(targetText, end) & LAST_BYTE_MASK_) != 0) {
-                        ce = getCE(m_colEIter_.previous());
-                        while (ce != expected 
-                                && ce != CollationElementIterator.NULLORDER 
-                                && m_colEIter_.getOffset() <= start) {
-                            ce = getCE(m_colEIter_.previous());
-                        }
-                    }
-                }
-                if (ce != m_pattern_.m_CE_[count - 1]) {
-                    start --;
-                    start = getPreviousBaseOffset(start);
-                    m_utilBuffer_[0] = start;
-                    m_utilBuffer_[1] = end;
-                    return false;
+
+            PCEI get() {
+                if (bufferIndex_ > 0) {
+                    return buffer_[--bufferIndex_];
                  }
-                count --;
+                return null;
              }
-        } 
-        m_utilBuffer_[0] = start;
-        m_utilBuffer_[1] = end;
-        return true;
-    }
-    
-    /**
-     * Checks and sets the match information if found.
-     * Checks 
-     * <ul>
-     * <li> the potential match does not repeat the previous match
-     * <li> boundaries are correct
-     * <li> potential match does not end in the middle of a contraction
-     * <li> identical matches
-     * </ul>
-     * Otherwise the offset will be shifted to the next character.
-     * Uses the temporary utility buffer for storing the modified textoffset.
-     * @param textoffset offset in the collation element text. the returned 
-     *             value will be the truncated start offset of the match or the 
-     *             new start search offset.
-     * @return true if the match is valid, false otherwise
-     */
-    private boolean checkPreviousCanonicalMatch(int textoffset)
-    {
-        // to ensure that the start and ends are not composite characters
-        // if we have a canonical accent match
-        if (m_pattern_.m_hasSuffixAccents_ 
-            && m_canonicalSuffixAccents_.length() != 0 
-            || m_pattern_.m_hasPrefixAccents_ 
-            && m_canonicalPrefixAccents_.length() != 0) {
-            m_matchedIndex_ = textoffset;
-            matchLength = getNextBaseOffset(m_colEIter_.getOffset()) 
-                                                                - textoffset;
-            return true;
          }
-    
-        int end = m_colEIter_.getOffset();
-        if (!checkPreviousCanonicalContractionMatch(textoffset, end)) {
-            // storing the modified textoffset
-            return false;
-        }
-        textoffset = m_utilBuffer_[0];
-        end = m_utilBuffer_[1];
-        end = getNextBaseOffset(end);
-        // this totally matches, however we need to check if it is repeating
-        if (checkRepeatedMatch(textoffset, end) 
-            || !isBreakUnit(textoffset, end) 
-            || !checkIdentical(textoffset, end)) {
-            textoffset --;
-            textoffset = getPreviousBaseOffset(textoffset);
-            m_utilBuffer_[0] = textoffset;
-            return false;
+
+        /** Raw collation element buffer stuff ported from ICU4C ucoleitr.cpp */
+        private static final class RCEI {
+            int ce_;
+            int low_;
+            int high_;
          }
-        
-        m_matchedIndex_ = textoffset;
-        matchLength = end - textoffset;
-        return true;
-    }
-    
-    /**
-     * Method that does the next exact match
-     * @param start the offset to start shifting from and performing the 
-     *        next exact match
-     */
-    private void handleNextExact(int start)
-    {
-        int textoffset = shiftForward(start, 
-                                         CollationElementIterator.NULLORDER,
-                                         m_pattern_.m_CELength_);
-        int targetce = CollationElementIterator.IGNORABLE;
-        while (textoffset <= m_textLimitOffset_) {
-            m_colEIter_.setExactOffset(textoffset);
-            int patternceindex = m_pattern_.m_CELength_ - 1;
-            boolean found = false;
-            int lastce = CollationElementIterator.NULLORDER;
-            
-            while (true) {
-                // finding the last pattern ce match, imagine composite 
-                // characters. for example: search for pattern A in text \u00C0
-                // we'll have to skip \u0300 the grave first before we get to A
-                targetce = m_colEIter_.previous();
-                if (targetce == CollationElementIterator.NULLORDER) {
-                    found = false;
-                    break;
-                }
-                targetce = getCE(targetce);
-                if (targetce == CollationElementIterator.IGNORABLE && 
-                    m_colEIter_.isInBuffer()) { 
-                    // this is for the text \u0315\u0300 that requires 
-                    // normalization and pattern \u0300, where \u0315 is ignorable
-                    continue;
-                }
-                if (lastce == CollationElementIterator.NULLORDER 
-                    || lastce == CollationElementIterator.IGNORABLE) {
-                    lastce = targetce;
-                }
-                if (targetce == m_pattern_.m_CE_[patternceindex]) {
-                    // the first ce can be a contraction
-                    found = true;
-                    break;
-                }
-                if (m_colEIter_.m_CEBufferOffset_ <= 0) {
-                    found = false;
-                    break;
-                }
-            }
-    
-            while (found && patternceindex > 0) {
-                lastce = targetce;
-                targetce = m_colEIter_.previous();
-                if (targetce == CollationElementIterator.NULLORDER) {
-                    found = false;
-                    break;
-                }
-                targetce = getCE(targetce);
-                if (targetce == CollationElementIterator.IGNORABLE) {
-                    continue;
-                }
-    
-                patternceindex --;
-                found = found && targetce == m_pattern_.m_CE_[patternceindex]; 
+
+        private static final class RCEBuffer {
+            private RCEI[] buffer_ = new RCEI[DEFAULT_BUFFER_SIZE];
+            private int bufferIndex_ = 0;
+
+            boolean empty() {
+                return bufferIndex_ <= 0;
              }
-            
-            targetce = lastce;
-    
-            if (!found) {
-                textoffset = shiftForward(textoffset, lastce, patternceindex);
-                // status checked at loop.
-                patternceindex = m_pattern_.m_CELength_;
-                continue;
+
+            void put(int ce, int ixLow, int ixHigh) {
+                if (bufferIndex_ >= buffer_.length) {
+                    RCEI[] newBuffer = new RCEI[buffer_.length + BUFFER_GROW];
+                    System.arraycopy(buffer_, 0, newBuffer, 0, buffer_.length);
+                    buffer_ = newBuffer;
+                }
+                buffer_[bufferIndex_] = new RCEI();
+                buffer_[bufferIndex_].ce_ = ce;
+                buffer_[bufferIndex_].low_ = ixLow;
+                buffer_[bufferIndex_].high_ = ixHigh;
+
+                bufferIndex_ += 1;
              }
-            
-            if (checkNextExactMatch(textoffset)) {
-                // status checked in ucol_setOffset
-                return;
+
+            RCEI get() {
+                if (bufferIndex_ > 0) {
+                    return buffer_[--bufferIndex_];
+                }
+                return null;
              }
-            textoffset = m_utilBuffer_[0];
          }
-        setMatchNotFound();
      }
  
      /**
-     * Method that does the next canonical match
-     * @param start the offset to start shifting from and performing the 
-     *        next canonical match
+     * Java port of ICU4C CEI (usearch.cpp)
+     * 
+     * CEI  Collation Element + source text index.
+     *      These structs are kept in the circular buffer.
       */
-    private void handleNextCanonical(int start)
-    {
-        boolean hasPatternAccents = 
-           m_pattern_.m_hasSuffixAccents_ || m_pattern_.m_hasPrefixAccents_;
-              
-        // shifting it check for setting offset
-        // if setOffset is called previously or there was no previous match, we
-        // leave the offset as it is.
-        int textoffset = shiftForward(start, CollationElementIterator.NULLORDER, 
-                                        m_pattern_.m_CELength_);
-        m_canonicalPrefixAccents_.delete(0, m_canonicalPrefixAccents_.length());
-        m_canonicalSuffixAccents_.delete(0, m_canonicalSuffixAccents_.length());
-        int targetce = CollationElementIterator.IGNORABLE;
-        
-        while (textoffset <= m_textLimitOffset_)
-        {
-            m_colEIter_.setExactOffset(textoffset);
-            int patternceindex = m_pattern_.m_CELength_ - 1;
-            boolean found = false;
-            int lastce = CollationElementIterator.NULLORDER;
-            
-            while (true) {
-                // finding the last pattern ce match, imagine composite characters
-                // for example: search for pattern A in text \u00C0
-                // we'll have to skip \u0300 the grave first before we get to A
-                targetce = m_colEIter_.previous();
-                if (targetce == CollationElementIterator.NULLORDER) {
-                    found = false;
-                    break;
-                }
-                targetce = getCE(targetce);
-                if (lastce == CollationElementIterator.NULLORDER 
-                            || lastce == CollationElementIterator.IGNORABLE) {
-                    lastce = targetce;
-                }
-                if (targetce == m_pattern_.m_CE_[patternceindex]) {
-                    // the first ce can be a contraction
-                    found = true;
-                    break;
-                }
-                if (m_colEIter_.m_CEBufferOffset_ <= 0) {
-                    found = false;
-                    break;
-                }
-            }
-            
-            while (found && patternceindex > 0) {
-                targetce    = m_colEIter_.previous();
-                if (targetce == CollationElementIterator.NULLORDER) {
-                    found = false;
-                    break;
-                }
-                targetce    = getCE(targetce);
-                if (targetce == CollationElementIterator.IGNORABLE) {
-                    continue;
+    private static class CEI {
+        long ce_;
+        int lowIndex_;
+        int highIndex_;
+    }
+
+    /**
+     * CEBuffer A circular buffer of CEs from the text being searched
+     */
+    private static class CEBuffer {
+        // Java porting note: ICU4C uses the size for stack buffer
+        // static final int DEFAULT_CEBUFFER_SIZE = 96;
+
+        static final int CEBUFFER_EXTRA = 32;
+        static final int MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L = 8;
+        static final int MAX_TARGET_IGNORABLES_PER_PAT_OTHER = 3;
+
+        CEI[] buf_;
+        int bufSize_;
+        int firstIx_;
+        int limitIx_;
+
+        // Java porting note: No references in ICU4C implementation
+        // CollationElementIterator ceIter_;
+
+        StringSearch strSearch_;
+
+        CEBuffer(StringSearch ss) {
+            strSearch_ = ss;
+            bufSize_ = ss.pattern_.PCELength_ + CEBUFFER_EXTRA;
+            if (ss.search_.elementComparisonType_ != ElementComparisonType.STANDARD_ELEMENT_COMPARISON) {
+                String patText = ss.pattern_.text_;
+                if (patText != null) {
+                    for (int i = 0; i < patText.length(); i++) {
+                        char c = patText.charAt(i);
+                        if (MIGHT_BE_JAMO_L(c)) {
+                            bufSize_ += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L;
+                        } else {
+                            // No check for surrogates, we might allocate slightly more buffer than necessary.
+                            bufSize_ += MAX_TARGET_IGNORABLES_PER_PAT_OTHER;
+                        }
+                    }
                  }
-    
-                patternceindex --;
-                found = found && targetce == m_pattern_.m_CE_[patternceindex]; 
-            }
-    
-            // initializing the rearranged accent array
-            if (hasPatternAccents && !found) {
-                found = doNextCanonicalMatch(textoffset);
-            }
-    
-            if (!found) {
-                textoffset = shiftForward(textoffset, lastce, patternceindex);
-                // status checked at loop
-                patternceindex = m_pattern_.m_CELength_;
-                continue;
              }
-            
-            if (checkNextCanonicalMatch(textoffset)) {
+
+            // Not used - see above
+            // ceIter_ = ss.textIter_;
+
+            firstIx_ = 0;
+            limitIx_ = 0;
+
+            if (!ss.initTextProcessedIter()) {
                  return;
              }
-            textoffset = m_utilBuffer_[0];
+
+            buf_ = new CEI[bufSize_];
          }
-        setMatchNotFound();
-    }
-    
-    /**
-     * Method that does the previous exact match
-     * @param start the offset to start shifting from and performing the 
-     *        previous exact match
-     */
-    private void handlePreviousExact(int start)
-    {
-        int textoffset = reverseShift(start, CollationElementIterator.NULLORDER, 
-                                      m_pattern_.m_CELength_);
-        while (textoffset >= m_textBeginOffset_)
-        {
-            m_colEIter_.setExactOffset(textoffset);
-            int patternceindex = 1;
-            int targetce = CollationElementIterator.IGNORABLE;
-            boolean found = false;
-            int firstce = CollationElementIterator.NULLORDER;
-            
-            while (true) {
-                // finding the first pattern ce match, imagine composite 
-                // characters. for example: search for pattern \u0300 in text 
-                // \u00C0, we'll have to skip A first before we get to 
-                // \u0300 the grave accent
-                targetce = m_colEIter_.next();
-                if (targetce == CollationElementIterator.NULLORDER) {
-                    found = false;
-                    break;
-                }
-                targetce = getCE(targetce);
-                if (firstce == CollationElementIterator.NULLORDER 
-                    || firstce == CollationElementIterator.IGNORABLE) {
-                    firstce = targetce;
-                }
-                if (targetce == CollationElementIterator.IGNORABLE && m_collator_.getStrength() != Collator.PRIMARY) {
-                    continue;
-                }         
-                if (targetce == m_pattern_.m_CE_[0]) {
-                    found = true;
-                    break;
-                }
-                if (m_colEIter_.m_CEBufferOffset_ == -1 
-                    || m_colEIter_.m_CEBufferOffset_ 
-                                            == m_colEIter_.m_CEBufferSize_) {
-                    // checking for accents in composite character
-                    found = false;
-                    break;
-                }
+
+        // Get the CE with the specified index.
+        //   Index must be in the range
+        //             n-history_size < index < n+1
+        //   where n is the largest index to have been fetched by some previous call to this function.
+        //   The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
+        //
+        CEI get(int index) {
+            int i = index % bufSize_;
+
+            if (index >= firstIx_ && index < limitIx_) {
+                // The request was for an entry already in our buffer.
+                // Just return it.
+                return buf_[i];
              }
-    
-            //targetce = firstce;
-            
-            while (found && patternceindex < m_pattern_.m_CELength_) {
-                firstce = targetce;
-                targetce = m_colEIter_.next();
-                if (targetce == CollationElementIterator.NULLORDER) {
-                    found = false;
-                    break;
-                }
-                targetce = getCE(targetce);
-                if (targetce == CollationElementIterator.IGNORABLE) {
-                    continue;
-                }
-    
-                found = found && targetce == m_pattern_.m_CE_[patternceindex]; 
-                patternceindex ++;
+
+            // Caller is requesting a new, never accessed before, CE.
+            // Verify that it is the next one in sequence, which is all
+            // that is allowed.
+            if (index != limitIx_) {
+                assert(false);
+                return null;
              }
-            
-            targetce = firstce;
-    
-            if (!found) {
-                textoffset = reverseShift(textoffset, targetce, patternceindex);
-                patternceindex = 0;
-                continue;
+
+            // Manage the circular CE buffer indexing
+            limitIx_++;
+
+            if (limitIx_ - firstIx_ >= bufSize_) {
+                // The buffer is full, knock out the lowest-indexed entry.
+                firstIx_++;
              }
-            
-            if (checkPreviousExactMatch(textoffset)) {
-                return;
+
+            CollationPCE.Range range = new CollationPCE.Range();
+            if (buf_[i] == null) {
+                buf_[i] = new CEI();
              }
-            textoffset = m_utilBuffer_[0];
+            buf_[i].ce_ = strSearch_.textProcessedIter_.nextProcessed(range);
+            buf_[i].lowIndex_ = range.ixLow_;
+            buf_[i].highIndex_ = range.ixHigh_;
+
+            return buf_[i];
          }
-        setMatchNotFound();
-    }
-    
-    /**
-     * Method that does the previous canonical match
-     * @param start the offset to start shifting from and performing the 
-     *        previous canonical match
-     */
-    private void handlePreviousCanonical(int start)
-    {
-        boolean hasPatternAccents = 
-           m_pattern_.m_hasSuffixAccents_ || m_pattern_.m_hasPrefixAccents_;
-              
-        // shifting it check for setting offset
-        // if setOffset is called previously or there was no previous match, we
-        // leave the offset as it is.
-        int textoffset = reverseShift(start, CollationElementIterator.NULLORDER, 
-                                          m_pattern_.m_CELength_);
-        m_canonicalPrefixAccents_.delete(0, m_canonicalPrefixAccents_.length());
-        m_canonicalSuffixAccents_.delete(0, m_canonicalSuffixAccents_.length());
-        
-        while (textoffset >= m_textBeginOffset_)
-        {
-            m_colEIter_.setExactOffset(textoffset);
-            int patternceindex = 1;
-            int targetce = CollationElementIterator.IGNORABLE;
-            boolean found = false;
-            int firstce = CollationElementIterator.NULLORDER;
-            
-            while (true) {
-                // finding the first pattern ce match, imagine composite 
-                // characters. for example: search for pattern \u0300 in text 
-                // \u00C0, we'll have to skip A first before we get to 
-                // \u0300 the grave accent
-                targetce = m_colEIter_.next();
-                if (targetce == CollationElementIterator.NULLORDER) {
-                    found = false;
-                    break;
-                }
-                targetce = getCE(targetce);
-                if (firstce == CollationElementIterator.NULLORDER 
-                    || firstce == CollationElementIterator.IGNORABLE) {
-                    firstce = targetce;
-                }
-                
-                if (targetce == m_pattern_.m_CE_[0]) {
-                    // the first ce can be a contraction
-                    found = true;
-                    break;
-                }
-                if (m_colEIter_.m_CEBufferOffset_ == -1 
-                    || m_colEIter_.m_CEBufferOffset_ 
-                                            == m_colEIter_.m_CEBufferSize_) {
-                    // checking for accents in composite character
-                    found = false;
-                    break;
-                }
-            }
-    
-            targetce = firstce;
-            
-            while (found && patternceindex < m_pattern_.m_CELength_) {
-                targetce = m_colEIter_.next();
-                if (targetce == CollationElementIterator.NULLORDER) {
-                    found = false;
-                    break;
-                }
-                targetce = getCE(targetce);
-                if (targetce == CollationElementIterator.IGNORABLE) {
-                    continue;
-                }
-    
-                found = found && targetce == m_pattern_.m_CE_[patternceindex]; 
-                patternceindex ++;
+
+        // Get the CE with the specified index.
+        //   Index must be in the range
+        //             n-history_size < index < n+1
+        //   where n is the largest index to have been fetched by some previous call to this function.
+        //   The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
+        //
+        CEI getPrevious(int index) {
+            int i = index % bufSize_;
+
+            if (index >= firstIx_ && index < limitIx_) {
+                // The request was for an entry already in our buffer.
+                // Just return it.
+                return buf_[i];
              }
-    
-            // initializing the rearranged accent array
-            if (hasPatternAccents && !found) {
-                found = doPreviousCanonicalMatch(textoffset);
+
+            // Caller is requesting a new, never accessed before, CE.
+            // Verify that it is the next one in sequence, which is all
+            // that is allowed.
+            if (index != limitIx_) {
+                assert(false);
+                return null;
              }
-    
-            if (!found) {
-                textoffset = reverseShift(textoffset, targetce, patternceindex);
-                patternceindex = 0;
-                continue;
+
+            // Manage the circular CE buffer indexing
+            limitIx_++;
+
+            if (limitIx_ - firstIx_ >= bufSize_) {
+                // The buffer is full, knock out the lowest-indexed entry.
+                firstIx_++;
              }
-    
-            if (checkPreviousCanonicalMatch(textoffset)) {
-                return;
+
+            CollationPCE.Range range = new CollationPCE.Range();
+            if (buf_[i] == null) {
+                buf_[i] = new CEI();
              }
-            textoffset = m_utilBuffer_[0];
-        }
-        setMatchNotFound();
-    }
-    
-    /**
-     * Gets a substring out of a CharacterIterator
-     * @param text CharacterIterator
-     * @param start start offset
-     * @param length of substring
-     * @return substring from text starting at start and length length
-     */
-    private static final String getString(CharacterIterator text, int start,
-                                            int length)
-    {
-        StringBuilder result = new StringBuilder(length);
-        int offset = text.getIndex();
-        text.setIndex(start);
-        for (int i = 0; i < length; i ++) {
-            result.append(text.current());
-            text.next();
-        }
-        text.setIndex(offset);
-        return result.toString();
-    }
-    
-    /**
-     * Getting the mask for collation strength
-     * @param strength collation strength
-      * @return collation element mask
-     */
-    private static final int getMask(int strength) 
-    {
-        switch (strength) 
-        {
-            case Collator.PRIMARY:
-                return RuleBasedCollator.CE_PRIMARY_MASK_;
-            case Collator.SECONDARY:
-                return RuleBasedCollator.CE_SECONDARY_MASK_ 
-                       | RuleBasedCollator.CE_PRIMARY_MASK_;
-            default:
-                return RuleBasedCollator.CE_TERTIARY_MASK_ 
-                       | RuleBasedCollator.CE_SECONDARY_MASK_ 
-                       | RuleBasedCollator.CE_PRIMARY_MASK_;
+            buf_[i].ce_ = strSearch_.textProcessedIter_.previousProcessed(range);
+            buf_[i].lowIndex_ = range.ixLow_;
+            buf_[i].highIndex_ = range.ixHigh_;
+
+            return buf_[i];
          }
-    }
-    
-    /**
-     * Sets match not found 
-     */
-    private void setMatchNotFound() 
-    {
-        // this method resets the match result regardless of the error status.
-        m_matchedIndex_ = DONE;
-        setMatchLength(0);
-    }
-    
-    /**
-     * Check the boundaries of the match.
-     */
-    private int checkBreakBoundary(int end) {
-        if (!m_charBreakIter_.isBoundary(end)) {
-            end = m_charBreakIter_.following(end);
+
+        static boolean MIGHT_BE_JAMO_L(char c) {
+            return (c >= 0x1100 && c <= 0x115E)
+                    || (c >= 0x3131 && c <= 0x314E)
+                    || (c >= 0x3165 && c <= 0x3186);
          }
-        return end;
      }
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java

index ade745e33f112a31b1b15195c830c7a7b523e8af..3b32bf11c90833dcf34f3191527ce14ce3bf4009 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
@@ -1,6 +1,6 @@
  /*
  *******************************************************************************
-*   Copyright (C) 2009-2012, International Business Machines
+*   Copyright (C) 2009-2014, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *******************************************************************************
  */
@@ -21,10 +21,14 @@ public final class Normalizer2Impl {
      public static final class Hangul {
          /* Korean Hangul and Jamo constants */
          public static final int JAMO_L_BASE=0x1100;     /* "lead" jamo */
+        public static final int JAMO_L_END=0x1112;
          public static final int JAMO_V_BASE=0x1161;     /* "vowel" jamo */
+        public static final int JAMO_V_END=0x1175;
          public static final int JAMO_T_BASE=0x11a7;     /* "trail" jamo */
+        public static final int JAMO_T_END=0x11c2;
  
          public static final int HANGUL_BASE=0xac00;
+        public static final int HANGUL_END=0xd7a3;
  
          public static final int JAMO_L_COUNT=19;
          public static final int JAMO_V_COUNT=21;
@@ -496,13 +500,52 @@ public final class Normalizer2Impl {
          return load(ICUData.getRequiredStream(name));
      }
  
+    private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) {
+        if(isAlgorithmicNoNo(norm16)) {
+            // Range of code points with same-norm16-value algorithmic decompositions.
+            // They might have different non-zero FCD16 values.
+            do {
+                int fcd16=getFCD16(start);
+                if(fcd16>0xff) { set.add(start); }
+            } while(++start<=end);
+        } else {
+            int fcd16=getFCD16(start);
+            if(fcd16>0xff) { set.add(start, end); }
+        }
+    }
+
+    private void enumNorm16PropertyStartsRange(int start, int end, int value, UnicodeSet set) {
+        /* add the start code point to the USet */
+        set.add(start);
+        if(start!=end && isAlgorithmicNoNo(value)) {
+            // Range of code points with same-norm16-value algorithmic decompositions.
+            // They might have different non-zero FCD16 values.
+            int prevFCD16=getFCD16(start);
+            while(++start<=end) {
+                int fcd16=getFCD16(start);
+                if(fcd16!=prevFCD16) {
+                    set.add(start);
+                    prevFCD16=fcd16;
+                }
+            }
+        }
+    }
+
+    public void addLcccChars(UnicodeSet set) {
+        /* add the start code point of each same-value range of each trie */
+        Iterator<Trie2.Range> trieIterator=normTrie.iterator();
+        Trie2.Range range;
+        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
+            enumLcccRange(range.startCodePoint, range.endCodePoint, range.value, set);
+        }
+    }
+
      public void addPropertyStarts(UnicodeSet set) {
          /* add the start code point of each same-value range of each trie */
          Iterator<Trie2.Range> trieIterator=normTrie.iterator();
          Trie2.Range range;
          while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
-            /* add the start code point to the USet */
-            set.add(range.startCodePoint);
+            enumNorm16PropertyStartsRange(range.startCodePoint, range.endCodePoint, range.value, set);
          }
  
          /* add Hangul LV syllables and LV+1 because of skippables */
@@ -640,6 +683,7 @@ public final class Normalizer2Impl {
              return 0;  // no
          }
      }
+    public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; }
      public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; }
      public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }
  
@@ -881,6 +925,26 @@ public final class Normalizer2Impl {
  
      // higher-level functionality ------------------------------------------ ***
  
+    // NFD without an NFD Normalizer2 instance.
+    public Appendable decompose(CharSequence s, StringBuilder dest) {
+        decompose(s, 0, s.length(), dest, s.length());
+        return dest;
+    }
+    /**
+     * Decomposes s[src, limit[ and writes the result to dest.
+     * limit can be NULL if src is NUL-terminated.
+     * destLengthEstimate is the initial dest buffer capacity and can be -1.
+     */
+    public void decompose(CharSequence s, int src, int limit, StringBuilder dest,
+                   int destLengthEstimate) {
+        if(destLengthEstimate<0) {
+            destLengthEstimate=limit-src;
+        }
+        dest.setLength(0);
+        ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate);
+        decompose(s, src, limit, buffer);
+    }
+
      // Dual functionality:
      // buffer!=NULL: normalize
      // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java b/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java

index 7e9961acf3dc4d8041515723f8367e6706b9ab47..c4c006cd13e144ad9db01eaf717eb936ea4acfee 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java
@@ -520,8 +520,8 @@ public final class VersionInfo implements Comparable<VersionInfo>
          ICU_DATA_VERSION = getInstance(53, 0, 1, 0);
          UNICODE_VERSION = UNICODE_6_3;
  
-        UCOL_RUNTIME_VERSION = getInstance(7);
-        UCOL_BUILDER_VERSION = getInstance(8);
+        UCOL_RUNTIME_VERSION = getInstance(8);
+        UCOL_BUILDER_VERSION = getInstance(9);
          UCOL_TAILORINGS_VERSION = getInstance(1);
      }
  
diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar

index 97402df8a08627d68447f0d8666133a5d76ea2cd..085973df7d47f0ebad96abcf9dd16bce9e223e37 100755 (executable)
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
@@ -1,3 +1,3 @@
  version https://git-lfs.github.com/spec/v1
-oid sha256:c2759d4ee4ebccae6cd0995f8ae6442228829ac6f3818b206dbebfa16e864895
-size 11002389
+oid sha256:1abc0174ae76c79801fe369dac52cd4c42a09d6c2b92919b1f7736d46ea10e1d
+size 10855096
diff --git a/icu4j/main/tests/collate/build.xml b/icu4j/main/tests/collate/build.xml

index 062732b3dd1025dd5cd808366bdeaa07301d1a7b..4b02015933a02511e2cc28a6e92f08a406489f44 100644 (file)
--- a/icu4j/main/tests/collate/build.xml
+++ b/icu4j/main/tests/collate/build.xml
@@ -1,6 +1,6 @@
  <!--
  *******************************************************************************
-* Copyright (C) 2009-2011, International Business Machines Corporation and    *
+* Copyright (C) 2009-2014, International Business Machines Corporation and    *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  -->
@@ -37,9 +37,9 @@
          </unjar>
          <copy todir="${bin.dir}">
              <fileset dir="${icu4j.collate-tests.dir}/src">
-                <include name="com/ibm/icu/dev/data/riwords.txt"/>
+                <include name="com/ibm/icu/dev/data/collationtest.txt"/>
                  <include name="com/ibm/icu/dev/data/CollationTest_*.txt"/>
-                <include name="com/ibm/icu/dev/data/resources/*"/>
+                <include name="com/ibm/icu/dev/data/riwords.txt"/>
              </fileset>
          </copy>
      </target>
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_NON_IGNORABLE_SHORT.txt b/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_NON_IGNORABLE_SHORT.txt

index f2a7ec0ffb7a33b6feae0cd916ed2fb6c0aaff5d..ca5dac8e851b46384660bff84362f8f73616210b 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_NON_IGNORABLE_SHORT.txt
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_NON_IGNORABLE_SHORT.txt
@@ -1,7 +1,7 @@
  # File:        CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
-# UCA Version: 6.2.0
-# UCD Version: 6.2.0
-# Generated:   2012-08-15, 21:43:27 GMT [MD]
+# UCA Version: 6.3.0
+# UCD Version: 6.3.0
+# Generated:   2013-09-03 [MS]
  # For a description of the format and usage, see CollationAuxiliary.html
  
  0338 0334
@@ -13,6 +13,7 @@
  20E5 0334
  20EA 0334
  20EB 0334
+0335 0334
  0334 0591
  0591 0334
  0334 0592
@@ -600,6 +601,10 @@ A67D 0334
  20EF 0334
  0334 10A0D
  10A0D 0334
+0334 3099
+3099 0334
+0334 309A
+309A 0334
  0305 0334
  0334 0305
  0309 0334
@@ -973,10 +978,6 @@ A92D 0334
  302E 0334
  0334 302F
  302F 0334
-0334 3099
-3099 0334
-0334 309A
-309A 0334
  0334 20D0
  20D0 0334
  0334 20D1
@@ -1003,7 +1004,6 @@ A92D 0334
  20E9 0334
  0334 101FD
  101FD 0334
-0335 0334
  10A39 0334
  20D2 0334
  20D3 0334
@@ -1043,11 +1043,6 @@ FFFE 0062
  0085 0061
  0085 0041
  0085 0062
-180E 0021
-180E 003F
-180E 0061
-180E 0041
-180E 0062
  2028 0021
  2028 003F
  2028 0061
@@ -1826,6 +1821,7 @@ A6F4 0062
  0618 0021
  0619 0021
  061A 0021
+061C 0021
  0640 0021
  06D6 0021
  06D7 0021
@@ -1883,6 +1879,7 @@ A6F4 0062
  180B 0021
  180C 0021
  180D 0021
+180E 0021
  1A7F 0021
  1B6B 0021
  1B6C 0021
@@ -1934,6 +1931,10 @@ A6F4 0062
  2062 0021
  2063 0021
  2064 0021
+2066 0021
+2067 0021
+2068 0021
+2069 0021
  206A 0021
  206B 0021
  206C 0021
@@ -2508,6 +2509,11 @@ A67D 0021
  20E2 0021
  20E3 0021
  20E4 0021
+3099 0021
+FF9E 0021
+309A 0021
+FF9F 0021
+0335 0021
  0305 0021
  0309 0021
  030F 0021
@@ -2525,7 +2531,6 @@ A67D 0021
  0330 0021
  0331 0021
  0334 0021
-0335 0021
  0339 0021
  0345 0021
  0358 0021
@@ -2796,10 +2801,6 @@ A92D 0021
  302D 0021
  302E 0021
  302F 0021
-3099 0021
-FF9E 0021
-309A 0021
-FF9F 0021
  20D0 0021
  20D1 0021
  20D2 0021
@@ -2979,6 +2980,7 @@ FE15 0062
  0618 003F
  0619 003F
  061A 003F
+061C 003F
  0640 003F
  06D6 003F
  06D7 003F
@@ -3036,6 +3038,7 @@ FE15 0062
  180B 003F
  180C 003F
  180D 003F
+180E 003F
  1A7F 003F
  1B6B 003F
  1B6C 003F
@@ -3087,6 +3090,10 @@ FE15 0062
  2062 003F
  2063 003F
  2064 003F
+2066 003F
+2067 003F
+2068 003F
+2069 003F
  206A 003F
  206B 003F
  206C 003F
@@ -3661,6 +3668,11 @@ A67D 003F
  20E2 003F
  20E3 003F
  20E4 003F
+3099 003F
+FF9E 003F
+309A 003F
+FF9F 003F
+0335 003F
  0305 003F
  0309 003F
  030F 003F
@@ -3678,7 +3690,6 @@ A67D 003F
  0330 003F
  0331 003F
  0334 003F
-0335 003F
  0339 003F
  0345 003F
  0358 003F
@@ -3949,10 +3960,6 @@ A92D 003F
  302D 003F
  302E 003F
  302F 003F
-3099 003F
-FF9E 003F
-309A 003F
-FF9F 003F
  20D0 003F
  20D1 003F
  20D2 003F
@@ -5679,6 +5686,26 @@ FE38 0062
  2046 0061
  2046 0041
  2046 0062
+2308 0021
+2308 003F
+2308 0061
+2308 0041
+2308 0062
+2309 0021
+2309 003F
+2309 0061
+2309 0041
+2309 0062
+230A 0021
+230A 003F
+230A 0061
+230A 0041
+230A 0062
+230B 0021
+230B 003F
+230B 0061
+230B 0041
+230B 0062
  29FC 0021
  29FC 003F
  29FC 0061
@@ -10995,26 +11022,6 @@ FF5E 0062
  2307 0061
  2307 0041
  2307 0062
-2308 0021
-2308 003F
-2308 0061
-2308 0041
-2308 0062
-2309 0021
-2309 003F
-2309 0061
-2309 0041
-2309 0062
-230A 0021
-230A 003F
-230A 0061
-230A 0041
-230A 0062
-230B 0021
-230B 003F
-230B 0061
-230B 0041
-230B 0062
  230C 0021
  230C 003F
  230C 0061
@@ -29520,11 +29527,6 @@ FFFC 003F
  FFFC 0061
  FFFC 0041
  FFFC 0062
-FFFD 0021
-FFFD 003F
-FFFD 0061
-FFFD 0041
-FFFD 0062
  02D0 0021
  02D0 003F
  02D0 0061
@@ -30831,16 +30833,6 @@ A835 0062
  12433 0061
  12433 0041
  12433 0062
-12456 0021
-12456 003F
-12456 0061
-12456 0041
-12456 0062
-12457 0021
-12457 003F
-12457 0061
-12457 0041
-12457 0062
  1245A 0021
  1245A 003F
  1245A 0061
@@ -30937,20 +30929,9 @@ A835 0062
  1F101 0041
  1F101 0062
  0030 0021
-FF10 0021
-1D7CE 0021
-1D7D8 0021
-1D7E2 0021
-1D7EC 0021
-1D7F6 0021
-24EA 0021
-24FF 0021
-2070 0021
-2080 0021
  0660 0021
  06F0 0021
  07C0 0021
-104A0 0021
  0966 0021
  09E6 0021
  0A66 0021
@@ -30961,51 +30942,51 @@ FF10 0021
  0C78 0021
  0CE6 0021
  0D66 0021
-ABF0 0021
-A8D0 0021
-1946 0021
-19D0 0021
-1A80 0021
-1A90 0021
  0E50 0021
  0ED0 0021
  0F20 0021
-0F33 0021
-1C40 0021
-A900 0021
  1040 0021
  1090 0021
-11136 0021
  17E0 0021
  17F0 0021
-AA50 0021
+1810 0021
+1946 0021
+19D0 0021
+1A80 0021
+1A90 0021
  1B50 0021
-A9D0 0021
  1BB0 0021
-1810 0021
+1C40 0021
  1C50 0021
-A620 0021
-110F0 0021
  3007 0021
+A620 0021
+A8D0 0021
+A900 0021
+A9D0 0021
+AA50 0021
+ABF0 0021
  1018A 0021
+104A0 0021
+11066 0021
+110F0 0021
+11136 0021
  111D0 0021
  116C0 0021
-11066 0021
+FF10 0021
+0F33 0021
+1D7CE 0021
+1D7D8 0021
+1D7E2 0021
+1D7EC 0021
+1D7F6 0021
+24EA 0021
+24FF 0021
+2070 0021
+2080 0021
  0030 003F
-FF10 003F
-1D7CE 003F
-1D7D8 003F
-1D7E2 003F
-1D7EC 003F
-1D7F6 003F
-24EA 003F
-24FF 003F
-2070 003F
-2080 003F
  0660 003F
  06F0 003F
  07C0 003F
-104A0 003F
  0966 003F
  09E6 003F
  0A66 003F
@@ -31016,36 +30997,47 @@ FF10 003F
  0C78 003F
  0CE6 003F
  0D66 003F
-ABF0 003F
-A8D0 003F
-1946 003F
-19D0 003F
-1A80 003F
-1A90 003F
  0E50 003F
  0ED0 003F
  0F20 003F
-0F33 003F
-1C40 003F
-A900 003F
  1040 003F
  1090 003F
-11136 003F
  17E0 003F
  17F0 003F
-AA50 003F
+1810 003F
+1946 003F
+19D0 003F
+1A80 003F
+1A90 003F
  1B50 003F
-A9D0 003F
  1BB0 003F
-1810 003F
+1C40 003F
  1C50 003F
-A620 003F
-110F0 003F
  3007 003F
+A620 003F
+A8D0 003F
+A900 003F
+A9D0 003F
+AA50 003F
+ABF0 003F
  1018A 003F
+104A0 003F
+11066 003F
+110F0 003F
+11136 003F
  111D0 003F
  116C0 003F
-11066 003F
+FF10 003F
+0F33 003F
+1D7CE 003F
+1D7D8 003F
+1D7E2 003F
+1D7EC 003F
+1D7F6 003F
+24EA 003F
+24FF 003F
+2070 003F
+2080 003F
  1F100 0021
  1F100 003F
  1F100 0061
@@ -31057,130 +31049,119 @@ A620 003F
  2189 0041
  2189 0062
  0030 0061
-0030 0041
-FF10 0061
-FF10 0041
-1D7CE 0061
-1D7D8 0061
-1D7E2 0061
-1D7EC 0061
-1D7F6 0061
-1D7CE 0041
-1D7D8 0041
-1D7E2 0041
-1D7EC 0041
-1D7F6 0041
-24EA 0061
-24FF 0061
-24EA 0041
-24FF 0041
-2070 0061
-2070 0041
-2080 0061
-2080 0041
  0660 0061
-0660 0041
  06F0 0061
-06F0 0041
  07C0 0061
-07C0 0041
-104A0 0061
-104A0 0041
  0966 0061
-0966 0041
  09E6 0061
-09E6 0041
  0A66 0061
-0A66 0041
  0AE6 0061
-0AE6 0041
  0B66 0061
-0B66 0041
  0BE6 0061
-0BE6 0041
  0C66 0061
  0C78 0061
-0C66 0041
-0C78 0041
  0CE6 0061
-0CE6 0041
  0D66 0061
-0D66 0041
-ABF0 0061
-ABF0 0041
-A8D0 0061
-A8D0 0041
+0E50 0061
+0ED0 0061
+0F20 0061
+1040 0061
+1090 0061
+17E0 0061
+17F0 0061
+1810 0061
  1946 0061
-1946 0041
  19D0 0061
-19D0 0041
  1A80 0061
-1A80 0041
  1A90 0061
-1A90 0041
-0E50 0061
+1B50 0061
+1BB0 0061
+1C40 0061
+1C50 0061
+3007 0061
+A620 0061
+A8D0 0061
+A900 0061
+A9D0 0061
+AA50 0061
+ABF0 0061
+1018A 0061
+104A0 0061
+11066 0061
+110F0 0061
+11136 0061
+111D0 0061
+116C0 0061
+0030 0041
+0660 0041
+06F0 0041
+07C0 0041
+0966 0041
+09E6 0041
+0A66 0041
+0AE6 0041
+0B66 0041
+0BE6 0041
+0C66 0041
+0C78 0041
+0CE6 0041
+0D66 0041
  0E50 0041
-0ED0 0061
  0ED0 0041
-0F20 0061
  0F20 0041
-0F33 0061
-0F33 0041
-1C40 0061
-1C40 0041
-A900 0061
-A900 0041
-1040 0061
  1040 0041
-1090 0061
  1090 0041
-11136 0061
-11136 0041
-17E0 0061
  17E0 0041
-17F0 0061
  17F0 0041
-AA50 0061
-AA50 0041
-1B50 0061
+1810 0041
+1946 0041
+19D0 0041
+1A80 0041
+1A90 0041
  1B50 0041
-A9D0 0061
-A9D0 0041
-1BB0 0061
  1BB0 0041
-1810 0061
-1810 0041
-1C50 0061
+1C40 0041
  1C50 0041
-A620 0061
-A620 0041
-110F0 0061
-110F0 0041
-3007 0061
  3007 0041
-1018A 0061
+A620 0041
+A8D0 0041
+A900 0041
+A9D0 0041
+AA50 0041
+ABF0 0041
  1018A 0041
-111D0 0061
+104A0 0041
+11066 0041
+110F0 0041
+11136 0041
  111D0 0041
-116C0 0061
  116C0 0041
-11066 0061
-11066 0041
+FF10 0061
+FF10 0041
+0F33 0061
+0F33 0041
+1D7CE 0061
+1D7D8 0061
+1D7E2 0061
+1D7EC 0061
+1D7F6 0061
+1D7CE 0041
+1D7D8 0041
+1D7E2 0041
+1D7EC 0041
+1D7F6 0041
+24EA 0061
+24FF 0061
+24EA 0041
+24FF 0041
+2070 0061
+2070 0041
+2080 0061
+2080 0041
  0030 0062
-FF10 0062
-1D7CE 0062
-1D7D8 0062
-1D7E2 0062
-1D7EC 0062
-1D7F6 0062
-24EA 0062
-24FF 0062
-2070 0062
-2080 0062
  0660 0062
  06F0 0062
  07C0 0062
-104A0 0062
  0966 0062
  09E6 0062
  0A66 0062
@@ -31191,36 +31172,47 @@ FF10 0062
  0C78 0062
  0CE6 0062
  0D66 0062
-ABF0 0062
-A8D0 0062
-1946 0062
-19D0 0062
-1A80 0062
-1A90 0062
  0E50 0062
  0ED0 0062
  0F20 0062
-0F33 0062
-1C40 0062
-A900 0062
  1040 0062
  1090 0062
-11136 0062
  17E0 0062
  17F0 0062
-AA50 0062
+1810 0062
+1946 0062
+19D0 0062
+1A80 0062
+1A90 0062
  1B50 0062
-A9D0 0062
  1BB0 0062
-1810 0062
+1C40 0062
  1C50 0062
-A620 0062
-110F0 0062
  3007 0062
+A620 0062
+A8D0 0062
+A900 0062
+A9D0 0062
+AA50 0062
+ABF0 0062
  1018A 0062
+104A0 0062
+11066 0062
+110F0 0062
+11136 0062
  111D0 0062
  116C0 0062
-11066 0062
+FF10 0062
+0F33 0062
+1D7CE 0062
+1D7D8 0062
+1D7E2 0062
+1D7EC 0062
+1D7F6 0062
+24EA 0062
+24FF 0062
+2070 0062
+2080 0062
  3358 0021
  3358 003F
  3358 0061
@@ -31232,25 +31224,9 @@ A620 0062
  1F102 0041
  1F102 0062
  0031 0021
-FF11 0021
-1D7CF 0021
-1D7D9 0021
-1D7E3 0021
-1D7ED 0021
-1D7F7 0021
-2460 0021
-24F5 0021
-2776 0021
-2780 0021
-278A 0021
-00B9 0021
-2081 0021
  0661 0021
  06F1 0021
-10E60 0021
  07C1 0021
-1369 0021
-104A1 0021
  0967 0021
  09E7 0021
  0A67 0021
@@ -31262,33 +31238,31 @@ FF11 0021
  0C7C 0021
  0CE7 0021
  0D67 0021
-ABF1 0021
-A8D1 0021
-1947 0021
-19D1 0021
-19DA 0021
-1A81 0021
-1A91 0021
  0E51 0021
  0ED1 0021
  0F21 0021
-0F2A 0021
-1C41 0021
-A901 0021
  1041 0021
  1091 0021
-11137 0021
+1369 0021
  17E1 0021
  17F1 0021
-AA51 0021
+1811 0021
+1947 0021
+19D1 0021
+19DA 0021
+1A81 0021
+1A91 0021
  1B51 0021
-A9D1 0021
  1BB1 0021
-1811 0021
+1C41 0021
  1C51 0021
-A621 0021
-110F1 0021
  3021 0021
+A621 0021
+A8D1 0021
+A901 0021
+A9D1 0021
+AA51 0021
+ABF1 0021
  10107 0021
  10142 0021
  10158 0021
@@ -31296,43 +31270,45 @@ A621 0021
  1015A 0021
  10320 0021
  103D1 0021
+104A1 0021
+10858 0021
+10916 0021
+10A40 0021
+10A7D 0021
+10B58 0021
+10B78 0021
+10E60 0021
+11052 0021
+11067 0021
+110F1 0021
+11137 0021
+111D1 0021
+116C1 0021
  12415 0021
  1241E 0021
  1242C 0021
  12434 0021
  1244F 0021
  12458 0021
-10A7D 0021
-10916 0021
-10858 0021
-10B58 0021
-10B78 0021
-111D1 0021
-116C1 0021
-11067 0021
-11052 0021
-10A40 0021
  1D360 0021
+FF11 0021
+0F2A 0021
+1D7CF 0021
+1D7D9 0021
+1D7E3 0021
+1D7ED 0021
+1D7F7 0021
+2460 0021
+24F5 0021
+2776 0021
+2780 0021
+278A 0021
+00B9 0021
+2081 0021
  0031 003F
-FF11 003F
-1D7CF 003F
-1D7D9 003F
-1D7E3 003F
-1D7ED 003F
-1D7F7 003F
-2460 003F
-24F5 003F
-2776 003F
-2780 003F
-278A 003F
-00B9 003F
-2081 003F
  0661 003F
  06F1 003F
-10E60 003F
  07C1 003F
-1369 003F
-104A1 003F
  0967 003F
  09E7 003F
  0A67 003F
@@ -31344,33 +31320,31 @@ FF11 003F
  0C7C 003F
  0CE7 003F
  0D67 003F
-ABF1 003F
-A8D1 003F
-1947 003F
-19D1 003F
-19DA 003F
-1A81 003F
-1A91 003F
  0E51 003F
  0ED1 003F
  0F21 003F
-0F2A 003F
-1C41 003F
-A901 003F
  1041 003F
  1091 003F
-11137 003F
+1369 003F
  17E1 003F
  17F1 003F
-AA51 003F
+1811 003F
+1947 003F
+19D1 003F
+19DA 003F
+1A81 003F
+1A91 003F
  1B51 003F
-A9D1 003F
  1BB1 003F
-1811 003F
+1C41 003F
  1C51 003F
-A621 003F
-110F1 003F
  3021 003F
+A621 003F
+A8D1 003F
+A901 003F
+A9D1 003F
+AA51 003F
+ABF1 003F
  10107 003F
  10142 003F
  10158 003F
@@ -31378,23 +31352,41 @@ A621 003F
  1015A 003F
  10320 003F
  103D1 003F
+104A1 003F
+10858 003F
+10916 003F
+10A40 003F
+10A7D 003F
+10B58 003F
+10B78 003F
+10E60 003F
+11052 003F
+11067 003F
+110F1 003F
+11137 003F
+111D1 003F
+116C1 003F
  12415 003F
  1241E 003F
  1242C 003F
  12434 003F
  1244F 003F
  12458 003F
-10A7D 003F
-10916 003F
-10858 003F
-10B58 003F
-10B78 003F
-111D1 003F
-116C1 003F
-11067 003F
-11052 003F
-10A40 003F
  1D360 003F
+FF11 003F
+0F2A 003F
+1D7CF 003F
+1D7D9 003F
+1D7E3 003F
+1D7ED 003F
+1D7F7 003F
+2460 003F
+24F5 003F
+2776 003F
+2780 003F
+278A 003F
+00B9 003F
+2081 003F
  2488 0021
  2488 003F
  2488 0061
@@ -31736,189 +31728,173 @@ A621 003F
  336B 0041
  336B 0062
  0031 0061
-0031 0041
-FF11 0061
-FF11 0041
-1D7CF 0061
-1D7D9 0061
-1D7E3 0061
-1D7ED 0061
-1D7F7 0061
-1D7CF 0041
-1D7D9 0041
-1D7E3 0041
-1D7ED 0041
-1D7F7 0041
-2460 0061
-24F5 0061
-2776 0061
-2780 0061
-278A 0061
-2460 0041
-24F5 0041
-2776 0041
-2780 0041
-278A 0041
-00B9 0061
-00B9 0041
-2081 0061
-2081 0041
  0661 0061
-0661 0041
  06F1 0061
-06F1 0041
-10E60 0061
-10E60 0041
  07C1 0061
-07C1 0041
-1369 0061
-1369 0041
-104A1 0061
-104A1 0041
  0967 0061
-0967 0041
  09E7 0061
-09E7 0041
  0A67 0061
-0A67 0041
  0AE7 0061
-0AE7 0041
  0B67 0061
-0B67 0041
  0BE7 0061
-0BE7 0041
  0C67 0061
  0C79 0061
  0C7C 0061
-0C67 0041
-0C79 0041
-0C7C 0041
  0CE7 0061
-0CE7 0041
  0D67 0061
-0D67 0041
-ABF1 0061
-ABF1 0041
-A8D1 0061
-A8D1 0041
-1947 0061
-1947 0041
-19D1 0061
-19DA 0061
-19D1 0041
-19DA 0041
-1A81 0061
-1A81 0041
-1A91 0061
-1A91 0041
  0E51 0061
-0E51 0041
  0ED1 0061
-0ED1 0041
  0F21 0061
-0F21 0041
-0F2A 0061
-0F2A 0041
-1C41 0061
-1C41 0041
-A901 0061
-A901 0041
  1041 0061
-1041 0041
  1091 0061
-1091 0041
-11137 0061
-11137 0041
+1369 0061
  17E1 0061
-17E1 0041
  17F1 0061
-17F1 0041
-AA51 0061
-AA51 0041
+1811 0061
+1947 0061
+19D1 0061
+19DA 0061
+1A81 0061
+1A91 0061
  1B51 0061
-1B51 0041
-A9D1 0061
-A9D1 0041
  1BB1 0061
-1BB1 0041
-1811 0061
-1811 0041
+1C41 0061
  1C51 0061
-1C51 0041
-A621 0061
-A621 0041
-110F1 0061
-110F1 0041
  3021 0061
-3021 0041
+A621 0061
+A8D1 0061
+A901 0061
+A9D1 0061
+AA51 0061
+ABF1 0061
  10107 0061
-10107 0041
  10142 0061
  10158 0061
  10159 0061
  1015A 0061
-10142 0041
-10158 0041
-10159 0041
-1015A 0041
  10320 0061
-10320 0041
  103D1 0061
-103D1 0041
+104A1 0061
+10858 0061
+10916 0061
+10A40 0061
+10A7D 0061
+10B58 0061
+10B78 0061
+10E60 0061
+11052 0061
+11067 0061
+110F1 0061
+11137 0061
+111D1 0061
+116C1 0061
  12415 0061
  1241E 0061
  1242C 0061
  12434 0061
  1244F 0061
  12458 0061
+1D360 0061
+0031 0041
+0661 0041
+06F1 0041
+07C1 0041
+0967 0041
+09E7 0041
+0A67 0041
+0AE7 0041
+0B67 0041
+0BE7 0041
+0C67 0041
+0C79 0041
+0C7C 0041
+0CE7 0041
+0D67 0041
+0E51 0041
+0ED1 0041
+0F21 0041
+1041 0041
+1091 0041
+1369 0041
+17E1 0041
+17F1 0041
+1811 0041
+1947 0041
+19D1 0041
+19DA 0041
+1A81 0041
+1A91 0041
+1B51 0041
+1BB1 0041
+1C41 0041
+1C51 0041
+3021 0041
+A621 0041
+A8D1 0041
+A901 0041
+A9D1 0041
+AA51 0041
+ABF1 0041
+10107 0041
+10142 0041
+10158 0041
+10159 0041
+1015A 0041
+10320 0041
+103D1 0041
+104A1 0041
+10858 0041
+10916 0041
+10A40 0041
+10A7D 0041
+10B58 0041
+10B78 0041
+10E60 0041
+11052 0041
+11067 0041
+110F1 0041
+11137 0041
+111D1 0041
+116C1 0041
  12415 0041
  1241E 0041
  1242C 0041
  12434 0041
  1244F 0041
  12458 0041
-10A7D 0061
-10A7D 0041
-10916 0061
-10916 0041
-10858 0061
-10858 0041
-10B58 0061
-10B58 0041
-10B78 0061
-10B78 0041
-111D1 0061
-111D1 0041
-116C1 0061
-116C1 0041
-11067 0061
-11067 0041
-11052 0061
-11052 0041
-10A40 0061
-10A40 0041
-1D360 0061
  1D360 0041
+FF11 0061
+FF11 0041
+0F2A 0061
+0F2A 0041
+1D7CF 0061
+1D7D9 0061
+1D7E3 0061
+1D7ED 0061
+1D7F7 0061
+1D7CF 0041
+1D7D9 0041
+1D7E3 0041
+1D7ED 0041
+1D7F7 0041
+2460 0061
+24F5 0061
+2776 0061
+2780 0061
+278A 0061
+2460 0041
+24F5 0041
+2776 0041
+2780 0041
+278A 0041
+00B9 0061
+00B9 0041
+2081 0061
+2081 0041
  0031 0062
-FF11 0062
-1D7CF 0062
-1D7D9 0062
-1D7E3 0062
-1D7ED 0062
-1D7F7 0062
-2460 0062
-24F5 0062
-2776 0062
-2780 0062
-278A 0062
-00B9 0062
-2081 0062
  0661 0062
  06F1 0062
-10E60 0062
  07C1 0062
-1369 0062
-104A1 0062
  0967 0062
  09E7 0062
  0A67 0062
@@ -31930,33 +31906,31 @@ FF11 0062
  0C7C 0062
  0CE7 0062
  0D67 0062
-ABF1 0062
-A8D1 0062
-1947 0062
-19D1 0062
-19DA 0062
-1A81 0062
-1A91 0062
  0E51 0062
  0ED1 0062
  0F21 0062
-0F2A 0062
-1C41 0062
-A901 0062
  1041 0062
  1091 0062
-11137 0062
+1369 0062
  17E1 0062
  17F1 0062
-AA51 0062
+1811 0062
+1947 0062
+19D1 0062
+19DA 0062
+1A81 0062
+1A91 0062
  1B51 0062
-A9D1 0062
  1BB1 0062
-1811 0062
+1C41 0062
  1C51 0062
-A621 0062
-110F1 0062
  3021 0062
+A621 0062
+A8D1 0062
+A901 0062
+A9D1 0062
+AA51 0062
+ABF1 0062
  10107 0062
  10142 0062
  10158 0062
@@ -31964,23 +31938,41 @@ A621 0062
  1015A 0062
  10320 0062
  103D1 0062
+104A1 0062
+10858 0062
+10916 0062
+10A40 0062
+10A7D 0062
+10B58 0062
+10B78 0062
+10E60 0062
+11052 0062
+11067 0062
+110F1 0062
+11137 0062
+111D1 0062
+116C1 0062
  12415 0062
  1241E 0062
  1242C 0062
  12434 0062
  1244F 0062
  12458 0062
-10A7D 0062
-10916 0062
-10858 0062
-10B58 0062
-10B78 0062
-111D1 0062
-116C1 0062
-11067 0062
-11052 0062
-10A40 0062
  1D360 0062
+FF11 0062
+0F2A 0062
+1D7CF 0062
+1D7D9 0062
+1D7E3 0062
+1D7ED 0062
+1D7F7 0062
+2460 0062
+24F5 0062
+2776 0062
+2780 0062
+278A 0062
+00B9 0062
+2081 0062
  33E0 0021
  33E0 003F
  33E0 0061
@@ -32002,25 +31994,9 @@ A621 0062
  1F103 0041
  1F103 0062
  0032 0021
-FF12 0021
-1D7D0 0021
-1D7DA 0021
-1D7E4 0021
-1D7EE 0021
-1D7F8 0021
-2461 0021
-24F6 0021
-2777 0021
-2781 0021
-278B 0021
-00B2 0021
-2082 0021
  0662 0021
  06F2 0021
-10E61 0021
  07C2 0021
-136A 0021
-104A2 0021
  0968 0021
  09E8 0021
  0A68 0021
@@ -32032,38 +32008,49 @@ FF12 0021
  0C7D 0021
  0CE8 0021
  0D68 0021
-ABF2 0021
-A8D2 0021
-1948 0021
-19D2 0021
-1A82 0021
-1A92 0021
  0E52 0021
  0ED2 0021
  0F22 0021
-0F2B 0021
-1C42 0021
-A902 0021
  1042 0021
  1092 0021
-11138 0021
+136A 0021
  17E2 0021
  17F2 0021
-AA52 0021
+1812 0021
+1948 0021
+19D2 0021
+1A82 0021
+1A92 0021
  1B52 0021
-A9D2 0021
  1BB2 0021
-1812 0021
+1C42 0021
  1C52 0021
-A622 0021
-110F2 0021
  3022 0021
+A622 0021
+A8D2 0021
+A902 0021
+A9D2 0021
+AA52 0021
+ABF2 0021
  10108 0021
  1015B 0021
  1015C 0021
  1015D 0021
  1015E 0021
  103D2 0021
+104A2 0021
+10859 0021
+1091A 0021
+10A41 0021
+10B59 0021
+10B79 0021
+10E61 0021
+11053 0021
+11068 0021
+110F2 0021
+11138 0021
+111D2 0021
+116C2 0021
  12400 0021
  12416 0021
  1241F 0021
@@ -32072,37 +32059,27 @@ A622 0021
  12435 0021
  1244A 0021
  12450 0021
+12456 0021
  12459 0021
-1091A 0021
-10859 0021
-10B59 0021
-10B79 0021
-111D2 0021
-116C2 0021
-11068 0021
-11053 0021
-10A41 0021
  1D361 0021
+FF12 0021
+0F2B 0021
+1D7D0 0021
+1D7DA 0021
+1D7E4 0021
+1D7EE 0021
+1D7F8 0021
+2461 0021
+24F6 0021
+2777 0021
+2781 0021
+278B 0021
+00B2 0021
+2082 0021
  0032 003F
-FF12 003F
-1D7D0 003F
-1D7DA 003F
-1D7E4 003F
-1D7EE 003F
-1D7F8 003F
-2461 003F
-24F6 003F
-2777 003F
-2781 003F
-278B 003F
-00B2 003F
-2082 003F
  0662 003F
  06F2 003F
-10E61 003F
  07C2 003F
-136A 003F
-104A2 003F
  0968 003F
  09E8 003F
  0A68 003F
@@ -32114,38 +32091,49 @@ FF12 003F
  0C7D 003F
  0CE8 003F
  0D68 003F
-ABF2 003F
-A8D2 003F
-1948 003F
-19D2 003F
-1A82 003F
-1A92 003F
  0E52 003F
  0ED2 003F
  0F22 003F
-0F2B 003F
-1C42 003F
-A902 003F
  1042 003F
  1092 003F
-11138 003F
+136A 003F
  17E2 003F
  17F2 003F
-AA52 003F
+1812 003F
+1948 003F
+19D2 003F
+1A82 003F
+1A92 003F
  1B52 003F
-A9D2 003F
  1BB2 003F
-1812 003F
+1C42 003F
  1C52 003F
-A622 003F
-110F2 003F
  3022 003F
+A622 003F
+A8D2 003F
+A902 003F
+A9D2 003F
+AA52 003F
+ABF2 003F
  10108 003F
  1015B 003F
  1015C 003F
  1015D 003F
  1015E 003F
  103D2 003F
+104A2 003F
+10859 003F
+1091A 003F
+10A41 003F
+10B59 003F
+10B79 003F
+10E61 003F
+11053 003F
+11068 003F
+110F2 003F
+11138 003F
+111D2 003F
+116C2 003F
  12400 003F
  12416 003F
  1241F 003F
@@ -32154,17 +32142,23 @@ A622 003F
  12435 003F
  1244A 003F
  12450 003F
+12456 003F
  12459 003F
-1091A 003F
-10859 003F
-10B59 003F
-10B79 003F
-111D2 003F
-116C2 003F
-11068 003F
-11053 003F
-10A41 003F
  1D361 003F
+FF12 003F
+0F2B 003F
+1D7D0 003F
+1D7DA 003F
+1D7E4 003F
+1D7EE 003F
+1D7F8 003F
+2461 003F
+24F6 003F
+2777 003F
+2781 003F
+278B 003F
+00B2 003F
+2082 003F
  2489 0021
  2489 003F
  2489 0061
@@ -32321,131 +32315,63 @@ A622 003F
  33FC 0041
  33FC 0062
  0032 0061
-0032 0041
-FF12 0061
-FF12 0041
-1D7D0 0061
-1D7DA 0061
-1D7E4 0061
-1D7EE 0061
-1D7F8 0061
-1D7D0 0041
-1D7DA 0041
-1D7E4 0041
-1D7EE 0041
-1D7F8 0041
-2461 0061
-24F6 0061
-2777 0061
-2781 0061
-278B 0061
-2461 0041
-24F6 0041
-2777 0041
-2781 0041
-278B 0041
-00B2 0061
-00B2 0041
-2082 0061
-2082 0041
  0662 0061
-0662 0041
  06F2 0061
-06F2 0041
-10E61 0061
-10E61 0041
  07C2 0061
-07C2 0041
-136A 0061
-136A 0041
-104A2 0061
-104A2 0041
  0968 0061
-0968 0041
  09E8 0061
-09E8 0041
  0A68 0061
-0A68 0041
  0AE8 0061
-0AE8 0041
  0B68 0061
-0B68 0041
  0BE8 0061
-0BE8 0041
  0C68 0061
  0C7A 0061
  0C7D 0061
-0C68 0041
-0C7A 0041
-0C7D 0041
  0CE8 0061
-0CE8 0041
  0D68 0061
-0D68 0041
-ABF2 0061
-ABF2 0041
-A8D2 0061
-A8D2 0041
-1948 0061
-1948 0041
-19D2 0061
-19D2 0041
-1A82 0061
-1A82 0041
-1A92 0061
-1A92 0041
  0E52 0061
-0E52 0041
  0ED2 0061
-0ED2 0041
  0F22 0061
-0F22 0041
-0F2B 0061
-0F2B 0041
-1C42 0061
-1C42 0041
-A902 0061
-A902 0041
  1042 0061
-1042 0041
  1092 0061
-1092 0041
-11138 0061
-11138 0041
+136A 0061
  17E2 0061
-17E2 0041
  17F2 0061
-17F2 0041
-AA52 0061
-AA52 0041
+1812 0061
+1948 0061
+19D2 0061
+1A82 0061
+1A92 0061
  1B52 0061
-1B52 0041
-A9D2 0061
-A9D2 0041
  1BB2 0061
-1BB2 0041
-1812 0061
-1812 0041
+1C42 0061
  1C52 0061
-1C52 0041
-A622 0061
-A622 0041
-110F2 0061
-110F2 0041
  3022 0061
-3022 0041
+A622 0061
+A8D2 0061
+A902 0061
+A9D2 0061
+AA52 0061
+ABF2 0061
  10108 0061
-10108 0041
  1015B 0061
  1015C 0061
  1015D 0061
  1015E 0061
-1015B 0041
-1015C 0041
-1015D 0041
-1015E 0041
  103D2 0061
-103D2 0041
+104A2 0061
+10859 0061
+1091A 0061
+10A41 0061
+10B59 0061
+10B79 0061
+10E61 0061
+11053 0061
+11068 0061
+110F2 0061
+11138 0061
+111D2 0061
+116C2 0061
  12400 0061
  12416 0061
  1241F 0061
@@ -32454,7 +32380,67 @@ A622 0041
  12435 0061
  1244A 0061
  12450 0061
+12456 0061
  12459 0061
+1D361 0061
+0032 0041
+0662 0041
+06F2 0041
+07C2 0041
+0968 0041
+09E8 0041
+0A68 0041
+0AE8 0041
+0B68 0041
+0BE8 0041
+0C68 0041
+0C7A 0041
+0C7D 0041
+0CE8 0041
+0D68 0041
+0E52 0041
+0ED2 0041
+0F22 0041
+1042 0041
+1092 0041
+136A 0041
+17E2 0041
+17F2 0041
+1812 0041
+1948 0041
+19D2 0041
+1A82 0041
+1A92 0041
+1B52 0041
+1BB2 0041
+1C42 0041
+1C52 0041
+3022 0041
+A622 0041
+A8D2 0041
+A902 0041
+A9D2 0041
+AA52 0041
+ABF2 0041
+10108 0041
+1015B 0041
+1015C 0041
+1015D 0041
+1015E 0041
+103D2 0041
+104A2 0041
+10859 0041
+1091A 0041
+10A41 0041
+10B59 0041
+10B79 0041
+10E61 0041
+11053 0041
+11068 0041
+110F2 0041
+11138 0041
+111D2 0041
+116C2 0041
  12400 0041
  12416 0041
  1241F 0041
@@ -32463,47 +32449,41 @@ A622 0041
  12435 0041
  1244A 0041
  12450 0041
+12456 0041
  12459 0041
-1091A 0061
-1091A 0041
-10859 0061
-10859 0041
-10B59 0061
-10B59 0041
-10B79 0061
-10B79 0041
-111D2 0061
-111D2 0041
-116C2 0061
-116C2 0041
-11068 0061
-11068 0041
-11053 0061
-11053 0041
-10A41 0061
-10A41 0041
-1D361 0061
  1D361 0041
+FF12 0061
+FF12 0041
+0F2B 0061
+0F2B 0041
+1D7D0 0061
+1D7DA 0061
+1D7E4 0061
+1D7EE 0061
+1D7F8 0061
+1D7D0 0041
+1D7DA 0041
+1D7E4 0041
+1D7EE 0041
+1D7F8 0041
+2461 0061
+24F6 0061
+2777 0061
+2781 0061
+278B 0061
+2461 0041
+24F6 0041
+2777 0041
+2781 0041
+278B 0041
+00B2 0061
+00B2 0041
+2082 0061
+2082 0041
  0032 0062
-FF12 0062
-1D7D0 0062
-1D7DA 0062
-1D7E4 0062
-1D7EE 0062
-1D7F8 0062
-2461 0062
-24F6 0062
-2777 0062
-2781 0062
-278B 0062
-00B2 0062
-2082 0062
  0662 0062
  06F2 0062
-10E61 0062
  07C2 0062
-136A 0062
-104A2 0062
  0968 0062
  09E8 0062
  0A68 0062
@@ -32515,38 +32495,49 @@ FF12 0062
  0C7D 0062
  0CE8 0062
  0D68 0062
-ABF2 0062
-A8D2 0062
-1948 0062
-19D2 0062
-1A82 0062
-1A92 0062
  0E52 0062
  0ED2 0062
  0F22 0062
-0F2B 0062
-1C42 0062
-A902 0062
  1042 0062
  1092 0062
-11138 0062
+136A 0062
  17E2 0062
  17F2 0062
-AA52 0062
+1812 0062
+1948 0062
+19D2 0062
+1A82 0062
+1A92 0062
  1B52 0062
-A9D2 0062
  1BB2 0062
-1812 0062
+1C42 0062
  1C52 0062
-A622 0062
-110F2 0062
  3022 0062
+A622 0062
+A8D2 0062
+A902 0062
+A9D2 0062
+AA52 0062
+ABF2 0062
  10108 0062
  1015B 0062
  1015C 0062
  1015D 0062
  1015E 0062
  103D2 0062
+104A2 0062
+10859 0062
+1091A 0062
+10A41 0062
+10B59 0062
+10B79 0062
+10E61 0062
+11053 0062
+11068 0062
+110F2 0062
+11138 0062
+111D2 0062
+116C2 0062
  12400 0062
  12416 0062
  1241F 0062
@@ -32555,17 +32546,23 @@ A622 0062
  12435 0062
  1244A 0062
  12450 0062
+12456 0062
  12459 0062
-1091A 0062
-10859 0062
-10B59 0062
-10B79 0062
-111D2 0062
-116C2 0062
-11068 0062
-11053 0062
-10A41 0062
  1D361 0062
+FF12 0062
+0F2B 0062
+1D7D0 0062
+1D7DA 0062
+1D7E4 0062
+1D7EE 0062
+1D7F8 0062
+2461 0062
+24F6 0062
+2777 0062
+2781 0062
+278B 0062
+00B2 0062
+2082 0062
  33E1 0021
  33E1 003F
  33E1 0061
@@ -32587,25 +32584,9 @@ A622 0062
  1F104 0041
  1F104 0062
  0033 0021
-FF13 0021
-1D7D1 0021
-1D7DB 0021
-1D7E5 0021
-1D7EF 0021
-1D7F9 0021
-2462 0021
-24F7 0021
-2778 0021
-2782 0021
-278C 0021
-00B3 0021
-2083 0021
  0663 0021
  06F3 0021
-10E62 0021
  07C3 0021
-136B 0021
-104A3 0021
  0969 0021
  09E9 0021
  0A69 0021
@@ -32617,33 +32598,44 @@ FF13 0021
  0C7E 0021
  0CE9 0021
  0D69 0021
-ABF3 0021
-A8D3 0021
-1949 0021
-19D3 0021
-1A83 0021
-1A93 0021
  0E53 0021
  0ED3 0021
  0F23 0021
-0F2C 0021
-1C43 0021
-A903 0021
  1043 0021
  1093 0021
-11139 0021
+136B 0021
  17E3 0021
  17F3 0021
-AA53 0021
+1813 0021
+1949 0021
+19D3 0021
+1A83 0021
+1A93 0021
  1B53 0021
-A9D3 0021
  1BB3 0021
-1813 0021
+1C43 0021
  1C53 0021
-A623 0021
-110F3 0021
  3023 0021
+A623 0021
+A8D3 0021
+A903 0021
+A9D3 0021
+AA53 0021
+ABF3 0021
  10109 0021
+104A3 0021
+1085A 0021
+1091B 0021
+10A42 0021
+10B5A 0021
+10B7A 0021
+10E62 0021
+11054 0021
+11069 0021
+110F3 0021
+11139 0021
+111D3 0021
+116C3 0021
  12401 0021
  12408 0021
  12417 0021
@@ -32658,36 +32650,26 @@ A623 0021
  1243B 0021
  1244B 0021
  12451 0021
-1091B 0021
-1085A 0021
-10B5A 0021
-10B7A 0021
-111D3 0021
-116C3 0021
-11069 0021
-11054 0021
-10A42 0021
+12457 0021
  1D362 0021
+FF13 0021
+0F2C 0021
+1D7D1 0021
+1D7DB 0021
+1D7E5 0021
+1D7EF 0021
+1D7F9 0021
+2462 0021
+24F7 0021
+2778 0021
+2782 0021
+278C 0021
+00B3 0021
+2083 0021
  0033 003F
-FF13 003F
-1D7D1 003F
-1D7DB 003F
-1D7E5 003F
-1D7EF 003F
-1D7F9 003F
-2462 003F
-24F7 003F
-2778 003F
-2782 003F
-278C 003F
-00B3 003F
-2083 003F
  0663 003F
  06F3 003F
-10E62 003F
  07C3 003F
-136B 003F
-104A3 003F
  0969 003F
  09E9 003F
  0A69 003F
@@ -32699,33 +32681,44 @@ FF13 003F
  0C7E 003F
  0CE9 003F
  0D69 003F
-ABF3 003F
-A8D3 003F
-1949 003F
-19D3 003F
-1A83 003F
-1A93 003F
  0E53 003F
  0ED3 003F
  0F23 003F
-0F2C 003F
-1C43 003F
-A903 003F
  1043 003F
  1093 003F
-11139 003F
+136B 003F
  17E3 003F
  17F3 003F
-AA53 003F
+1813 003F
+1949 003F
+19D3 003F
+1A83 003F
+1A93 003F
  1B53 003F
-A9D3 003F
  1BB3 003F
-1813 003F
+1C43 003F
  1C53 003F
-A623 003F
-110F3 003F
  3023 003F
+A623 003F
+A8D3 003F
+A903 003F
+A9D3 003F
+AA53 003F
+ABF3 003F
  10109 003F
+104A3 003F
+1085A 003F
+1091B 003F
+10A42 003F
+10B5A 003F
+10B7A 003F
+10E62 003F
+11054 003F
+11069 003F
+110F3 003F
+11139 003F
+111D3 003F
+116C3 003F
  12401 003F
  12408 003F
  12417 003F
@@ -32740,16 +32733,22 @@ A623 003F
  1243B 003F
  1244B 003F
  12451 003F
-1091B 003F
-1085A 003F
-10B5A 003F
-10B7A 003F
-111D3 003F
-116C3 003F
-11069 003F
-11054 003F
-10A42 003F
+12457 003F
  1D362 003F
+FF13 003F
+0F2C 003F
+1D7D1 003F
+1D7DB 003F
+1D7E5 003F
+1D7EF 003F
+1D7F9 003F
+2462 003F
+24F7 003F
+2778 003F
+2782 003F
+278C 003F
+00B3 003F
+2083 003F
  248A 0021
  248A 003F
  248A 0061
@@ -32836,121 +32835,58 @@ A623 003F
  32B4 0041
  32B4 0062
  0033 0061
-0033 0041
-FF13 0061
-FF13 0041
-1D7D1 0061
-1D7DB 0061
-1D7E5 0061
-1D7EF 0061
-1D7F9 0061
-1D7D1 0041
-1D7DB 0041
-1D7E5 0041
-1D7EF 0041
-1D7F9 0041
-2462 0061
-24F7 0061
-2778 0061
-2782 0061
-278C 0061
-2462 0041
-24F7 0041
-2778 0041
-2782 0041
-278C 0041
-00B3 0061
-00B3 0041
-2083 0061
-2083 0041
  0663 0061
-0663 0041
  06F3 0061
-06F3 0041
-10E62 0061
-10E62 0041
  07C3 0061
-07C3 0041
-136B 0061
-136B 0041
-104A3 0061
-104A3 0041
  0969 0061
-0969 0041
  09E9 0061
-09E9 0041
  0A69 0061
-0A69 0041
  0AE9 0061
-0AE9 0041
  0B69 0061
-0B69 0041
  0BE9 0061
-0BE9 0041
  0C69 0061
  0C7B 0061
  0C7E 0061
-0C69 0041
-0C7B 0041
-0C7E 0041
  0CE9 0061
-0CE9 0041
  0D69 0061
-0D69 0041
-ABF3 0061
-ABF3 0041
-A8D3 0061
-A8D3 0041
-1949 0061
-1949 0041
-19D3 0061
-19D3 0041
-1A83 0061
-1A83 0041
-1A93 0061
-1A93 0041
  0E53 0061
-0E53 0041
  0ED3 0061
-0ED3 0041
  0F23 0061
-0F23 0041
-0F2C 0061
-0F2C 0041
-1C43 0061
-1C43 0041
-A903 0061
-A903 0041
  1043 0061
-1043 0041
  1093 0061
-1093 0041
-11139 0061
-11139 0041
+136B 0061
  17E3 0061
-17E3 0041
  17F3 0061
-17F3 0041
-AA53 0061
-AA53 0041
+1813 0061
+1949 0061
+19D3 0061
+1A83 0061
+1A93 0061
  1B53 0061
-1B53 0041
-A9D3 0061
-A9D3 0041
  1BB3 0061
-1BB3 0041
-1813 0061
-1813 0041
+1C43 0061
  1C53 0061
-1C53 0041
-A623 0061
-A623 0041
-110F3 0061
-110F3 0041
  3023 0061
-3023 0041
+A623 0061
+A8D3 0061
+A903 0061
+A9D3 0061
+AA53 0061
+ABF3 0061
  10109 0061
-10109 0041
+104A3 0061
+1085A 0061
+1091B 0061
+10A42 0061
+10B5A 0061
+10B7A 0061
+10E62 0061
+11054 0061
+11069 0061
+110F3 0061
+11139 0061
+111D3 0061
+116C3 0061
  12401 0061
  12408 0061
  12417 0061
@@ -32965,6 +32901,61 @@ A623 0041
  1243B 0061
  1244B 0061
  12451 0061
+12457 0061
+1D362 0061
+0033 0041
+0663 0041
+06F3 0041
+07C3 0041
+0969 0041
+09E9 0041
+0A69 0041
+0AE9 0041
+0B69 0041
+0BE9 0041
+0C69 0041
+0C7B 0041
+0C7E 0041
+0CE9 0041
+0D69 0041
+0E53 0041
+0ED3 0041
+0F23 0041
+1043 0041
+1093 0041
+136B 0041
+17E3 0041
+17F3 0041
+1813 0041
+1949 0041
+19D3 0041
+1A83 0041
+1A93 0041
+1B53 0041
+1BB3 0041
+1C43 0041
+1C53 0041
+3023 0041
+A623 0041
+A8D3 0041
+A903 0041
+A9D3 0041
+AA53 0041
+ABF3 0041
+10109 0041
+104A3 0041
+1085A 0041
+1091B 0041
+10A42 0041
+10B5A 0041
+10B7A 0041
+10E62 0041
+11054 0041
+11069 0041
+110F3 0041
+11139 0041
+111D3 0041
+116C3 0041
  12401 0041
  12408 0041
  12417 0041
@@ -32979,46 +32970,40 @@ A623 0041
  1243B 0041
  1244B 0041
  12451 0041
-1091B 0061
-1091B 0041
-1085A 0061
-1085A 0041
-10B5A 0061
-10B5A 0041
-10B7A 0061
-10B7A 0041
-111D3 0061
-111D3 0041
-116C3 0061
-116C3 0041
-11069 0061
-11069 0041
-11054 0061
-11054 0041
-10A42 0061
-10A42 0041
-1D362 0061
+12457 0041
  1D362 0041
+FF13 0061
+FF13 0041
+0F2C 0061
+0F2C 0041
+1D7D1 0061
+1D7DB 0061
+1D7E5 0061
+1D7EF 0061
+1D7F9 0061
+1D7D1 0041
+1D7DB 0041
+1D7E5 0041
+1D7EF 0041
+1D7F9 0041
+2462 0061
+24F7 0061
+2778 0061
+2782 0061
+278C 0061
+2462 0041
+24F7 0041
+2778 0041
+2782 0041
+278C 0041
+00B3 0061
+00B3 0041
+2083 0061
+2083 0041
  0033 0062
-FF13 0062
-1D7D1 0062
-1D7DB 0062
-1D7E5 0062
-1D7EF 0062
-1D7F9 0062
-2462 0062
-24F7 0062
-2778 0062
-2782 0062
-278C 0062
-00B3 0062
-2083 0062
  0663 0062
  06F3 0062
-10E62 0062
  07C3 0062
-136B 0062
-104A3 0062
  0969 0062
  09E9 0062
  0A69 0062
@@ -33030,33 +33015,44 @@ FF13 0062
  0C7E 0062
  0CE9 0062
  0D69 0062
-ABF3 0062
-A8D3 0062
-1949 0062
-19D3 0062
-1A83 0062
-1A93 0062
  0E53 0062
  0ED3 0062
  0F23 0062
-0F2C 0062
-1C43 0062
-A903 0062
  1043 0062
  1093 0062
-11139 0062
+136B 0062
  17E3 0062
  17F3 0062
-AA53 0062
+1813 0062
+1949 0062
+19D3 0062
+1A83 0062
+1A93 0062
  1B53 0062
-A9D3 0062
  1BB3 0062
-1813 0062
+1C43 0062
  1C53 0062
-A623 0062
-110F3 0062
  3023 0062
+A623 0062
+A8D3 0062
+A903 0062
+A9D3 0062
+AA53 0062
+ABF3 0062
  10109 0062
+104A3 0062
+1085A 0062
+1091B 0062
+10A42 0062
+10B5A 0062
+10B7A 0062
+10E62 0062
+11054 0062
+11069 0062
+110F3 0062
+11139 0062
+111D3 0062
+116C3 0062
  12401 0062
  12408 0062
  12417 0062
@@ -33071,16 +33067,22 @@ A623 0062
  1243B 0062
  1244B 0062
  12451 0062
-1091B 0062
-1085A 0062
-10B5A 0062
-10B7A 0062
-111D3 0062
-116C3 0062
-11069 0062
-11054 0062
-10A42 0062
+12457 0062
  1D362 0062
+FF13 0062
+0F2C 0062
+1D7D1 0062
+1D7DB 0062
+1D7E5 0062
+1D7EF 0062
+1D7F9 0062
+2462 0062
+24F7 0062
+2778 0062
+2782 0062
+278C 0062
+00B3 0062
+2083 0062
  33E2 0021
  33E2 003F
  33E2 0061
@@ -33102,25 +33104,9 @@ A623 0062
  1F105 0041
  1F105 0062
  0034 0021
-FF14 0021
-1D7D2 0021
-1D7DC 0021
-1D7E6 0021
-1D7F0 0021
-1D7FA 0021
-2463 0021
-24F8 0021
-2779 0021
-2783 0021
-278D 0021
-2074 0021
-2084 0021
  0664 0021
  06F4 0021
-10E63 0021
  07C4 0021
-136C 0021
-104A4 0021
  096A 0021
  09EA 0021
  0A6A 0021
@@ -33130,33 +33116,42 @@ FF14 0021
  0C6A 0021
  0CEA 0021
  0D6A 0021
-ABF4 0021
-A8D4 0021
-194A 0021
-19D4 0021
-1A84 0021
-1A94 0021
  0E54 0021
  0ED4 0021
  0F24 0021
-0F2D 0021
-1C44 0021
-A904 0021
  1044 0021
  1094 0021
-1113A 0021
+136C 0021
  17E4 0021
  17F4 0021
-AA54 0021
+1814 0021
+194A 0021
+19D4 0021
+1A84 0021
+1A94 0021
  1B54 0021
-A9D4 0021
  1BB4 0021
-1814 0021
+1C44 0021
  1C54 0021
-A624 0021
-110F4 0021
  3024 0021
+A624 0021
+A8D4 0021
+A904 0021
+A9D4 0021
+AA54 0021
+ABF4 0021
  1010A 0021
+104A4 0021
+10A43 0021
+10B5B 0021
+10B7B 0021
+10E63 0021
+11055 0021
+1106A 0021
+110F4 0021
+1113A 0021
+111D4 0021
+116C4 0021
  12402 0021
  12409 0021
  1240F 0021
@@ -33172,34 +33167,25 @@ A624 0021
  1244C 0021
  12452 0021
  12453 0021
-10B5B 0021
-10B7B 0021
-111D4 0021
-116C4 0021
-1106A 0021
-11055 0021
-10A43 0021
  1D363 0021
+FF14 0021
+0F2D 0021
+1D7D2 0021
+1D7DC 0021
+1D7E6 0021
+1D7F0 0021
+1D7FA 0021
+2463 0021
+24F8 0021
+2779 0021
+2783 0021
+278D 0021
+2074 0021
+2084 0021
  0034 003F
-FF14 003F
-1D7D2 003F
-1D7DC 003F
-1D7E6 003F
-1D7F0 003F
-1D7FA 003F
-2463 003F
-24F8 003F
-2779 003F
-2783 003F
-278D 003F
-2074 003F
-2084 003F
  0664 003F
  06F4 003F
-10E63 003F
  07C4 003F
-136C 003F
-104A4 003F
  096A 003F
  09EA 003F
  0A6A 003F
@@ -33209,33 +33195,42 @@ FF14 003F
  0C6A 003F
  0CEA 003F
  0D6A 003F
-ABF4 003F
-A8D4 003F
-194A 003F
-19D4 003F
-1A84 003F
-1A94 003F
  0E54 003F
  0ED4 003F
  0F24 003F
-0F2D 003F
-1C44 003F
-A904 003F
  1044 003F
  1094 003F
-1113A 003F
+136C 003F
  17E4 003F
  17F4 003F
-AA54 003F
+1814 003F
+194A 003F
+19D4 003F
+1A84 003F
+1A94 003F
  1B54 003F
-A9D4 003F
  1BB4 003F
-1814 003F
+1C44 003F
  1C54 003F
-A624 003F
-110F4 003F
  3024 003F
+A624 003F
+A8D4 003F
+A904 003F
+A9D4 003F
+AA54 003F
+ABF4 003F
  1010A 003F
+104A4 003F
+10A43 003F
+10B5B 003F
+10B7B 003F
+10E63 003F
+11055 003F
+1106A 003F
+110F4 003F
+1113A 003F
+111D4 003F
+116C4 003F
  12402 003F
  12409 003F
  1240F 003F
@@ -33251,14 +33246,21 @@ A624 003F
  1244C 003F
  12452 003F
  12453 003F
-10B5B 003F
-10B7B 003F
-111D4 003F
-116C4 003F
-1106A 003F
-11055 003F
-10A43 003F
  1D363 003F
+FF14 003F
+0F2D 003F
+1D7D2 003F
+1D7DC 003F
+1D7E6 003F
+1D7F0 003F
+1D7FA 003F
+2463 003F
+24F8 003F
+2779 003F
+2783 003F
+278D 003F
+2074 003F
+2084 003F
  248B 0021
  248B 003F
  248B 0061
@@ -33325,117 +33327,54 @@ A624 003F
  32BE 0041
  32BE 0062
  0034 0061
-0034 0041
-FF14 0061
-FF14 0041
-1D7D2 0061
-1D7DC 0061
-1D7E6 0061
-1D7F0 0061
-1D7FA 0061
-1D7D2 0041
-1D7DC 0041
-1D7E6 0041
-1D7F0 0041
-1D7FA 0041
-2463 0061
-24F8 0061
-2779 0061
-2783 0061
-278D 0061
-2463 0041
-24F8 0041
-2779 0041
-2783 0041
-278D 0041
-2074 0061
-2074 0041
-2084 0061
-2084 0041
  0664 0061
-0664 0041
  06F4 0061
-06F4 0041
-10E63 0061
-10E63 0041
  07C4 0061
-07C4 0041
-136C 0061
-136C 0041
-104A4 0061
-104A4 0041
  096A 0061
-096A 0041
  09EA 0061
-09EA 0041
  0A6A 0061
-0A6A 0041
  0AEA 0061
-0AEA 0041
  0B6A 0061
-0B6A 0041
  0BEA 0061
-0BEA 0041
  0C6A 0061
-0C6A 0041
  0CEA 0061
-0CEA 0041
  0D6A 0061
-0D6A 0041
-ABF4 0061
-ABF4 0041
-A8D4 0061
-A8D4 0041
-194A 0061
-194A 0041
-19D4 0061
-19D4 0041
-1A84 0061
-1A84 0041
-1A94 0061
-1A94 0041
  0E54 0061
-0E54 0041
  0ED4 0061
-0ED4 0041
  0F24 0061
-0F24 0041
-0F2D 0061
-0F2D 0041
-1C44 0061
-1C44 0041
-A904 0061
-A904 0041
  1044 0061
-1044 0041
  1094 0061
-1094 0041
-1113A 0061
-1113A 0041
+136C 0061
  17E4 0061
-17E4 0041
  17F4 0061
-17F4 0041
-AA54 0061
-AA54 0041
+1814 0061
+194A 0061
+19D4 0061
+1A84 0061
+1A94 0061
  1B54 0061
-1B54 0041
-A9D4 0061
-A9D4 0041
  1BB4 0061
-1BB4 0041
-1814 0061
-1814 0041
+1C44 0061
  1C54 0061
-1C54 0041
-A624 0061
-A624 0041
-110F4 0061
-110F4 0041
  3024 0061
-3024 0041
+A624 0061
+A8D4 0061
+A904 0061
+A9D4 0061
+AA54 0061
+ABF4 0061
  1010A 0061
-1010A 0041
+104A4 0061
+10A43 0061
+10B5B 0061
+10B7B 0061
+10E63 0061
+11055 0061
+1106A 0061
+110F4 0061
+1113A 0061
+111D4 0061
+116C4 0061
  12402 0061
  12409 0061
  1240F 0061
@@ -33451,6 +33390,56 @@ A624 0041
  1244C 0061
  12452 0061
  12453 0061
+1D363 0061
+0034 0041
+0664 0041
+06F4 0041
+07C4 0041
+096A 0041
+09EA 0041
+0A6A 0041
+0AEA 0041
+0B6A 0041
+0BEA 0041
+0C6A 0041
+0CEA 0041
+0D6A 0041
+0E54 0041
+0ED4 0041
+0F24 0041
+1044 0041
+1094 0041
+136C 0041
+17E4 0041
+17F4 0041
+1814 0041
+194A 0041
+19D4 0041
+1A84 0041
+1A94 0041
+1B54 0041
+1BB4 0041
+1C44 0041
+1C54 0041
+3024 0041
+A624 0041
+A8D4 0041
+A904 0041
+A9D4 0041
+AA54 0041
+ABF4 0041
+1010A 0041
+104A4 0041
+10A43 0041
+10B5B 0041
+10B7B 0041
+10E63 0041
+11055 0041
+1106A 0041
+110F4 0041
+1113A 0041
+111D4 0041
+116C4 0041
  12402 0041
  12409 0041
  1240F 0041
@@ -33466,42 +33455,39 @@ A624 0041
  1244C 0041
  12452 0041
  12453 0041
-10B5B 0061
-10B5B 0041
-10B7B 0061
-10B7B 0041
-111D4 0061
-111D4 0041
-116C4 0061
-116C4 0041
-1106A 0061
-1106A 0041
-11055 0061
-11055 0041
-10A43 0061
-10A43 0041
-1D363 0061
  1D363 0041
+FF14 0061
+FF14 0041
+0F2D 0061
+0F2D 0041
+1D7D2 0061
+1D7DC 0061
+1D7E6 0061
+1D7F0 0061
+1D7FA 0061
+1D7D2 0041
+1D7DC 0041
+1D7E6 0041
+1D7F0 0041
+1D7FA 0041
+2463 0061
+24F8 0061
+2779 0061
+2783 0061
+278D 0061
+2463 0041
+24F8 0041
+2779 0041
+2783 0041
+278D 0041
+2074 0061
+2074 0041
+2084 0061
+2084 0041
  0034 0062
-FF14 0062
-1D7D2 0062
-1D7DC 0062
-1D7E6 0062
-1D7F0 0062
-1D7FA 0062
-2463 0062
-24F8 0062
-2779 0062
-2783 0062
-278D 0062
-2074 0062
-2084 0062
  0664 0062
  06F4 0062
-10E63 0062
  07C4 0062
-136C 0062
-104A4 0062
  096A 0062
  09EA 0062
  0A6A 0062
@@ -33511,33 +33497,42 @@ FF14 0062
  0C6A 0062
  0CEA 0062
  0D6A 0062
-ABF4 0062
-A8D4 0062
-194A 0062
-19D4 0062
-1A84 0062
-1A94 0062
  0E54 0062
  0ED4 0062
  0F24 0062
-0F2D 0062
-1C44 0062
-A904 0062
  1044 0062
  1094 0062
-1113A 0062
+136C 0062
  17E4 0062
  17F4 0062
-AA54 0062
+1814 0062
+194A 0062
+19D4 0062
+1A84 0062
+1A94 0062
  1B54 0062
-A9D4 0062
  1BB4 0062
-1814 0062
+1C44 0062
  1C54 0062
-A624 0062
-110F4 0062
  3024 0062
+A624 0062
+A8D4 0062
+A904 0062
+A9D4 0062
+AA54 0062
+ABF4 0062
  1010A 0062
+104A4 0062
+10A43 0062
+10B5B 0062
+10B7B 0062
+10E63 0062
+11055 0062
+1106A 0062
+110F4 0062
+1113A 0062
+111D4 0062
+116C4 0062
  12402 0062
  12409 0062
  1240F 0062
@@ -33553,14 +33548,21 @@ A624 0062
  1244C 0062
  12452 0062
  12453 0062
-10B5B 0062
-10B7B 0062
-111D4 0062
-116C4 0062
-1106A 0062
-11055 0062
-10A43 0062
  1D363 0062
+FF14 0062
+0F2D 0062
+1D7D2 0062
+1D7DC 0062
+1D7E6 0062
+1D7F0 0062
+1D7FA 0062
+2463 0062
+24F8 0062
+2779 0062
+2783 0062
+278D 0062
+2074 0062
+2084 0062
  33E3 0021
  33E3 003F
  33E3 0061
@@ -33582,25 +33584,9 @@ A624 0062
  1F106 0041
  1F106 0062
  0035 0021
-FF15 0021
-1D7D3 0021
-1D7DD 0021
-1D7E7 0021
-1D7F1 0021
-1D7FB 0021
-2464 0021
-24F9 0021
-277A 0021
-2784 0021
-278E 0021
-2075 0021
-2085 0021
  0665 0021
  06F5 0021
-10E64 0021
  07C5 0021
-136D 0021
-104A5 0021
  096B 0021
  09EB 0021
  0A6B 0021
@@ -33610,32 +33596,30 @@ FF15 0021
  0C6B 0021
  0CEB 0021
  0D6B 0021
-ABF5 0021
-A8D5 0021
-194B 0021
-19D5 0021
-1A85 0021
-1A95 0021
  0E55 0021
  0ED5 0021
  0F25 0021
-0F2E 0021
-1C45 0021
-A905 0021
  1045 0021
  1095 0021
-1113B 0021
+136D 0021
  17E5 0021
  17F5 0021
-AA55 0021
+1815 0021
+194B 0021
+19D5 0021
+1A85 0021
+1A95 0021
  1B55 0021
-A9D5 0021
  1BB5 0021
-1815 0021
+1C45 0021
  1C55 0021
-A625 0021
-110F5 0021
  3025 0021
+A625 0021
+A8D5 0021
+A905 0021
+A9D5 0021
+AA55 0021
+ABF5 0021
  1010B 0021
  10143 0021
  10148 0021
@@ -33643,6 +33627,14 @@ A625 0021
  1015F 0021
  10173 0021
  10321 0021
+104A5 0021
+10E64 0021
+11056 0021
+1106B 0021
+110F5 0021
+1113B 0021
+111D5 0021
+116C5 0021
  12403 0021
  1240A 0021
  12410 0021
@@ -33654,31 +33646,25 @@ A625 0021
  1244D 0021
  12454 0021
  12455 0021
-111D5 0021
-116C5 0021
-1106B 0021
-11056 0021
  1D364 0021
+FF15 0021
+0F2E 0021
+1D7D3 0021
+1D7DD 0021
+1D7E7 0021
+1D7F1 0021
+1D7FB 0021
+2464 0021
+24F9 0021
+277A 0021
+2784 0021
+278E 0021
+2075 0021
+2085 0021
  0035 003F
-FF15 003F
-1D7D3 003F
-1D7DD 003F
-1D7E7 003F
-1D7F1 003F
-1D7FB 003F
-2464 003F
-24F9 003F
-277A 003F
-2784 003F
-278E 003F
-2075 003F
-2085 003F
  0665 003F
  06F5 003F
-10E64 003F
  07C5 003F
-136D 003F
-104A5 003F
  096B 003F
  09EB 003F
  0A6B 003F
@@ -33688,32 +33674,30 @@ FF15 003F
  0C6B 003F
  0CEB 003F
  0D6B 003F
-ABF5 003F
-A8D5 003F
-194B 003F
-19D5 003F
-1A85 003F
-1A95 003F
  0E55 003F
  0ED5 003F
  0F25 003F
-0F2E 003F
-1C45 003F
-A905 003F
  1045 003F
  1095 003F
-1113B 003F
+136D 003F
  17E5 003F
  17F5 003F
-AA55 003F
+1815 003F
+194B 003F
+19D5 003F
+1A85 003F
+1A95 003F
  1B55 003F
-A9D5 003F
  1BB5 003F
-1815 003F
+1C45 003F
  1C55 003F
-A625 003F
-110F5 003F
  3025 003F
+A625 003F
+A8D5 003F
+A905 003F
+A9D5 003F
+AA55 003F
+ABF5 003F
  1010B 003F
  10143 003F
  10148 003F
@@ -33721,6 +33705,14 @@ A625 003F
  1015F 003F
  10173 003F
  10321 003F
+104A5 003F
+10E64 003F
+11056 003F
+1106B 003F
+110F5 003F
+1113B 003F
+111D5 003F
+116C5 003F
  12403 003F
  1240A 003F
  12410 003F
@@ -33732,11 +33724,21 @@ A625 003F
  1244D 003F
  12454 003F
  12455 003F
-111D5 003F
-116C5 003F
-1106B 003F
-11056 003F
  1D364 003F
+FF15 003F
+0F2E 003F
+1D7D3 003F
+1D7DD 003F
+1D7E7 003F
+1D7F1 003F
+1D7FB 003F
+2464 003F
+24F9 003F
+277A 003F
+2784 003F
+278E 003F
+2075 003F
+2085 003F
  248C 0021
  248C 003F
  248C 0061
@@ -33763,129 +33765,57 @@ A625 003F
  324C 0062
  32BF 0062
  0035 0061
-0035 0041
-FF15 0061
-FF15 0041
-1D7D3 0061
-1D7DD 0061
-1D7E7 0061
-1D7F1 0061
-1D7FB 0061
-1D7D3 0041
-1D7DD 0041
-1D7E7 0041
-1D7F1 0041
-1D7FB 0041
-2464 0061
-24F9 0061
-277A 0061
-2784 0061
-278E 0061
-2464 0041
-24F9 0041
-277A 0041
-2784 0041
-278E 0041
-2075 0061
-2075 0041
-2085 0061
-2085 0041
  0665 0061
-0665 0041
  06F5 0061
-06F5 0041
-10E64 0061
-10E64 0041
  07C5 0061
-07C5 0041
-136D 0061
-136D 0041
-104A5 0061
-104A5 0041
  096B 0061
-096B 0041
  09EB 0061
-09EB 0041
  0A6B 0061
-0A6B 0041
  0AEB 0061
-0AEB 0041
  0B6B 0061
-0B6B 0041
  0BEB 0061
-0BEB 0041
  0C6B 0061
-0C6B 0041
  0CEB 0061
-0CEB 0041
  0D6B 0061
-0D6B 0041
-ABF5 0061
-ABF5 0041
-A8D5 0061
-A8D5 0041
-194B 0061
-194B 0041
-19D5 0061
-19D5 0041
-1A85 0061
-1A85 0041
-1A95 0061
-1A95 0041
  0E55 0061
-0E55 0041
  0ED5 0061
-0ED5 0041
  0F25 0061
-0F25 0041
-0F2E 0061
-0F2E 0041
-1C45 0061
-1C45 0041
-A905 0061
-A905 0041
  1045 0061
-1045 0041
  1095 0061
-1095 0041
-1113B 0061
-1113B 0041
+136D 0061
  17E5 0061
-17E5 0041
  17F5 0061
-17F5 0041
-AA55 0061
-AA55 0041
+1815 0061
+194B 0061
+19D5 0061
+1A85 0061
+1A95 0061
  1B55 0061
-1B55 0041
-A9D5 0061
-A9D5 0041
  1BB5 0061
-1BB5 0041
-1815 0061
-1815 0041
+1C45 0061
  1C55 0061
-1C55 0041
-A625 0061
-A625 0041
-110F5 0061
-110F5 0041
  3025 0061
-3025 0041
+A625 0061
+A8D5 0061
+A905 0061
+A9D5 0061
+AA55 0061
+ABF5 0061
  1010B 0061
-1010B 0041
  10143 0061
  10148 0061
  1014F 0061
  1015F 0061
  10173 0061
-10143 0041
-10148 0041
-1014F 0041
-1015F 0041
-10173 0041
  10321 0061
-10321 0041
+104A5 0061
+10E64 0061
+11056 0061
+1106B 0061
+110F5 0061
+1113B 0061
+111D5 0061
+116C5 0061
  12403 0061
  1240A 0061
  12410 0061
@@ -33897,6 +33827,59 @@ A625 0041
  1244D 0061
  12454 0061
  12455 0061
+1D364 0061
+0035 0041
+0665 0041
+06F5 0041
+07C5 0041
+096B 0041
+09EB 0041
+0A6B 0041
+0AEB 0041
+0B6B 0041
+0BEB 0041
+0C6B 0041
+0CEB 0041
+0D6B 0041
+0E55 0041
+0ED5 0041
+0F25 0041
+1045 0041
+1095 0041
+136D 0041
+17E5 0041
+17F5 0041
+1815 0041
+194B 0041
+19D5 0041
+1A85 0041
+1A95 0041
+1B55 0041
+1BB5 0041
+1C45 0041
+1C55 0041
+3025 0041
+A625 0041
+A8D5 0041
+A905 0041
+A9D5 0041
+AA55 0041
+ABF5 0041
+1010B 0041
+10143 0041
+10148 0041
+1014F 0041
+1015F 0041
+10173 0041
+10321 0041
+104A5 0041
+10E64 0041
+11056 0041
+1106B 0041
+110F5 0041
+1113B 0041
+111D5 0041
+116C5 0041
  12403 0041
  1240A 0041
  12410 0041
@@ -33908,36 +33891,39 @@ A625 0041
  1244D 0041
  12454 0041
  12455 0041
-111D5 0061
-111D5 0041
-116C5 0061
-116C5 0041
-1106B 0061
-1106B 0041
-11056 0061
-11056 0041
-1D364 0061
  1D364 0041
+FF15 0061
+FF15 0041
+0F2E 0061
+0F2E 0041
+1D7D3 0061
+1D7DD 0061
+1D7E7 0061
+1D7F1 0061
+1D7FB 0061
+1D7D3 0041
+1D7DD 0041
+1D7E7 0041
+1D7F1 0041
+1D7FB 0041
+2464 0061
+24F9 0061
+277A 0061
+2784 0061
+278E 0061
+2464 0041
+24F9 0041
+277A 0041
+2784 0041
+278E 0041
+2075 0061
+2075 0041
+2085 0061
+2085 0041
  0035 0062
-FF15 0062
-1D7D3 0062
-1D7DD 0062
-1D7E7 0062
-1D7F1 0062
-1D7FB 0062
-2464 0062
-24F9 0062
-277A 0062
-2784 0062
-278E 0062
-2075 0062
-2085 0062
  0665 0062
  06F5 0062
-10E64 0062
  07C5 0062
-136D 0062
-104A5 0062
  096B 0062
  09EB 0062
  0A6B 0062
@@ -33947,32 +33933,30 @@ FF15 0062
  0C6B 0062
  0CEB 0062
  0D6B 0062
-ABF5 0062
-A8D5 0062
-194B 0062
-19D5 0062
-1A85 0062
-1A95 0062
  0E55 0062
  0ED5 0062
  0F25 0062
-0F2E 0062
-1C45 0062
-A905 0062
  1045 0062
  1095 0062
-1113B 0062
+136D 0062
  17E5 0062
  17F5 0062
-AA55 0062
+1815 0062
+194B 0062
+19D5 0062
+1A85 0062
+1A95 0062
  1B55 0062
-A9D5 0062
  1BB5 0062
-1815 0062
+1C45 0062
  1C55 0062
-A625 0062
-110F5 0062
  3025 0062
+A625 0062
+A8D5 0062
+A905 0062
+A9D5 0062
+AA55 0062
+ABF5 0062
  1010B 0062
  10143 0062
  10148 0062
@@ -33980,6 +33964,14 @@ A625 0062
  1015F 0062
  10173 0062
  10321 0062
+104A5 0062
+10E64 0062
+11056 0062
+1106B 0062
+110F5 0062
+1113B 0062
+111D5 0062
+116C5 0062
  12403 0062
  1240A 0062
  12410 0062
@@ -33991,11 +33983,21 @@ A625 0062
  1244D 0062
  12454 0062
  12455 0062
-111D5 0062
-116C5 0062
-1106B 0062
-11056 0062
  1D364 0062
+FF15 0062
+0F2E 0062
+1D7D3 0062
+1D7DD 0062
+1D7E7 0062
+1D7F1 0062
+1D7FB 0062
+2464 0062
+24F9 0062
+277A 0062
+2784 0062
+278E 0062
+2075 0062
+2085 0062
  33E4 0021
  33E4 003F
  33E4 0061
@@ -34017,25 +34019,9 @@ A625 0062
  1F107 0041
  1F107 0062
  0036 0021
-FF16 0021
-1D7D4 0021
-1D7DE 0021
-1D7E8 0021
-1D7F2 0021
-1D7FC 0021
-2465 0021
-24FA 0021
-277B 0021
-2785 0021
-278F 0021
-2076 0021
-2086 0021
  0666 0021
  06F6 0021
-10E65 0021
  07C6 0021
-136E 0021
-104A6 0021
  096C 0021
  09EC 0021
  0A6C 0021
@@ -34045,34 +34031,40 @@ FF16 0021
  0C6C 0021
  0CEC 0021
  0D6C 0021
-ABF6 0021
-A8D6 0021
-194C 0021
-19D6 0021
-1A86 0021
-1A96 0021
  0E56 0021
  0ED6 0021
  0F26 0021
-0F2F 0021
-1C46 0021
-A906 0021
  1046 0021
  1096 0021
-1113C 0021
+136E 0021
  17E6 0021
  17F6 0021
-AA56 0021
+1816 0021
+194C 0021
+19D6 0021
+1A86 0021
+1A96 0021
  1B56 0021
-A9D6 0021
  1BB6 0021
-1816 0021
+1C46 0021
  1C56 0021
-A626 0021
-110F6 0021
+2185 0021
  3026 0021
+A626 0021
+A8D6 0021
+A906 0021
+A9D6 0021
+AA56 0021
+ABF6 0021
  1010C 0021
-2185 0021
+104A6 0021
+10E65 0021
+11057 0021
+1106C 0021
+110F6 0021
+1113C 0021
+111D6 0021
+116C6 0021
  12404 0021
  1240B 0021
  12411 0021
@@ -34080,31 +34072,25 @@ A626 0021
  12428 0021
  12440 0021
  1244E 0021
-111D6 0021
-116C6 0021
-1106C 0021
-11057 0021
  1D365 0021
+FF16 0021
+0F2F 0021
+1D7D4 0021
+1D7DE 0021
+1D7E8 0021
+1D7F2 0021
+1D7FC 0021
+2465 0021
+24FA 0021
+277B 0021
+2785 0021
+278F 0021
+2076 0021
+2086 0021
  0036 003F
-FF16 003F
-1D7D4 003F
-1D7DE 003F
-1D7E8 003F
-1D7F2 003F
-1D7FC 003F
-2465 003F
-24FA 003F
-277B 003F
-2785 003F
-278F 003F
-2076 003F
-2086 003F
  0666 003F
  06F6 003F
-10E65 003F
  07C6 003F
-136E 003F
-104A6 003F
  096C 003F
  09EC 003F
  0A6C 003F
@@ -34114,34 +34100,40 @@ FF16 003F
  0C6C 003F
  0CEC 003F
  0D6C 003F
-ABF6 003F
-A8D6 003F
-194C 003F
-19D6 003F
-1A86 003F
-1A96 003F
  0E56 003F
  0ED6 003F
  0F26 003F
-0F2F 003F
-1C46 003F
-A906 003F
  1046 003F
  1096 003F
-1113C 003F
+136E 003F
  17E6 003F
  17F6 003F
-AA56 003F
+1816 003F
+194C 003F
+19D6 003F
+1A86 003F
+1A96 003F
  1B56 003F
-A9D6 003F
  1BB6 003F
-1816 003F
+1C46 003F
  1C56 003F
-A626 003F
-110F6 003F
+2185 003F
  3026 003F
+A626 003F
+A8D6 003F
+A906 003F
+A9D6 003F
+AA56 003F
+ABF6 003F
  1010C 003F
-2185 003F
+104A6 003F
+10E65 003F
+11057 003F
+1106C 003F
+110F6 003F
+1113C 003F
+111D6 003F
+116C6 003F
  12404 003F
  1240B 003F
  12411 003F
@@ -34149,11 +34141,21 @@ A626 003F
  12428 003F
  12440 003F
  1244E 003F
-111D6 003F
-116C6 003F
-1106C 003F
-11057 003F
  1D365 003F
+FF16 003F
+0F2F 003F
+1D7D4 003F
+1D7DE 003F
+1D7E8 003F
+1D7F2 003F
+1D7FC 003F
+2465 003F
+24FA 003F
+277B 003F
+2785 003F
+278F 003F
+2076 003F
+2086 003F
  248D 0021
  248D 003F
  248D 0061
@@ -34165,119 +34167,52 @@ A626 003F
  324D 0041
  324D 0062
  0036 0061
-0036 0041
-FF16 0061
-FF16 0041
-1D7D4 0061
-1D7DE 0061
-1D7E8 0061
-1D7F2 0061
-1D7FC 0061
-1D7D4 0041
-1D7DE 0041
-1D7E8 0041
-1D7F2 0041
-1D7FC 0041
-2465 0061
-24FA 0061
-277B 0061
-2785 0061
-278F 0061
-2465 0041
-24FA 0041
-277B 0041
-2785 0041
-278F 0041
-2076 0061
-2076 0041
-2086 0061
-2086 0041
  0666 0061
-0666 0041
  06F6 0061
-06F6 0041
-10E65 0061
-10E65 0041
  07C6 0061
-07C6 0041
-136E 0061
-136E 0041
-104A6 0061
-104A6 0041
  096C 0061
-096C 0041
  09EC 0061
-09EC 0041
  0A6C 0061
-0A6C 0041
  0AEC 0061
-0AEC 0041
  0B6C 0061
-0B6C 0041
  0BEC 0061
-0BEC 0041
  0C6C 0061
-0C6C 0041
  0CEC 0061
-0CEC 0041
  0D6C 0061
-0D6C 0041
-ABF6 0061
-ABF6 0041
-A8D6 0061
-A8D6 0041
-194C 0061
-194C 0041
-19D6 0061
-19D6 0041
-1A86 0061
-1A86 0041
-1A96 0061
-1A96 0041
  0E56 0061
-0E56 0041
  0ED6 0061
-0ED6 0041
  0F26 0061
-0F26 0041
-0F2F 0061
-0F2F 0041
-1C46 0061
-1C46 0041
-A906 0061
-A906 0041
  1046 0061
-1046 0041
  1096 0061
-1096 0041
-1113C 0061
-1113C 0041
+136E 0061
  17E6 0061
-17E6 0041
  17F6 0061
-17F6 0041
-AA56 0061
-AA56 0041
+1816 0061
+194C 0061
+19D6 0061
+1A86 0061
+1A96 0061
  1B56 0061
-1B56 0041
-A9D6 0061
-A9D6 0041
  1BB6 0061
-1BB6 0041
-1816 0061
-1816 0041
+1C46 0061
  1C56 0061
-1C56 0041
-A626 0061
-A626 0041
-110F6 0061
-110F6 0041
+2185 0061
  3026 0061
-3026 0041
+A626 0061
+A8D6 0061
+A906 0061
+A9D6 0061
+AA56 0061
+ABF6 0061
  1010C 0061
-1010C 0041
-2185 0061
-2185 0041
+104A6 0061
+10E65 0061
+11057 0061
+1106C 0061
+110F6 0061
+1113C 0061
+111D6 0061
+116C6 0061
  12404 0061
  1240B 0061
  12411 0061
@@ -34285,6 +34220,54 @@ A626 0041
  12428 0061
  12440 0061
  1244E 0061
+1D365 0061
+0036 0041
+0666 0041
+06F6 0041
+07C6 0041
+096C 0041
+09EC 0041
+0A6C 0041
+0AEC 0041
+0B6C 0041
+0BEC 0041
+0C6C 0041
+0CEC 0041
+0D6C 0041
+0E56 0041
+0ED6 0041
+0F26 0041
+1046 0041
+1096 0041
+136E 0041
+17E6 0041
+17F6 0041
+1816 0041
+194C 0041
+19D6 0041
+1A86 0041
+1A96 0041
+1B56 0041
+1BB6 0041
+1C46 0041
+1C56 0041
+2185 0041
+3026 0041
+A626 0041
+A8D6 0041
+A906 0041
+A9D6 0041
+AA56 0041
+ABF6 0041
+1010C 0041
+104A6 0041
+10E65 0041
+11057 0041
+1106C 0041
+110F6 0041
+1113C 0041
+111D6 0041
+116C6 0041
  12404 0041
  1240B 0041
  12411 0041
@@ -34292,36 +34275,39 @@ A626 0041
  12428 0041
  12440 0041
  1244E 0041
-111D6 0061
-111D6 0041
-116C6 0061
-116C6 0041
-1106C 0061
-1106C 0041
-11057 0061
-11057 0041
-1D365 0061
  1D365 0041
+FF16 0061
+FF16 0041
+0F2F 0061
+0F2F 0041
+1D7D4 0061
+1D7DE 0061
+1D7E8 0061
+1D7F2 0061
+1D7FC 0061
+1D7D4 0041
+1D7DE 0041
+1D7E8 0041
+1D7F2 0041
+1D7FC 0041
+2465 0061
+24FA 0061
+277B 0061
+2785 0061
+278F 0061
+2465 0041
+24FA 0041
+277B 0041
+2785 0041
+278F 0041
+2076 0061
+2076 0041
+2086 0061
+2086 0041
  0036 0062
-FF16 0062
-1D7D4 0062
-1D7DE 0062
-1D7E8 0062
-1D7F2 0062
-1D7FC 0062
-2465 0062
-24FA 0062
-277B 0062
-2785 0062
-278F 0062
-2076 0062
-2086 0062
  0666 0062
  06F6 0062
-10E65 0062
  07C6 0062
-136E 0062
-104A6 0062
  096C 0062
  09EC 0062
  0A6C 0062
@@ -34331,34 +34317,40 @@ FF16 0062
  0C6C 0062
  0CEC 0062
  0D6C 0062
-ABF6 0062
-A8D6 0062
-194C 0062
-19D6 0062
-1A86 0062
-1A96 0062
  0E56 0062
  0ED6 0062
  0F26 0062
-0F2F 0062
-1C46 0062
-A906 0062
  1046 0062
  1096 0062
-1113C 0062
+136E 0062
  17E6 0062
  17F6 0062
-AA56 0062
+1816 0062
+194C 0062
+19D6 0062
+1A86 0062
+1A96 0062
  1B56 0062
-A9D6 0062
  1BB6 0062
-1816 0062
+1C46 0062
  1C56 0062
-A626 0062
-110F6 0062
+2185 0062
  3026 0062
+A626 0062
+A8D6 0062
+A906 0062
+A9D6 0062
+AA56 0062
+ABF6 0062
  1010C 0062
-2185 0062
+104A6 0062
+10E65 0062
+11057 0062
+1106C 0062
+110F6 0062
+1113C 0062
+111D6 0062
+116C6 0062
  12404 0062
  1240B 0062
  12411 0062
@@ -34366,11 +34358,21 @@ A626 0062
  12428 0062
  12440 0062
  1244E 0062
-111D6 0062
-116C6 0062
-1106C 0062
-11057 0062
  1D365 0062
+FF16 0062
+0F2F 0062
+1D7D4 0062
+1D7DE 0062
+1D7E8 0062
+1D7F2 0062
+1D7FC 0062
+2465 0062
+24FA 0062
+277B 0062
+2785 0062
+278F 0062
+2076 0062
+2086 0062
  33E5 0021
  33E5 003F
  33E5 0061
@@ -34392,25 +34394,9 @@ A626 0062
  1F108 0041
  1F108 0062
  0037 0021
-FF17 0021
-1D7D5 0021
-1D7DF 0021
-1D7E9 0021
-1D7F3 0021
-1D7FD 0021
-2466 0021
-24FB 0021
-277C 0021
-2786 0021
-2790 0021
-2077 0021
-2087 0021
  0667 0021
  06F7 0021
-10E66 0021
  07C7 0021
-136F 0021
-104A7 0021
  096D 0021
  09ED 0021
  0A6D 0021
@@ -34420,33 +34406,39 @@ FF17 0021
  0C6D 0021
  0CED 0021
  0D6D 0021
-ABF7 0021
-A8D7 0021
-194D 0021
-19D7 0021
-1A87 0021
-1A97 0021
  0E57 0021
  0ED7 0021
  0F27 0021
-0F30 0021
-1C47 0021
-A907 0021
  1047 0021
  1097 0021
-1113D 0021
+136F 0021
  17E7 0021
  17F7 0021
-AA57 0021
+1817 0021
+194D 0021
+19D7 0021
+1A87 0021
+1A97 0021
  1B57 0021
-A9D7 0021
  1BB7 0021
-1817 0021
+1C47 0021
  1C57 0021
-A627 0021
-110F7 0021
  3027 0021
+A627 0021
+A8D7 0021
+A907 0021
+A9D7 0021
+AA57 0021
+ABF7 0021
  1010D 0021
+104A7 0021
+10E66 0021
+11058 0021
+1106D 0021
+110F7 0021
+1113D 0021
+111D7 0021
+116C7 0021
  12405 0021
  1240C 0021
  12412 0021
@@ -34455,31 +34447,25 @@ A627 0021
  12441 0021
  12442 0021
  12443 0021
-111D7 0021
-116C7 0021
-1106D 0021
-11058 0021
  1D366 0021
+FF17 0021
+0F30 0021
+1D7D5 0021
+1D7DF 0021
+1D7E9 0021
+1D7F3 0021
+1D7FD 0021
+2466 0021
+24FB 0021
+277C 0021
+2786 0021
+2790 0021
+2077 0021
+2087 0021
  0037 003F
-FF17 003F
-1D7D5 003F
-1D7DF 003F
-1D7E9 003F
-1D7F3 003F
-1D7FD 003F
-2466 003F
-24FB 003F
-277C 003F
-2786 003F
-2790 003F
-2077 003F
-2087 003F
  0667 003F
  06F7 003F
-10E66 003F
  07C7 003F
-136F 003F
-104A7 003F
  096D 003F
  09ED 003F
  0A6D 003F
@@ -34489,33 +34475,39 @@ FF17 003F
  0C6D 003F
  0CED 003F
  0D6D 003F
-ABF7 003F
-A8D7 003F
-194D 003F
-19D7 003F
-1A87 003F
-1A97 003F
  0E57 003F
  0ED7 003F
  0F27 003F
-0F30 003F
-1C47 003F
-A907 003F
  1047 003F
  1097 003F
-1113D 003F
+136F 003F
  17E7 003F
  17F7 003F
-AA57 003F
+1817 003F
+194D 003F
+19D7 003F
+1A87 003F
+1A97 003F
  1B57 003F
-A9D7 003F
  1BB7 003F
-1817 003F
+1C47 003F
  1C57 003F
-A627 003F
-110F7 003F
  3027 003F
+A627 003F
+A8D7 003F
+A907 003F
+A9D7 003F
+AA57 003F
+ABF7 003F
  1010D 003F
+104A7 003F
+10E66 003F
+11058 003F
+1106D 003F
+110F7 003F
+1113D 003F
+111D7 003F
+116C7 003F
  12405 003F
  1240C 003F
  12412 003F
@@ -34524,11 +34516,21 @@ A627 003F
  12441 003F
  12442 003F
  12443 003F
-111D7 003F
-116C7 003F
-1106D 003F
-11058 003F
  1D366 003F
+FF17 003F
+0F30 003F
+1D7D5 003F
+1D7DF 003F
+1D7E9 003F
+1D7F3 003F
+1D7FD 003F
+2466 003F
+24FB 003F
+277C 003F
+2786 003F
+2790 003F
+2077 003F
+2087 003F
  248E 0021
  248E 003F
  248E 0061
@@ -34545,117 +34547,51 @@ A627 003F
  324E 0041
  324E 0062
  0037 0061
-0037 0041
-FF17 0061
-FF17 0041
-1D7D5 0061
-1D7DF 0061
-1D7E9 0061
-1D7F3 0061
-1D7FD 0061
-1D7D5 0041
-1D7DF 0041
-1D7E9 0041
-1D7F3 0041
-1D7FD 0041
-2466 0061
-24FB 0061
-277C 0061
-2786 0061
-2790 0061
-2466 0041
-24FB 0041
-277C 0041
-2786 0041
-2790 0041
-2077 0061
-2077 0041
-2087 0061
-2087 0041
  0667 0061
-0667 0041
  06F7 0061
-06F7 0041
-10E66 0061
-10E66 0041
  07C7 0061
-07C7 0041
-136F 0061
-136F 0041
-104A7 0061
-104A7 0041
  096D 0061
-096D 0041
  09ED 0061
-09ED 0041
  0A6D 0061
-0A6D 0041
  0AED 0061
-0AED 0041
  0B6D 0061
-0B6D 0041
  0BED 0061
-0BED 0041
  0C6D 0061
-0C6D 0041
  0CED 0061
-0CED 0041
  0D6D 0061
-0D6D 0041
-ABF7 0061
-ABF7 0041
-A8D7 0061
-A8D7 0041
-194D 0061
-194D 0041
-19D7 0061
-19D7 0041
-1A87 0061
-1A87 0041
-1A97 0061
-1A97 0041
  0E57 0061
-0E57 0041
  0ED7 0061
-0ED7 0041
  0F27 0061
-0F27 0041
-0F30 0061
-0F30 0041
-1C47 0061
-1C47 0041
-A907 0061
-A907 0041
  1047 0061
-1047 0041
  1097 0061
-1097 0041
-1113D 0061
-1113D 0041
+136F 0061
  17E7 0061
-17E7 0041
  17F7 0061
-17F7 0041
-AA57 0061
-AA57 0041
+1817 0061
+194D 0061
+19D7 0061
+1A87 0061
+1A97 0061
  1B57 0061
-1B57 0041
-A9D7 0061
-A9D7 0041
  1BB7 0061
-1BB7 0041
-1817 0061
-1817 0041
+1C47 0061
  1C57 0061
-1C57 0041
-A627 0061
-A627 0041
-110F7 0061
-110F7 0041
  3027 0061
-3027 0041
+A627 0061
+A8D7 0061
+A907 0061
+A9D7 0061
+AA57 0061
+ABF7 0061
  1010D 0061
-1010D 0041
+104A7 0061
+10E66 0061
+11058 0061
+1106D 0061
+110F7 0061
+1113D 0061
+111D7 0061
+116C7 0061
  12405 0061
  1240C 0061
  12412 0061
@@ -34664,6 +34600,53 @@ A627 0041
  12441 0061
  12442 0061
  12443 0061
+1D366 0061
+0037 0041
+0667 0041
+06F7 0041
+07C7 0041
+096D 0041
+09ED 0041
+0A6D 0041
+0AED 0041
+0B6D 0041
+0BED 0041
+0C6D 0041
+0CED 0041
+0D6D 0041
+0E57 0041
+0ED7 0041
+0F27 0041
+1047 0041
+1097 0041
+136F 0041
+17E7 0041
+17F7 0041
+1817 0041
+194D 0041
+19D7 0041
+1A87 0041
+1A97 0041
+1B57 0041
+1BB7 0041
+1C47 0041
+1C57 0041
+3027 0041
+A627 0041
+A8D7 0041
+A907 0041
+A9D7 0041
+AA57 0041
+ABF7 0041
+1010D 0041
+104A7 0041
+10E66 0041
+11058 0041
+1106D 0041
+110F7 0041
+1113D 0041
+111D7 0041
+116C7 0041
  12405 0041
  1240C 0041
  12412 0041
@@ -34672,36 +34655,39 @@ A627 0041
  12441 0041
  12442 0041
  12443 0041
-111D7 0061
-111D7 0041
-116C7 0061
-116C7 0041
-1106D 0061
-1106D 0041
-11058 0061
-11058 0041
-1D366 0061
  1D366 0041
+FF17 0061
+FF17 0041
+0F30 0061
+0F30 0041
+1D7D5 0061
+1D7DF 0061
+1D7E9 0061
+1D7F3 0061
+1D7FD 0061
+1D7D5 0041
+1D7DF 0041
+1D7E9 0041
+1D7F3 0041
+1D7FD 0041
+2466 0061
+24FB 0061
+277C 0061
+2786 0061
+2790 0061
+2466 0041
+24FB 0041
+277C 0041
+2786 0041
+2790 0041
+2077 0061
+2077 0041
+2087 0061
+2087 0041
  0037 0062
-FF17 0062
-1D7D5 0062
-1D7DF 0062
-1D7E9 0062
-1D7F3 0062
-1D7FD 0062
-2466 0062
-24FB 0062
-277C 0062
-2786 0062
-2790 0062
-2077 0062
-2087 0062
  0667 0062
  06F7 0062
-10E66 0062
  07C7 0062
-136F 0062
-104A7 0062
  096D 0062
  09ED 0062
  0A6D 0062
@@ -34711,33 +34697,39 @@ FF17 0062
  0C6D 0062
  0CED 0062
  0D6D 0062
-ABF7 0062
-A8D7 0062
-194D 0062
-19D7 0062
-1A87 0062
-1A97 0062
  0E57 0062
  0ED7 0062
  0F27 0062
-0F30 0062
-1C47 0062
-A907 0062
  1047 0062
  1097 0062
-1113D 0062
+136F 0062
  17E7 0062
  17F7 0062
-AA57 0062
+1817 0062
+194D 0062
+19D7 0062
+1A87 0062
+1A97 0062
  1B57 0062
-A9D7 0062
  1BB7 0062
-1817 0062
+1C47 0062
  1C57 0062
-A627 0062
-110F7 0062
  3027 0062
+A627 0062
+A8D7 0062
+A907 0062
+A9D7 0062
+AA57 0062
+ABF7 0062
  1010D 0062
+104A7 0062
+10E66 0062
+11058 0062
+1106D 0062
+110F7 0062
+1113D 0062
+111D7 0062
+116C7 0062
  12405 0062
  1240C 0062
  12412 0062
@@ -34746,11 +34738,21 @@ A627 0062
  12441 0062
  12442 0062
  12443 0062
-111D7 0062
-116C7 0062
-1106D 0062
-11058 0062
  1D366 0062
+FF17 0062
+0F30 0062
+1D7D5 0062
+1D7DF 0062
+1D7E9 0062
+1D7F3 0062
+1D7FD 0062
+2466 0062
+24FB 0062
+277C 0062
+2786 0062
+2790 0062
+2077 0062
+2087 0062
  33E6 0021
  33E6 003F
  33E6 0061
@@ -34772,25 +34774,9 @@ A627 0062
  1F109 0041
  1F109 0062
  0038 0021
-FF18 0021
-1D7D6 0021
-1D7E0 0021
-1D7EA 0021
-1D7F4 0021
-1D7FE 0021
-2467 0021
-24FC 0021
-277D 0021
-2787 0021
-2791 0021
-2078 0021
-2088 0021
  0668 0021
  06F8 0021
-10E67 0021
  07C8 0021
-1370 0021
-104A8 0021
  096E 0021
  09EE 0021
  0A6E 0021
@@ -34800,33 +34786,39 @@ FF18 0021
  0C6E 0021
  0CEE 0021
  0D6E 0021
-ABF8 0021
-A8D8 0021
-194E 0021
-19D8 0021
-1A88 0021
-1A98 0021
  0E58 0021
  0ED8 0021
  0F28 0021
-0F31 0021
-1C48 0021
-A908 0021
  1048 0021
  1098 0021
-1113E 0021
+1370 0021
  17E8 0021
  17F8 0021
-AA58 0021
+1818 0021
+194E 0021
+19D8 0021
+1A88 0021
+1A98 0021
  1B58 0021
-A9D8 0021
  1BB8 0021
-1818 0021
+1C48 0021
  1C58 0021
-A628 0021
-110F8 0021
  3028 0021
+A628 0021
+A8D8 0021
+A908 0021
+A9D8 0021
+AA58 0021
+ABF8 0021
  1010E 0021
+104A8 0021
+10E67 0021
+11059 0021
+1106E 0021
+110F8 0021
+1113E 0021
+111D8 0021
+116C8 0021
  12406 0021
  1240D 0021
  12413 0021
@@ -34834,31 +34826,25 @@ A628 0021
  1242A 0021
  12444 0021
  12445 0021
-111D8 0021
-116C8 0021
-1106E 0021
-11059 0021
  1D367 0021
+FF18 0021
+0F31 0021
+1D7D6 0021
+1D7E0 0021
+1D7EA 0021
+1D7F4 0021
+1D7FE 0021
+2467 0021
+24FC 0021
+277D 0021
+2787 0021
+2791 0021
+2078 0021
+2088 0021
  0038 003F
-FF18 003F
-1D7D6 003F
-1D7E0 003F
-1D7EA 003F
-1D7F4 003F
-1D7FE 003F
-2467 003F
-24FC 003F
-277D 003F
-2787 003F
-2791 003F
-2078 003F
-2088 003F
  0668 003F
  06F8 003F
-10E67 003F
  07C8 003F
-1370 003F
-104A8 003F
  096E 003F
  09EE 003F
  0A6E 003F
@@ -34868,33 +34854,39 @@ FF18 003F
  0C6E 003F
  0CEE 003F
  0D6E 003F
-ABF8 003F
-A8D8 003F
-194E 003F
-19D8 003F
-1A88 003F
-1A98 003F
  0E58 003F
  0ED8 003F
  0F28 003F
-0F31 003F
-1C48 003F
-A908 003F
  1048 003F
  1098 003F
-1113E 003F
+1370 003F
  17E8 003F
  17F8 003F
-AA58 003F
+1818 003F
+194E 003F
+19D8 003F
+1A88 003F
+1A98 003F
  1B58 003F
-A9D8 003F
  1BB8 003F
-1818 003F
+1C48 003F
  1C58 003F
-A628 003F
-110F8 003F
  3028 003F
+A628 003F
+A8D8 003F
+A908 003F
+A9D8 003F
+AA58 003F
+ABF8 003F
  1010E 003F
+104A8 003F
+10E67 003F
+11059 003F
+1106E 003F
+110F8 003F
+1113E 003F
+111D8 003F
+116C8 003F
  12406 003F
  1240D 003F
  12413 003F
@@ -34902,11 +34894,21 @@ A628 003F
  1242A 003F
  12444 003F
  12445 003F
-111D8 003F
-116C8 003F
-1106E 003F
-11059 003F
  1D367 003F
+FF18 003F
+0F31 003F
+1D7D6 003F
+1D7E0 003F
+1D7EA 003F
+1D7F4 003F
+1D7FE 003F
+2467 003F
+24FC 003F
+277D 003F
+2787 003F
+2791 003F
+2078 003F
+2088 003F
  248F 0021
  248F 003F
  248F 0061
@@ -34918,117 +34920,51 @@ A628 003F
  324F 0041
  324F 0062
  0038 0061
-0038 0041
-FF18 0061
-FF18 0041
-1D7D6 0061
-1D7E0 0061
-1D7EA 0061
-1D7F4 0061
-1D7FE 0061
-1D7D6 0041
-1D7E0 0041
-1D7EA 0041
-1D7F4 0041
-1D7FE 0041
-2467 0061
-24FC 0061
-277D 0061
-2787 0061
-2791 0061
-2467 0041
-24FC 0041
-277D 0041
-2787 0041
-2791 0041
-2078 0061
-2078 0041
-2088 0061
-2088 0041
  0668 0061
-0668 0041
  06F8 0061
-06F8 0041
-10E67 0061
-10E67 0041
  07C8 0061
-07C8 0041
-1370 0061
-1370 0041
-104A8 0061
-104A8 0041
  096E 0061
-096E 0041
  09EE 0061
-09EE 0041
  0A6E 0061
-0A6E 0041
  0AEE 0061
-0AEE 0041
  0B6E 0061
-0B6E 0041
  0BEE 0061
-0BEE 0041
  0C6E 0061
-0C6E 0041
  0CEE 0061
-0CEE 0041
  0D6E 0061
-0D6E 0041
-ABF8 0061
-ABF8 0041
-A8D8 0061
-A8D8 0041
-194E 0061
-194E 0041
-19D8 0061
-19D8 0041
-1A88 0061
-1A88 0041
-1A98 0061
-1A98 0041
  0E58 0061
-0E58 0041
  0ED8 0061
-0ED8 0041
  0F28 0061
-0F28 0041
-0F31 0061
-0F31 0041
-1C48 0061
-1C48 0041
-A908 0061
-A908 0041
  1048 0061
-1048 0041
  1098 0061
-1098 0041
-1113E 0061
-1113E 0041
+1370 0061
  17E8 0061
-17E8 0041
  17F8 0061
-17F8 0041
-AA58 0061
-AA58 0041
+1818 0061
+194E 0061
+19D8 0061
+1A88 0061
+1A98 0061
  1B58 0061
-1B58 0041
-A9D8 0061
-A9D8 0041
  1BB8 0061
-1BB8 0041
-1818 0061
-1818 0041
+1C48 0061
  1C58 0061
-1C58 0041
-A628 0061
-A628 0041
-110F8 0061
-110F8 0041
  3028 0061
-3028 0041
+A628 0061
+A8D8 0061
+A908 0061
+A9D8 0061
+AA58 0061
+ABF8 0061
  1010E 0061
-1010E 0041
+104A8 0061
+10E67 0061
+11059 0061
+1106E 0061
+110F8 0061
+1113E 0061
+111D8 0061
+116C8 0061
  12406 0061
  1240D 0061
  12413 0061
@@ -35036,6 +34972,53 @@ A628 0041
  1242A 0061
  12444 0061
  12445 0061
+1D367 0061
+0038 0041
+0668 0041
+06F8 0041
+07C8 0041
+096E 0041
+09EE 0041
+0A6E 0041
+0AEE 0041
+0B6E 0041
+0BEE 0041
+0C6E 0041
+0CEE 0041
+0D6E 0041
+0E58 0041
+0ED8 0041
+0F28 0041
+1048 0041
+1098 0041
+1370 0041
+17E8 0041
+17F8 0041
+1818 0041
+194E 0041
+19D8 0041
+1A88 0041
+1A98 0041
+1B58 0041
+1BB8 0041
+1C48 0041
+1C58 0041
+3028 0041
+A628 0041
+A8D8 0041
+A908 0041
+A9D8 0041
+AA58 0041
+ABF8 0041
+1010E 0041
+104A8 0041
+10E67 0041
+11059 0041
+1106E 0041
+110F8 0041
+1113E 0041
+111D8 0041
+116C8 0041
  12406 0041
  1240D 0041
  12413 0041
@@ -35043,36 +35026,39 @@ A628 0041
  1242A 0041
  12444 0041
  12445 0041
-111D8 0061
-111D8 0041
-116C8 0061
-116C8 0041
-1106E 0061
-1106E 0041
-11059 0061
-11059 0041
-1D367 0061
  1D367 0041
+FF18 0061
+FF18 0041
+0F31 0061
+0F31 0041
+1D7D6 0061
+1D7E0 0061
+1D7EA 0061
+1D7F4 0061
+1D7FE 0061
+1D7D6 0041
+1D7E0 0041
+1D7EA 0041
+1D7F4 0041
+1D7FE 0041
+2467 0061
+24FC 0061
+277D 0061
+2787 0061
+2791 0061
+2467 0041
+24FC 0041
+277D 0041
+2787 0041
+2791 0041
+2078 0061
+2078 0041
+2088 0061
+2088 0041
  0038 0062
-FF18 0062
-1D7D6 0062
-1D7E0 0062
-1D7EA 0062
-1D7F4 0062
-1D7FE 0062
-2467 0062
-24FC 0062
-277D 0062
-2787 0062
-2791 0062
-2078 0062
-2088 0062
  0668 0062
  06F8 0062
-10E67 0062
  07C8 0062
-1370 0062
-104A8 0062
  096E 0062
  09EE 0062
  0A6E 0062
@@ -35082,33 +35068,39 @@ FF18 0062
  0C6E 0062
  0CEE 0062
  0D6E 0062
-ABF8 0062
-A8D8 0062
-194E 0062
-19D8 0062
-1A88 0062
-1A98 0062
  0E58 0062
  0ED8 0062
  0F28 0062
-0F31 0062
-1C48 0062
-A908 0062
  1048 0062
  1098 0062
-1113E 0062
+1370 0062
  17E8 0062
  17F8 0062
-AA58 0062
+1818 0062
+194E 0062
+19D8 0062
+1A88 0062
+1A98 0062
  1B58 0062
-A9D8 0062
  1BB8 0062
-1818 0062
+1C48 0062
  1C58 0062
-A628 0062
-110F8 0062
  3028 0062
+A628 0062
+A8D8 0062
+A908 0062
+A9D8 0062
+AA58 0062
+ABF8 0062
  1010E 0062
+104A8 0062
+10E67 0062
+11059 0062
+1106E 0062
+110F8 0062
+1113E 0062
+111D8 0062
+116C8 0062
  12406 0062
  1240D 0062
  12413 0062
@@ -35116,11 +35108,21 @@ A628 0062
  1242A 0062
  12444 0062
  12445 0062
-111D8 0062
-116C8 0062
-1106E 0062
-11059 0062
  1D367 0062
+FF18 0062
+0F31 0062
+1D7D6 0062
+1D7E0 0062
+1D7EA 0062
+1D7F4 0062
+1D7FE 0062
+2467 0062
+24FC 0062
+277D 0062
+2787 0062
+2791 0062
+2078 0062
+2088 0062
  33E7 0021
  33E7 003F
  33E7 0061
@@ -35142,25 +35144,9 @@ A628 0062
  1F10A 0041
  1F10A 0062
  0039 0021
-FF19 0021
-1D7D7 0021
-1D7E1 0021
-1D7EB 0021
-1D7F5 0021
-1D7FF 0021
-2468 0021
-24FD 0021
-277E 0021
-2788 0021
-2792 0021
-2079 0021
-2089 0021
  0669 0021
  06F9 0021
-10E68 0021
  07C9 0021
-1371 0021
-104A9 0021
  096F 0021
  09EF 0021
  0A6F 0021
@@ -35170,33 +35156,39 @@ FF19 0021
  0C6F 0021
  0CEF 0021
  0D6F 0021
-ABF9 0021
-A8D9 0021
-194F 0021
-19D9 0021
-1A89 0021
-1A99 0021
  0E59 0021
  0ED9 0021
  0F29 0021
-0F32 0021
-1C49 0021
-A909 0021
  1049 0021
  1099 0021
-1113F 0021
+1371 0021
  17E9 0021
  17F9 0021
-AA59 0021
+1819 0021
+194F 0021
+19D9 0021
+1A89 0021
+1A99 0021
  1B59 0021
-A9D9 0021
  1BB9 0021
-1819 0021
+1C49 0021
  1C59 0021
-A629 0021
-110F9 0021
  3029 0021
+A629 0021
+A8D9 0021
+A909 0021
+A9D9 0021
+AA59 0021
+ABF9 0021
  1010F 0021
+104A9 0021
+10E68 0021
+1105A 0021
+1106F 0021
+110F9 0021
+1113F 0021
+111D9 0021
+116C9 0021
  12407 0021
  1240E 0021
  12414 0021
@@ -35206,31 +35198,25 @@ A629 0021
  12447 0021
  12448 0021
  12449 0021
-111D9 0021
-116C9 0021
-1106F 0021
-1105A 0021
  1D368 0021
+FF19 0021
+0F32 0021
+1D7D7 0021
+1D7E1 0021
+1D7EB 0021
+1D7F5 0021
+1D7FF 0021
+2468 0021
+24FD 0021
+277E 0021
+2788 0021
+2792 0021
+2079 0021
+2089 0021
  0039 003F
-FF19 003F
-1D7D7 003F
-1D7E1 003F
-1D7EB 003F
-1D7F5 003F
-1D7FF 003F
-2468 003F
-24FD 003F
-277E 003F
-2788 003F
-2792 003F
-2079 003F
-2089 003F
  0669 003F
  06F9 003F
-10E68 003F
  07C9 003F
-1371 003F
-104A9 003F
  096F 003F
  09EF 003F
  0A6F 003F
@@ -35240,33 +35226,39 @@ FF19 003F
  0C6F 003F
  0CEF 003F
  0D6F 003F
-ABF9 003F
-A8D9 003F
-194F 003F
-19D9 003F
-1A89 003F
-1A99 003F
  0E59 003F
  0ED9 003F
  0F29 003F
-0F32 003F
-1C49 003F
-A909 003F
  1049 003F
  1099 003F
-1113F 003F
+1371 003F
  17E9 003F
  17F9 003F
-AA59 003F
+1819 003F
+194F 003F
+19D9 003F
+1A89 003F
+1A99 003F
  1B59 003F
-A9D9 003F
  1BB9 003F
-1819 003F
+1C49 003F
  1C59 003F
-A629 003F
-110F9 003F
  3029 003F
+A629 003F
+A8D9 003F
+A909 003F
+A9D9 003F
+AA59 003F
+ABF9 003F
  1010F 003F
+104A9 003F
+10E68 003F
+1105A 003F
+1106F 003F
+110F9 003F
+1113F 003F
+111D9 003F
+116C9 003F
  12407 003F
  1240E 003F
  12414 003F
@@ -35276,20 +35268,142 @@ A629 003F
  12447 003F
  12448 003F
  12449 003F
-111D9 003F
-116C9 003F
-1106F 003F
-1105A 003F
  1D368 003F
+FF19 003F
+0F32 003F
+1D7D7 003F
+1D7E1 003F
+1D7EB 003F
+1D7F5 003F
+1D7FF 003F
+2468 003F
+24FD 003F
+277E 003F
+2788 003F
+2792 003F
+2079 003F
+2089 003F
  2490 0021
  2490 003F
  2490 0061
  2490 0041
  2490 0062
  0039 0061
+0669 0061
+06F9 0061
+07C9 0061
+096F 0061
+09EF 0061
+0A6F 0061
+0AEF 0061
+0B6F 0061
+0BEF 0061
+0C6F 0061
+0CEF 0061
+0D6F 0061
+0E59 0061
+0ED9 0061
+0F29 0061
+1049 0061
+1099 0061
+1371 0061
+17E9 0061
+17F9 0061
+1819 0061
+194F 0061
+19D9 0061
+1A89 0061
+1A99 0061
+1B59 0061
+1BB9 0061
+1C49 0061
+1C59 0061
+3029 0061
+A629 0061
+A8D9 0061
+A909 0061
+A9D9 0061
+AA59 0061
+ABF9 0061
+1010F 0061
+104A9 0061
+10E68 0061
+1105A 0061
+1106F 0061
+110F9 0061
+1113F 0061
+111D9 0061
+116C9 0061
+12407 0061
+1240E 0061
+12414 0061
+1241D 0061
+1242B 0061
+12446 0061
+12447 0061
+12448 0061
+12449 0061
+1D368 0061
  0039 0041
+0669 0041
+06F9 0041
+07C9 0041
+096F 0041
+09EF 0041
+0A6F 0041
+0AEF 0041
+0B6F 0041
+0BEF 0041
+0C6F 0041
+0CEF 0041
+0D6F 0041
+0E59 0041
+0ED9 0041
+0F29 0041
+1049 0041
+1099 0041
+1371 0041
+17E9 0041
+17F9 0041
+1819 0041
+194F 0041
+19D9 0041
+1A89 0041
+1A99 0041
+1B59 0041
+1BB9 0041
+1C49 0041
+1C59 0041
+3029 0041
+A629 0041
+A8D9 0041
+A909 0041
+A9D9 0041
+AA59 0041
+ABF9 0041
+1010F 0041
+104A9 0041
+10E68 0041
+1105A 0041
+1106F 0041
+110F9 0041
+1113F 0041
+111D9 0041
+116C9 0041
+12407 0041
+1240E 0041
+12414 0041
+1241D 0041
+1242B 0041
+12446 0041
+12447 0041
+12448 0041
+12449 0041
+1D368 0041
  FF19 0061
  FF19 0041
+0F32 0061
+0F32 0041
  1D7D7 0061
  1D7E1 0061
  1D7EB 0061
@@ -35314,138 +35428,10 @@ FF19 0041
  2079 0041
  2089 0061
  2089 0041
-0669 0061
-0669 0041
-06F9 0061
-06F9 0041
-10E68 0061
-10E68 0041
-07C9 0061
-07C9 0041
-1371 0061
-1371 0041
-104A9 0061
-104A9 0041
-096F 0061
-096F 0041
-09EF 0061
-09EF 0041
-0A6F 0061
-0A6F 0041
-0AEF 0061
-0AEF 0041
-0B6F 0061
-0B6F 0041
-0BEF 0061
-0BEF 0041
-0C6F 0061
-0C6F 0041
-0CEF 0061
-0CEF 0041
-0D6F 0061
-0D6F 0041
-ABF9 0061
-ABF9 0041
-A8D9 0061
-A8D9 0041
-194F 0061
-194F 0041
-19D9 0061
-19D9 0041
-1A89 0061
-1A89 0041
-1A99 0061
-1A99 0041
-0E59 0061
-0E59 0041
-0ED9 0061
-0ED9 0041
-0F29 0061
-0F29 0041
-0F32 0061
-0F32 0041
-1C49 0061
-1C49 0041
-A909 0061
-A909 0041
-1049 0061
-1049 0041
-1099 0061
-1099 0041
-1113F 0061
-1113F 0041
-17E9 0061
-17E9 0041
-17F9 0061
-17F9 0041
-AA59 0061
-AA59 0041
-1B59 0061
-1B59 0041
-A9D9 0061
-A9D9 0041
-1BB9 0061
-1BB9 0041
-1819 0061
-1819 0041
-1C59 0061
-1C59 0041
-A629 0061
-A629 0041
-110F9 0061
-110F9 0041
-3029 0061
-3029 0041
-1010F 0061
-1010F 0041
-12407 0061
-1240E 0061
-12414 0061
-1241D 0061
-1242B 0061
-12446 0061
-12447 0061
-12448 0061
-12449 0061
-12407 0041
-1240E 0041
-12414 0041
-1241D 0041
-1242B 0041
-12446 0041
-12447 0041
-12448 0041
-12449 0041
-111D9 0061
-111D9 0041
-116C9 0061
-116C9 0041
-1106F 0061
-1106F 0041
-1105A 0061
-1105A 0041
-1D368 0061
-1D368 0041
  0039 0062
-FF19 0062
-1D7D7 0062
-1D7E1 0062
-1D7EB 0062
-1D7F5 0062
-1D7FF 0062
-2468 0062
-24FD 0062
-277E 0062
-2788 0062
-2792 0062
-2079 0062
-2089 0062
  0669 0062
  06F9 0062
-10E68 0062
  07C9 0062
-1371 0062
-104A9 0062
  096F 0062
  09EF 0062
  0A6F 0062
@@ -35455,33 +35441,39 @@ FF19 0062
  0C6F 0062
  0CEF 0062
  0D6F 0062
-ABF9 0062
-A8D9 0062
-194F 0062
-19D9 0062
-1A89 0062
-1A99 0062
  0E59 0062
  0ED9 0062
  0F29 0062
-0F32 0062
-1C49 0062
-A909 0062
  1049 0062
  1099 0062
-1113F 0062
+1371 0062
  17E9 0062
  17F9 0062
-AA59 0062
+1819 0062
+194F 0062
+19D9 0062
+1A89 0062
+1A99 0062
  1B59 0062
-A9D9 0062
  1BB9 0062
-1819 0062
+1C49 0062
  1C59 0062
-A629 0062
-110F9 0062
  3029 0062
+A629 0062
+A8D9 0062
+A909 0062
+A9D9 0062
+AA59 0062
+ABF9 0062
  1010F 0062
+104A9 0062
+10E68 0062
+1105A 0062
+1106F 0062
+110F9 0062
+1113F 0062
+111D9 0062
+116C9 0062
  12407 0062
  1240E 0062
  12414 0062
@@ -35491,11 +35483,21 @@ A629 0062
  12447 0062
  12448 0062
  12449 0062
-111D9 0062
-116C9 0062
-1106F 0062
-1105A 0062
  1D368 0062
+FF19 0062
+0F32 0062
+1D7D7 0062
+1D7E1 0062
+1D7EB 0062
+1D7F5 0062
+1D7FF 0062
+2468 0062
+24FD 0062
+277E 0062
+2788 0062
+2792 0062
+2079 0062
+2089 0062
  33E8 0021
  33E8 003F
  33E8 0061
@@ -35624,6 +35626,7 @@ A629 0062
  0618 0061
  0619 0061
  061A 0061
+061C 0061
  0640 0061
  06D6 0061
  06D7 0061
@@ -35681,6 +35684,7 @@ A629 0062
  180B 0061
  180C 0061
  180D 0061
+180E 0061
  1A7F 0061
  1B6B 0061
  1B6C 0061
@@ -35732,6 +35736,10 @@ A629 0062
  2062 0061
  2063 0061
  2064 0061
+2066 0061
+2067 0061
+2068 0061
+2069 0061
  206A 0061
  206B 0061
  206C 0061
@@ -36278,6 +36286,7 @@ E01EF 0061
  0618 0041
  0619 0041
  061A 0041
+061C 0041
  0640 0041
  06D6 0041
  06D7 0041
@@ -36335,6 +36344,7 @@ E01EF 0061
  180B 0041
  180C 0041
  180D 0041
+180E 0041
  1A7F 0041
  1B6B 0041
  1B6C 0041
@@ -36386,6 +36396,10 @@ E01EF 0061
  2062 0041
  2063 0041
  2064 0041
+2066 0041
+2067 0041
+2068 0041
+2069 0041
  206A 0041
  206B 0041
  206C 0041
@@ -37306,6 +37320,16 @@ A67D 0041
  20E2 0041
  20E3 0041
  20E4 0041
+3099 0061
+3099 0041
+FF9E 0061
+FF9E 0041
+309A 0061
+309A 0041
+FF9F 0061
+FF9F 0041
+0335 0061
+0335 0041
  0305 0061
  0305 0041
  0309 0061
@@ -37344,8 +37368,6 @@ A67D 0041
  0334 0041
  0334 1DD3
  1DD3 0334
-0335 0061
-0335 0041
  0339 0061
  0339 0041
  0345 0061
@@ -37886,14 +37908,6 @@ A92D 0041
  302E 0041
  302F 0061
  302F 0041
-3099 0061
-3099 0041
-FF9E 0061
-FF9E 0041
-309A 0061
-309A 0041
-FF9F 0061
-FF9F 0041
  20D0 0061
  20D0 0041
  20D1 0061
@@ -38794,6 +38808,7 @@ A73C 0062
  0618 0062
  0619 0062
  061A 0062
+061C 0062
  0640 0062
  06D6 0062
  06D7 0062
@@ -38851,6 +38866,7 @@ A73C 0062
  180B 0062
  180C 0062
  180D 0062
+180E 0062
  1A7F 0062
  1B6B 0062
  1B6C 0062
@@ -38902,6 +38918,10 @@ A73C 0062
  2062 0062
  2063 0062
  2064 0062
+2066 0062
+2067 0062
+2068 0062
+2069 0062
  206A 0062
  206B 0062
  206C 0062
@@ -39494,6 +39514,11 @@ A67D 0062
  20E2 0062
  20E3 0062
  20E4 0062
+3099 0062
+FF9E 0062
+309A 0062
+FF9F 0062
+0335 0062
  0305 0062
  0309 0062
  030F 0062
@@ -39511,7 +39536,6 @@ A67D 0062
  0330 0062
  0331 0062
  0334 0062
-0335 0062
  0339 0062
  0345 0062
  0358 0062
@@ -39782,10 +39806,6 @@ A92D 0062
  302D 0062
  302E 0062
  302F 0062
-3099 0062
-FF9E 0062
-309A 0062
-FF9F 0062
  20D0 0062
  20D1 0062
  20D2 0062
@@ -40611,14 +40631,14 @@ FF24 0021
  1E0A 0021
  1E11 0021
  1E10 0021
+0111 0021
+0110 0021
  1E0D 0021
  1E0C 0021
  1E13 0021
  1E12 0021
  1E0F 0021
  1E0E 0021
-0111 0021
-0110 0021
  00F0 0021
  1DD9 0021
  00D0 0021
@@ -40674,14 +40694,14 @@ FF24 003F
  1E0A 003F
  1E11 003F
  1E10 003F
+0111 003F
+0110 003F
  1E0D 003F
  1E0C 003F
  1E13 003F
  1E12 003F
  1E0F 003F
  1E0E 003F
-0111 003F
-0110 003F
  00F0 003F
  1DD9 003F
  00D0 003F
@@ -40785,6 +40805,10 @@ FF24 0041
  1E11 0041
  1E10 0061
  1E10 0041
+0111 0061
+0111 0041
+0110 0061
+0110 0041
  1E0D 0061
  1E0D 0041
  1E0C 0061
@@ -40797,10 +40821,6 @@ FF24 0041
  1E0F 0041
  1E0E 0061
  1E0E 0041
-0111 0061
-0111 0041
-0110 0061
-0110 0041
  00F0 0061
  1DD9 0061
  00F0 0041
@@ -40868,14 +40888,14 @@ FF24 0062
  1E0A 0062
  1E11 0062
  1E10 0062
+0111 0062
+0110 0062
  1E0D 0062
  1E0C 0062
  1E13 0062
  1E12 0062
  1E0F 0062
  1E0E 0062
-0111 0062
-0110 0062
  00F0 0062
  1DD9 0062
  00D0 0062
@@ -42645,15 +42665,15 @@ FF28 0021
  1E22 0021
  1E29 0021
  1E28 0021
+0127 0021
+210F 0021
+0126 0021
+A7F8 0021
  1E25 0021
  1E24 0021
  1E2B 0021
  1E2A 0021
  1E96 0021
-0127 0021
-210F 0021
-0126 0021
-A7F8 0021
  0068 003F
  FF48 003F
  036A 003F
@@ -42703,15 +42723,15 @@ FF28 003F
  1E22 003F
  1E29 003F
  1E28 003F
+0127 003F
+210F 003F
+0126 003F
+A7F8 003F
  1E25 003F
  1E24 003F
  1E2B 003F
  1E2A 003F
  1E96 003F
-0127 003F
-210F 003F
-0126 003F
-A7F8 003F
  0068 0061
  0068 0041
  FF48 0061
@@ -42810,6 +42830,14 @@ FF28 0041
  1E29 0041
  1E28 0061
  1E28 0041
+0127 0061
+210F 0061
+0127 0041
+210F 0041
+0126 0061
+0126 0041
+A7F8 0061
+A7F8 0041
  1E25 0061
  1E25 0041
  1E24 0061
@@ -42820,14 +42848,6 @@ FF28 0041
  1E2A 0041
  1E96 0061
  1E96 0041
-0127 0061
-210F 0061
-0127 0041
-210F 0041
-0126 0061
-0126 0041
-A7F8 0061
-A7F8 0041
  33CA 0021
  33CA 003F
  33CA 0061
@@ -42882,15 +42902,15 @@ FF28 0062
  1E22 0062
  1E29 0062
  1E28 0062
+0127 0062
+210F 0062
+0126 0062
+A7F8 0062
  1E25 0062
  1E24 0062
  1E2B 0062
  1E2A 0062
  1E96 0062
-0127 0062
-210F 0062
-0126 0062
-A7F8 0062
  32CC 0021
  32CC 003F
  32CC 0061
@@ -43034,7 +43054,7 @@ A726 0062
  0069 0308 0301 0334
  0069 0308 0334 0341
  00EF 0301 0334
-1E2F 0334
+00EF 0334 0341
  0049 0308 0334 0301
  0049 0308 0341 0334
  00CF 0334 0301
@@ -44425,6 +44445,8 @@ FF2C 0021
  013D 0021
  013C 0021
  013B 0021
+0142 0021
+0141 0021
  1E37 0021
  1E36 0021
  1E39 0021
@@ -44433,8 +44455,6 @@ FF2C 0021
  1E3C 0021
  1E3B 0021
  1E3A 0021
-0142 0021
-0141 0021
  006C 00B7 0021
  006C 0387 0021
  0140 0021
@@ -44489,6 +44509,8 @@ FF2C 003F
  013D 003F
  013C 003F
  013B 003F
+0142 003F
+0141 003F
  1E37 003F
  1E36 003F
  1E39 003F
@@ -44497,8 +44519,6 @@ FF2C 003F
  1E3C 003F
  1E3B 003F
  1E3A 003F
-0142 003F
-0141 003F
  006C 00B7 003F
  006C 0387 003F
  0140 003F
@@ -44613,6 +44633,10 @@ FF2C 0041
  013C 0041
  013B 0061
  013B 0041
+0142 0061
+0142 0041
+0141 0061
+0141 0041
  1E37 0061
  1E37 0041
  1E36 0061
@@ -44629,10 +44653,6 @@ FF2C 0041
  1E3B 0041
  1E3A 0061
  1E3A 0041
-0142 0061
-0142 0041
-0141 0061
-0141 0041
  006C 00B7 0061
  006C 0387 0061
  0140 0061
@@ -44693,6 +44713,8 @@ FF2C 0062
  013D 0062
  013C 0062
  013B 0062
+0142 0062
+0141 0062
  1E37 0062
  1E36 0062
  1E39 0062
@@ -44701,8 +44723,6 @@ FF2C 0062
  1E3C 0062
  1E3B 0062
  1E3A 0062
-0142 0062
-0141 0062
  006C 00B7 0062
  006C 0387 0062
  0140 0062
@@ -49043,10 +49063,10 @@ A777 0062
  0075 0334 0344
  0075 0344 0334
  01D8 0334
-0055 0308 0341 0334
+0055 0308 0301 0334
  0055 0334 0308 0301
-0055 0334 0308 0341
  00DC 0301 0334
+00DC 0334 0301
  0075 0308 0340 0334
  0075 0334 0308 0340
  00FC 0300 0334
@@ -51958,10 +51978,10 @@ A724 0062
  0391 0334 0313 0340
  1F08 0300 0334
  1F0A 0334
-03B1 0313 0300 0334 0345
  03B1 0343 0300 0345 0334
  03B1 0343 0345 0334 0340
  03B1 0345 0313 0300 0334
+1F00 0345 0340 0334
  0391 0343 0334 0345 0340
  0391 0345 0313 0334 0340
  1F08 0300 0345 0334
@@ -52021,13 +52041,13 @@ A724 0062
  1F09 0300 0334
  1F09 0340 0334
  03B1 0314 0334 0300 0345
-03B1 0334 0314 0345 0300
-03B1 0345 0314 0300 0334
-03B1 0345 0334 0314 0340
+03B1 0314 0345 0340 0334
+1F81 0340 0334
+1F83 0334
  0391 0334 0345 0314 0300
  1F09 0334 0345 0300
-1F09 0345 0300 0334
  1F0B 0345 0334
+1FBC 0314 0300 0334
  03B1 0314 0334 0342
  03B1 0314 0342 0334
  03B1 0334 0314 0342
@@ -52987,9 +53007,9 @@ A724 0062
  0397 0334 0343
  1F28 0334
  03B7 0334 0313 0341
-03B7 0334 0343 0301
  03B7 0334 0343 0341
-1F20 0334 0341
+03B7 0343 0341 0334
+1F20 0341 0334
  0397 0313 0334 0301
  0397 0313 0341 0334
  0397 0334 0313 0301
@@ -53012,8 +53032,8 @@ A724 0062
  0397 0343 0300 0334
  03B7 0313 0345 0334 0300
  03B7 0313 0345 0340 0334
-03B7 0343 0340 0334 0345
  03B7 0343 0345 0340 0334
+1FC3 0334 0313 0300
  0397 0313 0334 0300 0345
  0397 0343 0334 0345 0340
  0397 0343 0340 0345 0334
@@ -53057,9 +53077,9 @@ A724 0062
  1F29 0334 0301
  1F2D 0334
  03B7 0314 0301 0345 0334
+03B7 0314 0334 0345 0301
  03B7 0314 0334 0345 0341
-1F21 0334 0341 0345
-1F21 0345 0301 0334
+03B7 0314 0345 0334 0301
  0397 0314 0334 0345 0341
  0397 0334 0314 0345 0301
  1F29 0334 0301 0345
@@ -54967,7 +54987,7 @@ A724 0062
  03C5 0334 0343 0341
  1F50 0334 0301
  1F50 0334 0341
-03C5 0334 0313 0340
+03C5 0313 0340 0334
  03C5 0334 0343 0340
  1F50 0334 0340
  1F52 0334
@@ -55043,9 +55063,9 @@ A724 0062
  03D2 0308 0334
  03D2 0334 0308
  03D4 0334
+03B0 0334
  03C5 0308 0341 0334
  03C5 0344 0334
-03CB 0334 0341
  03CB 0341 0334
  03C5 0308 0300 0334
  03C5 0308 0340 0334
@@ -55596,8 +55616,8 @@ A724 0062
  2126 0334 0314 0301
  2126 0334 0314 0341
  03C9 0314 0334 0345 0301
-1F61 0334 0301 0345
-1F61 0341 0345 0334
+03C9 0345 0314 0334 0301
+1F61 0301 0345 0334
  1F65 0345 0334
  03A9 0314 0301 0345 0334
  03A9 0345 0334 0314 0301
@@ -55612,9 +55632,9 @@ A724 0062
  2126 0314 0334 0340
  2126 0334 0314 0340
  03C9 0314 0334 0345 0300
-03C9 0314 0340 0334 0345
  03C9 0314 0345 0300 0334
-03C9 0334 0314 0340 0345
+03C9 0345 0314 0300 0334
+03C9 0345 0314 0334 0300
  03A9 0314 0300 0345 0334
  03A9 0345 0334 0314 0300
  1F6B 0345 0334
@@ -118348,6 +118368,16 @@ A4F7 0062
  12262 0061
  12262 0041
  12262 0062
+122D4 0021
+122D4 003F
+122D4 0061
+122D4 0041
+122D4 0062
+122D5 0021
+122D5 003F
+122D5 0061
+122D5 0041
+122D5 0062
  12263 0021
  12263 003F
  12263 0061
@@ -118913,16 +118943,6 @@ A4F7 0062
  122D3 0061
  122D3 0041
  122D3 0062
-122D4 0021
-122D4 003F
-122D4 0061
-122D4 0041
-122D4 0062
-122D5 0021
-122D5 003F
-122D5 0061
-122D5 0041
-122D5 0062
  122D6 0021
  122D6 003F
  122D6 0061
@@ -166353,6 +166373,11 @@ FFFFD 0062
  10FFFF 0061
  10FFFF 0041
  10FFFF 0062
+FFFD 0021
+FFFD 003F
+FFFD 0061
+FFFD 0041
+FFFD 0062
  FFFF 0021
  FFFF 003F
  FFFF 0061
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_SHIFTED_SHORT.txt b/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_SHIFTED_SHORT.txt

index c155d132c3ab540f5820b2624f79313d612dd50f..f21df3b21c9cadb60628cafd805c0223d66b6d77 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_SHIFTED_SHORT.txt
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_SHIFTED_SHORT.txt
@@ -1,7 +1,7 @@
  # File:        CollationTest_CLDR_SHIFTED_SHORT.txt
-# UCA Version: 6.2.0
-# UCD Version: 6.2.0
-# Generated:   2012-08-15, 21:43:28 GMT [MD]
+# UCA Version: 6.3.0
+# UCD Version: 6.3.0
+# Generated:   2013-09-03 [MS]
  # For a description of the format and usage, see CollationAuxiliary.html
  
  0009 0021
@@ -16,8 +16,6 @@
  000D 003F
  0085 0021
  0085 003F
-180E 0021
-180E 003F
  2028 0021
  2028 003F
  2029 0021
@@ -395,6 +393,7 @@ A6F4 003F
  0618 0021
  0619 0021
  061A 0021
+061C 0021
  0640 0021
  06D6 0021
  06D7 0021
@@ -452,6 +451,7 @@ A6F4 003F
  180B 0021
  180C 0021
  180D 0021
+180E 0021
  1A7F 0021
  1B6B 0021
  1B6C 0021
@@ -503,6 +503,10 @@ A6F4 003F
  2062 0021
  2063 0021
  2064 0021
+2066 0021
+2067 0021
+2068 0021
+2069 0021
  206A 0021
  206B 0021
  206C 0021
@@ -1069,6 +1073,7 @@ FF01 003F
  0618 003F
  0619 003F
  061A 003F
+061C 003F
  0640 003F
  06D6 003F
  06D7 003F
@@ -1126,6 +1131,7 @@ FF01 003F
  180B 003F
  180C 003F
  180D 003F
+180E 003F
  1A7F 003F
  1B6B 003F
  1B6C 003F
@@ -1177,6 +1183,10 @@ FF01 003F
  2062 003F
  2063 003F
  2064 003F
+2066 003F
+2067 003F
+2068 003F
+2069 003F
  206A 003F
  206B 003F
  206C 003F
@@ -2049,6 +2059,14 @@ FF5D 003F
  2045 003F
  2046 0021
  2046 003F
+2308 0021
+2308 003F
+2309 0021
+2309 003F
+230A 0021
+230A 003F
+230B 0021
+230B 003F
  29FC 0021
  29FC 003F
  29FD 0021
@@ -2965,6 +2983,17 @@ A67D 003F
  20E2 003F
  20E3 003F
  20E4 003F
+3099 0021
+3099 003F
+FF9E 0021
+FF9E 003F
+309A 0021
+309A 003F
+FF9F 0021
+FF9F 003F
+0335 0021
+0335 003F
+0335 0334
  0305 0021
  0305 003F
  0309 0021
@@ -3932,6 +3961,10 @@ A67D 0334
  20EF 0334
  0334 10A0D
  10A0D 0334
+0334 3099
+3099 0334
+0334 309A
+309A 0334
  0305 0334
  0334 0305
  0309 0334
@@ -4305,10 +4338,6 @@ A92D 0334
  302E 0334
  0334 302F
  302F 0334
-0334 3099
-3099 0334
-0334 309A
-309A 0334
  0334 20D0
  20D0 0334
  0334 20D1
@@ -4335,9 +4364,6 @@ A92D 0334
  20E9 0334
  0334 101FD
  101FD 0334
-0335 0021
-0335 003F
-0335 0334
  0339 0021
  0339 003F
  0345 0021
@@ -4891,14 +4917,6 @@ A92D 003F
  302E 003F
  302F 0021
  302F 003F
-3099 0021
-3099 003F
-FF9E 0021
-FF9E 003F
-309A 0021
-309A 003F
-FF9F 0021
-FF9F 003F
  20D0 0021
  20D0 003F
  20D1 0021
@@ -7278,14 +7296,14 @@ FE64 0062
  003D 003F
  FF1D 0021
  FF1D 003F
+2A74 0021
+2A74 003F
  FE66 0021
  FE66 003F
  207C 0021
  207C 003F
  208C 0021
  208C 003F
-2A74 0021
-2A74 003F
  2260 0021
  2260 003F
  003D 0338 0334
@@ -7304,22 +7322,22 @@ FE66 003F
  003D 0041
  FF1D 0061
  FF1D 0041
+2A74 0061
+2A74 0041
  FE66 0061
  FE66 0041
  207C 0061
  207C 0041
  208C 0061
  208C 0041
-2A74 0061
-2A74 0041
  2260 0061
  2260 0041
  003D 0062
  FF1D 0062
+2A74 0062
  FE66 0062
  207C 0062
  208C 0062
-2A74 0062
  2260 0062
  003E 0021
  003E 003F
@@ -8702,26 +8720,6 @@ FF5E 0062
  2307 0061
  2307 0041
  2307 0062
-2308 0021
-2308 003F
-2308 0061
-2308 0041
-2308 0062
-2309 0021
-2309 003F
-2309 0061
-2309 0041
-2309 0062
-230A 0021
-230A 003F
-230A 0061
-230A 0041
-230A 0062
-230B 0021
-230B 003F
-230B 0061
-230B 0041
-230B 0062
  230C 0021
  230C 003F
  230C 0061
@@ -28906,11 +28904,6 @@ FFFC 003F
  FFFC 0061
  FFFC 0041
  FFFC 0062
-FFFD 0021
-FFFD 003F
-FFFD 0061
-FFFD 0041
-FFFD 0062
  02D0 0021
  02D0 003F
  02D0 0061
@@ -29008,11 +29001,11 @@ AAF4 0062
  3035 0062
  309D 0021
  309D 003F
+309E 0021
+309E 003F
  309D 0334 3099
  309D 3099 0334
  309E 0334
-309E 0021
-309E 003F
  309D 0061
  309D 0041
  309E 0061
@@ -29031,11 +29024,11 @@ FF70 0041
  FF70 0062
  30FD 0021
  30FD 003F
+30FE 0021
+30FE 003F
  30FD 0334 3099
  30FD 3099 0334
  30FE 0334
-30FE 0021
-30FE 003F
  30FD 0061
  30FD 0041
  30FE 0061
@@ -30358,18 +30351,6 @@ A835 0062
  12433 0061
  12433 0041
  12433 0062
-12456 0021
-12456 003F
-12456 0334
-12456 0061
-12456 0041
-12456 0062
-12457 0021
-12457 003F
-12457 0334
-12457 0061
-12457 0041
-12457 0062
  1245A 0021
  1245A 003F
  1245A 0334
@@ -30479,269 +30460,256 @@ A835 0062
  1D371 0041
  1D371 0062
  0030 0021
-0030 003F
-FF10 0021
-FF10 003F
-1F101 0334
-1F101 0021
-1F101 003F
-1F100 0334
-1F100 0021
-1F100 003F
-1D7CE 0021
-1D7D8 0021
-1D7E2 0021
-1D7EC 0021
-1D7F6 0021
-1D7CE 003F
-1D7D8 003F
-1D7E2 003F
-1D7EC 003F
-1D7F6 003F
-24EA 0021
-24FF 0021
-24EA 003F
-24FF 003F
-2070 0021
-2070 003F
-2080 0021
-2080 003F
-1D7CE 0334
-1D7D8 0334
-1D7E2 0334
-1D7EC 0334
-1D7F6 0334
  0660 0021
-0660 003F
  06F0 0021
-06F0 003F
  07C0 0021
-07C0 003F
-104A0 0021
-104A0 003F
-104A0 0334
  0966 0021
-0966 003F
  09E6 0021
-09E6 003F
  0A66 0021
-0A66 003F
  0AE6 0021
-0AE6 003F
  0B66 0021
-0B66 003F
  0BE6 0021
-0BE6 003F
  0C66 0021
  0C78 0021
-0C66 003F
-0C78 003F
  0CE6 0021
-0CE6 003F
  0D66 0021
-0D66 003F
-ABF0 0021
-ABF0 003F
-A8D0 0021
-A8D0 003F
+0E50 0021
+0ED0 0021
+0F20 0021
+1040 0021
+1090 0021
+17E0 0021
+17F0 0021
+1810 0021
  1946 0021
-1946 003F
  19D0 0021
-19D0 003F
  1A80 0021
-1A80 003F
  1A90 0021
-1A90 003F
-0E50 0021
+1B50 0021
+1BB0 0021
+1C40 0021
+1C50 0021
+3007 0021
+A620 0021
+A8D0 0021
+A900 0021
+A9D0 0021
+AA50 0021
+ABF0 0021
+1018A 0021
+104A0 0021
+11066 0021
+110F0 0021
+11136 0021
+111D0 0021
+116C0 0021
+0030 003F
+0660 003F
+06F0 003F
+07C0 003F
+0966 003F
+09E6 003F
+0A66 003F
+0AE6 003F
+0B66 003F
+0BE6 003F
+0C66 003F
+0C78 003F
+0CE6 003F
+0D66 003F
  0E50 003F
-0ED0 0021
  0ED0 003F
-0F20 0021
  0F20 003F
-0F33 0021
-0F33 003F
-1C40 0021
-1C40 003F
-A900 0021
-A900 003F
-1040 0021
  1040 003F
-1090 0021
  1090 003F
-11136 0021
-11136 003F
-11136 0334
-17E0 0021
  17E0 003F
-17F0 0021
  17F0 003F
-AA50 0021
-AA50 003F
-1B50 0021
+1810 003F
+1946 003F
+19D0 003F
+1A80 003F
+1A90 003F
  1B50 003F
-A9D0 0021
-A9D0 003F
-1BB0 0021
  1BB0 003F
-1810 0021
-1810 003F
-1C50 0021
+1C40 003F
  1C50 003F
-A620 0021
-A620 003F
-110F0 0021
-110F0 003F
-110F0 0334
-3007 0021
  3007 003F
-1018A 0021
+A620 003F
+A8D0 003F
+A900 003F
+A9D0 003F
+AA50 003F
+ABF0 003F
  1018A 003F
-1018A 0334
-111D0 0021
+104A0 003F
+11066 003F
+110F0 003F
+11136 003F
  111D0 003F
-111D0 0334
-116C0 0021
  116C0 003F
-116C0 0334
-11066 0021
-11066 003F
+FF10 0021
+FF10 003F
+1F101 0334
+1F101 0021
+1F101 003F
+0F33 0021
+0F33 003F
+1F100 0334
+1F100 0021
+1F100 003F
+1D7CE 0021
+1D7D8 0021
+1D7E2 0021
+1D7EC 0021
+1D7F6 0021
+1D7CE 003F
+1D7D8 003F
+1D7E2 003F
+1D7EC 003F
+1D7F6 003F
+24EA 0021
+24FF 0021
+24EA 003F
+24FF 003F
+2070 0021
+2070 003F
+2080 0021
+2080 003F
+1018A 0334
+104A0 0334
  11066 0334
+110F0 0334
+11136 0334
+111D0 0334
+116C0 0334
+1D7CE 0334
+1D7D8 0334
+1D7E2 0334
+1D7EC 0334
+1D7F6 0334
  2189 0021
  2189 003F
  2189 0061
  2189 0041
  2189 0062
  0030 0061
-0030 0041
-FF10 0061
-FF10 0041
-1F101 0061
-1F100 0061
-1F101 0041
-1F100 0041
-1D7CE 0061
-1D7D8 0061
-1D7E2 0061
-1D7EC 0061
-1D7F6 0061
-1D7CE 0041
-1D7D8 0041
-1D7E2 0041
-1D7EC 0041
-1D7F6 0041
-24EA 0061
-24FF 0061
-24EA 0041
-24FF 0041
-2070 0061
-2070 0041
-2080 0061
-2080 0041
  0660 0061
-0660 0041
  06F0 0061
-06F0 0041
  07C0 0061
-07C0 0041
-104A0 0061
-104A0 0041
  0966 0061
-0966 0041
  09E6 0061
-09E6 0041
  0A66 0061
-0A66 0041
  0AE6 0061
-0AE6 0041
  0B66 0061
-0B66 0041
  0BE6 0061
-0BE6 0041
  0C66 0061
  0C78 0061
-0C66 0041
-0C78 0041
  0CE6 0061
-0CE6 0041
  0D66 0061
-0D66 0041
-ABF0 0061
-ABF0 0041
-A8D0 0061
-A8D0 0041
+0E50 0061
+0ED0 0061
+0F20 0061
+1040 0061
+1090 0061
+17E0 0061
+17F0 0061
+1810 0061
  1946 0061
-1946 0041
  19D0 0061
-19D0 0041
  1A80 0061
-1A80 0041
  1A90 0061
-1A90 0041
-0E50 0061
+1B50 0061
+1BB0 0061
+1C40 0061
+1C50 0061
+3007 0061
+A620 0061
+A8D0 0061
+A900 0061
+A9D0 0061
+AA50 0061
+ABF0 0061
+1018A 0061
+104A0 0061
+11066 0061
+110F0 0061
+11136 0061
+111D0 0061
+116C0 0061
+0030 0041
+0660 0041
+06F0 0041
+07C0 0041
+0966 0041
+09E6 0041
+0A66 0041
+0AE6 0041
+0B66 0041
+0BE6 0041
+0C66 0041
+0C78 0041
+0CE6 0041
+0D66 0041
  0E50 0041
-0ED0 0061
  0ED0 0041
-0F20 0061
  0F20 0041
-0F33 0061
-0F33 0041
-1C40 0061
-1C40 0041
-A900 0061
-A900 0041
-1040 0061
  1040 0041
-1090 0061
  1090 0041
-11136 0061
-11136 0041
-17E0 0061
  17E0 0041
-17F0 0061
  17F0 0041
-AA50 0061
-AA50 0041
-1B50 0061
+1810 0041
+1946 0041
+19D0 0041
+1A80 0041
+1A90 0041
  1B50 0041
-A9D0 0061
-A9D0 0041
-1BB0 0061
  1BB0 0041
-1810 0061
-1810 0041
-1C50 0061
+1C40 0041
  1C50 0041
-A620 0061
-A620 0041
-110F0 0061
-110F0 0041
-3007 0061
  3007 0041
-1018A 0061
+A620 0041
+A8D0 0041
+A900 0041
+A9D0 0041
+AA50 0041
+ABF0 0041
  1018A 0041
-111D0 0061
+104A0 0041
+11066 0041
+110F0 0041
+11136 0041
  111D0 0041
-116C0 0061
  116C0 0041
-11066 0061
-11066 0041
+FF10 0061
+FF10 0041
+1F101 0061
+1F100 0061
+0F33 0061
+1F101 0041
+1F100 0041
+0F33 0041
+1D7CE 0061
+1D7D8 0061
+1D7E2 0061
+1D7EC 0061
+1D7F6 0061
+1D7CE 0041
+1D7D8 0041
+1D7E2 0041
+1D7EC 0041
+1D7F6 0041
+24EA 0061
+24FF 0061
+24EA 0041
+24FF 0041
+2070 0061
+2070 0041
+2080 0061
+2080 0041
  0030 0062
-FF10 0062
-1F101 0062
-1F100 0062
-1D7CE 0062
-1D7D8 0062
-1D7E2 0062
-1D7EC 0062
-1D7F6 0062
-24EA 0062
-24FF 0062
-2070 0062
-2080 0062
  0660 0062
  06F0 0062
  07C0 0062
-104A0 0062
  0966 0062
  09E6 0062
  0A66 0062
@@ -30752,43 +30720,190 @@ FF10 0062
  0C78 0062
  0CE6 0062
  0D66 0062
-ABF0 0062
-A8D0 0062
-1946 0062
-19D0 0062
-1A80 0062
-1A90 0062
  0E50 0062
  0ED0 0062
  0F20 0062
-0F33 0062
-1C40 0062
-A900 0062
  1040 0062
  1090 0062
-11136 0062
  17E0 0062
  17F0 0062
-AA50 0062
+1810 0062
+1946 0062
+19D0 0062
+1A80 0062
+1A90 0062
  1B50 0062
-A9D0 0062
  1BB0 0062
-1810 0062
+1C40 0062
  1C50 0062
-A620 0062
-110F0 0062
  3007 0062
+A620 0062
+A8D0 0062
+A900 0062
+A9D0 0062
+AA50 0062
+ABF0 0062
  1018A 0062
+104A0 0062
+11066 0062
+110F0 0062
+11136 0062
  111D0 0062
  116C0 0062
-11066 0062
+FF10 0062
+1F101 0062
+1F100 0062
+0F33 0062
+1D7CE 0062
+1D7D8 0062
+1D7E2 0062
+1D7EC 0062
+1D7F6 0062
+24EA 0062
+24FF 0062
+2070 0062
+2080 0062
  3358 0021
  3358 003F
  3358 0061
  3358 0041
  3358 0062
  0031 0021
+0661 0021
+06F1 0021
+07C1 0021
+0967 0021
+09E7 0021
+0A67 0021
+0AE7 0021
+0B67 0021
+0BE7 0021
+0C67 0021
+0C79 0021
+0C7C 0021
+0CE7 0021
+0D67 0021
+0E51 0021
+0ED1 0021
+0F21 0021
+1041 0021
+1091 0021
+1369 0021
+17E1 0021
+17F1 0021
+1811 0021
+1947 0021
+19D1 0021
+19DA 0021
+1A81 0021
+1A91 0021
+1B51 0021
+1BB1 0021
+1C41 0021
+1C51 0021
+3021 0021
+A621 0021
+A8D1 0021
+A901 0021
+A9D1 0021
+AA51 0021
+ABF1 0021
+10107 0021
+10142 0021
+10158 0021
+10159 0021
+1015A 0021
+10320 0021
+103D1 0021
+104A1 0021
+10858 0021
+10916 0021
+10A40 0021
+10A7D 0021
+10B58 0021
+10B78 0021
+10E60 0021
+11052 0021
+11067 0021
+110F1 0021
+11137 0021
+111D1 0021
+116C1 0021
+12415 0021
+1241E 0021
+1242C 0021
+12434 0021
+1244F 0021
+12458 0021
+1D360 0021
  0031 003F
+0661 003F
+06F1 003F
+07C1 003F
+0967 003F
+09E7 003F
+0A67 003F
+0AE7 003F
+0B67 003F
+0BE7 003F
+0C67 003F
+0C79 003F
+0C7C 003F
+0CE7 003F
+0D67 003F
+0E51 003F
+0ED1 003F
+0F21 003F
+1041 003F
+1091 003F
+1369 003F
+17E1 003F
+17F1 003F
+1811 003F
+1947 003F
+19D1 003F
+19DA 003F
+1A81 003F
+1A91 003F
+1B51 003F
+1BB1 003F
+1C41 003F
+1C51 003F
+3021 003F
+A621 003F
+A8D1 003F
+A901 003F
+A9D1 003F
+AA51 003F
+ABF1 003F
+10107 003F
+10142 003F
+10158 003F
+10159 003F
+1015A 003F
+10320 003F
+103D1 003F
+104A1 003F
+10858 003F
+10916 003F
+10A40 003F
+10A7D 003F
+10B58 003F
+10B78 003F
+10E60 003F
+11052 003F
+11067 003F
+110F1 003F
+11137 003F
+111D1 003F
+116C1 003F
+12415 003F
+1241E 003F
+1242C 003F
+12434 003F
+1244F 003F
+12458 003F
+1D360 003F
  FF11 0021
  FF11 003F
  2474 0021
@@ -30796,6 +30911,8 @@ FF11 003F
  1F102 0334
  1F102 0021
  1F102 003F
+0F2A 0021
+0F2A 003F
  2488 0021
  2488 003F
  1D7CF 0021
@@ -30822,175 +30939,39 @@ FF11 003F
  00B9 003F
  2081 0021
  2081 003F
-1D7CF 0334
-1D7D9 0334
-1D7E3 0334
-1D7ED 0334
-1D7F7 0334
-0661 0021
-0661 003F
-06F1 0021
-06F1 003F
-10E60 0021
-10E60 003F
-10E60 0334
-07C1 0021
-07C1 003F
-1369 0021
-1369 003F
-104A1 0021
-104A1 003F
-104A1 0334
-0967 0021
-0967 003F
-09E7 0021
-09E7 003F
-0A67 0021
-0A67 003F
-0AE7 0021
-0AE7 003F
-0B67 0021
-0B67 003F
-0BE7 0021
-0BE7 003F
-0C67 0021
-0C79 0021
-0C7C 0021
-0C67 003F
-0C79 003F
-0C7C 003F
-0CE7 0021
-0CE7 003F
-0D67 0021
-0D67 003F
-ABF1 0021
-ABF1 003F
-A8D1 0021
-A8D1 003F
-1947 0021
-1947 003F
-19D1 0021
-19DA 0021
-19D1 003F
-19DA 003F
-1A81 0021
-1A81 003F
-1A91 0021
-1A91 003F
-0E51 0021
-0E51 003F
-0ED1 0021
-0ED1 003F
-0F21 0021
-0F21 003F
-0F2A 0021
-0F2A 003F
-1C41 0021
-1C41 003F
-A901 0021
-A901 003F
-1041 0021
-1041 003F
-1091 0021
-1091 003F
-11137 0021
-11137 003F
-11137 0334
-17E1 0021
-17E1 003F
-17F1 0021
-17F1 003F
-AA51 0021
-AA51 003F
-1B51 0021
-1B51 003F
-A9D1 0021
-A9D1 003F
-1BB1 0021
-1BB1 003F
-1811 0021
-1811 003F
-1C51 0021
-1C51 003F
-A621 0021
-A621 003F
-110F1 0021
-110F1 003F
-110F1 0334
-3021 0021
-3021 003F
-10107 0021
-10107 003F
  10107 0334
-10142 0021
-10158 0021
-10159 0021
-1015A 0021
-10142 003F
-10158 003F
-10159 003F
-1015A 003F
  10142 0334
  10158 0334
  10159 0334
  1015A 0334
-10320 0021
-10320 003F
  10320 0334
-103D1 0021
-103D1 003F
  103D1 0334
-12415 0021
-1241E 0021
-1242C 0021
-12434 0021
-1244F 0021
-12458 0021
-12415 003F
-1241E 003F
-1242C 003F
-12434 003F
-1244F 003F
-12458 003F
+104A1 0334
+10858 0334
+10916 0334
+10A40 0334
+10A7D 0334
+10B58 0334
+10B78 0334
+10E60 0334
+11052 0334
+11067 0334
+110F1 0334
+11137 0334
+111D1 0334
+116C1 0334
  12415 0334
  1241E 0334
  1242C 0334
  12434 0334
  1244F 0334
  12458 0334
-10A7D 0021
-10A7D 003F
-10A7D 0334
-10916 0021
-10916 003F
-10916 0334
-10858 0021
-10858 003F
-10858 0334
-10B58 0021
-10B58 003F
-10B58 0334
-10B78 0021
-10B78 003F
-10B78 0334
-111D1 0021
-111D1 003F
-111D1 0334
-116C1 0021
-116C1 003F
-116C1 0334
-11067 0021
-11067 003F
-11067 0334
-11052 0021
-11052 003F
-11052 0334
-10A40 0021
-10A40 003F
-10A40 0334
-1D360 0021
-1D360 003F
  1D360 0334
+1D7CF 0334
+1D7D9 0334
+1D7E3 0334
+1D7ED 0334
+1D7F7 0334
  215F 0021
  215F 003F
  2152 0021
@@ -31377,198 +31358,179 @@ A621 003F
  336B 0041
  336B 0062
  0031 0061
-0031 0041
-FF11 0061
-FF11 0041
-2474 0061
-1F102 0061
-2488 0061
-2474 0041
-1F102 0041
-2488 0041
-1D7CF 0061
-1D7D9 0061
-1D7E3 0061
-1D7ED 0061
-1D7F7 0061
-1D7CF 0041
-1D7D9 0041
-1D7E3 0041
-1D7ED 0041
-1D7F7 0041
-2460 0061
-24F5 0061
-2776 0061
-2780 0061
-278A 0061
-2460 0041
-24F5 0041
-2776 0041
-2780 0041
-278A 0041
-00B9 0061
-00B9 0041
-2081 0061
-2081 0041
  0661 0061
-0661 0041
  06F1 0061
-06F1 0041
-10E60 0061
-10E60 0041
  07C1 0061
-07C1 0041
-1369 0061
-1369 0041
-104A1 0061
-104A1 0041
  0967 0061
-0967 0041
  09E7 0061
-09E7 0041
  0A67 0061
-0A67 0041
  0AE7 0061
-0AE7 0041
  0B67 0061
-0B67 0041
  0BE7 0061
-0BE7 0041
  0C67 0061
  0C79 0061
  0C7C 0061
-0C67 0041
-0C79 0041
-0C7C 0041
  0CE7 0061
-0CE7 0041
  0D67 0061
-0D67 0041
-ABF1 0061
-ABF1 0041
-A8D1 0061
-A8D1 0041
-1947 0061
-1947 0041
-19D1 0061
-19DA 0061
-19D1 0041
-19DA 0041
-1A81 0061
-1A81 0041
-1A91 0061
-1A91 0041
  0E51 0061
-0E51 0041
  0ED1 0061
-0ED1 0041
  0F21 0061
-0F21 0041
-0F2A 0061
-0F2A 0041
-1C41 0061
-1C41 0041
-A901 0061
-A901 0041
  1041 0061
-1041 0041
  1091 0061
-1091 0041
-11137 0061
-11137 0041
+1369 0061
  17E1 0061
-17E1 0041
  17F1 0061
-17F1 0041
-AA51 0061
-AA51 0041
+1811 0061
+1947 0061
+19D1 0061
+19DA 0061
+1A81 0061
+1A91 0061
  1B51 0061
-1B51 0041
-A9D1 0061
-A9D1 0041
  1BB1 0061
-1BB1 0041
-1811 0061
-1811 0041
+1C41 0061
  1C51 0061
-1C51 0041
-A621 0061
-A621 0041
-110F1 0061
-110F1 0041
  3021 0061
-3021 0041
+A621 0061
+A8D1 0061
+A901 0061
+A9D1 0061
+AA51 0061
+ABF1 0061
  10107 0061
-10107 0041
  10142 0061
  10158 0061
  10159 0061
  1015A 0061
-10142 0041
-10158 0041
-10159 0041
-1015A 0041
  10320 0061
-10320 0041
  103D1 0061
-103D1 0041
+104A1 0061
+10858 0061
+10916 0061
+10A40 0061
+10A7D 0061
+10B58 0061
+10B78 0061
+10E60 0061
+11052 0061
+11067 0061
+110F1 0061
+11137 0061
+111D1 0061
+116C1 0061
  12415 0061
  1241E 0061
  1242C 0061
  12434 0061
  1244F 0061
  12458 0061
+1D360 0061
+0031 0041
+0661 0041
+06F1 0041
+07C1 0041
+0967 0041
+09E7 0041
+0A67 0041
+0AE7 0041
+0B67 0041
+0BE7 0041
+0C67 0041
+0C79 0041
+0C7C 0041
+0CE7 0041
+0D67 0041
+0E51 0041
+0ED1 0041
+0F21 0041
+1041 0041
+1091 0041
+1369 0041
+17E1 0041
+17F1 0041
+1811 0041
+1947 0041
+19D1 0041
+19DA 0041
+1A81 0041
+1A91 0041
+1B51 0041
+1BB1 0041
+1C41 0041
+1C51 0041
+3021 0041
+A621 0041
+A8D1 0041
+A901 0041
+A9D1 0041
+AA51 0041
+ABF1 0041
+10107 0041
+10142 0041
+10158 0041
+10159 0041
+1015A 0041
+10320 0041
+103D1 0041
+104A1 0041
+10858 0041
+10916 0041
+10A40 0041
+10A7D 0041
+10B58 0041
+10B78 0041
+10E60 0041
+11052 0041
+11067 0041
+110F1 0041
+11137 0041
+111D1 0041
+116C1 0041
  12415 0041
  1241E 0041
  1242C 0041
  12434 0041
  1244F 0041
  12458 0041
-10A7D 0061
-10A7D 0041
-10916 0061
-10916 0041
-10858 0061
-10858 0041
-10B58 0061
-10B58 0041
-10B78 0061
-10B78 0041
-111D1 0061
-111D1 0041
-116C1 0061
-116C1 0041
-11067 0061
-11067 0041
-11052 0061
-11052 0041
-10A40 0061
-10A40 0041
-1D360 0061
  1D360 0041
+FF11 0061
+FF11 0041
+2474 0061
+1F102 0061
+2488 0061
+0F2A 0061
+2474 0041
+1F102 0041
+2488 0041
+0F2A 0041
+1D7CF 0061
+1D7D9 0061
+1D7E3 0061
+1D7ED 0061
+1D7F7 0061
+1D7CF 0041
+1D7D9 0041
+1D7E3 0041
+1D7ED 0041
+1D7F7 0041
+2460 0061
+24F5 0061
+2776 0061
+2780 0061
+278A 0061
+2460 0041
+24F5 0041
+2776 0041
+2780 0041
+278A 0041
+00B9 0061
+00B9 0041
+2081 0061
+2081 0041
  0031 0062
-FF11 0062
-2474 0062
-1F102 0062
-2488 0062
-1D7CF 0062
-1D7D9 0062
-1D7E3 0062
-1D7ED 0062
-1D7F7 0062
-2460 0062
-24F5 0062
-2776 0062
-2780 0062
-278A 0062
-00B9 0062
-2081 0062
  0661 0062
  06F1 0062
-10E60 0062
  07C1 0062
-1369 0062
-104A1 0062
  0967 0062
  09E7 0062
  0A67 0062
@@ -31580,33 +31542,31 @@ FF11 0062
  0C7C 0062
  0CE7 0062
  0D67 0062
-ABF1 0062
-A8D1 0062
-1947 0062
-19D1 0062
-19DA 0062
-1A81 0062
-1A91 0062
  0E51 0062
  0ED1 0062
  0F21 0062
-0F2A 0062
-1C41 0062
-A901 0062
  1041 0062
  1091 0062
-11137 0062
+1369 0062
  17E1 0062
  17F1 0062
-AA51 0062
+1811 0062
+1947 0062
+19D1 0062
+19DA 0062
+1A81 0062
+1A91 0062
  1B51 0062
-A9D1 0062
  1BB1 0062
-1811 0062
+1C41 0062
  1C51 0062
-A621 0062
-110F1 0062
  3021 0062
+A621 0062
+A8D1 0062
+A901 0062
+A9D1 0062
+AA51 0062
+ABF1 0062
  10107 0062
  10142 0062
  10158 0062
@@ -31614,23 +31574,44 @@ A621 0062
  1015A 0062
  10320 0062
  103D1 0062
+104A1 0062
+10858 0062
+10916 0062
+10A40 0062
+10A7D 0062
+10B58 0062
+10B78 0062
+10E60 0062
+11052 0062
+11067 0062
+110F1 0062
+11137 0062
+111D1 0062
+116C1 0062
  12415 0062
  1241E 0062
  1242C 0062
  12434 0062
  1244F 0062
  12458 0062
-10A7D 0062
-10916 0062
-10858 0062
-10B58 0062
-10B78 0062
-111D1 0062
-116C1 0062
-11067 0062
-11052 0062
-10A40 0062
  1D360 0062
+FF11 0062
+2474 0062
+1F102 0062
+2488 0062
+0F2A 0062
+1D7CF 0062
+1D7D9 0062
+1D7E3 0062
+1D7ED 0062
+1D7F7 0062
+2460 0062
+24F5 0062
+2776 0062
+2780 0062
+278A 0062
+00B9 0062
+2081 0062
  33E0 0021
  33E0 003F
  33E0 0061
@@ -31647,153 +31628,63 @@ A621 0062
  3359 0041
  3359 0062
  0032 0021
-0032 003F
-FF12 0021
-FF12 003F
-2475 0021
-2475 003F
-1F103 0334
-1F103 0021
-1F103 003F
-2489 0021
-2489 003F
-1D7D0 0021
-1D7DA 0021
-1D7E4 0021
-1D7EE 0021
-1D7F8 0021
-1D7D0 003F
-1D7DA 003F
-1D7E4 003F
-1D7EE 003F
-1D7F8 003F
-2461 0021
-24F6 0021
-2777 0021
-2781 0021
-278B 0021
-2461 003F
-24F6 003F
-2777 003F
-2781 003F
-278B 003F
-00B2 0021
-00B2 003F
-2082 0021
-2082 003F
-1D7D0 0334
-1D7DA 0334
-1D7E4 0334
-1D7EE 0334
-1D7F8 0334
  0662 0021
-0662 003F
  06F2 0021
-06F2 003F
-10E61 0021
-10E61 003F
-10E61 0334
  07C2 0021
-07C2 003F
-136A 0021
-136A 003F
-104A2 0021
-104A2 003F
-104A2 0334
  0968 0021
-0968 003F
  09E8 0021
-09E8 003F
  0A68 0021
-0A68 003F
  0AE8 0021
-0AE8 003F
  0B68 0021
-0B68 003F
  0BE8 0021
-0BE8 003F
  0C68 0021
  0C7A 0021
  0C7D 0021
-0C68 003F
-0C7A 003F
-0C7D 003F
  0CE8 0021
-0CE8 003F
  0D68 0021
-0D68 003F
-ABF2 0021
-ABF2 003F
-A8D2 0021
-A8D2 003F
-1948 0021
-1948 003F
-19D2 0021
-19D2 003F
-1A82 0021
-1A82 003F
-1A92 0021
-1A92 003F
  0E52 0021
-0E52 003F
  0ED2 0021
-0ED2 003F
  0F22 0021
-0F22 003F
-0F2B 0021
-0F2B 003F
-1C42 0021
-1C42 003F
-A902 0021
-A902 003F
  1042 0021
-1042 003F
  1092 0021
-1092 003F
-11138 0021
-11138 003F
-11138 0334
+136A 0021
  17E2 0021
-17E2 003F
  17F2 0021
-17F2 003F
-AA52 0021
-AA52 003F
+1812 0021
+1948 0021
+19D2 0021
+1A82 0021
+1A92 0021
  1B52 0021
-1B52 003F
-A9D2 0021
-A9D2 003F
  1BB2 0021
-1BB2 003F
-1812 0021
-1812 003F
+1C42 0021
  1C52 0021
-1C52 003F
-A622 0021
-A622 003F
-110F2 0021
-110F2 003F
-110F2 0334
  3022 0021
-3022 003F
+A622 0021
+A8D2 0021
+A902 0021
+A9D2 0021
+AA52 0021
+ABF2 0021
  10108 0021
-10108 003F
-10108 0334
  1015B 0021
  1015C 0021
  1015D 0021
  1015E 0021
-1015B 003F
-1015C 003F
-1015D 003F
-1015E 003F
-1015B 0334
-1015C 0334
-1015D 0334
-1015E 0334
  103D2 0021
-103D2 003F
-103D2 0334
+104A2 0021
+10859 0021
+1091A 0021
+10A41 0021
+10B59 0021
+10B79 0021
+10E61 0021
+11053 0021
+11068 0021
+110F2 0021
+11138 0021
+111D2 0021
+116C2 0021
  12400 0021
  12416 0021
  1241F 0021
@@ -31802,7 +31693,67 @@ A622 003F
  12435 0021
  1244A 0021
  12450 0021
+12456 0021
  12459 0021
+1D361 0021
+0032 003F
+0662 003F
+06F2 003F
+07C2 003F
+0968 003F
+09E8 003F
+0A68 003F
+0AE8 003F
+0B68 003F
+0BE8 003F
+0C68 003F
+0C7A 003F
+0C7D 003F
+0CE8 003F
+0D68 003F
+0E52 003F
+0ED2 003F
+0F22 003F
+1042 003F
+1092 003F
+136A 003F
+17E2 003F
+17F2 003F
+1812 003F
+1948 003F
+19D2 003F
+1A82 003F
+1A92 003F
+1B52 003F
+1BB2 003F
+1C42 003F
+1C52 003F
+3022 003F
+A622 003F
+A8D2 003F
+A902 003F
+A9D2 003F
+AA52 003F
+ABF2 003F
+10108 003F
+1015B 003F
+1015C 003F
+1015D 003F
+1015E 003F
+103D2 003F
+104A2 003F
+10859 003F
+1091A 003F
+10A41 003F
+10B59 003F
+10B79 003F
+10E61 003F
+11053 003F
+11068 003F
+110F2 003F
+11138 003F
+111D2 003F
+116C2 003F
  12400 003F
  12416 003F
  1241F 003F
@@ -31811,7 +31762,63 @@ A622 003F
  12435 003F
  1244A 003F
  12450 003F
+12456 003F
  12459 003F
+1D361 003F
+FF12 0021
+FF12 003F
+2475 0021
+2475 003F
+1F103 0334
+1F103 0021
+1F103 003F
+0F2B 0021
+0F2B 003F
+2489 0021
+2489 003F
+1D7D0 0021
+1D7DA 0021
+1D7E4 0021
+1D7EE 0021
+1D7F8 0021
+1D7D0 003F
+1D7DA 003F
+1D7E4 003F
+1D7EE 003F
+1D7F8 003F
+2461 0021
+24F6 0021
+2777 0021
+2781 0021
+278B 0021
+2461 003F
+24F6 003F
+2777 003F
+2781 003F
+278B 003F
+00B2 0021
+00B2 003F
+2082 0021
+2082 003F
+10108 0334
+1015B 0334
+1015C 0334
+1015D 0334
+1015E 0334
+103D2 0334
+104A2 0334
+10859 0334
+1091A 0334
+10A41 0334
+10B59 0334
+10B79 0334
+10E61 0334
+11053 0334
+11068 0334
+110F2 0334
+11138 0334
+111D2 0334
+116C2 0334
  12400 0334
  12416 0334
  1241F 0334
@@ -31820,37 +31827,14 @@ A622 003F
  12435 0334
  1244A 0334
  12450 0334
+12456 0334
  12459 0334
-1091A 0021
-1091A 003F
-1091A 0334
-10859 0021
-10859 003F
-10859 0334
-10B59 0021
-10B59 003F
-10B59 0334
-10B79 0021
-10B79 003F
-10B79 0334
-111D2 0021
-111D2 003F
-111D2 0334
-116C2 0021
-116C2 003F
-116C2 0334
-11068 0021
-11068 003F
-11068 0334
-11053 0021
-11053 003F
-11053 0334
-10A41 0021
-10A41 003F
-10A41 0334
-1D361 0021
-1D361 003F
  1D361 0334
+1D7D0 0334
+1D7DA 0334
+1D7E4 0334
+1D7EE 0334
+1D7F8 0334
  2154 0021
  2154 003F
  2154 0061
@@ -32007,137 +31991,63 @@ A622 003F
  33FC 0041
  33FC 0062
  0032 0061
-0032 0041
-FF12 0061
-FF12 0041
-2475 0061
-1F103 0061
-2489 0061
-2475 0041
-1F103 0041
-2489 0041
-1D7D0 0061
-1D7DA 0061
-1D7E4 0061
-1D7EE 0061
-1D7F8 0061
-1D7D0 0041
-1D7DA 0041
-1D7E4 0041
-1D7EE 0041
-1D7F8 0041
-2461 0061
-24F6 0061
-2777 0061
-2781 0061
-278B 0061
-2461 0041
-24F6 0041
-2777 0041
-2781 0041
-278B 0041
-00B2 0061
-00B2 0041
-2082 0061
-2082 0041
  0662 0061
-0662 0041
  06F2 0061
-06F2 0041
-10E61 0061
-10E61 0041
  07C2 0061
-07C2 0041
-136A 0061
-136A 0041
-104A2 0061
-104A2 0041
  0968 0061
-0968 0041
  09E8 0061
-09E8 0041
  0A68 0061
-0A68 0041
  0AE8 0061
-0AE8 0041
  0B68 0061
-0B68 0041
  0BE8 0061
-0BE8 0041
  0C68 0061
  0C7A 0061
  0C7D 0061
-0C68 0041
-0C7A 0041
-0C7D 0041
  0CE8 0061
-0CE8 0041
  0D68 0061
-0D68 0041
-ABF2 0061
-ABF2 0041
-A8D2 0061
-A8D2 0041
-1948 0061
-1948 0041
-19D2 0061
-19D2 0041
-1A82 0061
-1A82 0041
-1A92 0061
-1A92 0041
  0E52 0061
-0E52 0041
  0ED2 0061
-0ED2 0041
  0F22 0061
-0F22 0041
-0F2B 0061
-0F2B 0041
-1C42 0061
-1C42 0041
-A902 0061
-A902 0041
  1042 0061
-1042 0041
  1092 0061
-1092 0041
-11138 0061
-11138 0041
+136A 0061
  17E2 0061
-17E2 0041
  17F2 0061
-17F2 0041
-AA52 0061
-AA52 0041
+1812 0061
+1948 0061
+19D2 0061
+1A82 0061
+1A92 0061
  1B52 0061
-1B52 0041
-A9D2 0061
-A9D2 0041
  1BB2 0061
-1BB2 0041
-1812 0061
-1812 0041
+1C42 0061
  1C52 0061
-1C52 0041
-A622 0061
-A622 0041
-110F2 0061
-110F2 0041
  3022 0061
-3022 0041
+A622 0061
+A8D2 0061
+A902 0061
+A9D2 0061
+AA52 0061
+ABF2 0061
  10108 0061
-10108 0041
  1015B 0061
  1015C 0061
  1015D 0061
  1015E 0061
-1015B 0041
-1015C 0041
-1015D 0041
-1015E 0041
  103D2 0061
-103D2 0041
+104A2 0061
+10859 0061
+1091A 0061
+10A41 0061
+10B59 0061
+10B79 0061
+10E61 0061
+11053 0061
+11068 0061
+110F2 0061
+11138 0061
+111D2 0061
+116C2 0061
  12400 0061
  12416 0061
  1241F 0061
@@ -32146,7 +32056,67 @@ A622 0041
  12435 0061
  1244A 0061
  12450 0061
+12456 0061
  12459 0061
+1D361 0061
+0032 0041
+0662 0041
+06F2 0041
+07C2 0041
+0968 0041
+09E8 0041
+0A68 0041
+0AE8 0041
+0B68 0041
+0BE8 0041
+0C68 0041
+0C7A 0041
+0C7D 0041
+0CE8 0041
+0D68 0041
+0E52 0041
+0ED2 0041
+0F22 0041
+1042 0041
+1092 0041
+136A 0041
+17E2 0041
+17F2 0041
+1812 0041
+1948 0041
+19D2 0041
+1A82 0041
+1A92 0041
+1B52 0041
+1BB2 0041
+1C42 0041
+1C52 0041
+3022 0041
+A622 0041
+A8D2 0041
+A902 0041
+A9D2 0041
+AA52 0041
+ABF2 0041
+10108 0041
+1015B 0041
+1015C 0041
+1015D 0041
+1015E 0041
+103D2 0041
+104A2 0041
+10859 0041
+1091A 0041
+10A41 0041
+10B59 0041
+10B79 0041
+10E61 0041
+11053 0041
+11068 0041
+110F2 0041
+11138 0041
+111D2 0041
+116C2 0041
  12400 0041
  12416 0041
  1241F 0041
@@ -32155,50 +32125,47 @@ A622 0041
  12435 0041
  1244A 0041
  12450 0041
+12456 0041
  12459 0041
-1091A 0061
-1091A 0041
-10859 0061
-10859 0041
-10B59 0061
-10B59 0041
-10B79 0061
-10B79 0041
-111D2 0061
-111D2 0041
-116C2 0061
-116C2 0041
-11068 0061
-11068 0041
-11053 0061
-11053 0041
-10A41 0061
-10A41 0041
-1D361 0061
  1D361 0041
-0032 0062
-FF12 0062
-2475 0062
-1F103 0062
-2489 0062
-1D7D0 0062
-1D7DA 0062
-1D7E4 0062
-1D7EE 0062
-1D7F8 0062
-2461 0062
-24F6 0062
-2777 0062
-2781 0062
-278B 0062
-00B2 0062
-2082 0062
+FF12 0061
+FF12 0041
+2475 0061
+1F103 0061
+2489 0061
+0F2B 0061
+2475 0041
+1F103 0041
+2489 0041
+0F2B 0041
+1D7D0 0061
+1D7DA 0061
+1D7E4 0061
+1D7EE 0061
+1D7F8 0061
+1D7D0 0041
+1D7DA 0041
+1D7E4 0041
+1D7EE 0041
+1D7F8 0041
+2461 0061
+24F6 0061
+2777 0061
+2781 0061
+278B 0061
+2461 0041
+24F6 0041
+2777 0041
+2781 0041
+278B 0041
+00B2 0061
+00B2 0041
+2082 0061
+2082 0041
+0032 0062
  0662 0062
  06F2 0062
-10E61 0062
  07C2 0062
-136A 0062
-104A2 0062
  0968 0062
  09E8 0062
  0A68 0062
@@ -32210,38 +32177,49 @@ FF12 0062
  0C7D 0062
  0CE8 0062
  0D68 0062
-ABF2 0062
-A8D2 0062
-1948 0062
-19D2 0062
-1A82 0062
-1A92 0062
  0E52 0062
  0ED2 0062
  0F22 0062
-0F2B 0062
-1C42 0062
-A902 0062
  1042 0062
  1092 0062
-11138 0062
+136A 0062
  17E2 0062
  17F2 0062
-AA52 0062
+1812 0062
+1948 0062
+19D2 0062
+1A82 0062
+1A92 0062
  1B52 0062
-A9D2 0062
  1BB2 0062
-1812 0062
+1C42 0062
  1C52 0062
-A622 0062
-110F2 0062
  3022 0062
+A622 0062
+A8D2 0062
+A902 0062
+A9D2 0062
+AA52 0062
+ABF2 0062
  10108 0062
  1015B 0062
  1015C 0062
  1015D 0062
  1015E 0062
  103D2 0062
+104A2 0062
+10859 0062
+1091A 0062
+10A41 0062
+10B59 0062
+10B79 0062
+10E61 0062
+11053 0062
+11068 0062
+110F2 0062
+11138 0062
+111D2 0062
+116C2 0062
  12400 0062
  12416 0062
  1241F 0062
@@ -32250,17 +32228,26 @@ A622 0062
  12435 0062
  1244A 0062
  12450 0062
+12456 0062
  12459 0062
-1091A 0062
-10859 0062
-10B59 0062
-10B79 0062
-111D2 0062
-116C2 0062
-11068 0062
-11053 0062
-10A41 0062
  1D361 0062
+FF12 0062
+2475 0062
+1F103 0062
+2489 0062
+0F2B 0062
+1D7D0 0062
+1D7DA 0062
+1D7E4 0062
+1D7EE 0062
+1D7F8 0062
+2461 0062
+24F6 0062
+2777 0062
+2781 0062
+278B 0062
+00B2 0062
+2082 0062
  33E1 0021
  33E1 003F
  33E1 0061
@@ -32277,138 +32264,58 @@ A622 0062
  335A 0041
  335A 0062
  0033 0021
-0033 003F
-FF13 0021
-FF13 003F
-2476 0021
-2476 003F
-1F104 0334
-1F104 0021
-1F104 003F
-248A 0021
-248A 003F
-1D7D1 0021
-1D7DB 0021
-1D7E5 0021
-1D7EF 0021
-1D7F9 0021
-1D7D1 003F
-1D7DB 003F
-1D7E5 003F
-1D7EF 003F
-1D7F9 003F
-2462 0021
-24F7 0021
-2778 0021
-2782 0021
-278C 0021
-2462 003F
-24F7 003F
-2778 003F
-2782 003F
-278C 003F
-00B3 0021
-00B3 003F
-2083 0021
-2083 003F
-1D7D1 0334
-1D7DB 0334
-1D7E5 0334
-1D7EF 0334
-1D7F9 0334
  0663 0021
-0663 003F
  06F3 0021
-06F3 003F
-10E62 0021
-10E62 003F
-10E62 0334
  07C3 0021
-07C3 003F
-136B 0021
-136B 003F
-104A3 0021
-104A3 003F
-104A3 0334
  0969 0021
-0969 003F
  09E9 0021
-09E9 003F
  0A69 0021
-0A69 003F
  0AE9 0021
-0AE9 003F
  0B69 0021
-0B69 003F
  0BE9 0021
-0BE9 003F
  0C69 0021
  0C7B 0021
  0C7E 0021
-0C69 003F
-0C7B 003F
-0C7E 003F
  0CE9 0021
-0CE9 003F
  0D69 0021
-0D69 003F
-ABF3 0021
-ABF3 003F
-A8D3 0021
-A8D3 003F
-1949 0021
-1949 003F
-19D3 0021
-19D3 003F
-1A83 0021
-1A83 003F
-1A93 0021
-1A93 003F
  0E53 0021
-0E53 003F
  0ED3 0021
-0ED3 003F
  0F23 0021
-0F23 003F
-0F2C 0021
-0F2C 003F
-1C43 0021
-1C43 003F
-A903 0021
-A903 003F
  1043 0021
-1043 003F
  1093 0021
-1093 003F
-11139 0021
-11139 003F
-11139 0334
+136B 0021
  17E3 0021
-17E3 003F
  17F3 0021
-17F3 003F
-AA53 0021
-AA53 003F
+1813 0021
+1949 0021
+19D3 0021
+1A83 0021
+1A93 0021
  1B53 0021
-1B53 003F
-A9D3 0021
-A9D3 003F
  1BB3 0021
-1BB3 003F
-1813 0021
-1813 003F
+1C43 0021
  1C53 0021
-1C53 003F
-A623 0021
-A623 003F
-110F3 0021
-110F3 003F
-110F3 0334
  3023 0021
-3023 003F
+A623 0021
+A8D3 0021
+A903 0021
+A9D3 0021
+AA53 0021
+ABF3 0021
  10109 0021
-10109 003F
-10109 0334
+104A3 0021
+1085A 0021
+1091B 0021
+10A42 0021
+10B5A 0021
+10B7A 0021
+10E62 0021
+11054 0021
+11069 0021
+110F3 0021
+11139 0021
+111D3 0021
+116C3 0021
  12401 0021
  12408 0021
  12417 0021
@@ -32423,6 +32330,61 @@ A623 003F
  1243B 0021
  1244B 0021
  12451 0021
+12457 0021
+1D362 0021
+0033 003F
+0663 003F
+06F3 003F
+07C3 003F
+0969 003F
+09E9 003F
+0A69 003F
+0AE9 003F
+0B69 003F
+0BE9 003F
+0C69 003F
+0C7B 003F
+0C7E 003F
+0CE9 003F
+0D69 003F
+0E53 003F
+0ED3 003F
+0F23 003F
+1043 003F
+1093 003F
+136B 003F
+17E3 003F
+17F3 003F
+1813 003F
+1949 003F
+19D3 003F
+1A83 003F
+1A93 003F
+1B53 003F
+1BB3 003F
+1C43 003F
+1C53 003F
+3023 003F
+A623 003F
+A8D3 003F
+A903 003F
+A9D3 003F
+AA53 003F
+ABF3 003F
+10109 003F
+104A3 003F
+1085A 003F
+1091B 003F
+10A42 003F
+10B5A 003F
+10B7A 003F
+10E62 003F
+11054 003F
+11069 003F
+110F3 003F
+11139 003F
+111D3 003F
+116C3 003F
  12401 003F
  12408 003F
  12417 003F
@@ -32437,6 +32399,57 @@ A623 003F
  1243B 003F
  1244B 003F
  12451 003F
+12457 003F
+1D362 003F
+FF13 0021
+FF13 003F
+2476 0021
+2476 003F
+1F104 0334
+1F104 0021
+1F104 003F
+0F2C 0021
+0F2C 003F
+248A 0021
+248A 003F
+1D7D1 0021
+1D7DB 0021
+1D7E5 0021
+1D7EF 0021
+1D7F9 0021
+1D7D1 003F
+1D7DB 003F
+1D7E5 003F
+1D7EF 003F
+1D7F9 003F
+2462 0021
+24F7 0021
+2778 0021
+2782 0021
+278C 0021
+2462 003F
+24F7 003F
+2778 003F
+2782 003F
+278C 003F
+00B3 0021
+00B3 003F
+2083 0021
+2083 003F
+10109 0334
+104A3 0334
+1085A 0334
+1091B 0334
+10A42 0334
+10B5A 0334
+10B7A 0334
+10E62 0334
+11054 0334
+11069 0334
+110F3 0334
+11139 0334
+111D3 0334
+116C3 0334
  12401 0334
  12408 0334
  12417 0334
@@ -32451,36 +32464,13 @@ A623 003F
  1243B 0334
  1244B 0334
  12451 0334
-1091B 0021
-1091B 003F
-1091B 0334
-1085A 0021
-1085A 003F
-1085A 0334
-10B5A 0021
-10B5A 003F
-10B5A 0334
-10B7A 0021
-10B7A 003F
-10B7A 0334
-111D3 0021
-111D3 003F
-111D3 0334
-116C3 0021
-116C3 003F
-116C3 0334
-11069 0021
-11069 003F
-11069 0334
-11054 0021
-11054 003F
-11054 0334
-10A42 0021
-10A42 003F
-10A42 0334
-1D362 0021
-1D362 003F
+12457 0334
  1D362 0334
+1D7D1 0334
+1D7DB 0334
+1D7E5 0334
+1D7EF 0334
+1D7F9 0334
  00BE 0021
  00BE 003F
  00BE 0061
@@ -32562,127 +32552,58 @@ A623 003F
  32B4 0041
  32B4 0062
  0033 0061
-0033 0041
-FF13 0061
-FF13 0041
-2476 0061
-1F104 0061
-248A 0061
-2476 0041
-1F104 0041
-248A 0041
-1D7D1 0061
-1D7DB 0061
-1D7E5 0061
-1D7EF 0061
-1D7F9 0061
-1D7D1 0041
-1D7DB 0041
-1D7E5 0041
-1D7EF 0041
-1D7F9 0041
-2462 0061
-24F7 0061
-2778 0061
-2782 0061
-278C 0061
-2462 0041
-24F7 0041
-2778 0041
-2782 0041
-278C 0041
-00B3 0061
-00B3 0041
-2083 0061
-2083 0041
  0663 0061
-0663 0041
  06F3 0061
-06F3 0041
-10E62 0061
-10E62 0041
  07C3 0061
-07C3 0041
-136B 0061
-136B 0041
-104A3 0061
-104A3 0041
  0969 0061
-0969 0041
  09E9 0061
-09E9 0041
  0A69 0061
-0A69 0041
  0AE9 0061
-0AE9 0041
  0B69 0061
-0B69 0041
  0BE9 0061
-0BE9 0041
  0C69 0061
  0C7B 0061
  0C7E 0061
-0C69 0041
-0C7B 0041
-0C7E 0041
  0CE9 0061
-0CE9 0041
  0D69 0061
-0D69 0041
-ABF3 0061
-ABF3 0041
-A8D3 0061
-A8D3 0041
-1949 0061
-1949 0041
-19D3 0061
-19D3 0041
-1A83 0061
-1A83 0041
-1A93 0061
-1A93 0041
  0E53 0061
-0E53 0041
  0ED3 0061
-0ED3 0041
  0F23 0061
-0F23 0041
-0F2C 0061
-0F2C 0041
-1C43 0061
-1C43 0041
-A903 0061
-A903 0041
  1043 0061
-1043 0041
  1093 0061
-1093 0041
-11139 0061
-11139 0041
+136B 0061
  17E3 0061
-17E3 0041
  17F3 0061
-17F3 0041
-AA53 0061
-AA53 0041
+1813 0061
+1949 0061
+19D3 0061
+1A83 0061
+1A93 0061
  1B53 0061
-1B53 0041
-A9D3 0061
-A9D3 0041
  1BB3 0061
-1BB3 0041
-1813 0061
-1813 0041
+1C43 0061
  1C53 0061
-1C53 0041
-A623 0061
-A623 0041
-110F3 0061
-110F3 0041
  3023 0061
-3023 0041
+A623 0061
+A8D3 0061
+A903 0061
+A9D3 0061
+AA53 0061
+ABF3 0061
  10109 0061
-10109 0041
+104A3 0061
+1085A 0061
+1091B 0061
+10A42 0061
+10B5A 0061
+10B7A 0061
+10E62 0061
+11054 0061
+11069 0061
+110F3 0061
+11139 0061
+111D3 0061
+116C3 0061
  12401 0061
  12408 0061
  12417 0061
@@ -32697,6 +32618,61 @@ A623 0041
  1243B 0061
  1244B 0061
  12451 0061
+12457 0061
+1D362 0061
+0033 0041
+0663 0041
+06F3 0041
+07C3 0041
+0969 0041
+09E9 0041
+0A69 0041
+0AE9 0041
+0B69 0041
+0BE9 0041
+0C69 0041
+0C7B 0041
+0C7E 0041
+0CE9 0041
+0D69 0041
+0E53 0041
+0ED3 0041
+0F23 0041
+1043 0041
+1093 0041
+136B 0041
+17E3 0041
+17F3 0041
+1813 0041
+1949 0041
+19D3 0041
+1A83 0041
+1A93 0041
+1B53 0041
+1BB3 0041
+1C43 0041
+1C53 0041
+3023 0041
+A623 0041
+A8D3 0041
+A903 0041
+A9D3 0041
+AA53 0041
+ABF3 0041
+10109 0041
+104A3 0041
+1085A 0041
+1091B 0041
+10A42 0041
+10B5A 0041
+10B7A 0041
+10E62 0041
+11054 0041
+11069 0041
+110F3 0041
+11139 0041
+111D3 0041
+116C3 0041
  12401 0041
  12408 0041
  12417 0041
@@ -32711,49 +32687,46 @@ A623 0041
  1243B 0041
  1244B 0041
  12451 0041
-1091B 0061
-1091B 0041
-1085A 0061
-1085A 0041
-10B5A 0061
-10B5A 0041
-10B7A 0061
-10B7A 0041
-111D3 0061
-111D3 0041
-116C3 0061
-116C3 0041
-11069 0061
-11069 0041
-11054 0061
-11054 0041
-10A42 0061
-10A42 0041
-1D362 0061
+12457 0041
  1D362 0041
+FF13 0061
+FF13 0041
+2476 0061
+1F104 0061
+248A 0061
+0F2C 0061
+2476 0041
+1F104 0041
+248A 0041
+0F2C 0041
+1D7D1 0061
+1D7DB 0061
+1D7E5 0061
+1D7EF 0061
+1D7F9 0061
+1D7D1 0041
+1D7DB 0041
+1D7E5 0041
+1D7EF 0041
+1D7F9 0041
+2462 0061
+24F7 0061
+2778 0061
+2782 0061
+278C 0061
+2462 0041
+24F7 0041
+2778 0041
+2782 0041
+278C 0041
+00B3 0061
+00B3 0041
+2083 0061
+2083 0041
  0033 0062
-FF13 0062
-2476 0062
-1F104 0062
-248A 0062
-1D7D1 0062
-1D7DB 0062
-1D7E5 0062
-1D7EF 0062
-1D7F9 0062
-2462 0062
-24F7 0062
-2778 0062
-2782 0062
-278C 0062
-00B3 0062
-2083 0062
  0663 0062
  06F3 0062
-10E62 0062
  07C3 0062
-136B 0062
-104A3 0062
  0969 0062
  09E9 0062
  0A69 0062
@@ -32765,33 +32738,44 @@ FF13 0062
  0C7E 0062
  0CE9 0062
  0D69 0062
-ABF3 0062
-A8D3 0062
-1949 0062
-19D3 0062
-1A83 0062
-1A93 0062
  0E53 0062
  0ED3 0062
  0F23 0062
-0F2C 0062
-1C43 0062
-A903 0062
  1043 0062
  1093 0062
-11139 0062
+136B 0062
  17E3 0062
  17F3 0062
-AA53 0062
+1813 0062
+1949 0062
+19D3 0062
+1A83 0062
+1A93 0062
  1B53 0062
-A9D3 0062
  1BB3 0062
-1813 0062
+1C43 0062
  1C53 0062
-A623 0062
-110F3 0062
  3023 0062
+A623 0062
+A8D3 0062
+A903 0062
+A9D3 0062
+AA53 0062
+ABF3 0062
  10109 0062
+104A3 0062
+1085A 0062
+1091B 0062
+10A42 0062
+10B5A 0062
+10B7A 0062
+10E62 0062
+11054 0062
+11069 0062
+110F3 0062
+11139 0062
+111D3 0062
+116C3 0062
  12401 0062
  12408 0062
  12417 0062
@@ -32806,16 +32790,25 @@ A623 0062
  1243B 0062
  1244B 0062
  12451 0062
-1091B 0062
-1085A 0062
-10B5A 0062
-10B7A 0062
-111D3 0062
-116C3 0062
-11069 0062
-11054 0062
-10A42 0062
+12457 0062
  1D362 0062
+FF13 0062
+2476 0062
+1F104 0062
+248A 0062
+0F2C 0062
+1D7D1 0062
+1D7DB 0062
+1D7E5 0062
+1D7EF 0062
+1D7F9 0062
+2462 0062
+24F7 0062
+2778 0062
+2782 0062
+278C 0062
+00B3 0062
+2083 0062
  33E2 0021
  33E2 003F
  33E2 0061
@@ -32832,134 +32825,54 @@ A623 0062
  335B 0041
  335B 0062
  0034 0021
-0034 003F
-FF14 0021
-FF14 003F
-2477 0021
-2477 003F
-1F105 0334
-1F105 0021
-1F105 003F
-248B 0021
-248B 003F
-1D7D2 0021
-1D7DC 0021
-1D7E6 0021
-1D7F0 0021
-1D7FA 0021
-1D7D2 003F
-1D7DC 003F
-1D7E6 003F
-1D7F0 003F
-1D7FA 003F
-2463 0021
-24F8 0021
-2779 0021
-2783 0021
-278D 0021
-2463 003F
-24F8 003F
-2779 003F
-2783 003F
-278D 003F
-2074 0021
-2074 003F
-2084 0021
-2084 003F
-1D7D2 0334
-1D7DC 0334
-1D7E6 0334
-1D7F0 0334
-1D7FA 0334
  0664 0021
-0664 003F
  06F4 0021
-06F4 003F
-10E63 0021
-10E63 003F
-10E63 0334
  07C4 0021
-07C4 003F
-136C 0021
-136C 003F
-104A4 0021
-104A4 003F
-104A4 0334
  096A 0021
-096A 003F
  09EA 0021
-09EA 003F
  0A6A 0021
-0A6A 003F
  0AEA 0021
-0AEA 003F
  0B6A 0021
-0B6A 003F
  0BEA 0021
-0BEA 003F
  0C6A 0021
-0C6A 003F
  0CEA 0021
-0CEA 003F
  0D6A 0021
-0D6A 003F
-ABF4 0021
-ABF4 003F
-A8D4 0021
-A8D4 003F
-194A 0021
-194A 003F
-19D4 0021
-19D4 003F
-1A84 0021
-1A84 003F
-1A94 0021
-1A94 003F
  0E54 0021
-0E54 003F
  0ED4 0021
-0ED4 003F
  0F24 0021
-0F24 003F
-0F2D 0021
-0F2D 003F
-1C44 0021
-1C44 003F
-A904 0021
-A904 003F
  1044 0021
-1044 003F
  1094 0021
-1094 003F
-1113A 0021
-1113A 003F
-1113A 0334
+136C 0021
  17E4 0021
-17E4 003F
  17F4 0021
-17F4 003F
-AA54 0021
-AA54 003F
+1814 0021
+194A 0021
+19D4 0021
+1A84 0021
+1A94 0021
  1B54 0021
-1B54 003F
-A9D4 0021
-A9D4 003F
  1BB4 0021
-1BB4 003F
-1814 0021
-1814 003F
+1C44 0021
  1C54 0021
-1C54 003F
-A624 0021
-A624 003F
-110F4 0021
-110F4 003F
-110F4 0334
  3024 0021
-3024 003F
+A624 0021
+A8D4 0021
+A904 0021
+A9D4 0021
+AA54 0021
+ABF4 0021
  1010A 0021
-1010A 003F
-1010A 0334
+104A4 0021
+10A43 0021
+10B5B 0021
+10B7B 0021
+10E63 0021
+11055 0021
+1106A 0021
+110F4 0021
+1113A 0021
+111D4 0021
+116C4 0021
  12402 0021
  12409 0021
  1240F 0021
@@ -32975,6 +32888,56 @@ A624 003F
  1244C 0021
  12452 0021
  12453 0021
+1D363 0021
+0034 003F
+0664 003F
+06F4 003F
+07C4 003F
+096A 003F
+09EA 003F
+0A6A 003F
+0AEA 003F
+0B6A 003F
+0BEA 003F
+0C6A 003F
+0CEA 003F
+0D6A 003F
+0E54 003F
+0ED4 003F
+0F24 003F
+1044 003F
+1094 003F
+136C 003F
+17E4 003F
+17F4 003F
+1814 003F
+194A 003F
+19D4 003F
+1A84 003F
+1A94 003F
+1B54 003F
+1BB4 003F
+1C44 003F
+1C54 003F
+3024 003F
+A624 003F
+A8D4 003F
+A904 003F
+A9D4 003F
+AA54 003F
+ABF4 003F
+1010A 003F
+104A4 003F
+10A43 003F
+10B5B 003F
+10B7B 003F
+10E63 003F
+11055 003F
+1106A 003F
+110F4 003F
+1113A 003F
+111D4 003F
+116C4 003F
  12402 003F
  12409 003F
  1240F 003F
@@ -32990,6 +32953,54 @@ A624 003F
  1244C 003F
  12452 003F
  12453 003F
+1D363 003F
+FF14 0021
+FF14 003F
+2477 0021
+2477 003F
+1F105 0334
+1F105 0021
+1F105 003F
+0F2D 0021
+0F2D 003F
+248B 0021
+248B 003F
+1D7D2 0021
+1D7DC 0021
+1D7E6 0021
+1D7F0 0021
+1D7FA 0021
+1D7D2 003F
+1D7DC 003F
+1D7E6 003F
+1D7F0 003F
+1D7FA 003F
+2463 0021
+24F8 0021
+2779 0021
+2783 0021
+278D 0021
+2463 003F
+24F8 003F
+2779 003F
+2783 003F
+278D 003F
+2074 0021
+2074 003F
+2084 0021
+2084 003F
+1010A 0334
+104A4 0334
+10A43 0334
+10B5B 0334
+10B7B 0334
+10E63 0334
+11055 0334
+1106A 0334
+110F4 0334
+1113A 0334
+111D4 0334
+116C4 0334
  12402 0334
  12409 0334
  1240F 0334
@@ -33005,30 +33016,12 @@ A624 003F
  1244C 0334
  12452 0334
  12453 0334
-10B5B 0021
-10B5B 003F
-10B5B 0334
-10B7B 0021
-10B7B 003F
-10B7B 0334
-111D4 0021
-111D4 003F
-111D4 0334
-116C4 0021
-116C4 003F
-116C4 0334
-1106A 0021
-1106A 003F
-1106A 0334
-11055 0021
-11055 003F
-11055 0334
-10A43 0021
-10A43 003F
-10A43 0334
-1D363 0021
-1D363 003F
  1D363 0334
+1D7D2 0334
+1D7DC 0334
+1D7E6 0334
+1D7F0 0334
+1D7FA 0334
  2158 0021
  2158 003F
  2158 0061
@@ -33090,123 +33083,54 @@ A624 003F
  32BE 0041
  32BE 0062
  0034 0061
-0034 0041
-FF14 0061
-FF14 0041
-2477 0061
-1F105 0061
-248B 0061
-2477 0041
-1F105 0041
-248B 0041
-1D7D2 0061
-1D7DC 0061
-1D7E6 0061
-1D7F0 0061
-1D7FA 0061
-1D7D2 0041
-1D7DC 0041
-1D7E6 0041
-1D7F0 0041
-1D7FA 0041
-2463 0061
-24F8 0061
-2779 0061
-2783 0061
-278D 0061
-2463 0041
-24F8 0041
-2779 0041
-2783 0041
-278D 0041
-2074 0061
-2074 0041
-2084 0061
-2084 0041
  0664 0061
-0664 0041
  06F4 0061
-06F4 0041
-10E63 0061
-10E63 0041
  07C4 0061
-07C4 0041
-136C 0061
-136C 0041
-104A4 0061
-104A4 0041
  096A 0061
-096A 0041
  09EA 0061
-09EA 0041
  0A6A 0061
-0A6A 0041
  0AEA 0061
-0AEA 0041
  0B6A 0061
-0B6A 0041
  0BEA 0061
-0BEA 0041
  0C6A 0061
-0C6A 0041
  0CEA 0061
-0CEA 0041
  0D6A 0061
-0D6A 0041
-ABF4 0061
-ABF4 0041
-A8D4 0061
-A8D4 0041
+0E54 0061
+0ED4 0061
+0F24 0061
+1044 0061
+1094 0061
+136C 0061
+17E4 0061
+17F4 0061
+1814 0061
  194A 0061
-194A 0041
  19D4 0061
-19D4 0041
  1A84 0061
-1A84 0041
  1A94 0061
-1A94 0041
-0E54 0061
-0E54 0041
-0ED4 0061
-0ED4 0041
-0F24 0061
-0F24 0041
-0F2D 0061
-0F2D 0041
-1C44 0061
-1C44 0041
-A904 0061
-A904 0041
-1044 0061
-1044 0041
-1094 0061
-1094 0041
-1113A 0061
-1113A 0041
-17E4 0061
-17E4 0041
-17F4 0061
-17F4 0041
-AA54 0061
-AA54 0041
  1B54 0061
-1B54 0041
-A9D4 0061
-A9D4 0041
  1BB4 0061
-1BB4 0041
-1814 0061
-1814 0041
+1C44 0061
  1C54 0061
-1C54 0041
-A624 0061
-A624 0041
-110F4 0061
-110F4 0041
  3024 0061
-3024 0041
+A624 0061
+A8D4 0061
+A904 0061
+A9D4 0061
+AA54 0061
+ABF4 0061
  1010A 0061
-1010A 0041
+104A4 0061
+10A43 0061
+10B5B 0061
+10B7B 0061
+10E63 0061
+11055 0061
+1106A 0061
+110F4 0061
+1113A 0061
+111D4 0061
+116C4 0061
  12402 0061
  12409 0061
  1240F 0061
@@ -33222,6 +33146,56 @@ A624 0041
  1244C 0061
  12452 0061
  12453 0061
+1D363 0061
+0034 0041
+0664 0041
+06F4 0041
+07C4 0041
+096A 0041
+09EA 0041
+0A6A 0041
+0AEA 0041
+0B6A 0041
+0BEA 0041
+0C6A 0041
+0CEA 0041
+0D6A 0041
+0E54 0041
+0ED4 0041
+0F24 0041
+1044 0041
+1094 0041
+136C 0041
+17E4 0041
+17F4 0041
+1814 0041
+194A 0041
+19D4 0041
+1A84 0041
+1A94 0041
+1B54 0041
+1BB4 0041
+1C44 0041
+1C54 0041
+3024 0041
+A624 0041
+A8D4 0041
+A904 0041
+A9D4 0041
+AA54 0041
+ABF4 0041
+1010A 0041
+104A4 0041
+10A43 0041
+10B5B 0041
+10B7B 0041
+10E63 0041
+11055 0041
+1106A 0041
+110F4 0041
+1113A 0041
+111D4 0041
+116C4 0041
  12402 0041
  12409 0041
  1240F 0041
@@ -33237,45 +33211,45 @@ A624 0041
  1244C 0041
  12452 0041
  12453 0041
-10B5B 0061
-10B5B 0041
-10B7B 0061
-10B7B 0041
-111D4 0061
-111D4 0041
-116C4 0061
-116C4 0041
-1106A 0061
-1106A 0041
-11055 0061
-11055 0041
-10A43 0061
-10A43 0041
-1D363 0061
  1D363 0041
+FF14 0061
+FF14 0041
+2477 0061
+1F105 0061
+248B 0061
+0F2D 0061
+2477 0041
+1F105 0041
+248B 0041
+0F2D 0041
+1D7D2 0061
+1D7DC 0061
+1D7E6 0061
+1D7F0 0061
+1D7FA 0061
+1D7D2 0041
+1D7DC 0041
+1D7E6 0041
+1D7F0 0041
+1D7FA 0041
+2463 0061
+24F8 0061
+2779 0061
+2783 0061
+278D 0061
+2463 0041
+24F8 0041
+2779 0041
+2783 0041
+278D 0041
+2074 0061
+2074 0041
+2084 0061
+2084 0041
  0034 0062
-FF14 0062
-2477 0062
-1F105 0062
-248B 0062
-1D7D2 0062
-1D7DC 0062
-1D7E6 0062
-1D7F0 0062
-1D7FA 0062
-2463 0062
-24F8 0062
-2779 0062
-2783 0062
-278D 0062
-2074 0062
-2084 0062
  0664 0062
  06F4 0062
-10E63 0062
  07C4 0062
-136C 0062
-104A4 0062
  096A 0062
  09EA 0062
  0A6A 0062
@@ -33285,33 +33259,42 @@ FF14 0062
  0C6A 0062
  0CEA 0062
  0D6A 0062
-ABF4 0062
-A8D4 0062
-194A 0062
-19D4 0062
-1A84 0062
-1A94 0062
  0E54 0062
  0ED4 0062
  0F24 0062
-0F2D 0062
-1C44 0062
-A904 0062
  1044 0062
  1094 0062
-1113A 0062
+136C 0062
  17E4 0062
  17F4 0062
-AA54 0062
+1814 0062
+194A 0062
+19D4 0062
+1A84 0062
+1A94 0062
  1B54 0062
-A9D4 0062
  1BB4 0062
-1814 0062
+1C44 0062
  1C54 0062
-A624 0062
-110F4 0062
  3024 0062
+A624 0062
+A8D4 0062
+A904 0062
+A9D4 0062
+AA54 0062
+ABF4 0062
  1010A 0062
+104A4 0062
+10A43 0062
+10B5B 0062
+10B7B 0062
+10E63 0062
+11055 0062
+1106A 0062
+110F4 0062
+1113A 0062
+111D4 0062
+116C4 0062
  12402 0062
  12409 0062
  1240F 0062
@@ -33327,14 +33310,24 @@ A624 0062
  1244C 0062
  12452 0062
  12453 0062
-10B5B 0062
-10B7B 0062
-111D4 0062
-116C4 0062
-1106A 0062
-11055 0062
-10A43 0062
  1D363 0062
+FF14 0062
+2477 0062
+1F105 0062
+248B 0062
+0F2D 0062
+1D7D2 0062
+1D7DC 0062
+1D7E6 0062
+1D7F0 0062
+1D7FA 0062
+2463 0062
+24F8 0062
+2779 0062
+2783 0062
+278D 0062
+2074 0062
+2084 0062
  33E3 0021
  33E3 003F
  33E3 0061
@@ -33351,152 +33344,57 @@ A624 0062
  335C 0041
  335C 0062
  0035 0021
-0035 003F
-FF15 0021
-FF15 003F
-2478 0021
-2478 003F
-1F106 0334
-1F106 0021
-1F106 003F
-248C 0021
-248C 003F
-1D7D3 0021
-1D7DD 0021
-1D7E7 0021
-1D7F1 0021
-1D7FB 0021
-1D7D3 003F
-1D7DD 003F
-1D7E7 003F
-1D7F1 003F
-1D7FB 003F
-2464 0021
-24F9 0021
-277A 0021
-2784 0021
-278E 0021
-2464 003F
-24F9 003F
-277A 003F
-2784 003F
-278E 003F
-2075 0021
-2075 003F
-2085 0021
-2085 003F
-1D7D3 0334
-1D7DD 0334
-1D7E7 0334
-1D7F1 0334
-1D7FB 0334
  0665 0021
-0665 003F
  06F5 0021
-06F5 003F
-10E64 0021
-10E64 003F
-10E64 0334
  07C5 0021
-07C5 003F
-136D 0021
-136D 003F
-104A5 0021
-104A5 003F
-104A5 0334
  096B 0021
-096B 003F
  09EB 0021
-09EB 003F
  0A6B 0021
-0A6B 003F
  0AEB 0021
-0AEB 003F
  0B6B 0021
-0B6B 003F
  0BEB 0021
-0BEB 003F
  0C6B 0021
-0C6B 003F
  0CEB 0021
-0CEB 003F
  0D6B 0021
-0D6B 003F
-ABF5 0021
-ABF5 003F
-A8D5 0021
-A8D5 003F
-194B 0021
-194B 003F
-19D5 0021
-19D5 003F
-1A85 0021
-1A85 003F
-1A95 0021
-1A95 003F
  0E55 0021
-0E55 003F
  0ED5 0021
-0ED5 003F
  0F25 0021
-0F25 003F
-0F2E 0021
-0F2E 003F
-1C45 0021
-1C45 003F
-A905 0021
-A905 003F
  1045 0021
-1045 003F
  1095 0021
-1095 003F
-1113B 0021
-1113B 003F
-1113B 0334
+136D 0021
  17E5 0021
-17E5 003F
  17F5 0021
-17F5 003F
-AA55 0021
-AA55 003F
+1815 0021
+194B 0021
+19D5 0021
+1A85 0021
+1A95 0021
  1B55 0021
-1B55 003F
-A9D5 0021
-A9D5 003F
  1BB5 0021
-1BB5 003F
-1815 0021
-1815 003F
+1C45 0021
  1C55 0021
-1C55 003F
-A625 0021
-A625 003F
-110F5 0021
-110F5 003F
-110F5 0334
  3025 0021
-3025 003F
+A625 0021
+A8D5 0021
+A905 0021
+A9D5 0021
+AA55 0021
+ABF5 0021
  1010B 0021
-1010B 003F
-1010B 0334
  10143 0021
  10148 0021
  1014F 0021
  1015F 0021
  10173 0021
-10143 003F
-10148 003F
-1014F 003F
-1015F 003F
-10173 003F
-10143 0334
-10148 0334
-1014F 0334
-1015F 0334
-10173 0334
  10321 0021
-10321 003F
-10321 0334
+104A5 0021
+10E64 0021
+11056 0021
+1106B 0021
+110F5 0021
+1113B 0021
+111D5 0021
+116C5 0021
  12403 0021
  1240A 0021
  12410 0021
@@ -33508,6 +33406,59 @@ A625 003F
  1244D 0021
  12454 0021
  12455 0021
+1D364 0021
+0035 003F
+0665 003F
+06F5 003F
+07C5 003F
+096B 003F
+09EB 003F
+0A6B 003F
+0AEB 003F
+0B6B 003F
+0BEB 003F
+0C6B 003F
+0CEB 003F
+0D6B 003F
+0E55 003F
+0ED5 003F
+0F25 003F
+1045 003F
+1095 003F
+136D 003F
+17E5 003F
+17F5 003F
+1815 003F
+194B 003F
+19D5 003F
+1A85 003F
+1A95 003F
+1B55 003F
+1BB5 003F
+1C45 003F
+1C55 003F
+3025 003F
+A625 003F
+A8D5 003F
+A905 003F
+A9D5 003F
+AA55 003F
+ABF5 003F
+1010B 003F
+10143 003F
+10148 003F
+1014F 003F
+1015F 003F
+10173 003F
+10321 003F
+104A5 003F
+10E64 003F
+11056 003F
+1106B 003F
+110F5 003F
+1113B 003F
+111D5 003F
+116C5 003F
  12403 003F
  1240A 003F
  12410 003F
@@ -33519,6 +33470,57 @@ A625 003F
  1244D 003F
  12454 003F
  12455 003F
+1D364 003F
+FF15 0021
+FF15 003F
+2478 0021
+2478 003F
+1F106 0334
+1F106 0021
+1F106 003F
+0F2E 0021
+0F2E 003F
+248C 0021
+248C 003F
+1D7D3 0021
+1D7DD 0021
+1D7E7 0021
+1D7F1 0021
+1D7FB 0021
+1D7D3 003F
+1D7DD 003F
+1D7E7 003F
+1D7F1 003F
+1D7FB 003F
+2464 0021
+24F9 0021
+277A 0021
+2784 0021
+278E 0021
+2464 003F
+24F9 003F
+277A 003F
+2784 003F
+278E 003F
+2075 0021
+2075 003F
+2085 0021
+2085 003F
+1010B 0334
+10143 0334
+10148 0334
+1014F 0334
+1015F 0334
+10173 0334
+10321 0334
+104A5 0334
+10E64 0334
+11056 0334
+1106B 0334
+110F5 0334
+1113B 0334
+111D5 0334
+116C5 0334
  12403 0334
  1240A 0334
  12410 0334
@@ -33530,21 +33532,12 @@ A625 003F
  1244D 0334
  12454 0334
  12455 0334
-111D5 0021
-111D5 003F
-111D5 0334
-116C5 0021
-116C5 003F
-116C5 0334
-1106B 0021
-1106B 003F
-1106B 0334
-11056 0021
-11056 003F
-11056 0334
-1D364 0021
-1D364 003F
  1D364 0334
+1D7D3 0334
+1D7DD 0334
+1D7E7 0334
+1D7F1 0334
+1D7FB 0334
  215A 0021
  215A 003F
  215A 0061
@@ -33566,135 +33559,57 @@ A625 003F
  324C 0062
  32BF 0062
  0035 0061
-0035 0041
-FF15 0061
-FF15 0041
-2478 0061
-1F106 0061
-248C 0061
-2478 0041
-1F106 0041
-248C 0041
-1D7D3 0061
-1D7DD 0061
-1D7E7 0061
-1D7F1 0061
-1D7FB 0061
-1D7D3 0041
-1D7DD 0041
-1D7E7 0041
-1D7F1 0041
-1D7FB 0041
-2464 0061
-24F9 0061
-277A 0061
-2784 0061
-278E 0061
-2464 0041
-24F9 0041
-277A 0041
-2784 0041
-278E 0041
-2075 0061
-2075 0041
-2085 0061
-2085 0041
  0665 0061
-0665 0041
  06F5 0061
-06F5 0041
-10E64 0061
-10E64 0041
  07C5 0061
-07C5 0041
-136D 0061
-136D 0041
-104A5 0061
-104A5 0041
  096B 0061
-096B 0041
  09EB 0061
-09EB 0041
  0A6B 0061
-0A6B 0041
  0AEB 0061
-0AEB 0041
  0B6B 0061
-0B6B 0041
  0BEB 0061
-0BEB 0041
  0C6B 0061
-0C6B 0041
  0CEB 0061
-0CEB 0041
  0D6B 0061
-0D6B 0041
-ABF5 0061
-ABF5 0041
-A8D5 0061
-A8D5 0041
-194B 0061
-194B 0041
-19D5 0061
-19D5 0041
-1A85 0061
-1A85 0041
-1A95 0061
-1A95 0041
  0E55 0061
-0E55 0041
  0ED5 0061
-0ED5 0041
  0F25 0061
-0F25 0041
-0F2E 0061
-0F2E 0041
-1C45 0061
-1C45 0041
-A905 0061
-A905 0041
  1045 0061
-1045 0041
  1095 0061
-1095 0041
-1113B 0061
-1113B 0041
+136D 0061
  17E5 0061
-17E5 0041
  17F5 0061
-17F5 0041
-AA55 0061
-AA55 0041
+1815 0061
+194B 0061
+19D5 0061
+1A85 0061
+1A95 0061
  1B55 0061
-1B55 0041
-A9D5 0061
-A9D5 0041
  1BB5 0061
-1BB5 0041
-1815 0061
-1815 0041
+1C45 0061
  1C55 0061
-1C55 0041
-A625 0061
-A625 0041
-110F5 0061
-110F5 0041
  3025 0061
-3025 0041
+A625 0061
+A8D5 0061
+A905 0061
+A9D5 0061
+AA55 0061
+ABF5 0061
  1010B 0061
-1010B 0041
  10143 0061
  10148 0061
  1014F 0061
  1015F 0061
  10173 0061
-10143 0041
-10148 0041
-1014F 0041
-1015F 0041
-10173 0041
  10321 0061
-10321 0041
+104A5 0061
+10E64 0061
+11056 0061
+1106B 0061
+110F5 0061
+1113B 0061
+111D5 0061
+116C5 0061
  12403 0061
  1240A 0061
  12410 0061
@@ -33706,6 +33621,59 @@ A625 0041
  1244D 0061
  12454 0061
  12455 0061
+1D364 0061
+0035 0041
+0665 0041
+06F5 0041
+07C5 0041
+096B 0041
+09EB 0041
+0A6B 0041
+0AEB 0041
+0B6B 0041
+0BEB 0041
+0C6B 0041
+0CEB 0041
+0D6B 0041
+0E55 0041
+0ED5 0041
+0F25 0041
+1045 0041
+1095 0041
+136D 0041
+17E5 0041
+17F5 0041
+1815 0041
+194B 0041
+19D5 0041
+1A85 0041
+1A95 0041
+1B55 0041
+1BB5 0041
+1C45 0041
+1C55 0041
+3025 0041
+A625 0041
+A8D5 0041
+A905 0041
+A9D5 0041
+AA55 0041
+ABF5 0041
+1010B 0041
+10143 0041
+10148 0041
+1014F 0041
+1015F 0041
+10173 0041
+10321 0041
+104A5 0041
+10E64 0041
+11056 0041
+1106B 0041
+110F5 0041
+1113B 0041
+111D5 0041
+116C5 0041
  12403 0041
  1240A 0041
  12410 0041
@@ -33717,39 +33685,45 @@ A625 0041
  1244D 0041
  12454 0041
  12455 0041
-111D5 0061
-111D5 0041
-116C5 0061
-116C5 0041
-1106B 0061
-1106B 0041
-11056 0061
-11056 0041
-1D364 0061
  1D364 0041
+FF15 0061
+FF15 0041
+2478 0061
+1F106 0061
+248C 0061
+0F2E 0061
+2478 0041
+1F106 0041
+248C 0041
+0F2E 0041
+1D7D3 0061
+1D7DD 0061
+1D7E7 0061
+1D7F1 0061
+1D7FB 0061
+1D7D3 0041
+1D7DD 0041
+1D7E7 0041
+1D7F1 0041
+1D7FB 0041
+2464 0061
+24F9 0061
+277A 0061
+2784 0061
+278E 0061
+2464 0041
+24F9 0041
+277A 0041
+2784 0041
+278E 0041
+2075 0061
+2075 0041
+2085 0061
+2085 0041
  0035 0062
-FF15 0062
-2478 0062
-1F106 0062
-248C 0062
-1D7D3 0062
-1D7DD 0062
-1D7E7 0062
-1D7F1 0062
-1D7FB 0062
-2464 0062
-24F9 0062
-277A 0062
-2784 0062
-278E 0062
-2075 0062
-2085 0062
  0665 0062
  06F5 0062
-10E64 0062
  07C5 0062
-136D 0062
-104A5 0062
  096B 0062
  09EB 0062
  0A6B 0062
@@ -33759,32 +33733,30 @@ FF15 0062
  0C6B 0062
  0CEB 0062
  0D6B 0062
-ABF5 0062
-A8D5 0062
-194B 0062
-19D5 0062
-1A85 0062
-1A95 0062
  0E55 0062
  0ED5 0062
  0F25 0062
-0F2E 0062
-1C45 0062
-A905 0062
  1045 0062
  1095 0062
-1113B 0062
+136D 0062
  17E5 0062
  17F5 0062
-AA55 0062
+1815 0062
+194B 0062
+19D5 0062
+1A85 0062
+1A95 0062
  1B55 0062
-A9D5 0062
  1BB5 0062
-1815 0062
+1C45 0062
  1C55 0062
-A625 0062
-110F5 0062
  3025 0062
+A625 0062
+A8D5 0062
+A905 0062
+A9D5 0062
+AA55 0062
+ABF5 0062
  1010B 0062
  10143 0062
  10148 0062
@@ -33792,6 +33764,14 @@ A625 0062
  1015F 0062
  10173 0062
  10321 0062
+104A5 0062
+10E64 0062
+11056 0062
+1106B 0062
+110F5 0062
+1113B 0062
+111D5 0062
+116C5 0062
  12403 0062
  1240A 0062
  12410 0062
@@ -33803,11 +33783,24 @@ A625 0062
  1244D 0062
  12454 0062
  12455 0062
-111D5 0062
-116C5 0062
-1106B 0062
-11056 0062
  1D364 0062
+FF15 0062
+2478 0062
+1F106 0062
+248C 0062
+0F2E 0062
+1D7D3 0062
+1D7DD 0062
+1D7E7 0062
+1D7F1 0062
+1D7FB 0062
+2464 0062
+24F9 0062
+277A 0062
+2784 0062
+278E 0062
+2075 0062
+2085 0062
  33E4 0021
  33E4 003F
  33E4 0061
@@ -33824,136 +33817,52 @@ A625 0062
  335D 0041
  335D 0062
  0036 0021
-0036 003F
-FF16 0021
-FF16 003F
-2479 0021
-2479 003F
-1F107 0334
-1F107 0021
-1F107 003F
-248D 0021
-248D 003F
-1D7D4 0021
-1D7DE 0021
-1D7E8 0021
-1D7F2 0021
-1D7FC 0021
-1D7D4 003F
-1D7DE 003F
-1D7E8 003F
-1D7F2 003F
-1D7FC 003F
-2465 0021
-24FA 0021
-277B 0021
-2785 0021
-278F 0021
-2465 003F
-24FA 003F
-277B 003F
-2785 003F
-278F 003F
-2076 0021
-2076 003F
-2086 0021
-2086 003F
-1D7D4 0334
-1D7DE 0334
-1D7E8 0334
-1D7F2 0334
-1D7FC 0334
  0666 0021
-0666 003F
  06F6 0021
-06F6 003F
-10E65 0021
-10E65 003F
-10E65 0334
  07C6 0021
-07C6 003F
-136E 0021
-136E 003F
-104A6 0021
-104A6 003F
-104A6 0334
  096C 0021
-096C 003F
  09EC 0021
-09EC 003F
  0A6C 0021
-0A6C 003F
  0AEC 0021
-0AEC 003F
  0B6C 0021
-0B6C 003F
  0BEC 0021
-0BEC 003F
  0C6C 0021
-0C6C 003F
  0CEC 0021
-0CEC 003F
  0D6C 0021
-0D6C 003F
-ABF6 0021
-ABF6 003F
-A8D6 0021
-A8D6 003F
-194C 0021
-194C 003F
-19D6 0021
-19D6 003F
-1A86 0021
-1A86 003F
-1A96 0021
-1A96 003F
  0E56 0021
-0E56 003F
  0ED6 0021
-0ED6 003F
  0F26 0021
-0F26 003F
-0F2F 0021
-0F2F 003F
-1C46 0021
-1C46 003F
-A906 0021
-A906 003F
  1046 0021
-1046 003F
  1096 0021
-1096 003F
-1113C 0021
-1113C 003F
-1113C 0334
+136E 0021
  17E6 0021
-17E6 003F
  17F6 0021
-17F6 003F
-AA56 0021
-AA56 003F
+1816 0021
+194C 0021
+19D6 0021
+1A86 0021
+1A96 0021
  1B56 0021
-1B56 003F
-A9D6 0021
-A9D6 003F
  1BB6 0021
-1BB6 003F
-1816 0021
-1816 003F
+1C46 0021
  1C56 0021
-1C56 003F
-A626 0021
-A626 003F
-110F6 0021
-110F6 003F
-110F6 0334
+2185 0021
  3026 0021
-3026 003F
+A626 0021
+A8D6 0021
+A906 0021
+A9D6 0021
+AA56 0021
+ABF6 0021
  1010C 0021
-1010C 003F
-1010C 0334
-2185 0021
-2185 003F
+104A6 0021
+10E65 0021
+11057 0021
+1106C 0021
+110F6 0021
+1113C 0021
+111D6 0021
+116C6 0021
  12404 0021
  1240B 0021
  12411 0021
@@ -33961,6 +33870,54 @@ A626 003F
  12428 0021
  12440 0021
  1244E 0021
+1D365 0021
+0036 003F
+0666 003F
+06F6 003F
+07C6 003F
+096C 003F
+09EC 003F
+0A6C 003F
+0AEC 003F
+0B6C 003F
+0BEC 003F
+0C6C 003F
+0CEC 003F
+0D6C 003F
+0E56 003F
+0ED6 003F
+0F26 003F
+1046 003F
+1096 003F
+136E 003F
+17E6 003F
+17F6 003F
+1816 003F
+194C 003F
+19D6 003F
+1A86 003F
+1A96 003F
+1B56 003F
+1BB6 003F
+1C46 003F
+1C56 003F
+2185 003F
+3026 003F
+A626 003F
+A8D6 003F
+A906 003F
+A9D6 003F
+AA56 003F
+ABF6 003F
+1010C 003F
+104A6 003F
+10E65 003F
+11057 003F
+1106C 003F
+110F6 003F
+1113C 003F
+111D6 003F
+116C6 003F
  12404 003F
  1240B 003F
  12411 003F
@@ -33968,6 +33925,51 @@ A626 003F
  12428 003F
  12440 003F
  1244E 003F
+1D365 003F
+FF16 0021
+FF16 003F
+2479 0021
+2479 003F
+1F107 0334
+1F107 0021
+1F107 003F
+0F2F 0021
+0F2F 003F
+248D 0021
+248D 003F
+1D7D4 0021
+1D7DE 0021
+1D7E8 0021
+1D7F2 0021
+1D7FC 0021
+1D7D4 003F
+1D7DE 003F
+1D7E8 003F
+1D7F2 003F
+1D7FC 003F
+2465 0021
+24FA 0021
+277B 0021
+2785 0021
+278F 0021
+2465 003F
+24FA 003F
+277B 003F
+2785 003F
+278F 003F
+2076 0021
+2076 003F
+2086 0021
+2086 003F
+1010C 0334
+104A6 0334
+10E65 0334
+11057 0334
+1106C 0334
+110F6 0334
+1113C 0334
+111D6 0334
+116C6 0334
  12404 0334
  1240B 0334
  12411 0334
@@ -33975,146 +33977,64 @@ A626 003F
  12428 0334
  12440 0334
  1244E 0334
-111D6 0021
-111D6 003F
-111D6 0334
-116C6 0021
-116C6 003F
-116C6 0334
-1106C 0021
-1106C 003F
-1106C 0334
-11057 0021
-11057 003F
-11057 0334
-1D365 0021
-1D365 003F
  1D365 0334
-324D 0021
-324D 003F
-324D 0061
+1D7D4 0334
+1D7DE 0334
+1D7E8 0334
+1D7F2 0334
+1D7FC 0334
+324D 0021
+324D 003F
+324D 0061
  324D 0041
  324D 0062
  0036 0061
-0036 0041
-FF16 0061
-FF16 0041
-2479 0061
-1F107 0061
-248D 0061
-2479 0041
-1F107 0041
-248D 0041
-1D7D4 0061
-1D7DE 0061
-1D7E8 0061
-1D7F2 0061
-1D7FC 0061
-1D7D4 0041
-1D7DE 0041
-1D7E8 0041
-1D7F2 0041
-1D7FC 0041
-2465 0061
-24FA 0061
-277B 0061
-2785 0061
-278F 0061
-2465 0041
-24FA 0041
-277B 0041
-2785 0041
-278F 0041
-2076 0061
-2076 0041
-2086 0061
-2086 0041
  0666 0061
-0666 0041
  06F6 0061
-06F6 0041
-10E65 0061
-10E65 0041
  07C6 0061
-07C6 0041
-136E 0061
-136E 0041
-104A6 0061
-104A6 0041
  096C 0061
-096C 0041
  09EC 0061
-09EC 0041
  0A6C 0061
-0A6C 0041
  0AEC 0061
-0AEC 0041
  0B6C 0061
-0B6C 0041
  0BEC 0061
-0BEC 0041
  0C6C 0061
-0C6C 0041
  0CEC 0061
-0CEC 0041
  0D6C 0061
-0D6C 0041
-ABF6 0061
-ABF6 0041
-A8D6 0061
-A8D6 0041
-194C 0061
-194C 0041
-19D6 0061
-19D6 0041
-1A86 0061
-1A86 0041
-1A96 0061
-1A96 0041
  0E56 0061
-0E56 0041
  0ED6 0061
-0ED6 0041
  0F26 0061
-0F26 0041
-0F2F 0061
-0F2F 0041
-1C46 0061
-1C46 0041
-A906 0061
-A906 0041
  1046 0061
-1046 0041
  1096 0061
-1096 0041
-1113C 0061
-1113C 0041
+136E 0061
  17E6 0061
-17E6 0041
  17F6 0061
-17F6 0041
-AA56 0061
-AA56 0041
+1816 0061
+194C 0061
+19D6 0061
+1A86 0061
+1A96 0061
  1B56 0061
-1B56 0041
-A9D6 0061
-A9D6 0041
  1BB6 0061
-1BB6 0041
-1816 0061
-1816 0041
+1C46 0061
  1C56 0061
-1C56 0041
-A626 0061
-A626 0041
-110F6 0061
-110F6 0041
+2185 0061
  3026 0061
-3026 0041
+A626 0061
+A8D6 0061
+A906 0061
+A9D6 0061
+AA56 0061
+ABF6 0061
  1010C 0061
-1010C 0041
-2185 0061
-2185 0041
+104A6 0061
+10E65 0061
+11057 0061
+1106C 0061
+110F6 0061
+1113C 0061
+111D6 0061
+116C6 0061
  12404 0061
  1240B 0061
  12411 0061
@@ -34122,6 +34042,54 @@ A626 0041
  12428 0061
  12440 0061
  1244E 0061
+1D365 0061
+0036 0041
+0666 0041
+06F6 0041
+07C6 0041
+096C 0041
+09EC 0041
+0A6C 0041
+0AEC 0041
+0B6C 0041
+0BEC 0041
+0C6C 0041
+0CEC 0041
+0D6C 0041
+0E56 0041
+0ED6 0041
+0F26 0041
+1046 0041
+1096 0041
+136E 0041
+17E6 0041
+17F6 0041
+1816 0041
+194C 0041
+19D6 0041
+1A86 0041
+1A96 0041
+1B56 0041
+1BB6 0041
+1C46 0041
+1C56 0041
+2185 0041
+3026 0041
+A626 0041
+A8D6 0041
+A906 0041
+A9D6 0041
+AA56 0041
+ABF6 0041
+1010C 0041
+104A6 0041
+10E65 0041
+11057 0041
+1106C 0041
+110F6 0041
+1113C 0041
+111D6 0041
+116C6 0041
  12404 0041
  1240B 0041
  12411 0041
@@ -34129,39 +34097,45 @@ A626 0041
  12428 0041
  12440 0041
  1244E 0041
-111D6 0061
-111D6 0041
-116C6 0061
-116C6 0041
-1106C 0061
-1106C 0041
-11057 0061
-11057 0041
-1D365 0061
  1D365 0041
+FF16 0061
+FF16 0041
+2479 0061
+1F107 0061
+248D 0061
+0F2F 0061
+2479 0041
+1F107 0041
+248D 0041
+0F2F 0041
+1D7D4 0061
+1D7DE 0061
+1D7E8 0061
+1D7F2 0061
+1D7FC 0061
+1D7D4 0041
+1D7DE 0041
+1D7E8 0041
+1D7F2 0041
+1D7FC 0041
+2465 0061
+24FA 0061
+277B 0061
+2785 0061
+278F 0061
+2465 0041
+24FA 0041
+277B 0041
+2785 0041
+278F 0041
+2076 0061
+2076 0041
+2086 0061
+2086 0041
  0036 0062
-FF16 0062
-2479 0062
-1F107 0062
-248D 0062
-1D7D4 0062
-1D7DE 0062
-1D7E8 0062
-1D7F2 0062
-1D7FC 0062
-2465 0062
-24FA 0062
-277B 0062
-2785 0062
-278F 0062
-2076 0062
-2086 0062
  0666 0062
  06F6 0062
-10E65 0062
  07C6 0062
-136E 0062
-104A6 0062
  096C 0062
  09EC 0062
  0A6C 0062
@@ -34171,34 +34145,40 @@ FF16 0062
  0C6C 0062
  0CEC 0062
  0D6C 0062
-ABF6 0062
-A8D6 0062
-194C 0062
-19D6 0062
-1A86 0062
-1A96 0062
  0E56 0062
  0ED6 0062
  0F26 0062
-0F2F 0062
-1C46 0062
-A906 0062
  1046 0062
  1096 0062
-1113C 0062
+136E 0062
  17E6 0062
  17F6 0062
-AA56 0062
+1816 0062
+194C 0062
+19D6 0062
+1A86 0062
+1A96 0062
  1B56 0062
-A9D6 0062
  1BB6 0062
-1816 0062
+1C46 0062
  1C56 0062
-A626 0062
-110F6 0062
+2185 0062
  3026 0062
+A626 0062
+A8D6 0062
+A906 0062
+A9D6 0062
+AA56 0062
+ABF6 0062
  1010C 0062
-2185 0062
+104A6 0062
+10E65 0062
+11057 0062
+1106C 0062
+110F6 0062
+1113C 0062
+111D6 0062
+116C6 0062
  12404 0062
  1240B 0062
  12411 0062
@@ -34206,11 +34186,24 @@ A626 0062
  12428 0062
  12440 0062
  1244E 0062
-111D6 0062
-116C6 0062
-1106C 0062
-11057 0062
  1D365 0062
+FF16 0062
+2479 0062
+1F107 0062
+248D 0062
+0F2F 0062
+1D7D4 0062
+1D7DE 0062
+1D7E8 0062
+1D7F2 0062
+1D7FC 0062
+2465 0062
+24FA 0062
+277B 0062
+2785 0062
+278F 0062
+2076 0062
+2086 0062
  33E5 0021
  33E5 003F
  33E5 0061
@@ -34227,134 +34220,51 @@ A626 0062
  335E 0041
  335E 0062
  0037 0021
-0037 003F
-FF17 0021
-FF17 003F
-247A 0021
-247A 003F
-1F108 0334
-1F108 0021
-1F108 003F
-248E 0021
-248E 003F
-1D7D5 0021
-1D7DF 0021
-1D7E9 0021
-1D7F3 0021
-1D7FD 0021
-1D7D5 003F
-1D7DF 003F
-1D7E9 003F
-1D7F3 003F
-1D7FD 003F
-2466 0021
-24FB 0021
-277C 0021
-2786 0021
-2790 0021
-2466 003F
-24FB 003F
-277C 003F
-2786 003F
-2790 003F
-2077 0021
-2077 003F
-2087 0021
-2087 003F
-1D7D5 0334
-1D7DF 0334
-1D7E9 0334
-1D7F3 0334
-1D7FD 0334
  0667 0021
-0667 003F
  06F7 0021
-06F7 003F
-10E66 0021
-10E66 003F
-10E66 0334
  07C7 0021
-07C7 003F
-136F 0021
-136F 003F
-104A7 0021
-104A7 003F
-104A7 0334
  096D 0021
-096D 003F
  09ED 0021
-09ED 003F
  0A6D 0021
-0A6D 003F
  0AED 0021
-0AED 003F
  0B6D 0021
-0B6D 003F
  0BED 0021
-0BED 003F
  0C6D 0021
-0C6D 003F
  0CED 0021
-0CED 003F
  0D6D 0021
-0D6D 003F
-ABF7 0021
-ABF7 003F
-A8D7 0021
-A8D7 003F
-194D 0021
-194D 003F
-19D7 0021
-19D7 003F
-1A87 0021
-1A87 003F
-1A97 0021
-1A97 003F
  0E57 0021
-0E57 003F
  0ED7 0021
-0ED7 003F
  0F27 0021
-0F27 003F
-0F30 0021
-0F30 003F
-1C47 0021
-1C47 003F
-A907 0021
-A907 003F
  1047 0021
-1047 003F
  1097 0021
-1097 003F
-1113D 0021
-1113D 003F
-1113D 0334
+136F 0021
  17E7 0021
-17E7 003F
  17F7 0021
-17F7 003F
-AA57 0021
-AA57 003F
+1817 0021
+194D 0021
+19D7 0021
+1A87 0021
+1A97 0021
  1B57 0021
-1B57 003F
-A9D7 0021
-A9D7 003F
  1BB7 0021
-1BB7 003F
-1817 0021
-1817 003F
+1C47 0021
  1C57 0021
-1C57 003F
-A627 0021
-A627 003F
-110F7 0021
-110F7 003F
-110F7 0334
  3027 0021
-3027 003F
+A627 0021
+A8D7 0021
+A907 0021
+A9D7 0021
+AA57 0021
+ABF7 0021
  1010D 0021
-1010D 003F
-1010D 0334
+104A7 0021
+10E66 0021
+11058 0021
+1106D 0021
+110F7 0021
+1113D 0021
+111D7 0021
+116C7 0021
  12405 0021
  1240C 0021
  12412 0021
@@ -34363,6 +34273,53 @@ A627 003F
  12441 0021
  12442 0021
  12443 0021
+1D366 0021
+0037 003F
+0667 003F
+06F7 003F
+07C7 003F
+096D 003F
+09ED 003F
+0A6D 003F
+0AED 003F
+0B6D 003F
+0BED 003F
+0C6D 003F
+0CED 003F
+0D6D 003F
+0E57 003F
+0ED7 003F
+0F27 003F
+1047 003F
+1097 003F
+136F 003F
+17E7 003F
+17F7 003F
+1817 003F
+194D 003F
+19D7 003F
+1A87 003F
+1A97 003F
+1B57 003F
+1BB7 003F
+1C47 003F
+1C57 003F
+3027 003F
+A627 003F
+A8D7 003F
+A907 003F
+A9D7 003F
+AA57 003F
+ABF7 003F
+1010D 003F
+104A7 003F
+10E66 003F
+11058 003F
+1106D 003F
+110F7 003F
+1113D 003F
+111D7 003F
+116C7 003F
  12405 003F
  1240C 003F
  12412 003F
@@ -34371,6 +34328,51 @@ A627 003F
  12441 003F
  12442 003F
  12443 003F
+1D366 003F
+FF17 0021
+FF17 003F
+247A 0021
+247A 003F
+1F108 0334
+1F108 0021
+1F108 003F
+0F30 0021
+0F30 003F
+248E 0021
+248E 003F
+1D7D5 0021
+1D7DF 0021
+1D7E9 0021
+1D7F3 0021
+1D7FD 0021
+1D7D5 003F
+1D7DF 003F
+1D7E9 003F
+1D7F3 003F
+1D7FD 003F
+2466 0021
+24FB 0021
+277C 0021
+2786 0021
+2790 0021
+2466 003F
+24FB 003F
+277C 003F
+2786 003F
+2790 003F
+2077 0021
+2077 003F
+2087 0021
+2087 003F
+1010D 0334
+104A7 0334
+10E66 0334
+11058 0334
+1106D 0334
+110F7 0334
+1113D 0334
+111D7 0334
+116C7 0334
  12405 0334
  1240C 0334
  12412 0334
@@ -34379,21 +34381,12 @@ A627 003F
  12441 0334
  12442 0334
  12443 0334
-111D7 0021
-111D7 003F
-111D7 0334
-116C7 0021
-116C7 003F
-116C7 0334
-1106D 0021
-1106D 003F
-1106D 0334
-11058 0021
-11058 003F
-11058 0334
-1D366 0021
-1D366 003F
  1D366 0334
+1D7D5 0334
+1D7DF 0334
+1D7E9 0334
+1D7F3 0334
+1D7FD 0334
  215E 0021
  215E 003F
  215E 0061
@@ -34405,123 +34398,51 @@ A627 003F
  324E 0041
  324E 0062
  0037 0061
-0037 0041
-FF17 0061
-FF17 0041
-247A 0061
-1F108 0061
-248E 0061
-247A 0041
-1F108 0041
-248E 0041
-1D7D5 0061
-1D7DF 0061
-1D7E9 0061
-1D7F3 0061
-1D7FD 0061
-1D7D5 0041
-1D7DF 0041
-1D7E9 0041
-1D7F3 0041
-1D7FD 0041
-2466 0061
-24FB 0061
-277C 0061
-2786 0061
-2790 0061
-2466 0041
-24FB 0041
-277C 0041
-2786 0041
-2790 0041
-2077 0061
-2077 0041
-2087 0061
-2087 0041
  0667 0061
-0667 0041
  06F7 0061
-06F7 0041
-10E66 0061
-10E66 0041
  07C7 0061
-07C7 0041
-136F 0061
-136F 0041
-104A7 0061
-104A7 0041
  096D 0061
-096D 0041
  09ED 0061
-09ED 0041
  0A6D 0061
-0A6D 0041
  0AED 0061
-0AED 0041
  0B6D 0061
-0B6D 0041
  0BED 0061
-0BED 0041
  0C6D 0061
-0C6D 0041
  0CED 0061
-0CED 0041
  0D6D 0061
-0D6D 0041
-ABF7 0061
-ABF7 0041
-A8D7 0061
-A8D7 0041
-194D 0061
-194D 0041
-19D7 0061
-19D7 0041
-1A87 0061
-1A87 0041
-1A97 0061
-1A97 0041
  0E57 0061
-0E57 0041
  0ED7 0061
-0ED7 0041
  0F27 0061
-0F27 0041
-0F30 0061
-0F30 0041
-1C47 0061
-1C47 0041
-A907 0061
-A907 0041
  1047 0061
-1047 0041
  1097 0061
-1097 0041
-1113D 0061
-1113D 0041
+136F 0061
  17E7 0061
-17E7 0041
  17F7 0061
-17F7 0041
-AA57 0061
-AA57 0041
+1817 0061
+194D 0061
+19D7 0061
+1A87 0061
+1A97 0061
  1B57 0061
-1B57 0041
-A9D7 0061
-A9D7 0041
  1BB7 0061
-1BB7 0041
-1817 0061
-1817 0041
+1C47 0061
  1C57 0061
-1C57 0041
-A627 0061
-A627 0041
-110F7 0061
-110F7 0041
  3027 0061
-3027 0041
+A627 0061
+A8D7 0061
+A907 0061
+A9D7 0061
+AA57 0061
+ABF7 0061
  1010D 0061
-1010D 0041
+104A7 0061
+10E66 0061
+11058 0061
+1106D 0061
+110F7 0061
+1113D 0061
+111D7 0061
+116C7 0061
  12405 0061
  1240C 0061
  12412 0061
@@ -34530,6 +34451,53 @@ A627 0041
  12441 0061
  12442 0061
  12443 0061
+1D366 0061
+0037 0041
+0667 0041
+06F7 0041
+07C7 0041
+096D 0041
+09ED 0041
+0A6D 0041
+0AED 0041
+0B6D 0041
+0BED 0041
+0C6D 0041
+0CED 0041
+0D6D 0041
+0E57 0041
+0ED7 0041
+0F27 0041
+1047 0041
+1097 0041
+136F 0041
+17E7 0041
+17F7 0041
+1817 0041
+194D 0041
+19D7 0041
+1A87 0041
+1A97 0041
+1B57 0041
+1BB7 0041
+1C47 0041
+1C57 0041
+3027 0041
+A627 0041
+A8D7 0041
+A907 0041
+A9D7 0041
+AA57 0041
+ABF7 0041
+1010D 0041
+104A7 0041
+10E66 0041
+11058 0041
+1106D 0041
+110F7 0041
+1113D 0041
+111D7 0041
+116C7 0041
  12405 0041
  1240C 0041
  12412 0041
@@ -34538,39 +34506,45 @@ A627 0041
  12441 0041
  12442 0041
  12443 0041
-111D7 0061
-111D7 0041
-116C7 0061
-116C7 0041
-1106D 0061
-1106D 0041
-11058 0061
-11058 0041
-1D366 0061
  1D366 0041
+FF17 0061
+FF17 0041
+247A 0061
+1F108 0061
+248E 0061
+0F30 0061
+247A 0041
+1F108 0041
+248E 0041
+0F30 0041
+1D7D5 0061
+1D7DF 0061
+1D7E9 0061
+1D7F3 0061
+1D7FD 0061
+1D7D5 0041
+1D7DF 0041
+1D7E9 0041
+1D7F3 0041
+1D7FD 0041
+2466 0061
+24FB 0061
+277C 0061
+2786 0061
+2790 0061
+2466 0041
+24FB 0041
+277C 0041
+2786 0041
+2790 0041
+2077 0061
+2077 0041
+2087 0061
+2087 0041
  0037 0062
-FF17 0062
-247A 0062
-1F108 0062
-248E 0062
-1D7D5 0062
-1D7DF 0062
-1D7E9 0062
-1D7F3 0062
-1D7FD 0062
-2466 0062
-24FB 0062
-277C 0062
-2786 0062
-2790 0062
-2077 0062
-2087 0062
  0667 0062
  06F7 0062
-10E66 0062
  07C7 0062
-136F 0062
-104A7 0062
  096D 0062
  09ED 0062
  0A6D 0062
@@ -34580,33 +34554,39 @@ FF17 0062
  0C6D 0062
  0CED 0062
  0D6D 0062
-ABF7 0062
-A8D7 0062
-194D 0062
-19D7 0062
-1A87 0062
-1A97 0062
  0E57 0062
  0ED7 0062
  0F27 0062
-0F30 0062
-1C47 0062
-A907 0062
  1047 0062
  1097 0062
-1113D 0062
+136F 0062
  17E7 0062
  17F7 0062
-AA57 0062
+1817 0062
+194D 0062
+19D7 0062
+1A87 0062
+1A97 0062
  1B57 0062
-A9D7 0062
  1BB7 0062
-1817 0062
+1C47 0062
  1C57 0062
-A627 0062
-110F7 0062
  3027 0062
+A627 0062
+A8D7 0062
+A907 0062
+A9D7 0062
+AA57 0062
+ABF7 0062
  1010D 0062
+104A7 0062
+10E66 0062
+11058 0062
+1106D 0062
+110F7 0062
+1113D 0062
+111D7 0062
+116C7 0062
  12405 0062
  1240C 0062
  12412 0062
@@ -34615,11 +34595,24 @@ A627 0062
  12441 0062
  12442 0062
  12443 0062
-111D7 0062
-116C7 0062
-1106D 0062
-11058 0062
  1D366 0062
+FF17 0062
+247A 0062
+1F108 0062
+248E 0062
+0F30 0062
+1D7D5 0062
+1D7DF 0062
+1D7E9 0062
+1D7F3 0062
+1D7FD 0062
+2466 0062
+24FB 0062
+277C 0062
+2786 0062
+2790 0062
+2077 0062
+2087 0062
  33E6 0021
  33E6 003F
  33E6 0061
@@ -34636,134 +34629,51 @@ A627 0062
  335F 0041
  335F 0062
  0038 0021
-0038 003F
-FF18 0021
-FF18 003F
-247B 0021
-247B 003F
-1F109 0334
-1F109 0021
-1F109 003F
-248F 0021
-248F 003F
-1D7D6 0021
-1D7E0 0021
-1D7EA 0021
-1D7F4 0021
-1D7FE 0021
-1D7D6 003F
-1D7E0 003F
-1D7EA 003F
-1D7F4 003F
-1D7FE 003F
-2467 0021
-24FC 0021
-277D 0021
-2787 0021
-2791 0021
-2467 003F
-24FC 003F
-277D 003F
-2787 003F
-2791 003F
-2078 0021
-2078 003F
-2088 0021
-2088 003F
-1D7D6 0334
-1D7E0 0334
-1D7EA 0334
-1D7F4 0334
-1D7FE 0334
  0668 0021
-0668 003F
  06F8 0021
-06F8 003F
-10E67 0021
-10E67 003F
-10E67 0334
  07C8 0021
-07C8 003F
-1370 0021
-1370 003F
-104A8 0021
-104A8 003F
-104A8 0334
  096E 0021
-096E 003F
  09EE 0021
-09EE 003F
  0A6E 0021
-0A6E 003F
  0AEE 0021
-0AEE 003F
  0B6E 0021
-0B6E 003F
  0BEE 0021
-0BEE 003F
  0C6E 0021
-0C6E 003F
  0CEE 0021
-0CEE 003F
  0D6E 0021
-0D6E 003F
-ABF8 0021
-ABF8 003F
-A8D8 0021
-A8D8 003F
-194E 0021
-194E 003F
-19D8 0021
-19D8 003F
-1A88 0021
-1A88 003F
-1A98 0021
-1A98 003F
  0E58 0021
-0E58 003F
  0ED8 0021
-0ED8 003F
  0F28 0021
-0F28 003F
-0F31 0021
-0F31 003F
-1C48 0021
-1C48 003F
-A908 0021
-A908 003F
  1048 0021
-1048 003F
  1098 0021
-1098 003F
-1113E 0021
-1113E 003F
-1113E 0334
+1370 0021
  17E8 0021
-17E8 003F
  17F8 0021
-17F8 003F
-AA58 0021
-AA58 003F
+1818 0021
+194E 0021
+19D8 0021
+1A88 0021
+1A98 0021
  1B58 0021
-1B58 003F
-A9D8 0021
-A9D8 003F
  1BB8 0021
-1BB8 003F
-1818 0021
-1818 003F
+1C48 0021
  1C58 0021
-1C58 003F
-A628 0021
-A628 003F
-110F8 0021
-110F8 003F
-110F8 0334
  3028 0021
-3028 003F
+A628 0021
+A8D8 0021
+A908 0021
+A9D8 0021
+AA58 0021
+ABF8 0021
  1010E 0021
-1010E 003F
-1010E 0334
+104A8 0021
+10E67 0021
+11059 0021
+1106E 0021
+110F8 0021
+1113E 0021
+111D8 0021
+116C8 0021
  12406 0021
  1240D 0021
  12413 0021
@@ -34771,6 +34681,53 @@ A628 003F
  1242A 0021
  12444 0021
  12445 0021
+1D367 0021
+0038 003F
+0668 003F
+06F8 003F
+07C8 003F
+096E 003F
+09EE 003F
+0A6E 003F
+0AEE 003F
+0B6E 003F
+0BEE 003F
+0C6E 003F
+0CEE 003F
+0D6E 003F
+0E58 003F
+0ED8 003F
+0F28 003F
+1048 003F
+1098 003F
+1370 003F
+17E8 003F
+17F8 003F
+1818 003F
+194E 003F
+19D8 003F
+1A88 003F
+1A98 003F
+1B58 003F
+1BB8 003F
+1C48 003F
+1C58 003F
+3028 003F
+A628 003F
+A8D8 003F
+A908 003F
+A9D8 003F
+AA58 003F
+ABF8 003F
+1010E 003F
+104A8 003F
+10E67 003F
+11059 003F
+1106E 003F
+110F8 003F
+1113E 003F
+111D8 003F
+116C8 003F
  12406 003F
  1240D 003F
  12413 003F
@@ -34778,6 +34735,51 @@ A628 003F
  1242A 003F
  12444 003F
  12445 003F
+1D367 003F
+FF18 0021
+FF18 003F
+247B 0021
+247B 003F
+1F109 0334
+1F109 0021
+1F109 003F
+0F31 0021
+0F31 003F
+248F 0021
+248F 003F
+1D7D6 0021
+1D7E0 0021
+1D7EA 0021
+1D7F4 0021
+1D7FE 0021
+1D7D6 003F
+1D7E0 003F
+1D7EA 003F
+1D7F4 003F
+1D7FE 003F
+2467 0021
+24FC 0021
+277D 0021
+2787 0021
+2791 0021
+2467 003F
+24FC 003F
+277D 003F
+2787 003F
+2791 003F
+2078 0021
+2078 003F
+2088 0021
+2088 003F
+1010E 0334
+104A8 0334
+10E67 0334
+11059 0334
+1106E 0334
+110F8 0334
+1113E 0334
+111D8 0334
+116C8 0334
  12406 0334
  1240D 0334
  12413 0334
@@ -34785,144 +34787,63 @@ A628 003F
  1242A 0334
  12444 0334
  12445 0334
-111D8 0021
-111D8 003F
-111D8 0334
-116C8 0021
-116C8 003F
-116C8 0334
-1106E 0021
-1106E 003F
-1106E 0334
-11059 0021
-11059 003F
-11059 0334
-1D367 0021
-1D367 003F
  1D367 0334
+1D7D6 0334
+1D7E0 0334
+1D7EA 0334
+1D7F4 0334
+1D7FE 0334
  324F 0021
  324F 003F
  324F 0061
  324F 0041
  324F 0062
  0038 0061
-0038 0041
-FF18 0061
-FF18 0041
-247B 0061
-1F109 0061
-248F 0061
-247B 0041
-1F109 0041
-248F 0041
-1D7D6 0061
-1D7E0 0061
-1D7EA 0061
-1D7F4 0061
-1D7FE 0061
-1D7D6 0041
-1D7E0 0041
-1D7EA 0041
-1D7F4 0041
-1D7FE 0041
-2467 0061
-24FC 0061
-277D 0061
-2787 0061
-2791 0061
-2467 0041
-24FC 0041
-277D 0041
-2787 0041
-2791 0041
-2078 0061
-2078 0041
-2088 0061
-2088 0041
  0668 0061
-0668 0041
  06F8 0061
-06F8 0041
-10E67 0061
-10E67 0041
  07C8 0061
-07C8 0041
-1370 0061
-1370 0041
-104A8 0061
-104A8 0041
  096E 0061
-096E 0041
  09EE 0061
-09EE 0041
  0A6E 0061
-0A6E 0041
  0AEE 0061
-0AEE 0041
  0B6E 0061
-0B6E 0041
  0BEE 0061
-0BEE 0041
  0C6E 0061
-0C6E 0041
  0CEE 0061
-0CEE 0041
  0D6E 0061
-0D6E 0041
-ABF8 0061
-ABF8 0041
-A8D8 0061
-A8D8 0041
-194E 0061
-194E 0041
-19D8 0061
-19D8 0041
-1A88 0061
-1A88 0041
-1A98 0061
-1A98 0041
  0E58 0061
-0E58 0041
  0ED8 0061
-0ED8 0041
  0F28 0061
-0F28 0041
-0F31 0061
-0F31 0041
-1C48 0061
-1C48 0041
-A908 0061
-A908 0041
  1048 0061
-1048 0041
  1098 0061
-1098 0041
-1113E 0061
-1113E 0041
+1370 0061
  17E8 0061
-17E8 0041
  17F8 0061
-17F8 0041
-AA58 0061
-AA58 0041
+1818 0061
+194E 0061
+19D8 0061
+1A88 0061
+1A98 0061
  1B58 0061
-1B58 0041
-A9D8 0061
-A9D8 0041
  1BB8 0061
-1BB8 0041
-1818 0061
-1818 0041
+1C48 0061
  1C58 0061
-1C58 0041
-A628 0061
-A628 0041
-110F8 0061
-110F8 0041
  3028 0061
-3028 0041
+A628 0061
+A8D8 0061
+A908 0061
+A9D8 0061
+AA58 0061
+ABF8 0061
  1010E 0061
-1010E 0041
+104A8 0061
+10E67 0061
+11059 0061
+1106E 0061
+110F8 0061
+1113E 0061
+111D8 0061
+116C8 0061
  12406 0061
  1240D 0061
  12413 0061
@@ -34930,6 +34851,53 @@ A628 0041
  1242A 0061
  12444 0061
  12445 0061
+1D367 0061
+0038 0041
+0668 0041
+06F8 0041
+07C8 0041
+096E 0041
+09EE 0041
+0A6E 0041
+0AEE 0041
+0B6E 0041
+0BEE 0041
+0C6E 0041
+0CEE 0041
+0D6E 0041
+0E58 0041
+0ED8 0041
+0F28 0041
+1048 0041
+1098 0041
+1370 0041
+17E8 0041
+17F8 0041
+1818 0041
+194E 0041
+19D8 0041
+1A88 0041
+1A98 0041
+1B58 0041
+1BB8 0041
+1C48 0041
+1C58 0041
+3028 0041
+A628 0041
+A8D8 0041
+A908 0041
+A9D8 0041
+AA58 0041
+ABF8 0041
+1010E 0041
+104A8 0041
+10E67 0041
+11059 0041
+1106E 0041
+110F8 0041
+1113E 0041
+111D8 0041
+116C8 0041
  12406 0041
  1240D 0041
  12413 0041
@@ -34937,39 +34905,45 @@ A628 0041
  1242A 0041
  12444 0041
  12445 0041
-111D8 0061
-111D8 0041
-116C8 0061
-116C8 0041
-1106E 0061
-1106E 0041
-11059 0061
-11059 0041
-1D367 0061
  1D367 0041
+FF18 0061
+FF18 0041
+247B 0061
+1F109 0061
+248F 0061
+0F31 0061
+247B 0041
+1F109 0041
+248F 0041
+0F31 0041
+1D7D6 0061
+1D7E0 0061
+1D7EA 0061
+1D7F4 0061
+1D7FE 0061
+1D7D6 0041
+1D7E0 0041
+1D7EA 0041
+1D7F4 0041
+1D7FE 0041
+2467 0061
+24FC 0061
+277D 0061
+2787 0061
+2791 0061
+2467 0041
+24FC 0041
+277D 0041
+2787 0041
+2791 0041
+2078 0061
+2078 0041
+2088 0061
+2088 0041
  0038 0062
-FF18 0062
-247B 0062
-1F109 0062
-248F 0062
-1D7D6 0062
-1D7E0 0062
-1D7EA 0062
-1D7F4 0062
-1D7FE 0062
-2467 0062
-24FC 0062
-277D 0062
-2787 0062
-2791 0062
-2078 0062
-2088 0062
  0668 0062
  06F8 0062
-10E67 0062
  07C8 0062
-1370 0062
-104A8 0062
  096E 0062
  09EE 0062
  0A6E 0062
@@ -34979,33 +34953,39 @@ FF18 0062
  0C6E 0062
  0CEE 0062
  0D6E 0062
-ABF8 0062
-A8D8 0062
-194E 0062
-19D8 0062
-1A88 0062
-1A98 0062
  0E58 0062
  0ED8 0062
  0F28 0062
-0F31 0062
-1C48 0062
-A908 0062
  1048 0062
  1098 0062
-1113E 0062
+1370 0062
  17E8 0062
  17F8 0062
-AA58 0062
+1818 0062
+194E 0062
+19D8 0062
+1A88 0062
+1A98 0062
  1B58 0062
-A9D8 0062
  1BB8 0062
-1818 0062
+1C48 0062
  1C58 0062
-A628 0062
-110F8 0062
  3028 0062
+A628 0062
+A8D8 0062
+A908 0062
+A9D8 0062
+AA58 0062
+ABF8 0062
  1010E 0062
+104A8 0062
+10E67 0062
+11059 0062
+1106E 0062
+110F8 0062
+1113E 0062
+111D8 0062
+116C8 0062
  12406 0062
  1240D 0062
  12413 0062
@@ -35013,11 +34993,24 @@ A628 0062
  1242A 0062
  12444 0062
  12445 0062
-111D8 0062
-116C8 0062
-1106E 0062
-11059 0062
  1D367 0062
+FF18 0062
+247B 0062
+1F109 0062
+248F 0062
+0F31 0062
+1D7D6 0062
+1D7E0 0062
+1D7EA 0062
+1D7F4 0062
+1D7FE 0062
+2467 0062
+24FC 0062
+277D 0062
+2787 0062
+2791 0062
+2078 0062
+2088 0062
  33E7 0021
  33E7 003F
  33E7 0061
@@ -35034,134 +35027,51 @@ A628 0062
  3360 0041
  3360 0062
  0039 0021
-0039 003F
-FF19 0021
-FF19 003F
-247C 0021
-247C 003F
-1F10A 0334
-1F10A 0021
-1F10A 003F
-2490 0021
-2490 003F
-1D7D7 0021
-1D7E1 0021
-1D7EB 0021
-1D7F5 0021
-1D7FF 0021
-1D7D7 003F
-1D7E1 003F
-1D7EB 003F
-1D7F5 003F
-1D7FF 003F
-2468 0021
-24FD 0021
-277E 0021
-2788 0021
-2792 0021
-2468 003F
-24FD 003F
-277E 003F
-2788 003F
-2792 003F
-2079 0021
-2079 003F
-2089 0021
-2089 003F
-1D7D7 0334
-1D7E1 0334
-1D7EB 0334
-1D7F5 0334
-1D7FF 0334
  0669 0021
-0669 003F
  06F9 0021
-06F9 003F
-10E68 0021
-10E68 003F
-10E68 0334
  07C9 0021
-07C9 003F
-1371 0021
-1371 003F
-104A9 0021
-104A9 003F
-104A9 0334
  096F 0021
-096F 003F
  09EF 0021
-09EF 003F
  0A6F 0021
-0A6F 003F
  0AEF 0021
-0AEF 003F
  0B6F 0021
-0B6F 003F
  0BEF 0021
-0BEF 003F
  0C6F 0021
-0C6F 003F
  0CEF 0021
-0CEF 003F
  0D6F 0021
-0D6F 003F
-ABF9 0021
-ABF9 003F
-A8D9 0021
-A8D9 003F
-194F 0021
-194F 003F
-19D9 0021
-19D9 003F
-1A89 0021
-1A89 003F
-1A99 0021
-1A99 003F
  0E59 0021
-0E59 003F
  0ED9 0021
-0ED9 003F
  0F29 0021
-0F29 003F
-0F32 0021
-0F32 003F
-1C49 0021
-1C49 003F
-A909 0021
-A909 003F
  1049 0021
-1049 003F
  1099 0021
-1099 003F
-1113F 0021
-1113F 003F
-1113F 0334
+1371 0021
  17E9 0021
-17E9 003F
  17F9 0021
-17F9 003F
-AA59 0021
-AA59 003F
+1819 0021
+194F 0021
+19D9 0021
+1A89 0021
+1A99 0021
  1B59 0021
-1B59 003F
-A9D9 0021
-A9D9 003F
  1BB9 0021
-1BB9 003F
-1819 0021
-1819 003F
+1C49 0021
  1C59 0021
-1C59 003F
-A629 0021
-A629 003F
-110F9 0021
-110F9 003F
-110F9 0334
  3029 0021
-3029 003F
+A629 0021
+A8D9 0021
+A909 0021
+A9D9 0021
+AA59 0021
+ABF9 0021
  1010F 0021
-1010F 003F
-1010F 0334
+104A9 0021
+10E68 0021
+1105A 0021
+1106F 0021
+110F9 0021
+1113F 0021
+111D9 0021
+116C9 0021
  12407 0021
  1240E 0021
  12414 0021
@@ -35171,6 +35081,53 @@ A629 003F
  12447 0021
  12448 0021
  12449 0021
+1D368 0021
+0039 003F
+0669 003F
+06F9 003F
+07C9 003F
+096F 003F
+09EF 003F
+0A6F 003F
+0AEF 003F
+0B6F 003F
+0BEF 003F
+0C6F 003F
+0CEF 003F
+0D6F 003F
+0E59 003F
+0ED9 003F
+0F29 003F
+1049 003F
+1099 003F
+1371 003F
+17E9 003F
+17F9 003F
+1819 003F
+194F 003F
+19D9 003F
+1A89 003F
+1A99 003F
+1B59 003F
+1BB9 003F
+1C49 003F
+1C59 003F
+3029 003F
+A629 003F
+A8D9 003F
+A909 003F
+A9D9 003F
+AA59 003F
+ABF9 003F
+1010F 003F
+104A9 003F
+10E68 003F
+1105A 003F
+1106F 003F
+110F9 003F
+1113F 003F
+111D9 003F
+116C9 003F
  12407 003F
  1240E 003F
  12414 003F
@@ -35180,6 +35137,51 @@ A629 003F
  12447 003F
  12448 003F
  12449 003F
+1D368 003F
+FF19 0021
+FF19 003F
+247C 0021
+247C 003F
+1F10A 0334
+1F10A 0021
+1F10A 003F
+0F32 0021
+0F32 003F
+2490 0021
+2490 003F
+1D7D7 0021
+1D7E1 0021
+1D7EB 0021
+1D7F5 0021
+1D7FF 0021
+1D7D7 003F
+1D7E1 003F
+1D7EB 003F
+1D7F5 003F
+1D7FF 003F
+2468 0021
+24FD 0021
+277E 0021
+2788 0021
+2792 0021
+2468 003F
+24FD 003F
+277E 003F
+2788 003F
+2792 003F
+2079 0021
+2079 003F
+2089 0021
+2089 003F
+1010F 0334
+104A9 0334
+10E68 0334
+1105A 0334
+1106F 0334
+110F9 0334
+1113F 0334
+111D9 0334
+116C9 0334
  12407 0334
  1240E 0334
  12414 0334
@@ -35189,31 +35191,134 @@ A629 003F
  12447 0334
  12448 0334
  12449 0334
-111D9 0021
-111D9 003F
-111D9 0334
-116C9 0021
-116C9 003F
-116C9 0334
-1106F 0021
-1106F 003F
-1106F 0334
-1105A 0021
-1105A 003F
-1105A 0334
-1D368 0021
-1D368 003F
  1D368 0334
+1D7D7 0334
+1D7E1 0334
+1D7EB 0334
+1D7F5 0334
+1D7FF 0334
  0039 0061
+0669 0061
+06F9 0061
+07C9 0061
+096F 0061
+09EF 0061
+0A6F 0061
+0AEF 0061
+0B6F 0061
+0BEF 0061
+0C6F 0061
+0CEF 0061
+0D6F 0061
+0E59 0061
+0ED9 0061
+0F29 0061
+1049 0061
+1099 0061
+1371 0061
+17E9 0061
+17F9 0061
+1819 0061
+194F 0061
+19D9 0061
+1A89 0061
+1A99 0061
+1B59 0061
+1BB9 0061
+1C49 0061
+1C59 0061
+3029 0061
+A629 0061
+A8D9 0061
+A909 0061
+A9D9 0061
+AA59 0061
+ABF9 0061
+1010F 0061
+104A9 0061
+10E68 0061
+1105A 0061
+1106F 0061
+110F9 0061
+1113F 0061
+111D9 0061
+116C9 0061
+12407 0061
+1240E 0061
+12414 0061
+1241D 0061
+1242B 0061
+12446 0061
+12447 0061
+12448 0061
+12449 0061
+1D368 0061
  0039 0041
+0669 0041
+06F9 0041
+07C9 0041
+096F 0041
+09EF 0041
+0A6F 0041
+0AEF 0041
+0B6F 0041
+0BEF 0041
+0C6F 0041
+0CEF 0041
+0D6F 0041
+0E59 0041
+0ED9 0041
+0F29 0041
+1049 0041
+1099 0041
+1371 0041
+17E9 0041
+17F9 0041
+1819 0041
+194F 0041
+19D9 0041
+1A89 0041
+1A99 0041
+1B59 0041
+1BB9 0041
+1C49 0041
+1C59 0041
+3029 0041
+A629 0041
+A8D9 0041
+A909 0041
+A9D9 0041
+AA59 0041
+ABF9 0041
+1010F 0041
+104A9 0041
+10E68 0041
+1105A 0041
+1106F 0041
+110F9 0041
+1113F 0041
+111D9 0041
+116C9 0041
+12407 0041
+1240E 0041
+12414 0041
+1241D 0041
+1242B 0041
+12446 0041
+12447 0041
+12448 0041
+12449 0041
+1D368 0041
  FF19 0061
  FF19 0041
  247C 0061
  1F10A 0061
  2490 0061
+0F32 0061
  247C 0041
  1F10A 0041
  2490 0041
+0F32 0041
  1D7D7 0061
  1D7E1 0061
  1D7EB 0061
@@ -35238,141 +35343,10 @@ FF19 0041
  2079 0041
  2089 0061
  2089 0041
-0669 0061
-0669 0041
-06F9 0061
-06F9 0041
-10E68 0061
-10E68 0041
-07C9 0061
-07C9 0041
-1371 0061
-1371 0041
-104A9 0061
-104A9 0041
-096F 0061
-096F 0041
-09EF 0061
-09EF 0041
-0A6F 0061
-0A6F 0041
-0AEF 0061
-0AEF 0041
-0B6F 0061
-0B6F 0041
-0BEF 0061
-0BEF 0041
-0C6F 0061
-0C6F 0041
-0CEF 0061
-0CEF 0041
-0D6F 0061
-0D6F 0041
-ABF9 0061
-ABF9 0041
-A8D9 0061
-A8D9 0041
-194F 0061
-194F 0041
-19D9 0061
-19D9 0041
-1A89 0061
-1A89 0041
-1A99 0061
-1A99 0041
-0E59 0061
-0E59 0041
-0ED9 0061
-0ED9 0041
-0F29 0061
-0F29 0041
-0F32 0061
-0F32 0041
-1C49 0061
-1C49 0041
-A909 0061
-A909 0041
-1049 0061
-1049 0041
-1099 0061
-1099 0041
-1113F 0061
-1113F 0041
-17E9 0061
-17E9 0041
-17F9 0061
-17F9 0041
-AA59 0061
-AA59 0041
-1B59 0061
-1B59 0041
-A9D9 0061
-A9D9 0041
-1BB9 0061
-1BB9 0041
-1819 0061
-1819 0041
-1C59 0061
-1C59 0041
-A629 0061
-A629 0041
-110F9 0061
-110F9 0041
-3029 0061
-3029 0041
-1010F 0061
-1010F 0041
-12407 0061
-1240E 0061
-12414 0061
-1241D 0061
-1242B 0061
-12446 0061
-12447 0061
-12448 0061
-12449 0061
-12407 0041
-1240E 0041
-12414 0041
-1241D 0041
-1242B 0041
-12446 0041
-12447 0041
-12448 0041
-12449 0041
-111D9 0061
-111D9 0041
-116C9 0061
-116C9 0041
-1106F 0061
-1106F 0041
-1105A 0061
-1105A 0041
-1D368 0061
-1D368 0041
  0039 0062
-FF19 0062
-247C 0062
-1F10A 0062
-2490 0062
-1D7D7 0062
-1D7E1 0062
-1D7EB 0062
-1D7F5 0062
-1D7FF 0062
-2468 0062
-24FD 0062
-277E 0062
-2788 0062
-2792 0062
-2079 0062
-2089 0062
  0669 0062
  06F9 0062
-10E68 0062
  07C9 0062
-1371 0062
-104A9 0062
  096F 0062
  09EF 0062
  0A6F 0062
@@ -35382,33 +35356,39 @@ FF19 0062
  0C6F 0062
  0CEF 0062
  0D6F 0062
-ABF9 0062
-A8D9 0062
-194F 0062
-19D9 0062
-1A89 0062
-1A99 0062
  0E59 0062
  0ED9 0062
  0F29 0062
-0F32 0062
-1C49 0062
-A909 0062
  1049 0062
  1099 0062
-1113F 0062
+1371 0062
  17E9 0062
  17F9 0062
-AA59 0062
+1819 0062
+194F 0062
+19D9 0062
+1A89 0062
+1A99 0062
  1B59 0062
-A9D9 0062
  1BB9 0062
-1819 0062
+1C49 0062
  1C59 0062
-A629 0062
-110F9 0062
  3029 0062
+A629 0062
+A8D9 0062
+A909 0062
+A9D9 0062
+AA59 0062
+ABF9 0062
  1010F 0062
+104A9 0062
+10E68 0062
+1105A 0062
+1106F 0062
+110F9 0062
+1113F 0062
+111D9 0062
+116C9 0062
  12407 0062
  1240E 0062
  12414 0062
@@ -35418,11 +35398,24 @@ A629 0062
  12447 0062
  12448 0062
  12449 0062
-111D9 0062
-116C9 0062
-1106F 0062
-1105A 0062
  1D368 0062
+FF19 0062
+247C 0062
+1F10A 0062
+2490 0062
+0F32 0062
+1D7D7 0062
+1D7E1 0062
+1D7EB 0062
+1D7F5 0062
+1D7FF 0062
+2468 0062
+24FD 0062
+277E 0062
+2788 0062
+2792 0062
+2079 0062
+2089 0062
  33E8 0021
  33E8 003F
  33E8 0061
@@ -35444,7 +35437,6 @@ A629 0062
  000C 0061
  000D 0061
  0085 0061
-180E 0061
  2028 0061
  2029 0061
  0020 0061
@@ -35790,6 +35782,10 @@ FF5D 0061
  169C 0061
  2045 0061
  2046 0061
+2308 0061
+2309 0061
+230A 0061
+230B 0061
  29FC 0061
  29FD 0061
  2983 0061
@@ -36207,6 +36203,7 @@ AA5C 0061
  0618 0061
  0619 0061
  061A 0061
+061C 0061
  0640 0061
  06D6 0061
  06D7 0061
@@ -36264,6 +36261,7 @@ AA5C 0061
  180B 0061
  180C 0061
  180D 0061
+180E 0061
  1A7F 0061
  1B6B 0061
  1B6C 0061
@@ -36315,6 +36313,10 @@ AA5C 0061
  2062 0061
  2063 0061
  2064 0061
+2066 0061
+2067 0061
+2068 0061
+2069 0061
  206A 0061
  206B 0061
  206C 0061
@@ -36790,7 +36792,6 @@ FF41 003F
  000C 0041
  000D 0041
  0085 0041
-180E 0041
  2028 0041
  2029 0041
  0020 0041
@@ -37136,6 +37137,10 @@ FF5D 0041
  169C 0041
  2045 0041
  2046 0041
+2308 0041
+2309 0041
+230A 0041
+230B 0041
  29FC 0041
  29FD 0041
  2983 0041
@@ -37553,6 +37558,7 @@ AA5C 0041
  0618 0041
  0619 0041
  061A 0041
+061C 0041
  0640 0041
  06D6 0041
  06D7 0041
@@ -37610,6 +37616,7 @@ AA5C 0041
  180B 0041
  180C 0041
  180D 0041
+180E 0041
  1A7F 0041
  1B6B 0041
  1B6C 0041
@@ -37661,6 +37668,10 @@ AA5C 0041
  2062 0041
  2063 0041
  2064 0041
+2066 0041
+2067 0041
+2068 0041
+2069 0041
  206A 0041
  206B 0041
  206C 0041
@@ -38779,6 +38790,16 @@ A67D 0041
  20E2 0041
  20E3 0041
  20E4 0041
+3099 0061
+3099 0041
+FF9E 0061
+FF9E 0041
+309A 0061
+309A 0041
+FF9F 0061
+FF9F 0041
+0335 0061
+0335 0041
  0305 0061
  0305 0041
  0309 0061
@@ -38817,8 +38838,6 @@ A67D 0041
  0334 0041
  0334 1DD3
  1DD3 0334
-0335 0061
-0335 0041
  0339 0061
  0339 0041
  0345 0061
@@ -39359,14 +39378,6 @@ A92D 0041
  302E 0041
  302F 0061
  302F 0041
-3099 0061
-3099 0041
-FF9E 0061
-FF9E 0041
-309A 0061
-309A 0041
-FF9F 0061
-FF9F 0041
  20D0 0061
  20D0 0041
  20D1 0061
@@ -39967,7 +39978,6 @@ A73C 0062
  000C 0062
  000D 0062
  0085 0062
-180E 0062
  2028 0062
  2029 0062
  0020 0062
@@ -40313,6 +40323,10 @@ FF5D 0062
  169C 0062
  2045 0062
  2046 0062
+2308 0062
+2309 0062
+230A 0062
+230B 0062
  29FC 0062
  29FD 0062
  2983 0062
@@ -40730,6 +40744,7 @@ AA5C 0062
  0618 0062
  0619 0062
  061A 0062
+061C 0062
  0640 0062
  06D6 0062
  06D7 0062
@@ -40787,6 +40802,7 @@ AA5C 0062
  180B 0062
  180C 0062
  180D 0062
+180E 0062
  1A7F 0062
  1B6B 0062
  1B6C 0062
@@ -40838,6 +40854,10 @@ AA5C 0062
  2062 0062
  2063 0062
  2064 0062
+2066 0062
+2067 0062
+2068 0062
+2069 0062
  206A 0062
  206B 0062
  206C 0062
@@ -41549,6 +41569,11 @@ A67D 0062
  20E2 0062
  20E3 0062
  20E4 0062
+3099 0062
+FF9E 0062
+309A 0062
+FF9F 0062
+0335 0062
  0305 0062
  0309 0062
  030F 0062
@@ -41566,7 +41591,6 @@ A67D 0062
  0330 0062
  0331 0062
  0334 0062
-0335 0062
  0339 0062
  0345 0062
  0358 0062
@@ -41837,10 +41861,6 @@ A92D 0062
  302D 0062
  302E 0062
  302F 0062
-3099 0062
-FF9E 0062
-309A 0062
-FF9F 0062
  20D0 0062
  20D1 0062
  20D2 0062
@@ -42639,6 +42659,10 @@ FF24 003F
  1E11 003F
  1E10 0021
  1E10 003F
+0111 0021
+0111 003F
+0110 0021
+0110 003F
  1E0D 0021
  1E0D 003F
  1E0C 0021
@@ -42716,10 +42740,6 @@ FF24 003F
  0044 0331 0334
  0044 0334 0331
  1E0E 0334
-0111 0021
-0111 003F
-0110 0021
-0110 003F
  00F0 0021
  1DD9 0021
  00F0 003F
@@ -42842,6 +42862,10 @@ FF24 0041
  1E11 0041
  1E10 0061
  1E10 0041
+0111 0061
+0111 0041
+0110 0061
+0110 0041
  1E0D 0061
  1E0D 0041
  1E0C 0061
@@ -42854,10 +42878,6 @@ FF24 0041
  1E0F 0041
  1E0E 0061
  1E0E 0041
-0111 0061
-0111 0041
-0110 0061
-0110 0041
  00F0 0061
  1DD9 0061
  00F0 0041
@@ -42929,14 +42949,14 @@ FF24 0062
  1E0A 0062
  1E11 0062
  1E10 0062
+0111 0062
+0110 0062
  1E0D 0062
  1E0C 0062
  1E13 0062
  1E12 0062
  1E0F 0062
  1E0E 0062
-0111 0062
-0110 0062
  00F0 0062
  1DD9 0062
  00D0 0062
@@ -44827,6 +44847,14 @@ FF28 003F
  1E29 003F
  1E28 0021
  1E28 003F
+0127 0021
+210F 0021
+0127 003F
+210F 003F
+0126 0021
+0126 003F
+A7F8 0021
+A7F8 003F
  1E25 0021
  1E25 003F
  1E24 0021
@@ -44907,14 +44935,6 @@ FF28 003F
  0068 0331 0334
  0068 0334 0331
  1E96 0334
-0127 0021
-210F 0021
-0127 003F
-210F 003F
-0126 0021
-0126 003F
-A7F8 0021
-A7F8 003F
  0334 036A
  036A 0334
  0068 0061
@@ -45021,6 +45041,14 @@ FF28 0041
  1E29 0041
  1E28 0061
  1E28 0041
+0127 0061
+210F 0061
+0127 0041
+210F 0041
+0126 0061
+0126 0041
+A7F8 0061
+A7F8 0041
  1E25 0061
  1E25 0041
  1E24 0061
@@ -45031,14 +45059,6 @@ FF28 0041
  1E2A 0041
  1E96 0061
  1E96 0041
-0127 0061
-210F 0061
-0127 0041
-210F 0041
-0126 0061
-0126 0041
-A7F8 0061
-A7F8 0041
  33CA 0061
  33CA 0041
  33CA 0062
@@ -45093,15 +45113,15 @@ FF28 0062
  1E22 0062
  1E29 0062
  1E28 0062
+0127 0062
+210F 0062
+0126 0062
+A7F8 0062
  1E25 0062
  1E24 0062
  1E2B 0062
  1E2A 0062
  1E96 0062
-0127 0062
-210F 0062
-0126 0062
-A7F8 0062
  32CC 0021
  32CC 003F
  32CC 0061
@@ -45426,7 +45446,7 @@ FF29 003F
  0069 0308 0301 0334
  0069 0308 0334 0341
  00EF 0301 0334
-1E2F 0334
+00EF 0334 0341
  0049 0308 0334 0301
  0049 0308 0341 0334
  00CF 0334 0301
@@ -46764,6 +46784,10 @@ FF2C 003F
  013C 003F
  013B 0021
  013B 003F
+0142 0021
+0142 003F
+0141 0021
+0141 003F
  1E37 0021
  1E37 003F
  1E36 0021
@@ -46854,10 +46878,6 @@ FF2C 003F
  004C 0331 0334
  004C 0334 0331
  1E3A 0334
-0142 0021
-0142 003F
-0141 0021
-0141 003F
  006C 00B7 0021
  006C 0387 0021
  0140 0021
@@ -46984,6 +47004,10 @@ FF2C 0041
  013C 0041
  013B 0061
  013B 0041
+0142 0061
+0142 0041
+0141 0061
+0141 0041
  1E37 0061
  1E37 0041
  1E36 0061
@@ -47000,10 +47024,6 @@ FF2C 0041
  1E3B 0041
  1E3A 0061
  1E3A 0041
-0142 0061
-0142 0041
-0141 0061
-0141 0041
  006C 00B7 0061
  006C 0387 0061
  0140 0061
@@ -47066,6 +47086,8 @@ FF2C 0062
  013D 0062
  013C 0062
  013B 0062
+0142 0062
+0141 0062
  1E37 0062
  1E36 0062
  1E39 0062
@@ -47074,8 +47096,6 @@ FF2C 0062
  1E3C 0062
  1E3B 0062
  1E3A 0062
-0142 0062
-0141 0062
  006C 00B7 0062
  006C 0387 0062
  0140 0062
@@ -51980,10 +52000,10 @@ FF35 003F
  0075 0334 0344
  0075 0344 0334
  01D8 0334
-0055 0308 0341 0334
+0055 0308 0301 0334
  0055 0334 0308 0301
-0055 0334 0308 0341
  00DC 0301 0334
+00DC 0334 0301
  0075 0308 0340 0334
  0075 0334 0308 0340
  00FC 0300 0334
@@ -55032,10 +55052,10 @@ A724 0062
  0391 0334 0313 0340
  1F08 0300 0334
  1F0A 0334
-03B1 0313 0300 0334 0345
  03B1 0343 0300 0345 0334
  03B1 0343 0345 0334 0340
  03B1 0345 0313 0300 0334
+1F00 0345 0340 0334
  0391 0343 0334 0345 0340
  0391 0345 0313 0334 0340
  1F08 0300 0345 0334
@@ -55095,13 +55115,13 @@ A724 0062
  1F09 0300 0334
  1F09 0340 0334
  03B1 0314 0334 0300 0345
-03B1 0334 0314 0345 0300
-03B1 0345 0314 0300 0334
-03B1 0345 0334 0314 0340
+03B1 0314 0345 0340 0334
+1F81 0340 0334
+1F83 0334
  0391 0334 0345 0314 0300
  1F09 0334 0345 0300
-1F09 0345 0300 0334
  1F0B 0345 0334
+1FBC 0314 0300 0334
  03B1 0314 0334 0342
  03B1 0314 0342 0334
  03B1 0334 0314 0342
@@ -56120,9 +56140,9 @@ A724 0062
  0397 0334 0343
  1F28 0334
  03B7 0334 0313 0341
-03B7 0334 0343 0301
  03B7 0334 0343 0341
-1F20 0334 0341
+03B7 0343 0341 0334
+1F20 0341 0334
  0397 0313 0334 0301
  0397 0313 0341 0334
  0397 0334 0313 0301
@@ -56145,8 +56165,8 @@ A724 0062
  0397 0343 0300 0334
  03B7 0313 0345 0334 0300
  03B7 0313 0345 0340 0334
-03B7 0343 0340 0334 0345
  03B7 0343 0345 0340 0334
+1FC3 0334 0313 0300
  0397 0313 0334 0300 0345
  0397 0343 0334 0345 0340
  0397 0343 0340 0345 0334
@@ -56190,9 +56210,9 @@ A724 0062
  1F29 0334 0301
  1F2D 0334
  03B7 0314 0301 0345 0334
+03B7 0314 0334 0345 0301
  03B7 0314 0334 0345 0341
-1F21 0334 0341 0345
-1F21 0345 0301 0334
+03B7 0314 0345 0334 0301
  0397 0314 0334 0345 0341
  0397 0334 0314 0345 0301
  1F29 0334 0301 0345
@@ -58240,7 +58260,7 @@ A724 0062
  03C5 0334 0343 0341
  1F50 0334 0301
  1F50 0334 0341
-03C5 0334 0313 0340
+03C5 0313 0340 0334
  03C5 0334 0343 0340
  1F50 0334 0340
  1F52 0334
@@ -58316,9 +58336,9 @@ A724 0062
  03D2 0308 0334
  03D2 0334 0308
  03D4 0334
+03B0 0334
  03C5 0308 0341 0334
  03C5 0344 0334
-03CB 0334 0341
  03CB 0341 0334
  03C5 0308 0300 0334
  03C5 0308 0340 0334
@@ -58936,8 +58956,8 @@ A724 0062
  2126 0334 0314 0301
  2126 0334 0314 0341
  03C9 0314 0334 0345 0301
-1F61 0334 0301 0345
-1F61 0341 0345 0334
+03C9 0345 0314 0334 0301
+1F61 0301 0345 0334
  1F65 0345 0334
  03A9 0314 0301 0345 0334
  03A9 0345 0334 0314 0301
@@ -58952,9 +58972,9 @@ A724 0062
  2126 0314 0334 0340
  2126 0334 0314 0340
  03C9 0314 0334 0345 0300
-03C9 0314 0340 0334 0345
  03C9 0314 0345 0300 0334
-03C9 0334 0314 0340 0345
+03C9 0345 0314 0300 0334
+03C9 0345 0314 0334 0300
  03A9 0314 0300 0345 0334
  03A9 0345 0334 0314 0300
  1F6B 0345 0334
@@ -106580,16 +106600,16 @@ FF73 0021
  FF73 003F
  32D2 0021
  32D2 003F
+3094 0021
+3094 003F
+30F4 0021
+30F4 003F
  3046 0334 3099
  3046 3099 0334
  3094 0334
  30A6 0334 3099
  30A6 3099 0334
  30F4 0334
-3094 0021
-3094 003F
-30F4 0021
-30F4 003F
  3045 0061
  3045 0041
  3046 0061
@@ -106730,16 +106750,16 @@ FF76 0021
  FF76 003F
  32D5 0021
  32D5 003F
+304C 0021
+304C 003F
+30AC 0021
+30AC 003F
  304B 0334 3099
  304B 3099 0334
  304C 0334
  30AB 0334 3099
  30AB 3099 0334
  30AC 0334
-304C 0021
-304C 003F
-30AC 0021
-30AC 003F
  3095 0061
  3095 0041
  304B 0061
@@ -106797,16 +106817,16 @@ FF77 0021
  FF77 003F
  32D6 0021
  32D6 003F
+304E 0021
+304E 003F
+30AE 0021
+30AE 003F
  304D 0334 3099
  304D 3099 0334
  304E 0334
  30AD 0334 3099
  30AD 3099 0334
  30AE 0334
-304E 0021
-304E 003F
-30AE 0021
-30AE 003F
  304D 0061
  304D 0041
  30AD 0061
@@ -106875,16 +106895,16 @@ FF78 0021
  FF78 003F
  32D7 0021
  32D7 003F
+3050 0021
+3050 003F
+30B0 0021
+30B0 003F
  304F 0334 3099
  304F 3099 0334
  3050 0334
  30AF 0334 3099
  30AF 3099 0334
  30B0 0334
-3050 0021
-3050 003F
-30B0 0021
-30B0 003F
  304F 0061
  304F 0041
  31F0 0061
@@ -106938,16 +106958,16 @@ FF79 0021
  FF79 003F
  32D8 0021
  32D8 003F
+3052 0021
+3052 003F
+30B2 0021
+30B2 003F
  3051 0334 3099
  3051 3099 0334
  3052 0334
  30B1 0334 3099
  30B1 3099 0334
  30B2 0334
-3052 0021
-3052 003F
-30B2 0021
-30B2 003F
  331C 0021
  331C 003F
  331C 0061
@@ -106985,16 +107005,16 @@ FF7A 0021
  FF7A 003F
  32D9 0021
  32D9 003F
+3054 0021
+3054 003F
+30B4 0021
+30B4 003F
  3053 0334 3099
  3053 3099 0334
  3054 0334
  30B3 0334 3099
  30B3 3099 0334
  30B4 0334
-3054 0021
-3054 003F
-30B4 0021
-30B4 003F
  331E 0021
  331E 003F
  331E 0061
@@ -107044,6 +107064,10 @@ FF7B 003F
  32DA 003F
  1F202 0021
  1F202 003F
+3056 0021
+3056 003F
+30B6 0021
+30B6 003F
  1F202 0334
  3055 0334 3099
  3055 3099 0334
@@ -107051,10 +107075,6 @@ FF7B 003F
  30B5 0334 3099
  30B5 3099 0334
  30B6 0334
-3056 0021
-3056 003F
-30B6 0021
-30B6 003F
  3055 0061
  3055 0041
  30B5 0061
@@ -107096,16 +107116,16 @@ FF7C 0021
  FF7C 003F
  32DB 0021
  32DB 003F
+3058 0021
+3058 003F
+30B8 0021
+30B8 003F
  3057 0334 3099
  3057 3099 0334
  3058 0334
  30B7 0334 3099
  30B7 3099 0334
  30B8 0334
-3058 0021
-3058 003F
-30B8 0021
-30B8 003F
  3057 0061
  3057 0041
  31F1 0061
@@ -107147,16 +107167,16 @@ FF7D 0021
  FF7D 003F
  32DC 0021
  32DC 003F
+305A 0021
+305A 003F
+30BA 0021
+30BA 003F
  3059 0334 3099
  3059 3099 0334
  305A 0334
  30B9 0334 3099
  30B9 3099 0334
  30BA 0334
-305A 0021
-305A 003F
-30BA 0021
-30BA 003F
  3059 0061
  3059 0041
  31F2 0061
@@ -107186,16 +107206,16 @@ FF7E 0021
  FF7E 003F
  32DD 0021
  32DD 003F
+305C 0021
+305C 003F
+30BC 0021
+30BC 003F
  305B 0334 3099
  305B 3099 0334
  305C 0334
  30BB 0334 3099
  30BB 3099 0334
  30BC 0334
-305C 0021
-305C 003F
-30BC 0021
-30BC 003F
  305B 0061
  305B 0041
  30BB 0061
@@ -107232,16 +107252,16 @@ FF7F 0021
  FF7F 003F
  32DE 0021
  32DE 003F
+305E 0021
+305E 003F
+30BE 0021
+30BE 003F
  305D 0334 3099
  305D 3099 0334
  305E 0334
  30BD 0334 3099
  30BD 3099 0334
  30BE 0334
-305E 0021
-305E 003F
-30BE 0021
-30BE 003F
  305D 0061
  305D 0041
  30BD 0061
@@ -107268,16 +107288,16 @@ FF80 0021
  FF80 003F
  32DF 0021
  32DF 003F
+3060 0021
+3060 003F
+30C0 0021
+30C0 003F
  305F 0334 3099
  305F 3099 0334
  3060 0334
  30BF 0334 3099
  30BF 3099 0334
  30C0 0334
-3060 0021
-3060 003F
-30C0 0021
-30C0 003F
  3324 0021
  3324 003F
  3324 0061
@@ -107309,16 +107329,16 @@ FF81 0021
  FF81 003F
  32E0 0021
  32E0 003F
+3062 0021
+3062 003F
+30C2 0021
+30C2 003F
  3061 0334 3099
  3061 3099 0334
  3062 0334
  30C1 0334 3099
  30C1 3099 0334
  30C2 0334
-3062 0021
-3062 003F
-30C2 0021
-30C2 003F
  3061 0061
  3061 0041
  30C1 0061
@@ -107351,16 +107371,16 @@ FF82 0021
  FF82 003F
  32E1 0021
  32E1 003F
+3065 0021
+3065 003F
+30C5 0021
+30C5 003F
  3064 0334 3099
  3064 3099 0334
  3065 0334
  30C4 0334 3099
  30C4 3099 0334
  30C5 0334
-3065 0021
-3065 003F
-30C5 0021
-30C5 003F
  3063 0061
  3063 0041
  3064 0061
@@ -107396,12 +107416,6 @@ FF83 0021
  FF83 003F
  32E2 0021
  32E2 003F
-3066 0334 3099
-3066 3099 0334
-3067 0334
-30C6 0334 3099
-30C6 3099 0334
-30C7 0334
  3067 0021
  3067 003F
  30C7 0021
@@ -107409,6 +107423,12 @@ FF83 003F
  1F213 0021
  1F213 003F
  1F213 0334
+3066 0334 3099
+3066 3099 0334
+3067 0334
+30C6 0334 3099
+30C6 3099 0334
+30C7 0334
  3066 0061
  3066 0041
  30C6 0061
@@ -107445,16 +107465,16 @@ FF84 0021
  FF84 003F
  32E3 0021
  32E3 003F
+3069 0021
+3069 003F
+30C9 0021
+30C9 003F
  3068 0334 3099
  3068 3099 0334
  3069 0334
  30C8 0334 3099
  30C8 3099 0334
  30C9 0334
-3069 0021
-3069 003F
-30C9 0021
-30C9 003F
  3068 0061
  3068 0041
  31F3 0061
@@ -107611,6 +107631,14 @@ FF8A 0021
  FF8A 003F
  32E9 0021
  32E9 003F
+3070 0021
+3070 003F
+30D0 0021
+30D0 003F
+3071 0021
+3071 003F
+30D1 0021
+30D1 003F
  306F 0334 3099
  306F 3099 0334
  3070 0334
@@ -107623,14 +107651,6 @@ FF8A 003F
  30CF 0334 309A
  30CF 309A 0334
  30D1 0334
-3070 0021
-3070 003F
-30D0 0021
-30D0 003F
-3071 0021
-3071 003F
-30D1 0021
-30D1 003F
  332B 0021
  332B 003F
  332B 0061
@@ -107688,6 +107708,14 @@ FF8B 0021
  FF8B 003F
  32EA 0021
  32EA 003F
+3073 0021
+3073 003F
+30D3 0021
+30D3 003F
+3074 0021
+3074 003F
+30D4 0021
+30D4 003F
  3072 0334 3099
  3072 3099 0334
  3073 0334
@@ -107700,14 +107728,6 @@ FF8B 003F
  30D2 0334 309A
  30D2 309A 0334
  30D4 0334
-3073 0021
-3073 003F
-30D3 0021
-30D3 003F
-3074 0021
-3074 003F
-30D4 0021
-30D4 003F
  3072 0061
  3072 0041
  31F6 0061
@@ -107765,6 +107785,14 @@ FF8C 0021
  FF8C 003F
  32EB 0021
  32EB 003F
+3076 0021
+3076 003F
+30D6 0021
+30D6 003F
+3077 0021
+3077 003F
+30D7 0021
+30D7 003F
  3075 0334 3099
  3075 3099 0334
  3076 0334
@@ -107777,14 +107805,6 @@ FF8C 003F
  30D5 0334 309A
  30D5 309A 0334
  30D7 0334
-3076 0021
-3076 003F
-30D6 0021
-30D6 003F
-3077 0021
-3077 003F
-30D7 0021
-30D7 003F
  3075 0061
  3075 0041
  31F7 0061
@@ -107842,6 +107862,14 @@ FF8D 0021
  FF8D 003F
  32EC 0021
  32EC 003F
+3079 0021
+3079 003F
+30D9 0021
+30D9 003F
+307A 0021
+307A 003F
+30DA 0021
+30DA 003F
  3078 0334 3099
  3078 3099 0334
  3079 0334
@@ -107854,14 +107882,6 @@ FF8D 003F
  30D8 0334 309A
  30D8 309A 0334
  30DA 0334
-3079 0021
-3079 003F
-30D9 0021
-30D9 003F
-307A 0021
-307A 003F
-30DA 0021
-30DA 003F
  333B 0021
  333B 003F
  333B 0061
@@ -107934,6 +107954,14 @@ FF8E 0021
  FF8E 003F
  32ED 0021
  32ED 003F
+307C 0021
+307C 003F
+30DC 0021
+30DC 003F
+307D 0021
+307D 003F
+30DD 0021
+30DD 003F
  307B 0334 3099
  307B 3099 0334
  307C 0334
@@ -107946,14 +107974,6 @@ FF8E 003F
  30DB 0334 309A
  30DB 309A 0334
  30DD 0334
-307C 0021
-307C 003F
-30DC 0021
-30DC 003F
-307D 0021
-307D 003F
-30DD 0021
-30DD 003F
  3341 0021
  3341 003F
  3341 0061
@@ -108480,11 +108500,11 @@ FF9C 0021
  FF9C 003F
  32FB 0021
  32FB 003F
+30F7 0021
+30F7 003F
  30EF 0334 3099
  30EF 3099 0334
  30F7 0334
-30F7 0021
-30F7 003F
  308E 0061
  308E 0041
  308F 0061
@@ -108517,11 +108537,11 @@ FF9C 0062
  30F0 003F
  32FC 0021
  32FC 003F
+30F8 0021
+30F8 003F
  30F0 0334 3099
  30F0 3099 0334
  30F8 0334
-30F8 0021
-30F8 003F
  3090 0061
  3090 0041
  30F0 0061
@@ -108540,11 +108560,11 @@ FF9C 0062
  30F1 003F
  32FD 0021
  32FD 003F
+30F9 0021
+30F9 003F
  30F1 0334 3099
  30F1 3099 0334
  30F9 0334
-30F9 0021
-30F9 003F
  3091 0061
  3091 0041
  30F1 0061
@@ -108565,11 +108585,11 @@ FF66 0021
  FF66 003F
  32FE 0021
  32FE 003F
+30FA 0021
+30FA 003F
  30F2 0334 3099
  30F2 3099 0334
  30FA 0334
-30FA 0021
-30FA 003F
  3092 0061
  3092 0041
  30F2 0061
@@ -124445,6 +124465,18 @@ A4F7 0062
  12262 0061
  12262 0041
  12262 0062
+122D4 0021
+122D4 003F
+122D4 0334
+122D4 0061
+122D4 0041
+122D4 0062
+122D5 0021
+122D5 003F
+122D5 0334
+122D5 0061
+122D5 0041
+122D5 0062
  12263 0021
  12263 003F
  12263 0334
@@ -125123,18 +125155,6 @@ A4F7 0062
  122D3 0061
  122D3 0041
  122D3 0062
-122D4 0021
-122D4 003F
-122D4 0334
-122D4 0061
-122D4 0041
-122D4 0062
-122D5 0021
-122D5 003F
-122D5 0334
-122D5 0061
-122D5 0041
-122D5 0062
  122D6 0021
  122D6 003F
  122D6 0334
@@ -174788,6 +174808,11 @@ FFFFD 0062
  10FFFF 0061
  10FFFF 0041
  10FFFF 0062
+FFFD 0021
+FFFD 003F
+FFFD 0061
+FFFD 0041
+FFFD 0062
  FFFF 0021
  FFFF 003F
  FFFF 0061
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/collationtest.txt b/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/collationtest.txt

new file mode 100644 (file)

index 0000000..d91ba24
--- /dev/null
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/collationtest.txt
@@ -0,0 +1,2368 @@
+# Copyright (c) 2012-2014 International Business Machines
+# Corporation and others. All Rights Reserved.
+#
+# This file should be in UTF-8 with a signature byte sequence ("BOM").
+#
+# collationtest.txt: Collation test data.
+#
+# created on: 2012apr13
+# created by: Markus W. Scherer
+
+# A line with "** test: description" is used for verbose and error output.
+
+# A collator can be set with "@ root" or "@ locale language-tag",
+# for example "@ locale de-u-co-phonebk".
+
+# A collator can be built with "@ rules".
+# An "@ rules" line is followed by one or more lines with the tailoring rules.
+
+# A collator can be modified with "% attribute=value".
+
+# "* compare" tests the order (= or <) of the following strings.
+# The relation can be "=" or "<" (the level of the difference is not specified)
+# or "<1", "<2", "<c", "<3", "<4" (indicating the level of the difference).
+
+# Test sections ("* compare") are terminated by
+# definitions of new collators, changing attributes, or new test sections.
+
+** test: simple CEs & expansions
+# Many types of mappings are tested elsewhere, including via the UCA conformance tests.
+# Here we mostly cover a few unusual mappings.
+@ rules
+&\x01                           # most control codes are ignorable
+<<<\u0300                       # tertiary CE
+&9<\x00                         # NUL not ignorable
+&\uA00A\uA00B=\uA002            # two long-primary CEs
+&\uA00A\uA00B\u00050005=\uA003  # three CEs, require 64 bits
+
+* compare
+=  \x01
+=  \x02
+<3 \u0300
+<1 9
+<1 \x00
+=  \x01\x00\x02
+<1 a
+<3 a\u0300
+<2 a\u0308
+=  ä
+<1 b
+<1 か        # Hiragana Ka (U+304B)
+<2 か\u3099  # plus voiced sound mark
+=  が        # Hiragana Ga (U+304C)
+<1 \uA00A\uA00B
+=  \uA002
+<1 \uA00A\uA00B\u00050004
+<1 \uA00A\uA00B\u00050005
+=  \uA003
+<1 \uA00A\uA00B\u00050006
+
+** test: contractions
+# Create some interesting mappings, and map some normalization-inert characters
+# (which are not subject to canonical reordering)
+# to some of the same CEs to check the sequence of CEs.
+@ rules
+
+# Contractions starting with 'a' should not continue with any character < U+0300
+# so that we can test a shortcut for that.
+&a=ⓐ
+&b<bz=ⓑ
+&d<dz\u0301=ⓓ           # d+z+acute
+&z
+<a\u0301=Ⓐ              # a+acute sorts after z
+<a\u0301\u0301=Ⓑ        # a+acute+acute
+<a\u0301\u0301\u0358=Ⓒ  # a+acute+acute+dot above right
+<a\u030a=Ⓓ              # a+ring
+<a\u0323=Ⓔ              # a+dot below
+<a\u0323\u0358=Ⓕ        # a+dot below+dot above right
+<a\u0327\u0323\u030a=Ⓖ  # a+cedilla+dot below+ring
+<a\u0327\u0323bz=Ⓗ      # a+cedilla+dot below+b+z
+
+&\U0001D158=⁰           # musical notehead black (has a symbol primary)
+<\U0001D158\U0001D165=¼ # musical quarter note
+
+# deliberately missing prefix contractions:
+# dz
+# a\u0327
+# a\u0327\u0323
+# a\u0327\u0323b
+
+&\x01
+<<<\U0001D165=¹         # musical stem (ccc=216)
+<<<\U0001D16D=²         # musical augmentation dot (ccc=226)
+<<<\U0001D165\U0001D16D=³  # stem+dot (ccc=216 226)
+&\u0301=❶               # acute (ccc=230)
+&\u030a=❷               # ring (ccc=230)
+&\u0308=❸               # diaeresis (ccc=230)
+<<\u0308\u0301=❹        # diaeresis+acute (=dialytika tonos) (ccc=230 230)
+&\u0327=❺               # cedilla (ccc=202)
+&\u0323=❻               # dot below (ccc=220)
+&\u0331=❼               # macron below (ccc=220)
+<<\u0331\u0358=❽        # macron below+dot above right (ccc=220 232)
+&\u0334=❾               # tilde overlay (ccc=1)
+&\u0358=❿               # dot above right (ccc=232)
+
+&\u0f71=①               # tibetan vowel sign aa
+&\u0f72=②               # tibetan vowel sign i
+#  \u0f71\u0f72         # tibetan vowel sign aa + i = ii = U+0F73
+&\u0f73=③               # tibetan vowel sign ii (ccc=0 but lccc=129)
+
+** test: simple contractions
+
+# Some strings are chosen to cause incremental contiguous contraction matching to
+# go into partial matches for prefixes of contractions
+# (where the prefixes are deliberately not also contractions).
+# When there is no complete match, then the matching code must back out of those
+# so that discontiguous contractions work as specified.
+
+* compare
+# contraction starter with no following text, or mismatch, or blocked
+<1 a
+=  ⓐ
+<1 aa
+=  ⓐⓐ
+<1 ab
+=  ⓐb
+<1 az
+=  ⓐz
+
+* compare
+<1 a
+<2 a\u0308\u030a  # ring blocked by diaeresis
+=  ⓐ❸❷
+<2 a\u0327
+=  ⓐ❺
+
+* compare
+<2 \u0308
+=  ❸
+<2 \u0308\u030a\u0301  # acute blocked by ring
+=  ❸❷❶
+
+* compare
+<1 \U0001D158
+=  ⁰
+<1 \U0001D158\U0001D165
+=  ¼
+
+# no discontiguous contraction because of missing prefix contraction d+z,
+# and a starter ('z') after the 'd'
+* compare
+<1 dz\u0323\u0301
+=  dz❻❶
+
+# contiguous contractions
+* compare
+<1 abz
+=  ⓐⓑ
+<1 abzz
+=  ⓐⓑz
+
+* compare
+<1 a
+<1 z
+<1 a\u0301
+=  Ⓐ
+<1 a\u0301\u0301
+=  Ⓑ
+<1 a\u0301\u0301\u0358
+=  Ⓒ
+<1 a\u030a
+=  Ⓓ
+<1 a\u0323\u0358
+=  Ⓕ
+<1 a\u0327\u0323\u030a  # match despite missing prefix
+=  Ⓖ
+<1 a\u0327\u0323bz
+=  Ⓗ
+
+* compare
+<2 \u0308\u0308\u0301  # acute blocked from first diaeresis, contracts with second
+=  ❸❹
+
+* compare
+<1 \U0001D158\U0001D165
+=  ¼
+
+* compare
+<3 \U0001D165\U0001D16D
+=  ³
+
+** test: discontiguous contractions
+* compare
+<1 a\u0327\u030a                # a+ring skips cedilla
+=  Ⓓ❺
+<2 a\u0327\u0327\u030a          # a+ring skips 2 cedillas
+=  Ⓓ❺❺
+<2 a\u0327\u0327\u0327\u030a    # a+ring skips 3 cedillas
+=  Ⓓ❺❺❺
+<2 a\u0334\u0327\u0327\u030a    # a+ring skips tilde overlay & 2 cedillas
+=  Ⓓ❾❺❺
+<1 a\u0327\u0323                # a+dot below skips cedilla
+=  Ⓔ❺
+<1 a\u0323\u0301\u0358          # a+dot below+dot ab.r.: 2-char match, then skips acute
+=  Ⓕ❶
+<2 a\u0334\u0323\u0358          # a+dot below skips tilde overlay
+=  Ⓕ❾
+
+* compare
+<2 \u0331\u0331\u0358           # macron below+dot ab.r. skips the second macron below
+=  ❽❼
+
+* compare
+<1 a\u0327\u0331\u0323\u030a    # a+ring skips cedilla, macron below, dot below (dot blocked by macron)
+=  Ⓓ❺❼❻
+<1 a\u0327\u0323\U0001D16D\u030a  # a+dot below skips cedilla
+=  Ⓔ❺²❷
+<2 a\u0327\u0327\u0323\u030a    # a+dot below skips 2 cedillas
+=  Ⓔ❺❺❷
+<2 a\u0327\u0323\u0323\u030a    # a+dot below skips cedilla
+=  Ⓔ❺❻❷
+<2 a\u0334\u0327\u0323\u030a    # a+dot below skips tilde overlay & cedilla
+=  Ⓔ❾❺❷
+
+* compare
+<1 \U0001D158\u0327\U0001D165   # quarter note skips cedilla
+=  ¼❺
+<1 a\U0001D165\u0323            # a+dot below skips stem
+=  Ⓔ¹
+
+# partial contiguous match, backs up, matches discontiguous contraction
+<1 a\u0327\u0323b
+=  Ⓔ❺b
+<1 a\u0327\u0323ba
+=  Ⓔ❺bⓐ
+
+# a+acute+acute+dot above right skips cedilla, continues matching 2 same-ccc combining marks
+* compare
+<1 a\u0327\u0301\u0301\u0358
+=  Ⓒ❺
+
+# FCD but not NFD
+* compare
+<1 a\u0f73\u0301                # a+acute skips tibetan ii
+=  Ⓐ③
+
+# FCD but the 0f71 inside the 0f73 must be skipped
+# to match the discontiguous contraction of the first 0f71 with the trailing 0f72 inside the 0f73
+* compare
+<1 \u0f71\u0f73                 # == \u0f73\u0f71 == \u0f71\u0f71\u0f72
+=  ③①
+
+** test: discontiguous contractions with nested contractions
+* compare
+<1 a\u0323\u0308\u0301\u0358
+=  Ⓕ❹
+<2 a\u0323\u0308\u0301\u0308\u0301\u0358
+=  Ⓕ❹❹
+
+** test: discontiguous contractions with interleaved contractions
+* compare
+# a+ring & cedilla & macron below+dot above right
+<1 a\u0327\u0331\u030a\u0358
+=  Ⓓ❺❽
+
+# a+ring & 1x..3x macron below+dot above right
+<2 a\u0331\u030a\u0358
+=  Ⓓ❽
+<2 a\u0331\u0331\u030a\u0358\u0358
+=  Ⓓ❽❽
+# also skips acute
+<2 a\u0331\u0331\u0331\u030a\u0301\u0358\u0358\u0358
+=  Ⓓ❽❽❽❶
+
+# a+dot below & stem+augmentation dot, followed by contiguous d+z+acute
+<1 a\U0001D165\u0323\U0001D16Ddz\u0301
+=  Ⓔ³ⓓ
+
+** test: some simple string comparisons
+@ root
+* compare
+# first string compares against ""
+= \u0000
+< a
+<1 b
+<3 B
+= \u0000B\u0000
+
+** test: compare with strength=primary
+% strength=primary
+* compare
+<1 a
+<1 b
+= B
+
+** test: compare with strength=secondary
+% strength=secondary
+* compare
+<1 a
+<1 b
+= B
+
+** test: compare with strength=tertiary
+% strength=tertiary
+* compare
+<1 a
+<1 b
+<3 B
+
+** test: compare with strength=quaternary
+% strength=quaternary
+* compare
+<1 a
+<1 b
+<3 B
+
+** test: compare with strength=identical
+% strength=identical
+* compare
+<1 a
+<1 b
+<3 B
+
+** test: côté with forwards secondary
+@ root
+* compare
+<1 cote
+<2 coté
+<2 côte
+<2 côté
+
+** test: côté with forwards secondary vs. U+FFFE merge separator
+# Merged sort keys: On each level, any difference in the first segment
+# must trump any further difference.
+* compare
+<1 cote\uFFFEcôté
+<2 coté\uFFFEcôte
+<2 côte\uFFFEcoté
+<2 côté\uFFFEcote
+
+** test: côté with backwards secondary
+% backwards=on
+* compare
+<1 cote
+<2 côte
+<2 coté
+<2 côté
+
+** test: côté with backwards secondary vs. U+FFFE merge separator
+# Merged sort keys: On each level, any difference in the first segment
+# must trump any further difference.
+* compare
+<1 cote\uFFFEcôté
+<2 côte\uFFFEcoté
+<2 coté\uFFFEcôte
+<2 côté\uFFFEcote
+
+** test: U+FFFE on identical level
+@ root
+% strength=identical
+* compare
+# All of these control codes are completely-ignorable, so that
+# their low code points are compared with the merge separator.
+# The merge separator must compare less than any other character.
+<1 \uFFFE\u0001\u0002\u0003
+<i \u0001\uFFFE\u0002\u0003
+<i \u0001\u0002\uFFFE\u0003
+<i \u0001\u0002\u0003\uFFFE
+
+* compare
+# The merge separator must even compare less than U+0000.
+<1 \uFFFE\u0000\u0000
+<i \u0000\uFFFE\u0000
+<i \u0000\u0000\uFFFE
+
+** test: Hani < surrogates < U+FFFD
+# Note: compareUTF8() treats unpaired surrogates like U+FFFD,
+# so with that the strings with surrogates will compare equal to each other
+# and equal to the string with U+FFFD.
+@ root
+% strength=identical
+* compare
+<1 abz
+<1 a\u4e00z
+<1 a\U00020000z
+<1 a\ud800z
+<1 a\udbffz
+<1 a\udc00z
+<1 a\udfffz
+<1 a\ufffdz
+
+** test: script reordering
+@ root
+% reorder Hani Zzzz digit
+* compare
+<1 ?
+<1 +
+<1 丂
+<1 a
+<1 α
+<1 5
+
+% reorder default
+* compare
+<1 ?
+<1 +
+<1 5
+<1 a
+<1 α
+<1 丂
+
+** test: empty rules
+@ rules
+* compare
+<1 a
+<2 ä
+<3 Ä
+<1 b
+
+** test: very simple rules
+@ rules
+&a=e<<<<q<<<<r<x<<<X<<y<<<Y;z,Z
+% strength=quaternary
+* compare
+<1 a
+=  e
+<4 q
+<4 r
+<1 x
+<3 X
+<2 y
+<3 Y
+<2 z
+<3 Z
+
+** test: tailoring twice before a root position: primary
+@ rules
+&[before 1]b<p
+&[before 1]b<q
+* compare
+<1 a
+<1 p
+<1 q
+<1 b
+
+** test: tailoring twice before a root position: secondary
+@ rules
+&[before 2]ſ<<p
+&[before 2]ſ<<q
+* compare
+<1 s
+<2 p
+<2 q
+<2 ſ
+
+# secondary-before common weight
+@ rules
+&[before 2]b<<p
+&[before 2]b<<q
+* compare
+<1 a
+<1 p
+<2 q
+<2 b
+
+** test: tailoring twice before a root position: tertiary
+@ rules
+&[before 3]B<<<p
+&[before 3]B<<<q
+* compare
+<1 b
+<3 p
+<3 q
+<3 B
+
+# tertiary-before common weight
+@ rules
+&[before 3]b<<<p
+&[before 3]b<<<q
+* compare
+<1 a
+<1 p
+<3 q
+<3 b
+
+@ rules
+&[before 2]b<<s
+&[before 3]s<<<p
+&[before 3]s<<<q
+* compare
+<1 a
+<1 p
+<3 q
+<3 s
+<2 b
+
+** test: tailor after completely ignorable
+@ rules
+&\x00<<<x<<y
+* compare
+= \x00
+= \x1F
+<3 x
+<2 y
+
+** test: secondary tailoring gaps, ICU ticket 9362
+@ rules
+&[before 2]s<<'_'
+&s<<r  # secondary between s and ſ (long s)
+&ſ<<*a-q  # more than 15 between ſ and secondary CE boundary
+&[before 2][first primary ignorable]<<u<<v  # between secondary CE boundary & lowest secondary CE
+&[last primary ignorable]<<y<<z
+
+* compare
+<2 u
+<2 v
+<2 \u0332  # lowest secondary CE
+<2 \u0308
+<2 y
+<2 z
+<1 s_
+<2 ss
+<2 sr
+<2 sſ
+<2 sa
+<2 sb
+<2 sp
+<2 sq
+<2 sus
+<2 svs
+<2 rs
+
+** test: tertiary tailoring gaps, ICU ticket 9362
+@ rules
+&[before 3]t<<<'_'
+&t<<<r  # tertiary between t and fullwidth t
+&ᵀ<<<*a-q  # more than 15 between ᵀ (modifier letter T) and tertiary CE boundary
+&[before 3][first secondary ignorable]<<<u<<<v  # between tertiary CE boundary & lowest tertiary CE
+&[last secondary ignorable]<<<y<<<z
+
+* compare
+<3 u
+<3 v
+# Note: The root collator currently does not map any characters to tertiary CEs.
+<3 y
+<3 z
+<1 t_
+<3 tt
+<3 tr
+<3 tｔ
+<3 tᵀ
+<3 ta
+<3 tb
+<3 tp
+<3 tq
+<3 tut
+<3 tvt
+<3 rt
+
+** test: secondary & tertiary around root character
+@ rules
+&[before 2]m<<r
+&m<<s
+&[before 3]m<<<u
+&m<<<v
+* compare
+<1 l
+<1 r
+<2 u
+<3 m
+<3 v
+<2 s
+<1 n
+
+** test: secondary & tertiary around tailored item
+@ rules
+&m<x
+&[before 2]x<<r
+&x<<s
+&[before 3]x<<<u
+&x<<<v
+* compare
+<1 m
+<1 r
+<2 u
+<3 x
+<3 v
+<2 s
+<1 n
+
+** test: more nesting of secondary & tertiary before
+@ rules
+&[before 3]m<<<u
+&[before 2]m<<r
+&[before 3]r<<<q
+&m<<<w
+&m<<t
+&[before 3]w<<<v
+&w<<<x
+&w<<s
+* compare
+<1 l
+<1 q
+<3 r
+<2 u
+<3 m
+<3 v
+<3 w
+<3 x
+<2 s
+<2 t
+<1 n
+
+** test: case bits
+@ rules
+&w<x  # tailored CE getting case bits
+  =uv=uV=Uv=UV  # 2 chars -> 1 CE
+&ae=ch=cH=Ch=CH  # 2 chars -> 2 CEs
+&rst=yz=yZ=Yz=YZ  # 2 chars -> 3 CEs
+% caseFirst=lower
+* compare
+<1 ae
+=  ch
+<3 cH
+<3 Ch
+<3 CH
+<1 rst
+=  yz
+<3 yZ
+<3 Yz
+<3 YZ
+<1 w
+<1 x
+=  uv
+<3 uV
+=  Uv  # mixed case on single CE cannot distinguish variations
+<3 UV
+
+** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=lower
+@ rules
+&\u0001<<<t<<<T  # tertiary CEs
+% caseFirst=lower
+* compare
+<1 aa
+<3 aat
+<3 aaT
+<3 aA
+<3 aAt
+<3 ata
+<3 aTa
+
+** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=upper
+% caseFirst=upper
+* compare
+<1 aA
+<3 aAt
+<3 aa
+<3 aat
+<3 aaT
+<3 ata
+<3 aTa
+
+** test: reset on expansion, ICU tickets 9415 & 9593
+@ rules
+&æ<x    # tailor the last primary CE so that x sorts between ae and af
+&æb=bæ  # copy all reset CEs to make bæ sort the same
+&각<h    # copy/tailor 3 CEs to make h sort before the next Hangul syllable 갂
+&⒀<<y   # copy/tailor 4 CEs to make y sort with only a secondary difference
+&l·=z   # handle the pre-context for · when fetching reset CEs
+   <<u  # copy/tailor 2 CEs
+
+* compare
+<1 ae
+<2 æ
+<1 x
+<1 af
+
+* compare
+<1 aeb
+<2 æb
+=  bæ
+
+* compare
+<1 각
+<1 h
+<1 갂
+<1 갃
+
+* compare
+<1 ·    # by itself: primary CE
+<1 l
+<2 l·   # l+middle dot has only a secondary difference from l
+=  z
+<2 u
+
+* compare
+<1 (13)
+<3 ⒀  # DUCET sets special tertiary weights in all CEs
+<2 y
+<1 (13[
+
+% alternate=shifted
+* compare
+<1 (13)
+=  13
+<3 ⒀
+=  y  # alternate=shifted removes the tailoring difference on the last CE
+<1 14
+
+** test: contraction inside extension, ICU ticket 9378
+@ rules
+&а<<х/й     # all letters are Cyrillic
+* compare
+<1 ай
+<2 х
+
+** test: no duplicate tailored CEs for different reset positions with same CEs, ICU ticket 10104
+@ rules
+&t<x &ᵀ<y           # same primary weights
+&q<u &[before 1]ꝗ<v # q and ꝗ are primary adjacent
+* compare
+<1 q
+<1 u
+<1 v
+<1 ꝗ
+<1 t
+<3 ᵀ
+<1 y
+<1 x
+
+# Principle: Each rule builds on the state of preceding rules and ignores following rules.
+
+** test: later rule does not affect earlier reset position, ICU ticket 10105
+@ rules
+&a < u < v < w  &ov < x  &b < v
+* compare
+<1 oa
+<1 ou
+<1 x    # CE(o) followed by CE between u and w
+<1 ow
+<1 ob
+<1 ov
+
+** test: later rule does not affect earlier extension (1), ICU ticket 10105
+@ rules
+&a=x/b &v=b
+% strength=secondary
+* compare
+<1 B
+<1 c
+<1 v
+=  b
+* compare
+<1 AB
+=  x
+<1 ac
+<1 av
+=  ab
+
+** test: later rule does not affect earlier extension (2), ICU ticket 10105
+@ rules
+&a <<< c / e &g <<< e / l
+% strength=secondary
+* compare
+<1 AE
+=  c
+<2 æ
+<1 agl
+=  ae
+
+** test: later rule does not affect earlier extension (3), ICU ticket 10105
+@ rules
+&a = b / c  &d = c / e
+% strength=secondary
+* compare
+<1 AC  # C is still only tertiary different from the original c
+=  b
+<1 ade
+=  ac
+
+** test: extension contains tailored character, ICU ticket 10105
+@ rules
+&a=e &b=u/e
+* compare
+<1 a
+=  e
+<1 ba
+=  be
+=  u
+
+** test: add simple mappings for characters with root context
+@ rules
+&z=·    # middle dot has a prefix mapping in the CLDR root
+&n=и    # и (U+0438) has contractions in the root
+* compare
+<1 l
+<2 l·   # root mapping for l|· still works
+<1 z
+=  ·
+* compare
+<1 n
+=  и
+<1 И
+<1 и\u0306  # root mapping for й=и\u0306 still works
+=  й
+<3 Й
+
+** test: add context mappings around characters with root context
+@ rules
+&z=·h   # middle dot has a prefix mapping in the CLDR root
+&n=ә|и  # и (U+0438) has contractions in the root
+* compare
+<1 l
+<2 l·   # root mapping for l|· still works
+<1 z
+=  ·h
+* compare
+<1 и
+<3 И
+<1 и\u0306  # root mapping for й=и\u0306 still works
+=  й
+* compare
+<1 әn
+=  әи
+<1 әo
+
+** test: many secondary CEs at the top of their range
+@ rules
+&[last primary ignorable]<<*\u2801-\u28ff
+* compare
+<2 \u0308
+<2 \u2801
+<2 \u2802
+<2 \u2803
+<2 \u2804
+<2 \u28fd
+<2 \u28fe
+<2 \u28ff
+<1 \x20
+
+** test: many tertiary CEs at the top of their range
+@ rules
+&[last secondary ignorable]<<<*a-z
+* compare
+<3 a
+<3 b
+<3 c
+<3 d
+# e..w
+<3 x
+<3 y
+<3 z
+<2 \u0308
+
+** test: tailor contraction together with nearly equivalent prefix, ICU ticket 10101
+@ rules
+&a=p|x &b=px &c=op
+* compare
+<1 b
+=  px
+<3 B
+<1 c
+=  op
+<3 C
+* compare
+<1 ca
+=  opx  # first contraction op, then prefix p|x
+<3 cA
+<3 Ca
+
+** test: reset position with prefix (pre-context), ICU ticket 10102
+@ rules
+&a=p|x &px=y
+* compare
+<1 pa
+=  px
+=  y
+<3 pA
+<1 q
+<1 x
+
+** test: prefix+contraction together (1), ICU ticket 10071
+@ rules
+&x=a|bc
+* compare
+<1 ab
+<1 Abc
+<1 abd
+<1 ac
+<1 aw
+<1 ax
+=  abc
+<3 aX
+<3 Ax
+<1 b
+<1 bb
+<1 bc
+<3 bC
+<3 Bc
+<1 bd
+
+** test: prefix+contraction together (2), ICU ticket 10071
+@ rules
+&w=bc &x=a|b
+* compare
+<1 w
+=  bc
+<3 W
+* compare
+<1 aw
+<1 ax
+=  ab
+<3 aX
+<1 axb
+<1 axc
+=  abc  # prefix match a|b takes precedence over contraction match bc
+<3 abC
+<1 abd
+<1 ay
+
+** test: prefix+contraction together (3), ICU ticket 10071
+@ rules
+&x=a|b &w=bc    # reverse order of rules as previous test, order should not matter here
+* compare       # same "compare" sequences as previous test
+<1 w
+=  bc
+<3 W
+* compare
+<1 aw
+<1 ax
+=  ab
+<3 aX
+<1 axb
+<1 axc
+=  abc  # prefix match a|b takes precedence over contraction match bc
+<3 abC
+<1 abd
+<1 ay
+
+** test: no mapping p|c, falls back to contraction ch, CLDR ticket 5962
+@ rules
+&d=ch &v=p|ci
+* compare
+<1 pc
+<3 pC
+<1 pcH
+<1 pcI
+<1 pd
+=  pch  # no-prefix contraction ch matches
+<3 pD
+<1 pv
+=  pci  # prefix+contraction p|ci matches
+<3 pV
+
+** test: tailor in & around compact ranges of root primaries
+# The Ogham characters U+1681..U+169A are in simple ascending order of primary CEs
+# which should be reliably encoded as one range in the root elements data.
+@ rules
+&[before 1]ᚁ<a
+&ᚁ<b
+&[before 1]ᚂ<c
+&ᚂ<d
+&[before 1]ᚚ<y
+&ᚚ<z
+&[before 2]ᚁ<<r
+&ᚁ<<s
+&[before 3]ᚚ<<<t
+&ᚚ<<<u
+* compare
+<1 ᣵ    # U+18F5 last Canadian Aboriginal
+<1 a
+<1 r
+<2 ᚁ
+<2 s
+<1 b
+<1 c
+<1 ᚂ
+<1 d
+<1 ᚃ
+<1 ᚙ
+<1 y
+<1 t
+<3 ᚚ
+<3 u
+<1 z
+<1 ᚠ    # U+16A0 first Runic
+
+** test: suppressContractions
+@ rules
+&z<ch<әж [suppressContractions [·cә]]
+* compare
+<1 ch
+<3 cH   # ch was suppressed
+<1 l
+<1 l·   # primary difference, not secondary, because l|· was suppressed
+<1 ә
+<2 ә\u0308  # secondary difference, not primary, because contractions for ә were suppressed
+<1 әж
+<3 әЖ
+
+** test: Hangul & Jamo
+@ rules
+&L=\u1100  # first Jamo L
+&V=\u1161  # first Jamo V
+&T=\u11A8  # first Jamo T
+&\uAC01<<*\u4E00-\u4EFF  # first Hangul LVT syllable & lots of secondary diffs
+* compare
+<1 Lv
+<3 LV
+=  \u1100\u1161
+=  \uAC00
+<1 LVt
+<3 LVT
+=  \u1100\u1161\u11A8
+=  \uAC00\u11A8
+=  \uAC01
+<2 LVT\u0308
+<2 \u4E00
+<2 \u4E01
+<2 \u4E80
+<2 \u4EFF
+<2 LV\u0308T
+<1 \uAC02
+
+** test: adjust special reset positions according to previous rules, CLDR ticket 6070
+@ rules
+&[last variable]<x
+[maxVariable space]  # has effect only after building, no effect on following rules
+&[last variable]<y
+&[before 1][first regular]<z
+* compare
+<1 ?  # some punctuation
+<1 x
+<1 y
+<1 z
+<1 $  # some symbol
+
+@ rules
+&[last primary ignorable]<<x<<<y
+&[last primary ignorable]<<z
+* compare
+<2 \u0358
+<2 x
+<3 y
+<2 z
+<1 \x20
+
+@ rules
+&[last secondary ignorable]<<<x
+&[last secondary ignorable]<<<y
+* compare
+<3 x
+<3 y
+<2 \u0358
+
+@ rules
+&[before 2][first variable]<<z
+&[before 2][first variable]<<y
+&[before 3][first variable]<<<x
+&[before 3][first variable]<<<w
+&[before 1][first variable]<v
+&[before 2][first variable]<<u
+&[before 3][first variable]<<<t
+&[before 2]\uFDD1\xA0<<s  # FractionalUCA.txt: FDD1 00A0, SPACE first primary
+* compare
+<2 \u0358
+<1 s
+<2 \uFDD1\xA0
+<1 t
+<3 u
+<2 v
+<1 w
+<3 x
+<3 y
+<2 z
+<2 \t
+
+@ rules
+&[before 2][first regular]<<z
+&[before 3][first regular]<<<y
+&[before 1][first regular]<x
+&[before 3][first regular]<<<w
+&[before 2]\uFDD1\u263A<<v  # FractionalUCA.txt: FDD1 263A, SYMBOL first primary
+&[before 3][first regular]<<<u
+&[before 1][first regular]<p  # primary before the boundary: becomes variable
+&[before 3][first regular]<<<t  # not affected by p
+&[last variable]<q              # after p!
+* compare
+<1 ?
+<1 p
+<1 q
+<1 t
+<3 u
+<3 v
+<1 w
+<3 x
+<1 y
+<3 z
+<1 $
+
+# check that p & q are indeed variable
+% alternate=shifted
+* compare
+=  ?
+=  p
+=  q
+<1 t
+<3 u
+<3 v
+<1 w
+<3 x
+<1 y
+<3 z
+<1 $
+
+@ rules
+&[before 2][first trailing]<<z
+&[before 1][first trailing]<y
+&[before 3][first trailing]<<<x
+* compare
+<1 \u4E00  # first Han, first implicit
+<1 \uFDD1\uFDD0  # FractionalUCA.txt: unassigned first primary
+# Note: The root collator currently does not map any characters to the trailing first boundary primary.
+<1 x
+<3 y
+<1 z
+<2 \uFFFD  # The root collator currently maps U+FFFD to the first real trailing primary.
+
+@ rules
+&[before 2][first primary ignorable]<<z
+&[before 2][first primary ignorable]<<y
+&[before 3][first primary ignorable]<<<x
+&[before 3][first primary ignorable]<<<w
+* compare
+=  \x01
+<2 w
+<3 x
+<3 y
+<2 z
+<2 \u0301
+
+@ rules
+&[before 3][first secondary ignorable]<<<y
+&[before 3][first secondary ignorable]<<<x
+* compare
+=  \x01
+<3 x
+<3 y
+<2 \u0301
+
+** test: canonical closure
+@ rules
+&X=A &U=Â
+* compare
+<1 U
+=  Â
+=  A\u0302
+<2 Ú  # U with acute
+=  U\u0301
+=  Ấ  # A with circumflex & acute
+=  Â\u0301
+=  A\u0302\u0301
+<1 X
+=  A
+<2 X\u030A  # with ring above
+=  Å
+=  A\u030A
+=  \u212B  # Angstrom sign
+
+@ rules
+&x=\u5140\u55C0
+* compare
+<1 x
+=  \u5140\u55C0
+=  \u5140\uFA0D
+=  \uFA0C\u55C0
+=  \uFA0C\uFA0D  # CJK compatibility characters
+<3 X
+
+# canonical closure on prefix rules, ICU ticket 9444
+@ rules
+&x=ä|ŝ
+* compare
+<1 äs  # not tailored
+<1 äx
+=  äŝ
+=  a\u0308s\u0302
+=  a\u0308ŝ
+=  äs\u0302
+<3 äX
+
+** test: conjoining Jamo map to expansions
+@ rules
+&gg=\u1101  # Jamo Lead consonant GG
+&nj=\u11AC  # Jamo Trail consonant NJ
+* compare
+<1 gg\u1161nj
+=  \u1101\u1161\u11AC
+=  \uAE4C\u11AC
+=  \uAE51
+<3 gg\u1161nJ
+<1 \u1100\u1100
+
+** test: canonical tail closure, ICU ticket 5913
+@ rules
+&a<â
+* compare
+<1 a
+<1 â              # tailored
+=  a\u0302
+<2 a\u0323\u0302  # discontiguous contraction
+=  ạ\u0302        # equivalent
+=  ậ              # equivalent
+<1 b
+
+@ rules
+&a<ạ
+* compare
+<1 a
+<1 ạ              # tailored
+=  a\u0323
+<2 a\u0323\u0302  # contiguous contraction plus extra diacritic
+=  ạ\u0302        # equivalent
+=  ậ              # equivalent
+<1 b
+
+# Tail closure should work even if there is a prefix and/or contraction.
+@ rules
+&a<\u5140|câ
+# In order to find discontiguous contractions for \u5140|câ
+# there must exist a mapping for \u5140|ca, regardless of what it maps to.
+# (This follows from the UCA spec.)
+&x=\u5140|ca
+* compare
+<1 \u5140a
+=  \uFA0Ca
+<1 \u5140câ              # tailored
+=  \uFA0Ccâ
+=  \u5140ca\u0302
+=  \uFA0Cca\u0302
+<2 \u5140ca\u0323\u0302  # discontiguous contraction
+=  \uFA0Cca\u0323\u0302
+=  \u5140cạ\u0302
+=  \uFA0Ccạ\u0302
+=  \u5140cậ
+=  \uFA0Ccậ
+<1 \u5140b
+=  \uFA0Cb
+<1 \u5140x
+=  \u5140ca
+
+# Double-check that without the extra mapping there will be no discontiguous match.
+@ rules
+&a<\u5140|câ
+* compare
+<1 \u5140a
+=  \uFA0Ca
+<1 \u5140câ              # tailored
+=  \uFA0Ccâ
+=  \u5140ca\u0302
+=  \uFA0Cca\u0302
+<1 \u5140b
+=  \uFA0Cb
+<1 \u5140ca\u0323\u0302  # no discontiguous contraction
+=  \uFA0Cca\u0323\u0302
+=  \u5140cạ\u0302
+=  \uFA0Ccạ\u0302
+=  \u5140cậ
+=  \uFA0Ccậ
+
+@ rules
+&a<cạ
+* compare
+<1 a
+<1 cạ              # tailored
+=  ca\u0323
+<2 ca\u0323\u0302  # contiguous contraction plus extra diacritic
+=  cạ\u0302        # equivalent
+=  cậ              # equivalent
+<1 b
+
+# ᾢ = U+1FA2 GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+#   = 03C9 0313 0300 0345
+# ccc = 0, 230, 230, 240
+@ rules
+&δ=αῳ
+# In order to find discontiguous contractions for αῳ
+# there must exist a mapping for αω, regardless of what it maps to.
+# (This follows from the UCA spec.)
+&ε=αω
+* compare
+<1 δ
+=  αῳ
+=  αω\u0345
+<2 αω\u0313\u0300\u0345  # discontiguous contraction
+=  αὠ\u0300\u0345
+=  αὢ\u0345
+=  αᾢ
+<2 αω\u0300\u0313\u0345
+=  αὼ\u0313\u0345
+=  αῲ\u0313  # not FCD
+<1 ε
+=  αω
+
+# Double-check that without the extra mapping there will be no discontiguous match.
+@ rules
+&δ=αῳ
+* compare
+<1 αω\u0313\u0300\u0345  # no discontiguous contraction
+=  αὠ\u0300\u0345
+=  αὢ\u0345
+=  αᾢ
+<2 αω\u0300\u0313\u0345
+=  αὼ\u0313\u0345
+=  αῲ\u0313  # not FCD
+<1 δ
+=  αῳ
+=  αω\u0345
+
+# Add U+0315 COMBINING COMMA ABOVE RIGHT which has ccc=232.
+# Tests code paths where the tailored string has a combining mark
+# that does not occur in any composite's decomposition.
+@ rules
+&δ=αὼ\u0315
+* compare
+<1 αω\u0313\u0300\u0315  # Not tailored: The grave accent blocks the comma above.
+=  αὠ\u0300\u0315
+=  αὢ\u0315
+<1 δ
+=  αὼ\u0315
+=  αω\u0300\u0315
+<2 αω\u0300\u0315\u0345
+=  αὼ\u0315\u0345
+=  αῲ\u0315  # not FCD
+
+** test: danish a+a vs. a-umlaut, ICU ticket 9319
+@ rules
+&z<aa
+* compare
+<1 z
+<1 aa
+<2 aa\u0308
+=  aä
+
+** test: Jamo L with and in prefix
+# Useful for the Korean "searchjl" tailoring (instead of contractions of pairs of Jamo L).
+@ rules
+# Jamo Lead consonant G after G or GG
+&[last primary ignorable]<<\u1100|\u1100=\u1101|\u1100
+# Jamo Lead consonant GG sorts like G+G
+&\u1100\u1100=\u1101
+# Note: Making G|GG and GG|GG sort the same as G|G+G
+# would require the ability to reset on G|G+G,
+# or we could make G-after-G equal to some secondary-CE character,
+# and reset on a pair of those.
+# (It does not matter much if there are at most two G in a row in real text.)
+* compare
+<1 \u1100
+<2 \u1100\u1100  # only one primary from a sequence of G lead consonants
+=  \u1101
+<2 \u1100\u1100\u1100
+=  \u1101\u1100
+# but not = \u1100\u1101, see above
+<1 \u1100\u1161
+=  \uAC00
+<2 \u1100\u1100\u1161
+=  \u1100\uAC00  # prefix match from the L of the LV syllable
+=  \u1101\u1161
+=  \uAE4C
+
+** test: proposed Korean "searchjl" tailoring with prefixes, CLDR ticket 6546
+@ rules
+# Low secondary CEs for Jamo V & T.
+# Note: T should sort before V for proper syllable order.
+&\u0332  # COMBINING LOW LINE (first primary ignorable)
+<<\u1161<<\u1162
+
+# Korean Jamo lead consonant search rules, part 2:
+# Make modern compound L jamo primary equivalent to non-compound forms.
+
+# Secondary CEs for Jamo L-after-L, greater than Jamo V & T.
+&\u0313  # COMBINING COMMA ABOVE (second primary ignorable)
+=\u1100|\u1100
+=\u1103|\u1103
+=\u1107|\u1107
+=\u1109|\u1109
+=\u110C|\u110C
+
+# Compound L Jamo map to equivalent expansions of primary+secondary CE.
+&\u1100\u0313=\u1101<<<\u3132  # HANGUL CHOSEONG SSANGKIYEOK, HANGUL LETTER SSANGKIYEOK
+&\u1103\u0313=\u1104<<<\u3138  # HANGUL CHOSEONG SSANGTIKEUT, HANGUL LETTER SSANGTIKEUT
+&\u1107\u0313=\u1108<<<\u3143  # HANGUL CHOSEONG SSANGPIEUP, HANGUL LETTER SSANGPIEUP
+&\u1109\u0313=\u110A<<<\u3146  # HANGUL CHOSEONG SSANGSIOS, HANGUL LETTER SSANGSIOS
+&\u110C\u0313=\u110D<<<\u3149  # HANGUL CHOSEONG SSANGCIEUC, HANGUL LETTER SSANGCIEUC
+
+* compare
+<1 \u1100\u1161
+=  \uAC00
+<2 \u1100\u1162
+=  \uAC1C
+<2 \u1100\u1100\u1161
+=  \u1100\uAC00
+=  \u1101\u1161
+=  \uAE4C
+<3 \u3132\u1161
+
+** test: Hangul syllables in prefix & in the interior of a contraction
+@ rules
+&x=\u1100\u1161|a\u1102\u1162z
+* compare
+<1 \u1100\u1161x
+=  \u1100\u1161a\u1102\u1162z
+=  \u1100\u1161a\uB0B4z
+=  \uAC00a\u1102\u1162z
+=  \uAC00a\uB0B4z
+
+** test: digits are unsafe-backwards when numeric=on
+@ root
+% numeric=on
+* compare
+# If digits are not unsafe, then numeric collation sees "1"=="01" and "b">"a".
+# We need to back up before the identical prefix "1" and compare the full numbers.
+<1 11b
+<1 101a
+
+** test: simple locale data test
+@ locale de
+* compare
+<1 a
+<2 ä
+<1 ae
+<2 æ
+
+@ locale de-u-co-phonebk
+* compare
+<1 a
+<1 ae
+<2 ä
+<2 æ
+
+# The following test cases were moved here from ICU 52's DataDrivenCollationTest.txt.
+
+** test: DataDrivenCollationTest/TestMorePinyin
+# Testing the primary strength.
+@ locale zh
+% strength=primary
+* compare
+< lā
+= lĀ
+= Lā
+= LĀ
+< lān
+= lĀn
+< lē
+= lĒ
+= Lē
+= LĒ
+< lēn
+= lĒn
+
+** test: DataDrivenCollationTest/TestLithuanian
+# Lithuanian sort order.
+@ locale lt
+* compare
+< cz
+< č
+< d
+< iz
+< j
+< sz
+< š
+< t
+< zz
+< ž
+
+** test: DataDrivenCollationTest/TestLatvian
+# Latvian sort order.
+@ locale lv
+* compare
+< cz
+< č
+< d
+< gz
+< ģ
+< h
+< iz
+< j
+< kz
+< ķ
+< l
+< lz
+< ļ
+< m
+< nz
+< ņ
+< o
+< rz
+< ŗ
+< s
+< sz
+< š
+< t
+< zz
+< ž
+
+** test: DataDrivenCollationTest/TestEstonian
+# Estonian sort order.
+@ locale et
+* compare
+< sy
+< š
+< šy
+< z
+< zy
+< ž
+< v
+< w
+< va
+< õ
+< õy
+< ä
+< äy
+< ö
+< öy
+< ü
+< üy
+< x
+
+** test: DataDrivenCollationTest/TestAlbanian
+# Albanian sort order.
+@ locale sq
+* compare
+< cz
+< ç
+< d
+< dz
+< dh
+< e
+< ez
+< ë
+< f
+< gz
+< gj
+< h
+< lz
+< ll
+< m
+< nz
+< nj
+< o
+< rz
+< rr
+< s
+< sz
+< sh
+< t
+< tz
+< th
+< u
+< xz
+< xh
+< y
+< zz
+< zh
+
+** test: DataDrivenCollationTest/TestSimplifiedChineseOrder
+# Sorted file has different order.
+@ root
+# normalization=on turned on & off automatically.
+* compare
+< \u5F20
+< \u5F20\u4E00\u8E3F
+
+** test: DataDrivenCollationTest/TestTibetanNormalizedIterativeCrash
+# This pretty much crashes.
+@ root
+* compare
+< \u0f71\u0f72\u0f80\u0f71\u0f72
+< \u0f80
+
+** test: DataDrivenCollationTest/TestThaiPartialSortKeyProblems
+# These are examples of strings that caused trouble in partial sort key testing.
+@ locale th-TH
+* compare
+< \u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C
+< \u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18
+* compare
+< \u0E01\u0E07\u0E01\u0E32\u0E23
+< \u0E01\u0E07\u0E42\u0E01\u0E49
+* compare
+< \u0E01\u0E23\u0E19\u0E17\u0E32
+< \u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32
+* compare
+< \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27
+< \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27
+* compare
+< \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D
+< \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32
+
+** test: DataDrivenCollationTest/TestJavaStyleRule
+# java.text allows rules to start as '<<<x<<<y...'
+# we emulate this by assuming a &[first tertiary ignorable] in this case.
+@ rules
+&\u0001=equal<<<z<<x<<<w &[first tertiary ignorable]=a &[first primary ignorable]=b
+* compare
+= a
+= equal
+< z
+< x
+= b  # x had become the new first primary ignorable
+< w
+
+** test: DataDrivenCollationTest/TestShiftedIgnorable
+# The UCA states that primary ignorables should be completely
+# ignorable when following a shifted code point.
+@ root
+% alternate=shifted
+% strength=quaternary
+* compare
+< a\u0020b
+= a\u0020\u0300b
+= a\u0020\u0301b
+< a_b
+= a_\u0300b
+= a_\u0301b
+< A\u0020b
+= A\u0020\u0300b
+= A\u0020\u0301b
+< A_b
+= A_\u0300b
+= A_\u0301b
+< a\u0301b
+< A\u0301b
+< a\u0300b
+< A\u0300b
+
+** test: DataDrivenCollationTest/TestNShiftedIgnorable
+# The UCA states that primary ignorables should be completely
+# ignorable when following a shifted code point.
+@ root
+% alternate=non-ignorable
+% strength=tertiary
+* compare
+< a\u0020b
+< A\u0020b
+< a\u0020\u0301b
+< A\u0020\u0301b
+< a\u0020\u0300b
+< A\u0020\u0300b
+< a_b
+< A_b
+< a_\u0301b
+< A_\u0301b
+< a_\u0300b
+< A_\u0300b
+< a\u0301b
+< A\u0301b
+< a\u0300b
+< A\u0300b
+
+** test: DataDrivenCollationTest/TestSafeSurrogates
+# It turned out that surrogates were not skipped properly
+# when iterating backwards if they were in the middle of a
+# contraction. This test assures that this is fixed.
+@ rules
+&a < x\ud800\udc00b
+* compare
+< a
+< x\ud800\udc00b
+
+** test: DataDrivenCollationTest/da_TestPrimary
+# This test goes through primary strength cases
+@ locale da
+% strength=primary
+* compare
+< Lvi
+< Lwi
+* compare
+< L\u00e4vi
+< L\u00f6wi
+* compare
+< L\u00fcbeck
+= Lybeck
+
+** test: DataDrivenCollationTest/da_TestTertiary
+# This test goes through tertiary strength cases
+@ locale da
+% strength=tertiary
+* compare
+< Luc
+< luck
+* compare
+< luck
+< L\u00fcbeck
+* compare
+< lybeck
+< L\u00fcbeck
+* compare
+< L\u00e4vi
+< L\u00f6we
+* compare
+< L\u00f6ww
+< mast
+
+* compare
+< A/S
+< ANDRE
+< ANDR\u00c9
+< ANDREAS
+< AS
+< CA
+< \u00c7A
+< CB
+< \u00c7C
+< D.S.B.
+< DA
+< \u00d0A
+< DB
+< \u00d0C
+< DSB
+< DSC
+< EKSTRA_ARBEJDE
+< EKSTRABUD0
+< H\u00d8ST
+< HAAG
+< H\u00c5NDBOG
+< HAANDV\u00c6RKSBANKEN
+< Karl
+< karl
+< NIELS\u0020J\u00d8RGEN
+< NIELS-J\u00d8RGEN
+< NIELSEN
+< R\u00c9E,\u0020A
+< REE,\u0020B
+< R\u00c9E,\u0020L
+< REE,\u0020V
+< SCHYTT,\u0020B
+< SCHYTT,\u0020H
+< SCH\u00dcTT,\u0020H
+< SCHYTT,\u0020L
+< SCH\u00dcTT,\u0020M
+< SS
+< \u00df
+< SSA
+< STORE\u0020VILDMOSE
+< STOREK\u00c6R0
+< STORM\u0020PETERSEN
+< STORMLY
+< THORVALD
+< THORVARDUR
+< \u00feORVAR\u00d0UR
+< THYGESEN
+< VESTERG\u00c5RD,\u0020A
+< VESTERGAARD,\u0020A
+< VESTERG\u00c5RD,\u0020B
+< \u00c6BLE
+< \u00c4BLE
+< \u00d8BERG
+< \u00d6BERG
+
+* compare
+< andere
+< chaque
+< chemin
+< cote
+< cot\u00e9
+< c\u00f4te
+< c\u00f4t\u00e9
+< \u010du\u010d\u0113t
+< Czech
+< hi\u0161a
+< irdisch
+< lie
+< lire
+< llama
+< l\u00f5ug
+< l\u00f2za
+< lu\u010d
+< luck
+< L\u00fcbeck
+< lye
+< l\u00e4vi
+< L\u00f6wen
+< m\u00e0\u0161ta
+< m\u00eer
+< myndig
+< M\u00e4nner
+< m\u00f6chten
+< pi\u00f1a
+< pint
+< pylon
+< \u0161\u00e0ran
+< savoir
+< \u0160erb\u016bra
+< Sietla
+< \u015blub
+< subtle
+< symbol
+< s\u00e4mtlich
+< verkehrt
+< vox
+< v\u00e4ga
+< waffle
+< wood
+< yen
+< yuan
+< yucca
+< \u017eal
+< \u017eena
+< \u017den\u0113va
+< zoo0
+< Zviedrija
+< Z\u00fcrich
+< zysk0
+< \u00e4ndere
+
+** test: DataDrivenCollationTest/hi_TestNewRules
+# This test goes through new rules and tests against old rules
+@ locale hi
+* compare
+< कॐ
+< कं
+< कँ
+< कः
+
+** test: DataDrivenCollationTest/ro_TestNewRules
+# This test goes through new rules and tests against old rules
+@ locale ro
+* compare
+< xAx
+< xă
+< xĂ
+< Xă
+< XĂ
+< xăx
+< xĂx
+< xâ
+< xÂ
+< Xâ
+< XÂ
+< xâx
+< xÂx
+< xb
+< xIx
+< xî
+< xÎ
+< Xî
+< XÎ
+< xîx
+< xÎx
+< xj
+< xSx
+< xș
+= xş
+< xȘ
+= xŞ
+< Xș
+= Xş
+< XȘ
+= XŞ
+< xșx
+= xşx
+< xȘx
+= xŞx
+< xT
+< xTx
+< xț
+= xţ
+< xȚ
+= xŢ
+< Xț
+= Xţ
+< XȚ
+= XŢ
+< xțx
+= xţx
+< xȚx
+= xŢx
+< xU
+
+** test: DataDrivenCollationTest/testOffsets
+# This tests cases where forwards and backwards iteration get different offsets
+@ locale en
+% strength=tertiary
+* compare
+< a\uD800\uDC00\uDC00
+< b\uD800\uDC00\uDC00
+* compare
+< \u0301A\u0301\u0301
+< \u0301B\u0301\u0301
+* compare
+< abcd\r\u0301
+< abce\r\u0301
+# TODO: test offsets in new CollationTest
+
+# End of test cases moved here from ICU 52's DataDrivenCollationTest.txt.
+
+** test: was ICU 52 cmsccoll/TestRedundantRules
+@ rules
+& a < b < c < d& [before 1] c < m
+* compare
+<1 a
+<1 b
+<1 m
+<1 c
+<1 d
+
+@ rules
+& a < b <<< c << d <<< e& [before 3] e <<< x
+* compare
+<1 a
+<1 b
+<3 c
+<2 d
+<3 x
+<3 e
+
+@ rules
+& a < b <<< c << d <<< e <<< f < g& [before 1] g < x
+* compare
+<1 a
+<1 b
+<3 c
+<2 d
+<3 e
+<3 f
+<1 x
+<1 g
+
+@ rules
+& a <<< b << c < d& a < m
+* compare
+<1 a
+<3 b
+<2 c
+<1 m
+<1 d
+
+@ rules
+&a<b<<b\u0301 &z<b
+* compare
+<1 a
+<1 b\u0301
+<1 z
+<1 b
+
+@ rules
+&z<m<<<q<<<m
+* compare
+<1 z
+<1 q
+<3 m
+
+@ rules
+&z<<<m<q<<<m
+* compare
+<1 z
+<1 q
+<3 m
+
+@ rules
+& a < b < c < d& r < c
+* compare
+<1 a
+<1 b
+<1 d
+<1 r
+<1 c
+
+@ rules
+& a < b < c < d& c < m
+* compare
+<1 a
+<1 b
+<1 c
+<1 m
+<1 d
+
+@ rules
+& a < b < c < d& a < m
+* compare
+<1 a
+<1 m
+<1 b
+<1 c
+<1 d
+
+** test: was ICU 52 cmsccoll/TestExpansionSyntax
+# The following two rules should sort the particular list of strings the same.
+@ rules
+&AE <<< a << b <<< c &d <<< f
+* compare
+<1 AE
+<3 a
+<2 b
+<3 c
+<1 d
+<3 f
+
+@ rules
+&A <<< a / E << b / E <<< c /E  &d <<< f
+* compare
+<1 AE
+<3 a
+<2 b
+<3 c
+<1 d
+<3 f
+
+# The following two rules should sort the particular list of strings the same.
+@ rules
+&AE <<< a <<< b << c << d < e < f <<< g
+* compare
+<1 AE
+<3 a
+<3 b
+<2 c
+<2 d
+<1 e
+<1 f
+<3 g
+
+@ rules
+&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g
+* compare
+<1 AE
+<3 a
+<3 b
+<2 c
+<2 d
+<1 e
+<1 f
+<3 g
+
+# The following two rules should sort the particular list of strings the same.
+@ rules
+&AE <<< B <<< C / D <<< F
+* compare
+<1 AE
+<3 B
+<3 F
+<1 AED
+<3 C
+
+@ rules
+&A <<< B / E <<< C / ED <<< F / E
+* compare
+<1 AE
+<3 B
+<3 F
+<1 AED
+<3 C
+
+** test: never reorder trailing primaries
+@ root
+% reorder Zzzz Grek
+* compare
+<1 L
+<1 字
+<1 Ω
+<1 \uFFFD
+<1 \uFFFF
+
+** test: fall back to mappings with shorter prefixes, not immediately to ones with no prefixes
+@ rules
+&u=ab|cd
+&v=b|ce
+* compare
+<1 abc
+<1 abcc
+<1 abcf
+<1 abcd
+=  abu
+<1 abce
+=  abv
+
+# With the following rules, there is only one prefix per composite ĉ or ç,
+# but both prefixes apply to just c in NFD form.
+# We would get different results for composed vs. NFD input
+# if we fell back directly from longest-prefix mappings to no-prefix mappings.
+@ rules
+&x=op|ĉ
+&y=p|ç
+* compare
+<1 opc
+<2 opć
+<1 opcz
+<1 opd
+<1 opĉ
+=  opc\u0302
+=  opx
+<1 opç
+=  opc\u0327
+=  opy
+
+# The mapping is used which has the longest matching prefix for which
+# there is also a suffix match, with the longest suffix match among several for that prefix.
+@ rules
+&❶=d
+&❷=de
+&❸=def
+&①=c|d
+&②=c|de
+&③=c|def
+&④=bc|d
+&⑤=bc|de
+&⑥=bc|def
+&⑦=abc|d
+&⑧=abc|de
+&⑨=abc|def
+* compare
+<1 9aadzz
+=  9aa❶zz
+<1 9aadez
+=  9aa❷z
+<1 9aadef
+=  9aa❸
+<1 9acdzz
+=  9ac①zz
+<1 9acdez
+=  9ac②z
+<1 9acdef
+=  9ac③
+<1 9bcdzz
+=  9bc④zz
+<1 9bcdez
+=  9bc⑤z
+<1 9bcdef
+=  9bc⑥
+<1 abcdzz
+=  abc⑦zz
+<1 abcdez
+=  abc⑧z
+<1 abcdef
+=  abc⑨
+
+** test: prefix + discontiguous contraction with missing prefix contraction
+# Unfortunate terminology: The first "prefix" here is the pre-context,
+# the second "prefix" refers to the contraction/relation string that is
+# one shorter than the one being tested.
+@ rules
+&x=p|e
+&y=p|ê
+&z=op|ê
+# No mapping for op|e:
+# Discontiguous contraction matching should not match op|ê in opệ
+# because it would have to skip the dot below and extend a match on op|e by the circumflex,
+# but there is no match on op|e.
+* compare
+<1 oPe
+<1 ope
+=  opx
+<1 opệ
+=  opy\u0323  # y not z
+<1 opê
+=  opz
+
+# We cannot test for fallback by whether the contraction default CE32
+# is for another contraction. With the following rules, there is no mapping for op|e,
+# and the fallback to prefix p has no contractions.
+@ rules
+&x=p|e
+&z=op|ê
+* compare
+<1 oPe
+<1 ope
+=  opx
+<2 opệ
+=  opx\u0323\u0302  # x not z
+<1 opê
+=  opz
+
+# One more variation: Fallback to the simple code point, no shorter non-empty prefix.
+@ rules
+&x=e
+&z=op|ê
+* compare
+<1 ope
+=  opx
+<3 oPe
+=  oPx
+<2 opệ
+=  opx\u0323\u0302  # x not z
+<1 opê
+=  opz
+
+** test: maxVariable via rules
+@ rules
+[maxVariable space][alternate shifted]
+* compare
+=  \u0020
+=  \u000A
+<1 .
+<1 °  # degree sign
+<1 $
+<1 0
+
+** test: maxVariable via setting
+@ root
+% maxVariable=currency
+% alternate=shifted
+* compare
+=  \u0020
+=  \u000A
+=  .
+=  °  # degree sign
+=  $
+<1 0
+
+** test: ICU4J CollationMiscTest/TestContractionClosure (ää)
+# This tests canonical closure, but it also tests that CollationFastLatin
+# bails out properly for contractions with combining marks.
+# For that we need pairs of strings that remain in the Latin fastpath
+# long enough, hence the extra "= b" lines.
+@ rules
+&b=\u00e4\u00e4
+* compare
+<1 b
+=  \u00e4\u00e4
+=  b
+=  a\u0308a\u0308
+=  b
+=  \u00e4a\u0308
+=  b
+=  a\u0308\u00e4
+
+** test: ICU4J CollationMiscTest/TestContractionClosure (Å)
+@ rules
+&b=\u00C5
+* compare
+<1 b
+=  \u00C5
+=  b
+=  A\u030A
+=  b
+=  \u212B
+
+** test: reset-before on already-tailored characters, ICU ticket 10108
+@ rules
+&a<w<<x &[before 2]x<<y
+* compare
+<1 a
+<1 w
+<2 y
+<2 x
+
+@ rules
+&a<<w<<<x &[before 2]x<<y
+* compare
+<1 a
+<2 y
+<2 w
+<3 x
+
+@ rules
+&a<w<x &[before 2]x<<y
+* compare
+<1 a
+<1 w
+<1 y
+<2 x
+
+@ rules
+&a<w<<<x &[before 2]x<<y
+* compare
+<1 a
+<1 y
+<2 w
+<3 x
+
+** test: numeric collation with other settings, ICU ticket 9092
+@ root
+% strength=identical
+% caseFirst=upper
+% numeric=on
+* compare
+<1 100\u0020a
+<1 101
+
+** test: collation type fallback from unsupported type, ICU ticket 10149
+@ locale fr-CA-u-co-phonebk
+# Expect the same result as with fr-CA, using backwards-secondary order.
+# That is, we should fall back from the unsupported collation type
+# to the locale's default collation type.
+* compare
+<1 cote
+<2 côte
+<2 coté
+<2 côté
+
+** test: @ is equivalent to [backwards 2], ICU ticket 9956
+@ rules
+&b<a @ &v<<w
+* compare
+<1 b
+<1 a
+<1 cote
+<2 côte
+<2 coté
+<2 côté
+<1 v
+<2 w
+<1 x
+
+** test: shifted+reordering, ICU ticket 9507
+@ root
+% reorder Grek punct space
+% alternate=shifted
+% strength=quaternary
+# Which primaries are "variable" should be determined without script reordering,
+# and then primaries should be reordered whether they are shifted to quaternary or not.
+* compare
+<4 (  # punctuation
+<4 )
+<4 \u0020  # space
+<1 `  # symbol
+<1 ^
+<1 $  # currency symbol
+<1 €
+<1 0  # numbers
+<1 ε  # Greek
+<1 e  # Latin
+<1 e(e
+<4 e)e
+<4 e\u0020e
+<4 ee
+<3 e(E
+<4 e)E
+<4 e\u0020E
+<4 eE
+
+** test: "uppercase first" could sort a string before its prefix, ICU ticket 9351
+@ rules
+&\u0001<<<b<<<B
+% caseFirst=upper
+* compare
+<1 aaa
+<3 aaaB
+
+** test: secondary+case ignores secondary ignorables, ICU ticket 9355
+@ rules
+&\u0001<<<b<<<B
+% strength=secondary
+% caseLevel=on
+* compare
+<1 a
+=  ab
+=  aB
+
+** test: custom collation rules involving tail of a contraction in Malayalam, ICU ticket 6328
+@ rules
+&[before 2] ൌ << ൗ  # U+0D57 << U+0D4C == 0D46+0D57
+* compare
+<1 ൗx
+<2 ൌx
+<1 ൗy
+<2 ൌy
+
+** test: quoted apostrophe in compact syntax, ICU ticket 8204
+@ rules
+&q<<*a''c
+* compare
+<1 d
+<1 p
+<1 q
+<2 a
+<2 \u0027
+<2 c
+<1 r
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java

index 835c85b7ebde1072fd3f13dcc917dffe1ab9f157..9b518844d2fa8c6bffae246926924e3bef5fe05c 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java
@@ -1,6 +1,6 @@
  /*
   *******************************************************************************
- * Copyright (C) 2008-2013, International Business Machines Corporation and
+ * Copyright (C) 2008-2014, International Business Machines Corporation and
   * others. All Rights Reserved.
   *******************************************************************************
   */
@@ -32,6 +32,7 @@ import com.ibm.icu.text.Collator;
  import com.ibm.icu.text.Normalizer2;
  import com.ibm.icu.text.RawCollationKey;
  import com.ibm.icu.text.RuleBasedCollator;
+import com.ibm.icu.text.UTF16;
  import com.ibm.icu.text.UnicodeSet;
  import com.ibm.icu.util.ULocale;
  
@@ -243,18 +244,16 @@ public class AlphabeticIndexTest extends TestFmwk {
          AlphabeticIndex alphabeticIndex = new AlphabeticIndex(Locale.ENGLISH);
          RuleBasedCollator collator = alphabeticIndex.getCollator();
          collator.setStrength(Collator.IDENTICAL);
-        Collection<String> firsts = AlphabeticIndex.getFirstCharactersInScripts();
+        Collection<String> firsts = alphabeticIndex.getFirstCharactersInScripts();
          // Verify that each script is represented exactly once.
          UnicodeSet missingScripts = new UnicodeSet("[^[:sc=inherited:][:sc=unknown:][:sc=common:][:Script=Braille:]]");
          String last = "";
          for (String index : firsts) {
-            if (index.equals("\uFFFF")) {
-                continue;
-            }
              if (collator.compare(last,index) >= 0) {
                  errln("Characters not in order: " + last + " !< " + index);
              }
-            int script = UScript.getScript(index.codePointAt(0)); // we actually look at just the first char
+            int script = getFirstRealScript(index);
+            if (script == UScript.UNKNOWN) { continue; }
              UnicodeSet s = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script);
              if (missingScripts.containsNone(s)) {
                  errln("2nd character in script: " + index + "\t" + new UnicodeSet(missingScripts).retainAll(s).toPattern(false));
@@ -279,6 +278,18 @@ public class AlphabeticIndexTest extends TestFmwk {
          }
      }
  
+    private static final int getFirstRealScript(CharSequence s) {
+        for (int i = 0; i < s.length();) {
+            int c = Character.codePointAt(s, i);
+            int script = UScript.getScript(c);
+            if (script != UScript.UNKNOWN && script != UScript.INHERITED && script != UScript.COMMON) {
+                return script;
+            }
+            i += Character.charCount(c);
+        }
+        return UScript.UNKNOWN;
+    }
+
      public void TestBuckets() {
          ULocale additionalLocale = ULocale.ENGLISH;
  
@@ -635,7 +646,8 @@ public class AlphabeticIndexTest extends TestFmwk {
      }
  
      public void TestFirstScriptCharacters() {
-        Collection<String> firstCharacters = AlphabeticIndex.getFirstCharactersInScripts();
+        Collection<String> firstCharacters =
+                new AlphabeticIndex(ULocale.ENGLISH).getFirstCharactersInScripts();
          Collection<String> expectedFirstCharacters = firstStringsInScript((RuleBasedCollator) Collator.getInstance(ULocale.ROOT));
          Collection<String> diff = new TreeSet<String>(firstCharacters);
          diff.removeAll(expectedFirstCharacters);
@@ -654,8 +666,7 @@ public class AlphabeticIndexTest extends TestFmwk {
      private static Collection<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
          String[] results = new String[UScript.CODE_LIMIT];
          for (String current : TO_TRY) {
-            if (ruleBasedCollator.compare(current, "a") < 0) { // TODO fix; we only want "real" script characters, not
-                // symbols.
+            if (ruleBasedCollator.compare(current, "a") < 0) { // we only want "real" script characters, not symbols.
                  continue;
              }
              int script = UScript.getScript(current.codePointAt(0));
@@ -674,12 +685,11 @@ public class AlphabeticIndexTest extends TestFmwk {
              if (extras.size() != 0) {
                  Normalizer2 normalizer = Normalizer2.getNFKCInstance();
                  for (String current : extras) {
-                    if (!TO_TRY.containsAll(current))
-                        continue;
-                    if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "a") < 0) {
+                    if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "9") <= 0) {
                          continue;
                      }
-                    int script = UScript.getScript(current.codePointAt(0));
+                    int script = getFirstRealScript(current);
+                    if (script == UScript.UNKNOWN && !isUnassignedBoundary(current)) { continue; }
                      if (results[script] == null) {
                          results[script] = current;
                      } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
@@ -690,11 +700,8 @@ public class AlphabeticIndexTest extends TestFmwk {
          } catch (Exception e) {
          } // why have a checked exception???
  
-        results[UScript.LATIN] = "A";  // See comment about en_US_POSIX in the implementation.
          // TODO: We should not test that we get the same strings, but that we
          // get strings that sort primary-equal to those from the implementation.
-        // This whole test becomes obsolete when the root collator adds script-first-primary mappings
-        // and the AlphabeticIndex implementation starts using them.
  
          Collection<String> result = new ArrayList<String>();
          for (int i = 0; i < results.length; ++i) {
@@ -702,12 +709,15 @@ public class AlphabeticIndexTest extends TestFmwk {
                  result.add(results[i]);
              }
          }
-        // AlphabeticIndex also has a boundary string for the ultimate overflow bucket,
-        // for unassigned code points and trailing/special primary weights.
-        result.add("\uFFFF");
          return result;
      }
  
+    private static final boolean isUnassignedBoundary(CharSequence s) {
+        // The root collator provides a script-first-primary boundary contraction
+        // for the unassigned-implicit range.
+        return s.charAt(0) == 0xfdd1 &&
+                UScript.getScript(Character.codePointAt(s, 1)) == UScript.UNKNOWN;
+    }
  
      public void TestZZZ() {
          //            int x = 3;
@@ -878,9 +888,9 @@ public class AlphabeticIndexTest extends TestFmwk {
          assertEquals("getBucketIndex(i)", 9, bucketIndex);
          bucketIndex = index.getBucketIndex("\u03B1");
          assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex);
-        // TODO: Test with an unassigned code point (not just U+FFFF)
-        // when unassigned code points are not in the Hani reordering group any more.
-        // String unassigned = UTF16.valueOf(0x50005);
+        // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group.
+        bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005));
+        assertEquals("getBucketIndex(U+50005)", 27, bucketIndex);
          bucketIndex = index.getBucketIndex("\uFFFF");
          assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex);
      }
@@ -892,7 +902,7 @@ public class AlphabeticIndexTest extends TestFmwk {
          RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.CHINESE);
          coll.setReorderCodes(UScript.HAN);
          AlphabeticIndex index = new AlphabeticIndex(coll);
-        assertEquals("getBucketCount()", 1, index.getBucketCount());   // ... (underflow only)
+        assertEquals("getBucketCount()", 28, index.getBucketCount());   // ... A-Z ...
          index.addLabels(ULocale.CHINESE);
          assertEquals("getBucketCount()", 28, index.getBucketCount());  // ... A-Z ...
          int bucketIndex = index.getBucketIndex("\u897f");
@@ -901,9 +911,9 @@ public class AlphabeticIndexTest extends TestFmwk {
          assertEquals("getBucketIndex(i)", 9, bucketIndex);
          bucketIndex = index.getBucketIndex("\u03B1");
          assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex);
-        // TODO: Test with an unassigned code point (not just U+FFFF)
-        // when unassigned code points are not in the Hani reordering group any more.
-        // String unassigned = UTF16.valueOf(0x50005);
+        // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group.
+        bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005));
+        assertEquals("getBucketIndex(U+50005)", 27, bucketIndex);
          bucketIndex = index.getBucketIndex("\uFFFF");
          assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex);
      }
@@ -977,4 +987,30 @@ public class AlphabeticIndexTest extends TestFmwk {
          assertEquals("label 4", "ㄈ", immIndex.getBucket(4).getLabel());
          assertEquals("label 5", "ㄉ", immIndex.getBucket(5).getLabel());
      }
+
+    public void TestJapaneseKanji() {
+        AlphabeticIndex index = new AlphabeticIndex(ULocale.JAPANESE);
+        AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex();
+        // There are no index characters for Kanji in the Japanese standard collator.
+        // They should all go into the overflow bucket.
+        final int[] kanji = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 };
+        int overflowIndex = immIndex.getBucketCount() - 1;
+        for(int i = 0; i < kanji.length; ++i) {
+            String msg = String.format("kanji[%d]=U+%04X in overflow bucket", i, kanji[i]);
+            assertEquals(msg, overflowIndex, immIndex.getBucketIndex(UTF16.valueOf(kanji[i])));
+        }
+    }
+
+    public void TestFrozenCollator() {
+        // Ticket #9472
+        RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(new ULocale("da"));
+        coll.setStrength(Collator.IDENTICAL);
+        coll.freeze();
+        // The AlphabeticIndex constructor used to throw an exception
+        // because it cloned the collator (which preserves frozenness)
+        // and set the clone's strength to PRIMARY.
+        AlphabeticIndex index = new AlphabeticIndex(coll);
+        assertEquals("same strength as input Collator",
+                Collator.IDENTICAL, index.getCollator().getStrength());
+    }
  }
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationAPITest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationAPITest.java

index 28b18f244ee5e6ec120768e3d31be993615df331..679417158a0f42c7fa325cde6e393f9693a8de59 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationAPITest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationAPITest.java
@@ -1,6 +1,6 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2013, International Business Machines Corporation and
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
   * others. All Rights Reserved.
   *******************************************************************************
   */
@@ -62,7 +62,16 @@ public class CollationAPITest extends TestFmwk {
          byte[] bytes = sortk1.toByteArray();
          doAssert(bytes.length == 3 && bytes[0] == 1 && bytes[1] == 1 
                   && bytes[2] == 0, 
-                 "Empty string should return an empty collation key");
+                 "Empty string should return a collation key with empty levels");
+
+        // Most control codes and CGJ are completely ignorable.
+        // A string with only completely ignorables must compare equal to an empty string.
+        CollationKey sortkIgnorable = col.getCollationKey("\u0001\u034f");
+        doAssert(sortkIgnorable != null && sortkIgnorable.toByteArray().length == 3,
+                 "Completely ignorable string should return a collation key with empty levels");
+        doAssert(sortkIgnorable.compareTo(sortk1) == 0,
+                 "Completely ignorable string should compare equal to empty string");
+
          // bogus key returned here
          sortk1 = col.getCollationKey(null);
          doAssert(sortk1 == null, "Error code should return bogus collation key");
@@ -242,12 +251,12 @@ public class CollationAPITest extends TestFmwk {
          
          // Collator col2 = (Collator)col1.clone();
          // doAssert(col1.equals(col2), "Cloned object is not equal to the orginal");
-        String ruleset = "< a, A < b, B < c, C < d, D, e, E";
+        String ruleset = "&9 < a, A < b, B < c, C < d, D, e, E";
          RuleBasedCollator col3 = null;
          try {
              col3 = new RuleBasedCollator(ruleset);
          } catch (Exception e) {
-            errln("Failure creating RuleBasedCollator with rule:" + ruleset);
+            errln("Failure creating RuleBasedCollator with rule: \"" + ruleset + "\"\n" + e);
              return;
          }
          doAssert(!col1.equals(col3), "Cloned object is equal to some dummy");
@@ -293,8 +302,11 @@ public class CollationAPITest extends TestFmwk {
          order1 = iterator1.next();
          doAssert(!(iterator1.equals(iterator2)), "The first iterator advance failed");
          order2 = iterator2.next();
-        
-        doAssert((iterator1.equals(iterator2)), "The second iterator advance failed"); 
+
+        // In ICU 52 and earlier we had iterator1.equals(iterator2)
+        // but in ICU 53 this fails because the iterators differ (String vs. CharacterIterator).
+        // doAssert((iterator1.equals(iterator2)), "The second iterator advance failed");
+        doAssert(iterator1.getOffset() == iterator2.getOffset(), "The second iterator advance failed");
          doAssert((order1 == order2), "The order result should be the same");
          order3 = iterator3.next();
          
@@ -329,8 +341,11 @@ public class CollationAPITest extends TestFmwk {
          doAssert(!(iterator1.equals(iterator2)), "The first iterator advance failed");
          
          order2 = iterator2.next();
-        
-        doAssert((iterator1.equals(iterator2)), "The second iterator advance failed");
+
+        // In ICU 52 and earlier we had iterator1.equals(iterator2)
+        // but in ICU 53 this fails because the iterators differ (String vs. CharacterIterator).
+        // doAssert((iterator1.equals(iterator2)), "The second iterator advance failed");
+        doAssert(iterator1.getOffset() == iterator2.getOffset(), "The second iterator advance failed");
          doAssert((order1 == order2), "The order result should be the same");
      
          order3 = iterator3.next();
@@ -437,9 +452,7 @@ public class CollationAPITest extends TestFmwk {
          // rather than hardcoding (and updating each time) a particular UCA version.
          VersionInfo ucdVersion = UCharacter.getUnicodeVersion();
          VersionInfo ucaVersion = col.getUCAVersion();
-        doAssert(logKnownIssue("9101", "update to collv2 & UCA 6.3") ?
-                    ucdVersion.getMajor() == 6 && ucdVersion.getMinor() == 3 :
-                    ucaVersion.equals(ucdVersion),
+        doAssert(ucaVersion.equals(ucdVersion),
                  "Expected UCA version "+ucdVersion.toString()+" got "+col.getUCAVersion().toString());
  
          doAssert((col.compare("ab", "abc") < 0), "ab < abc comparison failed");
@@ -685,8 +698,8 @@ public class CollationAPITest extends TestFmwk {
                  coll = new RuleBasedCollator(rules[i]);
                  set = coll.getTailoredSet();
                  logln("Got set: "+set.toPattern(true));
-                if(set.size() != data[i].length) {
-                    errln("Tailored set size different ("+set.size()+") than expected ("+data[i].length+")");
+                if(set.size() < data[i].length) {
+                    errln("Tailored set size smaller ("+set.size()+") than expected ("+data[i].length+")");
                  }
                  for(j = 0; j < data[i].length; j++) {
                      logln("Checking to see whether "+data[i][j]+" is in set");
@@ -707,23 +720,28 @@ public class CollationAPITest extends TestFmwk {
      {
          class TestCollator extends Collator
          {
+            @Override
              public boolean equals(Object that) {
                  return this == that;
              }
      
+            @Override
              public int hashCode() {
                  return 0;
              }
              
+            @Override
              public int compare(String source, String target) {
                  return source.compareTo(target);
              }
              
+            @Override
              public CollationKey getCollationKey(String source)
              {   return new CollationKey(source, 
                            getRawCollationKey(source, new RawCollationKey()));
              }
              
+            @Override
              public RawCollationKey getRawCollationKey(String source, 
                                                        RawCollationKey key)
              {
@@ -739,6 +757,7 @@ public class CollationAPITest extends TestFmwk {
                  return key;
              }
              
+            @Override
              public void setVariableTop(int ce)
              {
                  if (isFrozen()) {
@@ -746,6 +765,7 @@ public class CollationAPITest extends TestFmwk {
                  }
              }
              
+            @Override
              public int setVariableTop(String str) 
              {
                  if (isFrozen()) {
@@ -755,14 +775,17 @@ public class CollationAPITest extends TestFmwk {
                  return 0;
              }
              
+            @Override
              public int getVariableTop()
              {
                  return 0;
              }
+            @Override
              public VersionInfo getVersion()
              {
                  return VersionInfo.getInstance(0);
              }
+            @Override
              public VersionInfo getUCAVersion()
              {
                  return VersionInfo.getInstance(0);
@@ -797,9 +820,10 @@ public class CollationAPITest extends TestFmwk {
              errln("Error getting default tailored set");
          }
      }
-    
-     /** 
-     * Simple test the collator setter and getters
+
+    /** 
+     * Simple test the collator setter and getters.
+     * Similar to C++ apicoll.cpp TestAttribute().
       */
      public void TestSetGet() 
      {
@@ -875,7 +899,187 @@ public class CollationAPITest extends TestFmwk {
              errln("Setting case first handling default failed");
          }
      }
-    
+
+    public void TestVariableTopSetting() {
+        RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance();
+
+        int oldVarTop = coll.getVariableTop();
+
+        // ICU 53+: The character must be in a supported reordering group,
+        // and the variable top is pinned to the end of that group.
+        try {
+            coll.setVariableTop("A");
+            errln("setVariableTop(letter) did not detect illegal argument");
+        } catch(IllegalArgumentException expected) {
+        }
+
+        // dollar sign (currency symbol)
+        int newVarTop = coll.setVariableTop("$");
+
+        if(newVarTop != coll.getVariableTop()) {
+            errln("setVariableTop(dollar sign) != following getVariableTop()");
+        }
+
+        String dollar = "$";
+        String euro = "\u20AC";
+        int newVarTop2 = coll.setVariableTop(euro);
+        assertEquals("setVariableTop(Euro sign) == following getVariableTop()",
+                     newVarTop2, coll.getVariableTop());
+        assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)",
+                     newVarTop2, newVarTop);
+
+        coll.setAlternateHandlingShifted(true);
+        assertEquals("empty==dollar", 0, coll.compare("", dollar));  // UCOL_EQUAL
+        assertEquals("empty==euro", 0, coll.compare("", euro));  // UCOL_EQUAL
+        assertEquals("dollar<zero", -1, coll.compare(dollar, "0"));  // UCOL_LESS
+
+        coll.setVariableTop(oldVarTop);
+
+        int newerVarTop = coll.setVariableTop("$");
+
+        if(newVarTop != newerVarTop) {
+          errln("Didn't set vartop properly from String!\n");
+        }
+    }
+
+    public void TestMaxVariable() {
+        RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(ULocale.ROOT);
+
+        try {
+            coll.setMaxVariable(Collator.ReorderCodes.OTHERS);
+            errln("setMaxVariable(others) did not detect illegal argument");
+        } catch(IllegalArgumentException expected) {
+        }
+
+        coll.setMaxVariable(Collator.ReorderCodes.CURRENCY);
+
+        if(Collator.ReorderCodes.CURRENCY != coll.getMaxVariable()) {
+          errln("setMaxVariable(currency) != following getMaxVariable()");
+        }
+
+        coll.setAlternateHandlingShifted(true);
+        assertEquals("empty==dollar", 0, coll.compare("", "$"));  // UCOL_EQUAL
+        assertEquals("empty==euro", 0, coll.compare("", "\u20AC"));  // UCOL_EQUAL
+        assertEquals("dollar<zero", -1, coll.compare("$", "0"));  // UCOL_LESS
+    }
+
+    public void TestGetLocale() {
+        String rules = "&a<x<y<z";
+
+        Collator coll = Collator.getInstance(new ULocale("root"));
+        ULocale locale = coll.getLocale(ULocale.ACTUAL_LOCALE);
+        if(!locale.equals(ULocale.ROOT)) {
+          errln("Collator.getInstance(\"root\").getLocale(actual) != ULocale.ROOT; " +
+                "getLocale().getName() = \"" + locale.getName() + "\"");
+        }
+
+        coll = Collator.getInstance(new ULocale(""));
+        locale = coll.getLocale(ULocale.ACTUAL_LOCALE);
+        if(!locale.equals(ULocale.ROOT)) {
+            errln("Collator.getInstance(\"\").getLocale(actual) != ULocale.ROOT; " +
+                  "getLocale().getName() = \"" + locale.getName() + "\"");
+        }
+
+        int i = 0;
+
+        String[][] testStruct = {
+          // requestedLocale, validLocale, actualLocale
+          // Note: ULocale.ROOT.getName() == "" not "root".
+          { "de_DE", "de_DE", "" },
+          { "sr_RS", "sr_Cyrl_RS", "sr" },
+          { "en_US_CALIFORNIA", "en_US", "" },
+          { "fr_FR_NONEXISTANT", "fr_FR", "" },
+          // pinyin is the default, therefore suppressed.
+          { "zh_CN", "zh_Hans_CN", "zh" },
+          // zh_Hant has default=stroke but the data is in zh.
+          { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" },
+          { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" },
+          { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" }
+        };
+
+        /* test opening collators for different locales */
+        for(i = 0; i<testStruct.length; i++) {
+            String requestedLocale = testStruct[i][0];
+            String validLocale = testStruct[i][1];
+            String actualLocale = testStruct[i][2];
+            try {
+                coll = Collator.getInstance(new ULocale(requestedLocale));
+            } catch(Exception e) {
+                errln(String.format("Failed to open collator for %s with %s", requestedLocale, e));
+                continue;
+            }
+            // Note: C++ getLocale() recognizes ULOC_REQUESTED_LOCALE
+            // which does not exist in Java.
+            locale = coll.getLocale(ULocale.VALID_LOCALE);
+            if(!locale.equals(new ULocale(validLocale))) {
+              errln(String.format("[Coll %s]: Error in valid locale, expected %s, got %s",
+                    requestedLocale, validLocale, locale.getName()));
+            }
+            locale = coll.getLocale(ULocale.ACTUAL_LOCALE);
+            if(!locale.equals(new ULocale(actualLocale))) {
+              errln(String.format("[Coll %s]: Error in actual locale, expected %s, got %s",
+                    requestedLocale, actualLocale, locale.getName()));
+            }
+            // If we open a collator for the actual locale, we should get an equivalent one again.
+            Collator coll2;
+            try {
+                coll2 = Collator.getInstance(locale);
+            } catch(Exception e) {
+                errln(String.format("Failed to open collator for actual locale \"%s\" with %s",
+                        locale.getName(), e));
+                continue;
+            }
+            ULocale actual2 = coll2.getLocale(ULocale.ACTUAL_LOCALE);
+            if(!actual2.equals(locale)) {
+              errln(String.format("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"",
+                    locale.getName(), actual2.getName()));
+            }
+            if(!coll2.equals(coll)) {
+              errln(String.format("[Coll actual \"%s\"]: Got different collator than before",
+                      locale.getName()));
+            }
+        }
+
+        /* completely non-existent locale for collator should get a default collator */
+        {
+            Collator defaultColl = Collator.getInstance();
+            try {
+                coll = Collator.getInstance(new ULocale("blahaha"));
+            } catch(Exception e) {
+                errln("Failed to open collator with " + e);
+                return;
+            }
+            if(!coll.getLocale(ULocale.VALID_LOCALE).equals(
+                    defaultColl.getLocale(ULocale.VALID_LOCALE))) {
+                errln("Valid locale for nonexisting locale locale collator differs " +
+                      "from valid locale for default collator");
+            }
+            if(!coll.getLocale(ULocale.ACTUAL_LOCALE).equals(
+                defaultColl.getLocale(ULocale.ACTUAL_LOCALE))) {
+                errln("Actual locale for nonexisting locale locale collator differs " +
+                      "from actual locale for default collator");
+            }
+        }
+
+        /* collator instantiated from rules should have all locales null */
+        try {
+            coll = new RuleBasedCollator(rules);
+        } catch (Exception e) {
+            errln("RuleBasedCollator(" + rules + ") failed: " + e);
+            return;
+        }
+        locale = coll.getLocale(ULocale.VALID_LOCALE);
+        if(locale != null) {
+            errln(String.format("For collator instantiated from rules, valid locale %s is not bogus",
+                    locale.getName()));
+        }
+        locale = coll.getLocale(ULocale.ACTUAL_LOCALE);
+        if(locale != null) {
+            errln(String.format("For collator instantiated from rules, actual locale %s is not bogus",
+                    locale.getName()));
+        }
+    }
+
      public void TestBounds() 
      {
          Collator coll = Collator.getInstance(new Locale("sh", ""));
@@ -1035,8 +1239,9 @@ public class CollationAPITest extends TestFmwk {
          }
          return ok;
      }
-    
-    public void TestGetContractions()throws Exception {
+
+    // capitst.c/TestGetContractionsAndUnsafes()
+    public void TestGetContractions() throws Exception {
          /*        static struct {
           const char* locale;
           const char* inConts;
@@ -1073,7 +1278,19 @@ public class CollationAPITest extends TestFmwk {
                      "[jabv]"
                  },
                  { "ja",
-                    "[{\u3053\u3099\u309D}{\u3053\u3099\u309D\u3099}{\u3053\u3099\u309E}{\u3053\u3099\u30FC}{\u3053\u309D}{\u3053\u309D\u3099}{\u3053\u309E}{\u3053\u30FC}{\u30B3\u3099\u30FC}{\u30B3\u3099\u30FD}{\u30B3\u3099\u30FD\u3099}{\u30B3\u3099\u30FE}{\u30B3\u30FC}{\u30B3\u30FD}{\u30B3\u30FD\u3099}{\u30B3\u30FE}]",
+                    /*
+                     * The "collv2" builder omits mappings if the collator maps their
+                     * character sequences to the same CEs.
+                     * For example, it omits Japanese contractions for NFD forms
+                     * of the voiced iteration mark (U+309E = U+309D + U+3099), such as
+                     * {\u3053\u3099\u309D\u3099}{\u3053\u309D\u3099}
+                     * {\u30B3\u3099\u30FD\u3099}{\u30B3\u30FD\u3099}.
+                     * It does add mappings for the precomposed forms.
+                     */
+                    "[{\u3053\u3099\u309D}{\u3053\u3099\u309E}{\u3053\u3099\u30FC}" +
+                     "{\u3053\u309D}{\u3053\u309E}{\u3053\u30FC}" +
+                     "{\u30B3\u3099\u30FC}{\u30B3\u3099\u30FD}{\u30B3\u3099\u30FE}" +
+                     "{\u30B3\u30FC}{\u30B3\u30FD}{\u30B3\u30FE}]",
                      "[{\u30FD\u3099}{\u309D\u3099}{\u3053\u3099}{\u30B3\u3099}{lj}{nj}]",
                      "[\u30FE\u00e6]",
                      "[a]",
@@ -1081,10 +1298,7 @@ public class CollationAPITest extends TestFmwk {
                      "[]"
                  }
          };
-        
-        
-        
-        
+
          RuleBasedCollator coll = null;
          int i = 0;
          UnicodeSet conts = new UnicodeSet();
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCreationMethodTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCreationMethodTest.java

index ae48275c75a4fe2dd2a5a33980734b7d1a63205c..a637a87704458155a75c23dab7c4bdcc49cd0db0 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCreationMethodTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCreationMethodTest.java
@@ -1,6 +1,6 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and         *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and         *
   * others. All Rights Reserved.                                                *
   *******************************************************************************
   */
@@ -116,8 +116,8 @@ public class CollationCreationMethodTest extends TestFmwk
              StringBuilder msg = new StringBuilder();
              msg.append("With ").append(localeName).append(" collator\n and input string: ").append(string1).append('\n');
              msg.append(" failed to produce identical keys on both collators\n");
-            msg.append("  localeCollator key: ").append(CollationMiscTest.prettify(k1)).append('\n');
-            msg.append("  ruleCollator   key: ").append(CollationMiscTest.prettify(k2)).append('\n');
+            msg.append("  localeCollator key: ").append(CollationTest.prettify(k1)).append('\n');
+            msg.append("  ruleCollator   key: ").append(CollationTest.prettify(k2)).append('\n');
              errln(msg.toString());
          }
      }
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCurrencyTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCurrencyTest.java

index 9559833ff210ead3f837b09ea580138cd31b0089..7273baac824b75171c64e41591078466cc9d248b 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCurrencyTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCurrencyTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -126,8 +126,8 @@ public class CollationCurrencyTest extends TestFmwk {
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -137,21 +137,21 @@ public class CollationCurrencyTest extends TestFmwk {
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -160,31 +160,4 @@ public class CollationCurrencyTest extends TestFmwk {
              }                
          }
      }
-    
-    private String appendCompareResult(int result, String target){
-        if (result == -1) {
-            target += "LESS";
-        } else if (result == 0) {
-            target += "EQUAL";
-        } else if (result == 1) {
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
-}
-\ No newline at end of file
+}
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationDummyTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationDummyTest.java

index 916f4bf2c96cbac01b76321c940d91909c2afbd6..b1d76ccf6d8987023cd43c8a0764e879c204e6f3 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationDummyTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationDummyTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -25,7 +25,6 @@ import com.ibm.icu.text.RuleBasedCollator;
  public class CollationDummyTest extends TestFmwk {
      public static void main(String[] args) throws Exception {
          new CollationDummyTest().run(args);
-        // new CollationDummyTest().TestVariableTop();
      }
      
      //testSourceCases[][] and testTargetCases[][], testCases[][] are ported from the file callcoll.c in icu4c
@@ -327,7 +326,8 @@ public class CollationDummyTest extends TestFmwk {
              errln("Failed : non-tailored supplementary characters should have the same value\n");
          }
      }
-    
+
+    private static final boolean SUPPORT_VARIABLE_TOP_RELATION = false;
      //TestVariableTop() is ported from cintltst/callcoll.c
      /**
      * Tests the [variable top] tag in rule syntax. Since the default [alternate]
@@ -335,6 +335,13 @@ public class CollationDummyTest extends TestFmwk {
      * a primary ce of 0.
      */
      public void TestVariableTop() {
+        /*
+         * Starting with ICU 53, setting the variable top via a pseudo relation string
+         * is not supported any more.
+         * It was replaced by the [maxVariable symbol] setting.
+         * See ICU tickets #9958 and #8032.
+         */
+        if(!SUPPORT_VARIABLE_TOP_RELATION) { return; }
          String rule = "&z = [variable top]";
          Collator  myColl;
          Collator  enColl;
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationEnglishTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationEnglishTest.java

index 9f8825ef9ba94c8b8e6abb43c40d47f01ba025fa..502828b4814655c0aab4be904b4c527a84dd9b31 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationEnglishTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationEnglishTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -352,8 +352,8 @@ public class CollationEnglishTest extends TestFmwk{
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -363,21 +363,21 @@ public class CollationEnglishTest extends TestFmwk{
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -386,39 +386,4 @@ public class CollationEnglishTest extends TestFmwk{
              }                
          }
      }
-    
-    private String appendCompareResult(int result, String target){
-        if (result == -1)   //LESS
-        {
-            target += "LESS";
-        }
-        else if (result == 0)   //EQUAL
-        {
-            target += "EQUAL";
-        }
-        else if (result == 1)   //GREATER
-        {
-            target += "GREATER";
-        }
-        else
-        {
-            String huh = "?";
-
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
- }
-\ No newline at end of file
+ }
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFinnishTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFinnishTest.java

index 9f56f99335a22dd4da8eed38e0ceb55e63d1ee92..3870adfd3953183dcf68bc8a5c78986ab8d61f90 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFinnishTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFinnishTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -107,8 +107,8 @@ public class CollationFinnishTest extends TestFmwk {
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -118,21 +118,21 @@ public class CollationFinnishTest extends TestFmwk {
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -141,32 +141,4 @@ public class CollationFinnishTest extends TestFmwk {
              }               
          }
      }
-    
-    private String appendCompareResult(int result, String target) {
-        if (result == -1) {
-            target += "LESS";
-        } else if (result == 0) {
-            target += "EQUAL";
-        } else if (result == 1) {
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
  }
-    
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrenchTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrenchTest.java

index 30f440a827fd1d45a2c1ddc8d5257627a7de7c62..ef32f5ce17af5a748caf2527e42ffd7271f578f9 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrenchTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrenchTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -217,8 +217,8 @@ public class CollationFrenchTest extends TestFmwk{
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -228,21 +228,21 @@ public class CollationFrenchTest extends TestFmwk{
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -251,31 +251,4 @@ public class CollationFrenchTest extends TestFmwk{
              }               
          }
      }
-    
-    private String appendCompareResult(int result, String target) {
-        if (result == -1) {
-            target += "LESS";
-        } else if (result == 0) {
-            target += "EQUAL";
-        } else if (result == 1) {
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
  } 
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrozenMonkeyTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrozenMonkeyTest.java

index 6d02678426f61bab4d1e962f215eaca06a0f07ed..1503a0e5b7390c48a05933e7a128eac2fedba5bc 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrozenMonkeyTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrozenMonkeyTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2011, International Business Machines Corporation and         *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2011-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -285,8 +285,8 @@ public class CollationFrozenMonkeyTest extends TestFmwk {
              String msg4 = "; expected ";
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + src + msg2 + target + msg3 + sResult);
              } else {
@@ -295,19 +295,19 @@ public class CollationFrozenMonkeyTest extends TestFmwk {
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + src + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + src + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
              if (ok3) {
                  logln(msg1 + src + msg2 + target + msg3 + sResult);
              } else {
@@ -315,31 +315,4 @@ public class CollationFrozenMonkeyTest extends TestFmwk {
              }                
          }
      }
-    
-    String appendCompareResult(int result, String target) {
-        if (result == -1) {  //LESS
-            target += "LESS";
-        } else if (result == 0) {  //EQUAL
-            target += "EQUAL";
-        } else if (result == 1) {  //GREATER
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
  }
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationGermanTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationGermanTest.java

index 878094dd6040fa566a560bde31f158c91737167e..f0d8b1d09ae88708a3bc1fe69346df662af6fec0 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationGermanTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationGermanTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -153,8 +153,8 @@ import com.ibm.icu.text.Collator;
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -164,21 +164,21 @@ import com.ibm.icu.text.Collator;
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -187,39 +187,4 @@ import com.ibm.icu.text.Collator;
              }                
          }
      }
-    
-    private String appendCompareResult(int result, String target){
-        if (result == -1)   //LESS
-        {
-            target += "LESS";
-        }
-        else if (result == 0)   //EQUAL
-        {
-            target += "EQUAL";
-        }
-        else if (result == 1)   //GREATER
-        {
-            target += "GREATER";
-        }
-        else
-        {
-            String huh = "?";
-
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
-}
-\ No newline at end of file
+}
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationIteratorTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationIteratorTest.java

index c80a0f21a9a54ab12f82cbc0a6e4731e112c7ecc..4b6752cb6adda457d6e2b14ea6d9f0221ddc471b 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationIteratorTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationIteratorTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2011, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -650,9 +650,16 @@ public class CollationIteratorTest extends TestFmwk {
      }
  
      /**
-    * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
-    * normalization on AND jamo tailoring, among other things.
-    */
+     * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
+     * normalization on AND jamo tailoring, among other things.
+     *
+     * Note: This test is sensitive to changes of the root collator,
+     * for example whether the ae-ligature maps to three CEs (as in the DUCET)
+     * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
+     * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
+     * For example, the DUCET's artificial secondary CE in the ae-ligature
+     * may map to two 32-bit iterator CEs (as it did until ICU 52).
+     */
      public void TestSearchCollatorElements()
      {
          String tsceText =
@@ -677,7 +684,7 @@ public class CollationIteratorTest extends TestFmwk {
              12, 13,14,15,
              16, 17,18,19,
              20, 21,22,23,
-            24, 25,26,26,26,
+            24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
              26, 27,28,28,
              28,
              29
@@ -692,7 +699,7 @@ public class CollationIteratorTest extends TestFmwk {
              12, 13,14,15,
              16, 17,18,19,20,
              20, 21,22,22,23,23,23,24,
-            24, 25,26,26,26,
+            24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
              26, 27,28,28,
              28,
              29
@@ -732,6 +739,7 @@ public class CollationIteratorTest extends TestFmwk {
              do {
                  offset = uce.getOffset();
                  element = uce.next();
+                logln(String.format("(%s) offset=%2d  ce=%08x\n", tsceItem.localeString, offset, element));
                  if (element == 0) {
                      errln("Error: in locale " + localeString + ", CEIterator next() returned element 0");
                  }
@@ -749,9 +757,8 @@ public class CollationIteratorTest extends TestFmwk {
              if ( ioff < noff ) {
                  errln("Error: in locale " + localeString + ", CEIterator next() returned fewer elements than expected");
              }
-            
-            /*
-            // Skip the backwards test until ticket #8382 is fixed
+
+            // backwards test
              uce.setOffset(tsceText.length());
              ioff = noff;
              do {
@@ -774,7 +781,6 @@ public class CollationIteratorTest extends TestFmwk {
              if ( ioff > 0 ) {
                  errln("Error: in locale " + localeString + ", CEIterator previous() returned fewer elements than expected");
              }
-            */
          }
      }
  }
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationKanaTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationKanaTest.java

index 76b373f2510589c72b5d52669cc9a907ab5bc343..271bd5af029dcb5bcef2a832d976b3c928ce8b92 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationKanaTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationKanaTest.java
@@ -1,6 +1,6 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2012, International Business Machines Corporation and
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
   * others. All Rights Reserved.
   *******************************************************************************
   */
@@ -221,8 +221,8 @@ public class CollationKanaTest extends TestFmwk{
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -232,21 +232,21 @@ public class CollationKanaTest extends TestFmwk{
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -255,31 +255,4 @@ public class CollationKanaTest extends TestFmwk{
              }                
          }
      }
-    
-    private String appendCompareResult(int result, String target){
-        if (result == -1) {
-            target += "LESS";
-        } else if (result == 0) {
-            target += "EQUAL";
-        } else if (result == 1) {
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
-}
-\ No newline at end of file
+}
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java

index 3be81d21ff93f0bfc96a614ce7b9bede3020ef41..8430ef759c58ccd27e4767e60c409a6160a24511 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java
@@ -1,8 +1,8 @@
  
   /*
   *******************************************************************************
- * Copyright (C) 2002-2012, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -20,7 +20,6 @@ import java.util.TreeSet;
  
  import com.ibm.icu.dev.test.TestFmwk;
  import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.ImplicitCEGenerator;
  import com.ibm.icu.impl.Utility;
  import com.ibm.icu.lang.UCharacter;
  import com.ibm.icu.lang.UProperty;
@@ -175,29 +174,54 @@ public class CollationMiscTest extends TestFmwk {
              // have a  code point associated to it anymore
              // "&[before 3][last primary ignorable]<<<k",
              // - all befores here amount to zero
+            /* "you cannot go before ...": The parser now sets an error for such nonsensical rules.
              "&[before 3][first tertiary ignorable]<<<a",
-            "&[before 3][last tertiary ignorable]<<<a",
+            "&[before 3][last tertiary ignorable]<<<a", */
+            /*
+             * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
+             * and it *is* possible to "go before" that.
+             */
              "&[before 3][first secondary ignorable]<<<a",
              "&[before 3][last secondary ignorable]<<<a",
              // 'normal' befores
-            "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
+            /*
+             * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
+             * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
+             * because there is no tailoring space before that boundary.
+             * Made the tests work by tailoring to a space instead.
+             */
+            "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
              // we don't have a code point that corresponds to the last primary
              // ignorable
-            "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
+            "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
              "&[before 3][first variable]<<<c<<<b &[first variable]<a",
              "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
              "&[first regular]<a &[before 1][first regular]<b",
              "&[before 1][last regular]<b &[last regular]<a",
              "&[before 1][first implicit]<b &[first implicit]<a",
-            "&[before 1][last implicit]<b &[last implicit]<a",
-            "&[last variable]<z&[last primary ignorable]<x&[last secondary ignorable]<<y&[last tertiary ignorable]<<<w&[top]<u",
+            /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity).
+            "&[before 1][last implicit]<b &[last implicit]<a", */
+            "&[last variable]<z" +
+            "&' '<x" +  /* was &[last primary ignorable]<x, see above */
+            "&[last secondary ignorable]<<y&[last tertiary ignorable]<<<w&[top]<u",
          };
          String[][] data = {
              // {"k", "\u20e3"},
+            /* "you cannot go before ...": The parser now sets an error for such nonsensical rules.
              {"\\u0000", "a"}, // you cannot go before first tertiary ignorable
-            {"\\u0000", "a"}, // you cannot go before last tertiary ignorable
-            {"\\u0000", "a"}, // you cannot go before first secondary ignorable
-            {"\\u0000", "a"}, // you cannot go before first secondary ignorable
+            {"\\u0000", "a"}, // you cannot go before last tertiary ignorable */
+            /*
+             * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
+             * and it *is* possible to "go before" that.
+             */
+            {"\\u0000", "a"},
+            {"\\u0000", "a"},
+            /*
+             * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
+             * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
+             * because there is no tailoring space before that boundary.
+             * Made the tests work by tailoring to a space instead.
+             */
              {"c", "b", "\\u0332", "a"},
              {"\\u0332", "\\u20e3", "c", "b", "a"},
              {"c", "b", "\\u0009", "a", "\\u000a"},
@@ -210,7 +234,8 @@ public class CollationMiscTest extends TestFmwk {
              // [last regular [CE 27, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
              {LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00"},
              {"b", "\\u4e00", "a", "\\u4e01"},
-            {"b", "\\U0010FFFD", "a"},
+            /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity).
+            {"b", "\\U0010FFFD", "a"}, */
              {"\ufffb",  "w", "y", "\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"},
          };
  
@@ -232,7 +257,7 @@ public class CollationMiscTest extends TestFmwk {
              // logln("Rules starter for " + rules);
              genericOrderingTestWithResult(coll, s, result);
          } catch (Exception e) {
-            warnln("Unable to open collator with rules " + rules);
+            warnln("Unable to open collator with rules " + rules + ": " + e);
          }
      }
  
@@ -279,8 +304,8 @@ public class CollationMiscTest extends TestFmwk {
              String msg4 = "; expected ";
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  // logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -289,19 +314,19 @@ public class CollationMiscTest extends TestFmwk {
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  // logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
              if (ok3) {
                  // logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -310,36 +335,6 @@ public class CollationMiscTest extends TestFmwk {
          }
      }
  
-    String appendCompareResult(int result, String target) {
-        if (result == -1) {  //LESS
-            target += "LESS";
-        } else if (result == 0) {  //EQUAL
-            target += "EQUAL";
-        } else if (result == 1) {  //GREATER
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-
-    static String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        StringBuilder target = new StringBuilder("[");
-
-        for (i = 0; i < bytes.length; i++) {
-            String numStr = Integer.toHexString(bytes[i] & 0xff);
-            if (numStr.length() < 2) {
-                target.append('0');
-            }
-            target.append(numStr).append(' ');
-        }
-        target.append(']');
-        return target.toString();
-    }
-
      public void TestBeforePrefixFailure() {
          String[] rules = {
              "&g <<< a&[before 3]\uff41 <<< x",
@@ -358,6 +353,7 @@ public class CollationMiscTest extends TestFmwk {
      }
  
      public void TestContractionClosure() {
+        // Note: This was also ported to the data-driven test, see collationtest.txt.
          String[] rules = {
              "&b=\u00e4\u00e4",
              "&b=\u00C5",
@@ -825,13 +821,13 @@ public class CollationMiscTest extends TestFmwk {
              };
              // logln("mixed case test");
              // logln("lower first, case level off");
-            genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst);
+            genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst);
              // logln("upper first, case level off");
-            genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst);
+            genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst);
              // logln("lower first, case level on");
-            genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst);
+            genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst);
              // logln("upper first, case level on");
-            genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst);
+            genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst);
          }
      }
  
@@ -1048,16 +1044,20 @@ public class CollationMiscTest extends TestFmwk {
      }
  
      public void TestImplicitTailoring() {
-        String rules[] = { "&[before 1]\u4e00 < b < c &[before 1]\u4e00 < d < e",
-                           "&\u4e00 < a <<< A < b <<< B",
-                           "&[before 1]\u4e00 < \u4e01 < \u4e02",
-                           "&[before 1]\u4e01 < \u4e02 < \u4e03",
+        String rules[] = {
+            /* Tailor b and c before U+4E00. */
+            "&[before 1]\u4e00 < b < c " +
+            /* Now, before U+4E00 is c; put d and e after that. */
+            "&[before 1]\u4e00 < d < e",
+            "&\u4e00 < a <<< A < b <<< B",
+            "&[before 1]\u4e00 < \u4e01 < \u4e02",
+            "&[before 1]\u4e01 < \u4e02 < \u4e03",
          };
          String cases[][] = {
-            { "d", "e", "b", "c", "\u4e00"},
-            { "\u4e00", "a", "A", "b", "B", "\u4e01"},
-            { "\u4e01", "\u4e02", "\u4e00"},
-            { "\u4e02", "\u4e03", "\u4e01"},
+            { "b", "c", "d", "e", "\u4e00" },
+            { "\u4e00", "a", "A", "b", "B", "\u4e01" },
+            { "\u4e01", "\u4e02", "\u4e00" },
+            { "\u4e02", "\u4e03", "\u4e01" },
          };
  
          int i = 0;
@@ -1065,7 +1065,6 @@ public class CollationMiscTest extends TestFmwk {
          for(i = 0; i < rules.length; i++) {
              genericRulesStarter(rules[i], cases[i]);
          }
-
      }
  
      public void TestFCDProblem() {
@@ -1133,9 +1132,15 @@ public class CollationMiscTest extends TestFmwk {
  
      public void TestJ3087()
      {
-        String rule[] = {"&h<H&CH=\u0427",
-                         "&CH=\u0427&h<H",
-                         "&CH=\u0427"};
+        String rule[] = {
+                "&h<H&CH=\u0427",
+                /*
+                 * The ICU 53 builder adheres to the principle that
+                 * a rule is affected by previous rules but not following ones.
+                 * Therefore, setting CH=\u0427 and then re-tailoring H makes CH != \u0427.
+                "&CH=\u0427&h<H", */
+                "&CH=\u0427"
+        };
          RuleBasedCollator rbc = null;
          CollationElementIterator iter1;
          CollationElementIterator iter2;
@@ -1144,21 +1149,24 @@ public class CollationMiscTest extends TestFmwk {
                  rbc = new RuleBasedCollator(rule[i]);
              } catch (Exception e) {
                  warnln(e.getMessage());
-                return;
+                continue;
              }
              iter1 = rbc.getCollationElementIterator("CH");
              iter2 = rbc.getCollationElementIterator("\u0427");
              int ce1 = CollationElementIterator.IGNORABLE;
              int ce2 = CollationElementIterator.IGNORABLE;
+            // The ICU 53 builder code sets the uppercase flag only on the first CE.
+            int mask = ~0;
              while (ce1 != CollationElementIterator.NULLORDER
                     && ce2 != CollationElementIterator.NULLORDER) {
                  ce1 = iter1.next();
                  ce2 = iter2.next();
-                if (ce1 != ce2) {
+                if ((ce1 & mask) != (ce2 & mask)) {
                      errln("Error generating RuleBasedCollator with the rule "
                            + rule[i]);
                      errln("CH != \\u0427");
                  }
+                mask = ~0xc0;  // mask off case/continuation bits
              }
          }
      }
@@ -1191,120 +1199,6 @@ public class CollationMiscTest extends TestFmwk {
                              + "&u<\u01d6<\u01d8<\u01da<\u01dc<\u00fc", data);
      }
  
-    public void TestRedundantRules() {
-        String[] rules = {
-            //"& a <<< b <<< c << d <<< e& [before 1] e <<< x",
-            "& b <<< c <<< d << e <<< f& [before 3] f <<< x",
-            "& a < b <<< c << d <<< e& [before 1] e <<< x",
-            "& a < b < c < d& [before 1] c < m",
-            "& a < b <<< c << d <<< e& [before 3] e <<< x",
-            "& a < b <<< c << d <<< e& [before 2] e <<< x",
-            "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
-            "& a <<< b << c < d& a < m",
-            "&a<b<<b\u0301 &z<b",
-            "&z<m<<<q<<<m",
-            "&z<<<m<q<<<m",
-            "& a < b < c < d& r < c",
-            "& a < b < c < d& r < c",
-            "& a < b < c < d& c < m",
-            "& a < b < c < d& a < m"
-        };
-
-        String[] expectedRules = {
-            //"&\u2089<<<x",
-            "&\u0252<<<x",
-            "& a <<< x < b <<< c << d <<< e",
-            "& a < b < m < c < d",
-            "& a < b <<< c << d <<< x <<< e",
-            "& a < b <<< c <<< x << d <<< e",
-            "& a < b <<< c << d <<< e <<< f < x < g",
-            "& a <<< b << c < m < d",
-            "&a<b\u0301 &z<b",
-            "&z<q<<<m",
-            "&z<q<<<m",
-            "& a < b < d& r < c",
-            "& a < b < d& r < c",
-            "& a < b < c < m < d",
-            "& a < m < b < c < d"
-        };
-
-        String[][] testdata = {
-            //            {"\u2089", "x"},
-            {"\u0252", "x"},
-            {"a", "x", "b", "c", "d", "e"},
-            {"a", "b", "m", "c", "d"},
-            {"a", "b", "c", "d", "x", "e"},
-            {"a", "b", "c", "x", "d", "e"},
-            {"a", "b", "c", "d", "e", "f", "x", "g"},
-            {"a", "b", "c", "m", "d"},
-            {"a", "b\u0301", "z", "b"},
-            {"z", "q", "m"},
-            {"z", "q", "m"},
-            {"a", "b", "d"},
-            {"r", "c"},
-            {"a", "b", "c", "m", "d"},
-            {"a", "m", "b", "c", "d"}
-        };
-
-        String rlz = "";
-        for(int i = 0; i<rules.length; i++) {
-            logln("testing rule " + rules[i] + ", expected to be" + expectedRules[i]);
-            try {
-                rlz = rules[i];
-                Collator credundant = new RuleBasedCollator(rlz);
-                rlz = expectedRules[i];
-                Collator cresulting = new RuleBasedCollator(rlz);
-                logln(" credundant Rule:" + ((RuleBasedCollator)credundant).getRules());
-                logln(" cresulting Rule:" + ((RuleBasedCollator)cresulting).getRules());
-            } catch (Exception e) {
-                warnln("Cannot create RuleBasedCollator");
-            }
-            //testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
-            // logln("testing using data\n");
-            genericRulesStarter(rules[i], testdata[i]);
-        }
-    }
-
-    public void TestExpansionSyntax() {
-        String[] rules = {
-            "&AE <<< a << b <<< c &d <<< f",
-            "&AE <<< a <<< b << c << d < e < f <<< g",
-            "&AE <<< B <<< C / D <<< F"
-        };
-
-        String[] expectedRules = {
-            "&A <<< a / E << b / E <<< c /E  &d <<< f",
-            "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
-            "&A <<< B / E <<< C / ED <<< F / E"
-        };
-
-        String[][] testdata = {
-            {"AE", "a", "b", "c"},
-            {"AE", "a", "b", "c", "d", "e", "f", "g"},
-            {"AE", "B", "C"} // / ED <<< F / E"},
-        };
-
-        for(int i = 0; i<rules.length; i++) {
-            // logln("testing rule " + rules[i] + ", expected to be " + expectedRules[i]);
-            try {
-                String rlz = rules[i];
-                Collator credundant = new RuleBasedCollator(rlz);
-                rlz = expectedRules[i];
-                Collator cresulting = new RuleBasedCollator(rlz);
-                logln(" credundant Rule:" + ((RuleBasedCollator)credundant).getRules());
-                logln(" cresulting Rule:" + ((RuleBasedCollator)cresulting).getRules());
-            } catch (Exception e) {
-                warnln(e.getMessage());
-            }
-            // testAgainstUCA still doesn't handle expansions correctly, so this is not run
-            // as a hard error test, but only in information mode
-            //testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
-
-            // logln("testing using data");
-            genericRulesStarter(rules[i], testdata[i]);
-        }
-    }
-
      public void TestHangulTailoring() {
          String[] koreanData = {
              "\uac00", "\u4f3d", "\u4f73", "\u5047", "\u50f9", "\u52a0", "\u53ef", "\u5475",
@@ -1334,11 +1228,6 @@ public class CollationMiscTest extends TestFmwk {
          }
          // logln("Using start of korean rules\n");
          genericOrderingTest(coll, koreanData);
-        // logln("Setting jamoSpecial to TRUE and testing once more\n");
-
-        // can't set jamo in icu4j
-        // ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; // don't try this at home
-        // genericOrderingTest(coll, koreanData);
  
          // no such locale in icu4j
          // logln("Using ko__LOTUS locale\n");
@@ -1509,6 +1398,11 @@ public class CollationMiscTest extends TestFmwk {
              "\u0063\u0068",
              "\u0063\u006C",
          };
+        /*
+         * These pairs of rule strings are not guaranteed to yield the very same mappings.
+         * In fact, LDML 24 recommends an improved way of creating mappings
+         * which always yields different mappings for such pairs. See
+         * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
          String[] testrules3 = {
              "&z < xyz &xyzw << B",
              "&z < xyz &xyz << B / w",
@@ -1518,7 +1412,7 @@ public class CollationMiscTest extends TestFmwk {
              "&\ud800\udc00 << B / w",
              "&a\ud800\udc00m << B",
              "&a << B / \ud800\udc00m",
-        };
+        }; */
  
          RuleBasedCollator  coll = null;
          for (int i = 0; i < testrules.length; i ++) {
@@ -1579,7 +1473,7 @@ public class CollationMiscTest extends TestFmwk {
              errln("Expected " + testdata2[1] + " < " + testdata2[2]);
              return;
          }
-        for (int i = 0; i < testrules3.length; i += 2) {
+        /* see above -- for (int i = 0; i < testrules3.length; i += 2) {
              RuleBasedCollator          coll1, coll2;
              CollationElementIterator iter1, iter2;
              char               ch = 0x0042;
@@ -1618,12 +1512,21 @@ public class CollationMiscTest extends TestFmwk {
                  errln("CEs not exhausted\n");
                  return;
              }
-        }
+        } */
      }
  
      public void TestExpansion() {
          String[] testrules = {
+            /*
+             * This seems to have tested that M was not mapped to an expansion.
+             * I believe the old builder just did that because it computed the extension CEs
+             * at the very end, which was a bug.
+             * Among other problems, it violated the core tailoring principle
+             * by making an earlier rule depend on a later one.
+             * And, of course, if M did not get an expansion, then it was primary different from K,
+             * unlike what the rule &K<<M says.
              "&J << K / B & K << M",
+             */
              "&J << K / B << M"
          };
          String[] testdata = {
@@ -1774,15 +1677,15 @@ public class CollationMiscTest extends TestFmwk {
                          >= 0) {
                          errln("Error while comparing prefixed keys @ strength "
                                + strength);
-                        errln(prettify(mergedPrefixKeys[i-1]));
-                        errln(prettify(mergedPrefixKeys[i]));
+                        errln(CollationTest.prettify(mergedPrefixKeys[i-1]));
+                        errln(CollationTest.prettify(mergedPrefixKeys[i]));
                      }
                      if (mergedSuffixKeys[i-1].compareTo(mergedSuffixKeys[i])
                          >= 0) {
                          errln("Error while comparing suffixed keys @ strength "
                                + strength);
-                        errln(prettify(mergedSuffixKeys[i-1]));
-                        errln(prettify(mergedSuffixKeys[i]));
+                        errln(CollationTest.prettify(mergedSuffixKeys[i-1]));
+                        errln(CollationTest.prettify(mergedSuffixKeys[i]));
                      }
                  }
              }
@@ -1797,11 +1700,13 @@ public class CollationMiscTest extends TestFmwk {
  
      public void TestVariableTop()
      {
+        // ICU 53+: The character must be in a supported reordering group,
+        // and the variable top is pinned to the end of that group.
          // parseNextToken is not released as public so i create my own rules
-        String rules = "& a < b < c < de < fg & hi = j";
+        String rules = "& ' ' < b < c < de < fg & hi = j";
          try {
              RuleBasedCollator coll = new RuleBasedCollator(rules);
-            String tokens[] = {"a", "b", "c", "de", "fg", "hi", "j", "ab"};
+            String tokens[] = {" ", "b", "c", "de", "fg", "hi", "j", "ab"};
              coll.setAlternateHandlingShifted(true);
              for (int i = 0; i < tokens.length; i ++) {
                  int varTopOriginal = coll.getVariableTop();
@@ -1862,6 +1767,164 @@ public class CollationMiscTest extends TestFmwk {
          }
      }
  
+    // ported from cmsccoll.c
+    public void TestVariableTopSetting() {
+        int varTopOriginal = 0, varTop1, varTop2;
+        Collator coll = Collator.getInstance(ULocale.ROOT);
+
+        String empty = "";
+        String space = " ";
+        String dot = ".";  /* punctuation */
+        String degree = "\u00b0";  /* symbol */
+        String dollar = "$";  /* currency symbol */
+        String zero = "0";  /* digit */
+
+        varTopOriginal = coll.getVariableTop();
+        logln(String.format("coll.getVariableTop(root) -> %08x", varTopOriginal));
+        ((RuleBasedCollator)coll).setAlternateHandlingShifted(true);
+
+        varTop1 = coll.setVariableTop(space);
+        varTop2 = coll.getVariableTop();
+        logln(String.format("coll.setVariableTop(space) -> %08x", varTop1));
+        if(varTop1 != varTop2 ||
+                !coll.equals(empty, space) ||
+                coll.equals(empty, dot) ||
+                coll.equals(empty, degree) ||
+                coll.equals(empty, dollar) ||
+                coll.equals(empty, zero) ||
+                coll.compare(space, dot) >= 0) {
+            errln("coll.setVariableTop(space) did not work");
+        }
+
+        varTop1 = coll.setVariableTop(dot);
+        varTop2 = coll.getVariableTop();
+        logln(String.format("coll.setVariableTop(dot) -> %08x", varTop1));
+        if(varTop1 != varTop2 ||
+                !coll.equals(empty, space) ||
+                !coll.equals(empty, dot) ||
+                coll.equals(empty, degree) ||
+                coll.equals(empty, dollar) ||
+                coll.equals(empty, zero) ||
+                coll.compare(dot, degree) >= 0) {
+            errln("coll.setVariableTop(dot) did not work");
+        }
+
+        varTop1 = coll.setVariableTop(degree);
+        varTop2 = coll.getVariableTop();
+        logln(String.format("coll.setVariableTop(degree) -> %08x", varTop1));
+        if(varTop1 != varTop2 ||
+                !coll.equals(empty, space) ||
+                !coll.equals(empty, dot) ||
+                !coll.equals(empty, degree) ||
+                coll.equals(empty, dollar) ||
+                coll.equals(empty, zero) ||
+                coll.compare(degree, dollar) >= 0) {
+            errln("coll.setVariableTop(degree) did not work");
+        }
+
+        varTop1 = coll.setVariableTop(dollar);
+        varTop2 = coll.getVariableTop();
+        logln(String.format("coll.setVariableTop(dollar) -> %08x", varTop1));
+        if(varTop1 != varTop2 ||
+                !coll.equals(empty, space) ||
+                !coll.equals(empty, dot) ||
+                !coll.equals(empty, degree) ||
+                !coll.equals(empty, dollar) ||
+                coll.equals(empty, zero) ||
+                coll.compare(dollar, zero) >= 0) {
+            errln("coll.setVariableTop(dollar) did not work");
+        }
+
+        logln("Testing setting variable top to contractions");
+        try {
+            coll.setVariableTop("@P");
+            errln("Invalid contraction succeded in setting variable top!");
+        } catch(Exception expected) {
+        }
+
+        logln("Test restoring variable top");
+        coll.setVariableTop(varTopOriginal);
+        if(varTopOriginal != coll.getVariableTop()) {
+            errln("Couldn't restore old variable top");
+        }
+    }
+
+    // ported from cmsccoll.c
+    public void TestMaxVariable() {
+        int oldMax, max;
+
+        String empty = "";
+        String space = " ";
+        String dot = ".";  /* punctuation */
+        String degree = "\u00b0";  /* symbol */
+        String dollar = "$";  /* currency symbol */
+        String zero = "0";  /* digit */
+
+        Collator coll = Collator.getInstance(ULocale.ROOT);
+
+        oldMax = coll.getMaxVariable();
+        logln(String.format("coll.getMaxVariable(root) -> %04x", oldMax));
+        ((RuleBasedCollator)coll).setAlternateHandlingShifted(true);
+
+        coll.setMaxVariable(Collator.ReorderCodes.SPACE);
+        max = coll.getMaxVariable();
+        logln(String.format("coll.setMaxVariable(space) -> %04x", max));
+        if(max != Collator.ReorderCodes.SPACE ||
+                !coll.equals(empty, space) ||
+                coll.equals(empty, dot) ||
+                coll.equals(empty, degree) ||
+                coll.equals(empty, dollar) ||
+                coll.equals(empty, zero) ||
+                coll.compare(space, dot) >= 0) {
+            errln("coll.setMaxVariable(space) did not work");
+        }
+
+        coll.setMaxVariable(Collator.ReorderCodes.PUNCTUATION);
+        max = coll.getMaxVariable();
+        logln(String.format("coll.setMaxVariable(punctuation) -> %04x", max));
+        if(max != Collator.ReorderCodes.PUNCTUATION ||
+                !coll.equals(empty, space) ||
+                !coll.equals(empty, dot) ||
+                coll.equals(empty, degree) ||
+                coll.equals(empty, dollar) ||
+                coll.equals(empty, zero) ||
+                coll.compare(dot, degree) >= 0) {
+            errln("coll.setMaxVariable(punctuation) did not work");
+        }
+
+        coll.setMaxVariable(Collator.ReorderCodes.SYMBOL);
+        max = coll.getMaxVariable();
+        logln(String.format("coll.setMaxVariable(symbol) -> %04x", max));
+        if(max != Collator.ReorderCodes.SYMBOL ||
+                !coll.equals(empty, space) ||
+                !coll.equals(empty, dot) ||
+                !coll.equals(empty, degree) ||
+                coll.equals(empty, dollar) ||
+                coll.equals(empty, zero) ||
+                coll.compare(degree, dollar) >= 0) {
+            errln("coll.setMaxVariable(symbol) did not work");
+        }
+
+        coll.setMaxVariable(Collator.ReorderCodes.CURRENCY);
+        max = coll.getMaxVariable();
+        logln(String.format("coll.setMaxVariable(currency) -> %04x", max));
+        if(max != Collator.ReorderCodes.CURRENCY ||
+                !coll.equals(empty, space) ||
+                !coll.equals(empty, dot) ||
+                !coll.equals(empty, degree) ||
+                !coll.equals(empty, dollar) ||
+                coll.equals(empty, zero) ||
+                coll.compare(dollar, zero) >= 0) {
+            errln("coll.setMaxVariable(currency) did not work");
+        }
+
+        logln("Test restoring maxVariable");
+        coll.setMaxVariable(oldMax);
+        if(oldMax != coll.getMaxVariable()) {
+            errln("Couldn't restore old maxVariable");
+        }
+    }
+
      public void TestUCARules()
      {
          try {
@@ -2031,115 +2094,6 @@ public class CollationMiscTest extends TestFmwk {
          genericLocaleStarter(new Locale("zh", "", "PINYIN"), test);
      }
  
-    static final long topByte = 0xFF000000L;
-    static final long bottomByte = 0xFFL;
-    static final long fourBytes = 0xFFFFFFFFL;
-
-    static final int MAX_INPUT = 0x220001; // 2 * Unicode range + 2
-
-    private void show(int i, ImplicitCEGenerator imp) {
-        if (i >= 0 && i <= MAX_INPUT) {
-            logln(Utility.hex(i) + "\t" + Utility.hex(imp.getImplicitFromRaw(i) & fourBytes));
-        }
-    }
-
-    private void throwError(String title, int cp, ImplicitCEGenerator imp) {
-        throw new IllegalArgumentException(title + "\t" + Utility.hex(cp, 6) + "\t" + Utility.hex(imp.getImplicitFromRaw(cp) & fourBytes));
-    }
-
-    private void throwError(String title, long ce) {
-        errln(title + "\t" + Utility.hex(ce & fourBytes));
-    }
-
-    public void TestImplicitGeneration()
-    {
-        logln("Start");
-        try {
-            ImplicitCEGenerator foo = new ImplicitCEGenerator(0xE0, 0xE4);
-
-            //int x = foo.getRawImplicit(0xF810);
-            foo.getRawFromImplicit(0xE20303E7);
-
-            //int gap4 = foo.getGap4();
-            //logln("Gap4: " + gap4);
-            //int gap3 = foo.getGap3();
-            //int minTrail = foo.getMinTrail();
-            //int maxTrail = foo.getMaxTrail();
-            long last = 0;
-            long current;
-            for (int i = 0; i <= MAX_INPUT; ++i) {
-                current = foo.getImplicitFromRaw(i) & fourBytes;
-
-                // check that it round-trips AND that all intervening ones are illegal
-                int roundtrip = foo.getRawFromImplicit((int)current);
-                if (roundtrip != i) {
-                    throwError("No roundtrip", i, foo);
-                }
-                if (last != 0) {
-                    for (long j = last + 1; j < current; ++j) {
-                        roundtrip = foo.getRawFromImplicit((int)j);
-                        // raise an error if it *doesn't* find an error
-                        if (roundtrip != -1) {
-                            throwError("Fails to recognize illegal", j);
-                        }
-                    }
-                }
-                // now do other consistency checks
-                long lastBottom = last & bottomByte;
-                long currentBottom = current & bottomByte;
-                long lastTop = last & topByte;
-                long currentTop = current & topByte;
-
-                // do some consistency checks
-                /*
-                  long gap = current - last;
-                  if (currentBottom != 0) { // if we are a 4-byte
-                  // gap has to be at least gap4
-                  // and gap from minTrail, maxTrail has to be at least gap4
-                  if (gap <= gap4) foo.throwError("Failed gap4 between", i);
-                  if (currentBottom < minTrail + gap4) foo.throwError("Failed gap4 before", i);
-                  if (currentBottom > maxTrail - gap4) foo.throwError("Failed gap4 after", i);
-                  } else { // we are a three-byte
-                  gap = gap >> 8; // move gap down for comparison.
-                  long current3Bottom = (current >> 8) & bottomByte;
-                  if (gap <= gap3) foo.throwError("Failed gap3 between ", i);
-                  if (current3Bottom < minTrail + gap3) foo.throwError("Failed gap3 before", i);
-                  if (current3Bottom > maxTrail - gap3) foo.throwError("Failed gap3 after", i);
-                  }
-                */
-                // print out some values for spot-checking
-                if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
-                    show(i-3, foo);
-                    show(i-2, foo);
-                    show(i-1, foo);
-                    if (i == 0) {
-                        // do nothing
-                    } else if (lastBottom == 0 && currentBottom != 0) {
-                        logln("+ primary boundary, 4-byte CE's below");
-                    } else if (lastTop != currentTop) {
-                        logln("+ primary boundary");
-                    }
-                    show(i, foo);
-                    show(i+1, foo);
-                    show(i+2, foo);
-                    logln("...");
-                }
-                last = current;
-                if(foo.getCodePointFromRaw(foo.getRawFromCodePoint(i)) != i) {
-                    errln("No raw <-> code point roundtrip for "+Utility.hex(i));
-                }
-            }
-            show(MAX_INPUT-2, foo);
-            show(MAX_INPUT-1, foo);
-            show(MAX_INPUT, foo);
-        } catch (Exception e) {
-            e.printStackTrace();
-            warnln(e.getMessage());
-        } finally {
-            logln("End");
-        }
-    }
-
      /* supercedes TestJ784 */
      public void TestBeforePinyin() {
          String rules =
@@ -2284,7 +2238,15 @@ public class CollationMiscTest extends TestFmwk {
                  "&a < \u00e2 <<< \u00c2",
                  "&a < \u1FF3 ",  // OMEGA WITH YPOGEGRAMMENI
                  "&s < \u0161 ",  // &s < s with caron
-                "&z < a\u00EA",  // &z < a+e with circumflex
+                /*
+                 * Note: Just tailoring &z<ae^ does not work as expected:
+                 * The UCA spec requires for discontiguous contractions that they
+                 * extend an *existing match* by one combining mark at a time.
+                 * Therefore, ae must be a contraction so that the builder finds
+                 * discontiguous contractions for ae^, for example with an intervening underdot.
+                 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
+                 */
+                "&x < ae &z < a\u00EA",  // &x < ae &z < a+e with circumflex
          };
          String cases[][] = {
              { "\u1EAC", "A\u0323\u0302", "\u1EA0\u0302", "\u00C2\u0323", },
@@ -2313,9 +2275,9 @@ public class CollationMiscTest extends TestFmwk {
                  CollationKey key = coll.getCollationKey(cases[i][j]);
                  if ( key.compareTo(expectingKey)!=0) {
                      errln("Error! Test case["+i+"]:"+"source:" + key.getSourceString());
-                    errln("expecting:"+prettify(expectingKey)+ "got:"+  prettify(key));
+                    errln("expecting:"+CollationTest.prettify(expectingKey)+ "got:"+  CollationTest.prettify(key));
                  }
-                logln("   Key:"+  prettify(key));
+                logln("   Key:"+  CollationTest.prettify(key));
              }
          }
  
@@ -2333,10 +2295,10 @@ public class CollationMiscTest extends TestFmwk {
                      // errln("source:" + key.getSourceString());
                      // errln("expecting:"+prettify(expectingKey)+ "got:"+  prettify(key));
                      logln("Error!! in Vietnese sort - source:" + key.getSourceString());
-                    logln("expecting:"+prettify(expectingKey)+ "got:"+  prettify(key));
+                    logln("expecting:"+CollationTest.prettify(expectingKey)+ "got:"+  CollationTest.prettify(key));
                  }
                  // logln("source:" + key.getSourceString());
-                logln("   Key:"+  prettify(key));
+                logln("   Key:"+  CollationTest.prettify(key));
              }
          } catch (Exception e) {
              warnln("Error creating Vietnese collator");
@@ -2365,35 +2327,36 @@ public class CollationMiscTest extends TestFmwk {
              try {
                  coll = new RuleBasedCollator(rules[i]);
              } catch (Exception e) {
-                warnln("Unable to open collator with rules " + rules[i]);
+                warnln("Unable to open collator with rules " + rules[i] + ": " + e);
+                return;
              }
  
              logln("Test rule["+i+"]"+rules[i]);
  
              CollationKey keyA = coll.getCollationKey("a");
-            logln("Key for \"a\":"+  prettify(keyA));
+            logln("Key for \"a\":"+  CollationTest.prettify(keyA));
              if (keyA.compareTo(coll.getCollationKey(lastPrimIgn))<=0) {
                  CollationKey key = coll.getCollationKey(lastPrimIgn);
-                logln("Collation key for 0xD800 0xDDFD: "+prettify(key));
+                logln("Collation key for 0xD800 0xDDFD: "+CollationTest.prettify(key));
                  errln("Error! String \"a\" must be greater than \uD800\uDDFD -"+
                        "[Last Primary Ignorable]");
              }
              if (keyA.compareTo(coll.getCollationKey(firstVariable))>=0) {
                  CollationKey key = coll.getCollationKey(firstVariable);
-                logln("Collation key for 0x0009: "+prettify(key));
+                logln("Collation key for 0x0009: "+CollationTest.prettify(key));
                  errln("Error! String \"a\" must be less than 0x0009 - [First Variable]");
              }
              CollationKey keyB = coll.getCollationKey("b");
-            logln("Key for \"b\":"+  prettify(keyB));
+            logln("Key for \"b\":"+  CollationTest.prettify(keyB));
              if (keyB.compareTo(coll.getCollationKey(firstPrimIgn))<=0) {
                  CollationKey key = coll.getCollationKey(firstPrimIgn);
-                logln("Collation key for 0x0332: "+prettify(key));
+                logln("Collation key for 0x0332: "+CollationTest.prettify(key));
                  errln("Error! String \"b\" must be greater than 0x0332 -"+
                        "[First Primary Ignorable]");
              }
              if (keyB.compareTo(coll.getCollationKey(firstVariable))>=0) {
                  CollationKey key = coll.getCollationKey(firstVariable);
-                logln("Collation key for 0x0009: "+prettify(key));
+                logln("Collation key for 0x0009: "+CollationTest.prettify(key));
                  errln("Error! String \"b\" must be less than 0x0009 - [First Variable]");
              }
          }
@@ -2409,7 +2372,7 @@ public class CollationMiscTest extends TestFmwk {
              logln("Test rule["+i+"]"+rules[i]);
  
              CollationKey keyA = coll.getCollationKey("a");
-            logln("Key for \"a\":"+  prettify(keyA));
+            logln("Key for \"a\":"+  CollationTest.prettify(keyA));
              byte[] keyAInBytes = keyA.toByteArray();
              for (int j=0; j<keyAInBytes.length && j<secIgnKey.length; j++) {
                  if (keyAInBytes[j]!=secIgnKey[j]) {
@@ -2422,10 +2385,10 @@ public class CollationMiscTest extends TestFmwk {
              if (keyA.compareTo(coll.getCollationKey(firstVariable))>=0) {
                  errln("Error! String \"a\" must be less than 0x0009 - [First Variable]");
                  CollationKey key = coll.getCollationKey(firstVariable);
-                logln("Collation key for 0x0009: "+prettify(key));
+                logln("Collation key for 0x0009: "+CollationTest.prettify(key));
              }
              CollationKey keyB = coll.getCollationKey("b");
-            logln("Key for \"b\":"+  prettify(keyB));
+            logln("Key for \"b\":"+  CollationTest.prettify(keyB));
              byte[] keyBInBytes = keyB.toByteArray();
              for (int j=0; j<keyBInBytes.length && j<secIgnKey.length; j++) {
                  if (keyBInBytes[j]!=secIgnKey[j]) {
@@ -2437,7 +2400,7 @@ public class CollationMiscTest extends TestFmwk {
              }
              if (keyB.compareTo(coll.getCollationKey(firstVariable))>=0) {
                  CollationKey key = coll.getCollationKey(firstVariable);
-                logln("Collation key for 0x0009: "+prettify(key));
+                logln("Collation key for 0x0009: "+CollationTest.prettify(key));
                  errln("Error! String \"b\" must be less than 0x0009 - [First Variable]");
              }
          }
@@ -2484,7 +2447,7 @@ public class CollationMiscTest extends TestFmwk {
                      errln("expecting:"+prettify(expectingKey)+ "got:"+  prettify(key));
                  }
                  */
-                logln("String:"+cases[j]+"   Key:"+  prettify(key));
+                logln("String:"+cases[j]+"   Key:"+  CollationTest.prettify(key));
              }
          } catch (Exception e) {
              warnln("Error creating English collator");
@@ -2506,7 +2469,7 @@ public class CollationMiscTest extends TestFmwk {
                          " is not >= previous test string.");
                      }
                  }
-                logln("String:"+cases[j]+"   Key:"+  prettify(key));
+                logln("String:"+cases[j]+"   Key:"+  CollationTest.prettify(key));
              }
          } catch (Exception e) {
              warnln("Error creating Japanese collator");
@@ -2540,7 +2503,7 @@ public class CollationMiscTest extends TestFmwk {
                          }
                      }
                  }
-                logln("String:"+cases[j]+"   Key:"+  prettify(key));
+                logln("String:"+cases[j]+"   Key:"+  CollationTest.prettify(key));
              }
          }
      }
@@ -2584,7 +2547,7 @@ public class CollationMiscTest extends TestFmwk {
              try {
                  myCollation = new RuleBasedCollator(rule);
              } catch (Exception e) {
-                warnln("ERROR: in creation of rule based collator");
+                warnln("ERROR: in creation of rule based collator: " + e);
                  return;
              }
  
@@ -2636,14 +2599,14 @@ public class CollationMiscTest extends TestFmwk {
       // are working fine.
      private OneTestCase[] m_rangeTestCasesSupplemental_ = {
          //               Left                Right               Result
-        new OneTestCase( "\ufffe",           "\uffff",             -1 ),
-        new OneTestCase( "\uffff",           "\ud800\udc00",       -1 ),  // U+FFFF < U+10000
+        new OneTestCase( "\u4e00",           "\ufffb",             -1 ),
+        new OneTestCase( "\ufffb",           "\ud800\udc00",       -1 ),  // U+FFFB < U+10000
          new OneTestCase( "\ud800\udc00",    "\ud800\udc01",        -1 ),  // U+10000 < U+10001
  
-        new OneTestCase( "\ufffe",           "\ud800\udc01",       -1 ),  // U+FFFE < U+10001
+        new OneTestCase( "\u4e00",           "\ud800\udc01",       -1 ),  // U+4E00 < U+10001
          new OneTestCase( "\ud800\udc01",    "\ud800\udc02",        -1 ),  // U+10001 < U+10002
          new OneTestCase( "\ud800\udc00",    "\ud840\udc02",        -1 ),  // U+10000 < U+10002
-        new OneTestCase( "\ufffe",           "\u0d840\udc02",      -1 ),  // U+FFFF < U+10002
+        new OneTestCase( "\u4e00",           "\u0d840\udc02",      -1 ),  // U+4E00 < U+10002
  
      };
  
@@ -2707,24 +2670,20 @@ public class CollationMiscTest extends TestFmwk {
      public void TestSameStrengthListWithSupplementalCharacters() {
          String[] rules = new String[] {
              // ** Rule without compact list syntax **
-            // \ufffe < \uffff < \U00010000    < \U00010001  < \U00010002
-            "&'\ufffe'<'\uffff'<'\ud800\udc00'<'\ud800\udc01'<'\ud800\udc02' " +
+            // \u4e00 < \ufffb < \U00010000    < \U00010001  < \U00010002
+            "&\u4e00<\ufffb<'\ud800\udc00'<'\ud800\udc01'<'\ud800\udc02' " +
              // \U00010000    << \U00020001   << \U00020002       \U00020002
              "&'\ud800\udc00'<<'\ud840\udc01'<<'\ud840\udc02'<<'\ud840\udc02'  " +
              // \U00020001   = \U0003001    = \U0004001    = \U0004002
-            "&'\ud840\udc01'='\ud880\udc01'='\ud8c0\udc01'='\ud8c0\udc02'" +
-            // \U00040008   < \U00030008   < \U00020008
-            "&'\ud8c0\udc08'<'\ud880\udc08'<'\ud840\udc08'",
+            "&'\ud840\udc01'='\ud880\udc01'='\ud8c0\udc01'='\ud8c0\udc02'",
  
              // ** Rule with compact list syntax **
-            // \ufffe <* \uffff\U00010000  \U00010001
-            "&'\ufffe'<*'\uffff\ud800\udc00\ud800\udc01\ud800\udc02' " +
+            // \u4e00 <* \ufffb\U00010000  \U00010001
+            "&\u4e00<*'\ufffb\ud800\udc00\ud800\udc01\ud800\udc02' " +
              // \U00010000   <<* \U00020001  \U00020002
              "&'\ud800\udc00'<<*'\ud840\udc01\ud840\udc02\ud840\udc03'  " +
              // \U00020001   =* \U0003001   \U0003002   \U0003003   \U0004001
-            "&'\ud840\udc01'=*'\ud880\udc01\ud880\udc02\ud880\udc03\ud8c0\udc01' " +
-            // \U00040008   <* \U00030008  \U00030009  \U0003000a  \U00020008
-            "&'\ud8c0\udc08'<*'\ud880\udc08\ud880\udc09\ud880\udc0a\ud840\udc08'",
+            "&'\ud840\udc01'=*'\ud880\udc01\ud880\udc02\ud880\udc03\ud8c0\udc01' "
  
          };
          doTestCollation(m_rangeTestCasesSupplemental_, rules);
@@ -2751,14 +2710,12 @@ public class CollationMiscTest extends TestFmwk {
      // Tests the compact range syntax with supplemental codepoints.
      public void TestSameStrengthListRangesWithSupplementalCharacters() {
          String[] rules = new String[] {
-            // \ufffe <* \uffff\U00010000  \U00010001
-            "&'\ufffe'<*'\uffff'-'\ud800\udc02' " +
+            // \u4e00 <* \ufffb\U00010000  \U00010001
+            "&\u4e00<*'\ufffb'\ud800\udc00-'\ud800\udc02' " +
              // \U00010000   <<* \U00020001   - \U00020003
              "&'\ud800\udc00'<<*'\ud840\udc01'-'\ud840\udc03'  " +
              // \U00020001   =* \U0003001   \U0004001
-            "&'\ud840\udc01'=*'\ud880\udc01'-'\ud880\udc03\ud8c0\udc01' " +
-            // \U00040008   <* \U00030008  \U00020008
-            "&'\ud8c0\udc08'<*'\ud880\udc08'-'\ud880\udc0a\ud840\udc08'",
+            "&'\ud840\udc01'=*'\ud880\udc01'-'\ud880\udc03\ud8c0\udc01' "
          };
          doTestCollation(m_rangeTestCasesSupplemental_, rules);
      }
@@ -2794,7 +2751,7 @@ public class CollationMiscTest extends TestFmwk {
      public void TestInvalidListsAndRanges() {
          String[] invalidRules = new String[] {
              // Range not in starred expression
-            "&'\ufffe'<'\uffff'-'\ud800\udc02'",
+            "&\u4e00<\ufffb-'\ud800\udc02'",
              
              // Range without start
              "&a<*-c",
@@ -3004,8 +2961,8 @@ public class CollationMiscTest extends TestFmwk {
              if (sortKey1.compareTo(sortKey2) >= 0) {
                    errln("TestHungarianTailoring getCollationKey(\"" + str1 +"\") was suppose "+
                          "less than getCollationKey(\""+ str2 + "\").");
-                  errln("  getCollationKey(\"ggy\"):" + prettify(sortKey1) +
-                        "  getCollationKey(\"GGY\"):" + prettify(sortKey2));
+                  errln("  getCollationKey(\"ggy\"):" + CollationTest.prettify(sortKey1) +
+                        "  getCollationKey(\"GGY\"):" + CollationTest.prettify(sortKey2));
              }
  
              CollationElementIterator iter1 = coll.getCollationElementIterator(str1);
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMonkeyTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMonkeyTest.java

index 56c3ff1f6fe0e6a3e0c27fbc2ecc5417464c61d9..fe1a2a0b55ca84d6b7cfa8bc9f13ecb8ad77a549 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMonkeyTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMonkeyTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -260,8 +260,8 @@ public class CollationMonkeyTest extends TestFmwk {
              String msg4 = "; expected ";
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + src + msg2 + target + msg3 + sResult);
              } else {
@@ -270,19 +270,19 @@ public class CollationMonkeyTest extends TestFmwk {
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + src + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + src + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
              if (ok3) {
                  logln(msg1 + src + msg2 + target + msg3 + sResult);
              } else {
@@ -290,31 +290,4 @@ public class CollationMonkeyTest extends TestFmwk {
              }                
          }
      }
-    
-    String appendCompareResult(int result, String target) {
-        if (result == -1) {  //LESS
-            target += "LESS";
-        } else if (result == 0) {  //EQUAL
-            target += "EQUAL";
-        } else if (result == 1) {  //GREATER
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
  }
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java

index cf187bab2a9db0d91af38be45be54ba00cbb3b5f..f87601a473cd57c60d5eaff34ab51f9e486cddc7 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2013, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -79,7 +79,7 @@ public class CollationRegressionTest extends TestFmwk {
      // Collator -> rules -> Collator round-trip broken for expanding characters
      //
      public void Test4051866() {
-       String rules = "< o & oe ,o\u3080& oe ,\u1530 ,O& OE ,O\u3080& OE ,\u1520< p ,P";
+       String rules = "&n < o & oe ,o\u3080& oe ,\u1530 ,O& OE ,O\u3080& OE ,\u1520< p ,P";
  
          // Build a collator containing expanding characters
          RuleBasedCollator c1 = null;
@@ -228,8 +228,8 @@ public class CollationRegressionTest extends TestFmwk {
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -239,21 +239,21 @@ public class CollationRegressionTest extends TestFmwk {
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -262,34 +262,7 @@ public class CollationRegressionTest extends TestFmwk {
              }                
          }
      }
-    
-    String appendCompareResult(int result, String target) {
-        if (result == -1) {  //LESS
-            target += "LESS";
-        } else if (result == 0) {  //EQUAL
-            target += "EQUAL";
-        } else if (result == 1) {  //GREATER
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
-    
+
      // @bug 4054736
      //
      // Full Decomposition mode not implemented
@@ -341,7 +314,7 @@ public class CollationRegressionTest extends TestFmwk {
      //
      public void Test4059820(/* char* par */) {
          RuleBasedCollator c = null;
-        String rules = "< a < b , c/a < d < z";
+        String rules = "&9 < a < b , c/a < d < z";
          try {
              c = new RuleBasedCollator(rules);
          } catch (Exception e) {
@@ -360,14 +333,14 @@ public class CollationRegressionTest extends TestFmwk {
      // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
      //
      public void Test4060154(/* char* par */) {
-        String rules ="< g, G < h, H < i, I < j, J & H < \u0131, \u0130, i, I";
+        String rules ="&f < g, G < h, H < i, I < j, J & H < \u0131, \u0130, i, I";
      
          RuleBasedCollator c = null;
          try {
              c = new RuleBasedCollator(rules);
          } catch (Exception e) {
              //System.out.println(e);
-            errln("failure building collator.");
+            errln("failure building collator:" + e);
              return;
          }
      
@@ -495,7 +468,7 @@ public class CollationRegressionTest extends TestFmwk {
      public void Test4078588(/* char *par */) {
          RuleBasedCollator rbc = null;
          try {
-            rbc = new RuleBasedCollator("< a < bb");
+            rbc = new RuleBasedCollator("&9 < a < bb");
          } catch (Exception e) {
              errln("Failed to create RuleBasedCollator.");
              return;
@@ -561,7 +534,7 @@ public class CollationRegressionTest extends TestFmwk {
          c.setStrength(Collator.SECONDARY);
          String tests[] = {
              "\u007a",       "\u003c", "\u00E6",            // z        < ae
-            "\u0061\u0308", "\u003c", "\u0061\u030A",      // a-unlaut < a-ring
+            "\u0061\u0308", "\u003c", "\u0061\u030A",      // a-umlaut < a-ring
              "\u0059",       "\u003c", "\u0075\u0308",      // Y        < u-umlaut
          };
          compareArray(c, tests);
@@ -627,7 +600,7 @@ public class CollationRegressionTest extends TestFmwk {
      //
      public void Test4101940(/* char* par */) {
          RuleBasedCollator c = null;
-        String rules = "< a < b";
+        String rules = "&9 < a < b";
          String nothing = "";
          try {
              c = new RuleBasedCollator(rules);
@@ -944,9 +917,12 @@ public class CollationRegressionTest extends TestFmwk {
          iter.setOffset(5);
          int elt5 = CollationElementIterator.primaryOrder(iter.next());
  
-        if (elt4 != elt0 || elt5 != elt0)
-            errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
-                    + elt4 + "), and 5 (" + elt5 + ") don't match.");
+        // Compares and prints only 16-bit primary weights.
+        if (elt4 != elt0 || elt5 != elt0) {
+            errln(String.format("The collation elements at positions 0 (0x%04x), " +
+                    "4 (0x%04x), and 5 (0x%04x) don't match.",
+                    elt0, elt4, elt5));
+        }
  
          // test that the "cat" combination works properly
          iter.setOffset(14);
@@ -967,11 +943,14 @@ public class CollationRegressionTest extends TestFmwk {
          iter.setOffset(19);
          int elt19 = CollationElementIterator.primaryOrder(iter.next());
  
+        // Compares and prints only 16-bit primary weights.
          if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
-                || elt14 != elt18 || elt14 != elt19)
-            errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
-            + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
-            + ", elt18 = " + elt18 + ", elt19 = " + elt19);
+                || elt14 != elt18 || elt14 != elt19) {
+            errln(String.format("\"cat\" elements don't match: elt14 = 0x%04x, " +
+                    "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, " +
+                    "elt18 = 0x%04x, elt19 = 0x%04x",
+                    elt14, elt15, elt16, elt17, elt18, elt19));
+        }
  
          // now generate a complete list of the collation elements,
          // first using next() and then using setOffset(), and
@@ -1029,7 +1008,7 @@ public class CollationRegressionTest extends TestFmwk {
              throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
          }
  
-        RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
+        RuleBasedCollator collator = new RuleBasedCollator("&a<\u00e0=a\u0300");
          //commented by Kevin 2003/10/21 
          //for "FULL_DECOMPOSITION is not supported here." in ICU4J DOC
          //collator.setDecomposition(Collator.FULL_DECOMPOSITION);
@@ -1202,6 +1181,29 @@ public class CollationRegressionTest extends TestFmwk {
          caseFirstCompressionSub(col, "lower first");
      }
  
+    public void TestTrailingComment() throws Exception {
+        // ICU ticket #8070:
+        // Check that the rule parser handles a comment without terminating end-of-line.
+        RuleBasedCollator coll = new RuleBasedCollator("&c<b#comment1\n<a#comment2");
+        assertTrue("c<b", coll.compare("c", "b") < 0);
+        assertTrue("b<a", coll.compare("b", "a") < 0);
+    }
+
+    public void TestBeforeWithTooStrongAfter() {
+        // ICU ticket #9959:
+        // Forbid rules with a before-reset followed by a stronger relation.
+        try {
+            new RuleBasedCollator("&[before 2]x<<q<p");
+            errln("should forbid before-2-reset followed by primary relation");
+        } catch(Exception expected) {
+        }
+        try {
+            new RuleBasedCollator("&[before 3]x<<<q<<s<p");
+            errln("should forbid before-3-reset followed by primary or secondary relation");
+        } catch(Exception expected) {
+        }
+    }
+
      /*
       * Compare two strings - "aaa...A" and "aaa...a" with
       * Collation#compare and CollationKey#compareTo, called from
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationSpanishTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationSpanishTest.java

index 9b368c5ee5b303aea0de8eba5305e19541987a9c..72a5f838e5b86450a8177fbf0af2403e3d9cbcbe 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationSpanishTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationSpanishTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -119,8 +119,8 @@ import com.ibm.icu.text.Collator;
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -130,21 +130,21 @@ import com.ibm.icu.text.Collator;
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -153,31 +153,4 @@ import com.ibm.icu.text.Collator;
              }                
          }
      }
-    
-    private String appendCompareResult(int result, String target){
-        if (result == -1) {
-            target += "LESS";
-        } else if (result == 0) {
-            target += "EQUAL";
-        } else if (result == 1) {
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
  }
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTest.java

index 98d62eb9d337b39b3ac81cd190b12cb87d32e0c3..6e95504ae3056d05e4e65554963b118327c727f0 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTest.java
@@ -1,48 +1,67 @@
  /**
   *******************************************************************************
- * Copyright (C) 2001-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2001-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
+ * CollationTest.java, ported from collationtest.cpp
+ * C++ version created on: 2012apr27
+ * created by: Markus W. Scherer
   */
  package com.ibm.icu.dev.test.collator;
  
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Locale;
-import java.util.MissingResourceException;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.text.ParseException;
+import java.util.HashSet;
+import java.util.Set;
  
-import com.ibm.icu.dev.test.ModuleTest;
-import com.ibm.icu.dev.test.TestDataModule.DataMap;
  import com.ibm.icu.dev.test.TestFmwk;
-import com.ibm.icu.impl.LocaleUtility;
+import com.ibm.icu.dev.test.TestUtil;
+import com.ibm.icu.impl.Norm2AllModes;
  import com.ibm.icu.impl.Utility;
-import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.impl.coll.Collation;
+import com.ibm.icu.impl.coll.CollationData;
+import com.ibm.icu.impl.coll.CollationFCD;
+import com.ibm.icu.impl.coll.CollationIterator;
+import com.ibm.icu.impl.coll.CollationRoot;
+import com.ibm.icu.impl.coll.CollationRootElements;
+import com.ibm.icu.impl.coll.CollationRuleParser;
+import com.ibm.icu.impl.coll.CollationWeights;
+import com.ibm.icu.impl.coll.FCDIterCollationIterator;
+import com.ibm.icu.impl.coll.FCDUTF16CollationIterator;
+import com.ibm.icu.impl.coll.UTF16CollationIterator;
+import com.ibm.icu.impl.coll.UVector32;
  import com.ibm.icu.text.CollationElementIterator;
  import com.ibm.icu.text.CollationKey;
  import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.Collator.ReorderCodes;
+import com.ibm.icu.text.Normalizer2;
  import com.ibm.icu.text.RawCollationKey;
  import com.ibm.icu.text.RuleBasedCollator;
+import com.ibm.icu.text.UCharacterIterator;
  import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSetIterator;
+import com.ibm.icu.util.IllformedLocaleException;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
  
-public class CollationTest extends ModuleTest{
-    // public methods --------------------------------------------------------
-
+public class CollationTest extends TestFmwk {
      public static void main(String[] args) throws Exception{
          new CollationTest().run(args);
      }
  
      public CollationTest() {
-        super("com/ibm/icu/dev/data/testdata/", "DataDrivenCollationTest");
-    }
-    
-    public void processModules() {
-        for (Iterator iter = t.getSettingsIterator(); iter.hasNext();) {
-            DataMap setting = (DataMap) iter.next();
-            processSetting(setting);
-        }
      }
-    
+
+    // Fields
+    Normalizer2 fcd, nfd;
+    Collator coll;
+    String fileLine;
+    int fileLineNumber;
+    String fileTestName;
+    Throwable error;
+
      // package private methods ----------------------------------------------
      
      static void doTest(TestFmwk test, RuleBasedCollator col, String source, 
@@ -117,7 +136,7 @@ public class CollationTest extends ModuleTest{
                      } 
                      if (o != orders[index]) {
                          test.errln("Mismatch at index " + index + ": 0x" 
-                            + Integer.toHexString(orders[index]) + " vs 0x" + Integer.toHexString(o));
+                            + Utility.hex(orders[index]) + " vs 0x" + Utility.hex(o));
                          break;
                      }
                  }
@@ -135,270 +154,54 @@ public class CollationTest extends ModuleTest{
              iter.reset();
              test.err("next: ");
              while ((o = iter.next()) != CollationElementIterator.NULLORDER) {
-                String hexString = "0x" + Integer.toHexString(o) + " ";
+                String hexString = "0x" + Utility.hex(o) + " ";
                  test.err(hexString);
              }
              test.errln("");
              test.err("prev: ");
              while ((o = iter.previous()) != CollationElementIterator.NULLORDER) {
-                String hexString = "0x" + Integer.toHexString(o) + " ";
+                String hexString = "0x" + Utility.hex(o) + " ";
                   test.err(hexString);
              }
              test.errln("");
          }
      }
      
-    // private data members --------------------------------------------------
-
-    private String m_sequence_;
-    private int m_sequenceIndex_;
-    private String m_source_;
-    private StringBuffer m_target_ = new StringBuffer();
-    private int m_nextRelation_;
-    private int m_relation_;
-
-    // private methods -------------------------------------------------------
-
-    private void processSetting(DataMap settings) {
-        RuleBasedCollator col = null;
-        // ok i have to be careful here since it seems like we can have
-        // multiple locales for each test
-        String locale = settings.getString("TestLocale");
-        
-        if (locale != null) {
-            // this is a case where we have locale
-            try {
-                Locale l = LocaleUtility.getLocaleFromName(locale);
-                col = (RuleBasedCollator)Collator.getInstance(l);
-            }catch (MissingResourceException e){
-                warnln("Could not load the locale data for locale " + locale);
-            }catch (Exception e) {
-                errln("Error creating collator for locale " + locale);
-            }
-            logln("Testing collator for locale " + locale);
-            processSetting2(settings, col);
-        }
-        String rules = settings.getString("Rules");
-        // ok i have to be careful here since it seems like we can have
-        // multiple rules for each test
-        if (rules != null) {
-            // here we deal with rules
-            try {
-                col = new RuleBasedCollator(rules);
-            }catch (MissingResourceException e){
-        warnln("Could not load the locale data: " + e.getMessage());
-            } catch (Exception e) {
-                errln("Error creating collator for rules " + rules);
-            }
-            processSetting2(settings, col);
+    static final String appendCompareResult(int result, String target){
+        if (result == -1) {
+            target += "LESS";
+        } else if (result == 0) {
+            target += "EQUAL";
+        } else if (result == 1) {
+            target += "GREATER";
+        } else {
+            String huh = "?";
+            target += huh + result;
          }
+        return target;
      }
  
-    private void processSetting2(DataMap settings,RuleBasedCollator col)
-    {
-
-        // ok i have to be careful here since it seems like we can have
-        // multiple rules for each test
-        String arguments = settings.getString("Arguments");
-        if (arguments != null) {
-            handleArguments(col, arguments);
-        }
-        processTestCases(col);
+    static final String prettify(CollationKey key) {
+        byte[] bytes = key.toByteArray();
+        return prettify(bytes, bytes.length);
      }
  
-    /**
-     * Reads the options string and sets appropriate attributes in collator
-     */
-    private void handleArguments(RuleBasedCollator col, String argument) {
-        int i = 0;
-        boolean printInfo = false;
-        while (i < argument.length()) {
-            if (!UCharacter.isWhitespace(argument.charAt(i))) {
-                // eat whitespace
-                break;
-            }
-            i ++;
-        }
-        while (i < argument.length()) {
-            // skip opening '['
-            if (argument.charAt(i) == '[') {
-                i ++;
-            }
-            else {
-                if(!isModularBuild()){
-                    errln("Error in collation arguments, missing ["); // no opening '['
-                }
-                // !!! following line has no effect
-                printInfo=true;
-                return;
-            }
-
-            int value = argument.indexOf(' ', i);
-            String option = argument.substring(i, value);
-            i = argument.indexOf(']', value);
-            String optionvalue = argument.substring(value + 1, i);
-            i ++;
-            // some options are not added because they have no public apis yet
-            // TODO add the rest of the options
-            if (option.equalsIgnoreCase("alternate")) {
-                if (optionvalue.equalsIgnoreCase("non-ignorable")) {
-                    col.setAlternateHandlingShifted(false);
-                }
-                else {
-                    col.setAlternateHandlingShifted(true);
-                }
-            }
-            else if (option.equals("strength")) {
-                if (optionvalue.equalsIgnoreCase("1")) {
-                    col.setStrength(Collator.PRIMARY);
-                }
-                else if (optionvalue.equalsIgnoreCase("2")) {
-                    col.setStrength(Collator.SECONDARY);
-                }
-                else if (optionvalue.equalsIgnoreCase("3")) {
-                    col.setStrength(Collator.TERTIARY);
-                }
-                else if (optionvalue.equalsIgnoreCase("4")) {
-                    col.setStrength(Collator.QUATERNARY);
-                }
-            }
-        }
-        if (printInfo) {
-            warnln("Could not load the locale data. Skipping...");
-        }
-        // !!! effect is odd, if no modular build, this emits no
-        // message at all.  How come?  Hmmm.  printInfo is never
-        // true if we get here, so this code is never executed.
-        /*
-        if(printInfo == true && isModularBuild()){
-            infoln("Could not load the locale data. Skipping...");
-        }
-        */
-    }
-
-    private void processTestCases(RuleBasedCollator col) {
-        for (Iterator iter = t.getDataIterator(); iter.hasNext();) {
-            DataMap e1 =  (DataMap) iter.next();
-            processSequence(col, e1.getString("sequence"));
-     }
-    }
-
-    private void processSequence(RuleBasedCollator col, String sequence) {
-        // TODO: have a smarter tester that remembers the sequence and ensures
-        // that the complete sequence is in order. That is why I have made a
-        // constraint in the sequence format.
-        m_sequence_ = sequence;
-        m_sequenceIndex_ = 0;
-        m_nextRelation_ = -1;
-        m_target_.delete(0, m_target_.length());
-        List vector = new ArrayList();
-        int lastsmallerthanindex = -1;
-        getNextInSequence();
-        while (getNextInSequence()) {
-            String target = m_target_.toString();
-            doTest(this, col, m_source_, target, m_relation_);
-            int vsize = vector.size();
-            for (int i = vsize - 1; i >= 0; i --) {
-                String source = (String)vector.get(i);
-                if (i > lastsmallerthanindex) {
-                    doTest(this, col, source, target, m_relation_);
-                }
-                else {
-                    doTest(this, col, source, target, -1);
-                }
-            }
-            vector.add(target);
-            if (m_relation_ < 0) {
-                lastsmallerthanindex = vsize - 1;
-            }
-        }
+    static final String prettify(RawCollationKey key) {
+        return prettify(key.bytes, key.size);
      }
  
-    /**
-     * Parses the sequence to be tested
-     */
-    private boolean getNextInSequence() {
-        if (m_sequenceIndex_ >= m_sequence_.length()) {
-            return false;
+    static final String prettify(byte[] skBytes, int length) {
+        StringBuilder target = new StringBuilder(length * 3 + 2).append('[');
+    
+        for (int i = 0; i < length; i++) {
+            String numStr = Integer.toHexString(skBytes[i] & 0xff);
+            if (numStr.length() < 2) {
+                target.append('0');
+            }
+            target.append(numStr).append(' ');
          }
-
-        boolean quoted = false;
-        boolean quotedsingle = false;
-        boolean done = false;
-        int i = m_sequenceIndex_;
-        int offset = 0;
-        m_source_ = m_target_.toString();
-        m_relation_ = m_nextRelation_;
-        m_target_.delete(0, m_target_.length());
-        while (i < m_sequence_.length() && !done) {
-            int ch = UTF16.charAt(m_sequence_, i);
-            if (UCharacter.isSupplementary(ch)) {
-                i += 2;
-            }
-            else {
-                i ++;
-            }
-            if (!quoted) {
-                if (UCharacter.isWhitespace(ch)) {
-                    continue;
-                }
-                switch (ch) {
-                    case 0x003C : // <
-                        m_nextRelation_ = -1;
-                        done = true;
-                        break;
-                    case 0x003D : // =
-                        m_nextRelation_ = 0;
-                        done = true;
-                        break;
-                    case 0x003E : // >
-                        m_nextRelation_ = 1;
-                        done = true;
-                        break;
-                    case 0x0027 : // ' very basic quoting
-                        quoted = true;
-                        quotedsingle = false;
-                        break;
-                    case 0x005c : // \ single quote
-                        quoted = true;
-                        quotedsingle = true;
-                        break;
-                    default:
-                        UTF16.insert(m_target_, offset, ch);
-                        if (UCharacter.isSupplementary(ch)) {
-                            offset += 2;
-                        }
-                        else {
-                            offset ++;
-                        }
-                    }
-                }
-                else {
-                      if (ch == 0x0027) {
-                          quoted = false;
-                      }
-                      else {
-                          UTF16.insert(m_target_, offset, ch);
-                          if (UCharacter.isSupplementary(ch)) {
-                              offset += 2;
-                          }
-                          else {
-                              offset ++;
-                          }
-                      }
-                      if (quotedsingle) {
-                          quoted = false;
-                      }
-                }
-          }
-          if (quoted == true) {
-              errln("Quote in sequence not closed!");
-              return false;
-          }
-
-
-          m_sequenceIndex_ = i;
-          return true;
+        target.append(']');
+        return target.toString();
      }
  
      private static void doTestVariant(TestFmwk test, 
@@ -464,4 +267,1384 @@ public class CollationTest extends ModuleTest{
              test.warnln("Could not load locale data skipping.");
          }
      }
+
+    public void TestMinMax() {
+        setRootCollator();
+        RuleBasedCollator rbc = (RuleBasedCollator)coll;
+
+        final String s = "\uFFFE\uFFFF";
+        long[] ces;
+ 
+        ces = rbc.internalGetCEs(s);
+        if (ces.length != 2) {
+            errln("expected 2 CEs for <FFFE, FFFF>, got " + ces.length);
+            return;
+        }
+
+        long ce = ces[0];
+        long expected = (Collation.MERGE_SEPARATOR_PRIMARY << 32) |
+                Collation.MERGE_SEPARATOR_LOWER32;
+        if (ce != expected) {
+            errln("CE(U+fffe)=0x" + Utility.hex(ce) + " != 02.02.02");
+        }
+
+        ce = ces[1];
+        expected = Collation.makeCE(Collation.MAX_PRIMARY);
+        if (ce != expected) {
+            errln("CE(U+ffff)=0x" + Utility.hex(ce) + " != max..");
+        }
+    }
+
+    public void TestImplicits() {
+        CollationData cd = CollationRoot.getData();
+
+        // Implicit primary weights should be assigned for the following sets,
+        // and sort in ascending order by set and then code point.
+        // See http://www.unicode.org/reports/tr10/#Implicit_Weights
+        // core Han Unified Ideographs
+        UnicodeSet coreHan = new UnicodeSet("[\\p{unified_ideograph}&"
+                                 + "[\\p{Block=CJK_Unified_Ideographs}"
+                                 + "\\p{Block=CJK_Compatibility_Ideographs}]]");
+        // all other Unified Han ideographs
+        UnicodeSet otherHan = new UnicodeSet("[\\p{unified ideograph}-"
+                                 + "[\\p{Block=CJK_Unified_Ideographs}"
+                                 + "\\p{Block=CJK_Compatibility_Ideographs}]]");
+
+        UnicodeSet unassigned = new UnicodeSet("[[:Cn:][:Cs:][:Co:]]");
+        unassigned.remove(0xfffe, 0xffff);  // These have special CLDR root mappings.
+
+        UnicodeSet[] sets = { coreHan, otherHan, unassigned };
+        int prev = 0;
+        long prevPrimary = 0;
+        UTF16CollationIterator ci = new UTF16CollationIterator(cd, false, "", 0);
+        for (int i = 0; i < sets.length; ++i) {
+            UnicodeSetIterator iter = new UnicodeSetIterator(sets[i]);
+            while (iter.next()) {
+                String s = iter.getString();
+                int c = s.codePointAt(0);
+                ci.setText(false, s, 0);
+                long ce = ci.nextCE();
+                long ce2 = ci.nextCE();
+                if (ce == Collation.NO_CE || ce2 != Collation.NO_CE) {
+                    errln("CollationIterator.nextCE(0x" + Utility.hex(c)
+                            + ") did not yield exactly one CE");
+                    continue;
+
+                }
+                if ((ce & 0xffffffffL) != Collation.COMMON_SEC_AND_TER_CE) {
+                    errln("CollationIterator.nextCE(U+" + Utility.hex(c, 4)
+                            + ") has non-common sec/ter weights: 0x" + Utility.hex(ce & 0xffffffffL, 8));
+                    continue;
+                }
+                long primary = ce >>> 32;
+                if (!(primary > prevPrimary)) {
+                    errln("CE(U+" + Utility.hex(c) + ")=0x" + Utility.hex(primary)
+                            + ".. not greater than CE(U+" + Utility.hex(prev)
+                            + ")=0x" + Utility.hex(prevPrimary) + "..");
+
+                }
+                prev = c;
+                prevPrimary = primary;
+            }
+        }
+    }
+
+    // ICU4C: TestNulTerminated / renamed for ICU4J
+    public void TestSubSequence() {
+        CollationData data = CollationRoot.getData();
+        final String s = "abab"; // { 0x61, 0x62, 0x61, 0x62 }
+
+        UTF16CollationIterator ci1 = new UTF16CollationIterator(data, false, s, 0);
+        UTF16CollationIterator ci2 = new UTF16CollationIterator(data, false, s, 2);
+
+        for (int i = 0; i < 2; ++i) {
+            long ce1 = ci1.nextCE();
+            long ce2 = ci2.nextCE();
+
+            if (ce1 != ce2) {
+                errln("CollationIterator.nextCE(with start position at 0) != "
+                      + "nextCE(with start position at 2) at CE " + i);
+            }
+        }
+    }
+
+    
+    // ICU4C: TestIllegalUTF8 / not applicable to ICU4J
+
+
+    private static void addLeadSurrogatesForSupplementary(UnicodeSet src, UnicodeSet dest) {
+        for(int c = 0x10000; c < 0x110000;) {
+            int next = c + 0x400;
+            if(src.containsSome(c, next - 1)) {
+                dest.add(UTF16.getLeadSurrogate(c));
+            }
+            c = next;
+        }
+    }
+
+    public void TestShortFCDData() {
+        UnicodeSet expectedLccc = new UnicodeSet("[:^lccc=0:]");
+        expectedLccc.add(0xdc00, 0xdfff);   // add all trail surrogates
+        addLeadSurrogatesForSupplementary(expectedLccc, expectedLccc);
+
+        UnicodeSet lccc = new UnicodeSet(); // actual
+        for (int c = 0; c <= 0xffff; ++c) {
+            if (CollationFCD.hasLccc(c)) {
+                lccc.add(c);
+            }
+        }
+
+        UnicodeSet diff = new UnicodeSet(expectedLccc);
+        diff.removeAll(lccc);
+        diff.remove(0x10000, 0x10ffff);  // hasLccc() only works for the BMP
+
+        String empty = "[]";
+        String diffString;
+
+        diffString = diff.toPattern(true);
+        assertEquals("CollationFCD::hasLccc() expected-actual", empty, diffString);
+
+        diff = lccc;
+        diff.removeAll(expectedLccc);
+        diffString = diff.toPattern(true);
+        assertEquals("CollationFCD::hasLccc() actual-expected", empty, diffString);
+
+        UnicodeSet expectedTccc = new UnicodeSet("[:^tccc=0:]");
+        addLeadSurrogatesForSupplementary(expectedLccc, expectedTccc);
+        addLeadSurrogatesForSupplementary(expectedTccc, expectedTccc);
+
+        UnicodeSet tccc = new UnicodeSet(); // actual
+        for(int c = 0; c <= 0xffff; ++c) {
+            if (CollationFCD.hasTccc(c)) {
+                tccc.add(c);
+            }
+        }
+
+        diff = new UnicodeSet(expectedTccc);
+        diff.removeAll(tccc);
+        diff.remove(0x10000, 0x10ffff); // hasTccc() only works for the BMP
+        assertEquals("CollationFCD::hasTccc() expected-actual", empty, diffString);
+
+        diff = tccc;
+        diff.removeAll(expectedTccc);
+        diffString = diff.toPattern(true);
+        assertEquals("CollationFCD::hasTccc() actual-expected", empty, diffString);
+    }
+
+    private static class CodePointIterator {
+        int[] cp;
+        int length;
+        int pos;
+
+        CodePointIterator(int[] cp) {
+            this.cp = cp;
+            this.length = cp.length;
+            this.pos = 0;
+        }
+
+        void resetToStart() {
+            pos = 0;
+        }
+
+        int next() {
+            return (pos < length) ? cp[pos++] : Collation.SENTINEL_CP;
+        }
+
+        int previous() {
+            return (pos > 0) ? cp[--pos] : Collation.SENTINEL_CP;
+        }
+
+        int getLength() {
+            return length;
+        }
+
+        int getIndex() {
+            return pos;
+        }
+    }
+
+    private void checkFCD(String name, CollationIterator ci, CodePointIterator cpi) {
+        // Iterate forward to the limit.
+        for (;;) {
+            int c1 = ci.nextCodePoint();
+            int c2 = cpi.next();
+            if (c1 != c2) {
+                errln(name + ".nextCodePoint(to limit, 1st pass) = U+" + Utility.hex(c1)
+                        + " != U+" + Utility.hex(c1) + " at " + cpi.getIndex());
+                return;
+            }
+            if (c1 < 0) {
+                break;
+            }
+        }
+
+        // Iterate backward most of the way.
+        for (int n = (cpi.getLength() * 2) / 3; n > 0; --n) {
+            int c1 = ci.previousCodePoint();
+            int c2 = cpi.previous();
+            if (c1 != c2) {
+                errln(name + ".previousCodePoint() = U+" + Utility.hex(c1) +
+                        " != U+" + Utility.hex(c2) + " at " + cpi.getIndex());
+                return;
+            }
+        }
+
+        // Forward again.
+        for (;;) {
+            int c1 = ci.nextCodePoint();
+            int c2 = cpi.next();
+            if (c1 != c2) {
+                errln(name + ".nextCodePoint(to limit again) = U+" + Utility.hex(c1)
+                        + " != U+" + Utility.hex(c2) + " at " + cpi.getIndex());
+                return;
+            }
+            if (c1 < 0) {
+                break;
+            }
+        }
+
+        // Iterate backward to the start.
+        for (;;) {
+            int c1 = ci.previousCodePoint();
+            int c2 = cpi.previous();
+            if (c1 != c2) {
+                errln(name + ".nextCodePoint(to start) = U+" + Utility.hex(c1)
+                        + " != U+" + Utility.hex(c2) + " at " + cpi.getIndex());
+                return;
+            }
+            if (c1 < 0) {
+                break;
+            }
+        }
+    }
+
+    public void TestFCD() {
+        CollationData data = CollationRoot.getData();
+
+        // Input string, not FCD.
+        StringBuilder buf = new StringBuilder();
+        buf.append("\u0308\u00e1\u0062\u0301\u0327\u0430\u0062")
+            .appendCodePoint(0x1D15F)   // MUSICAL SYMBOL QUARTER NOTE=1D158 1D165, ccc=0, 216
+            .append("\u0327\u0308")     // ccc=202, 230
+            .appendCodePoint(0x1D16D)   // MUSICAL SYMBOL COMBINING AUGMENTATION DOT, ccc=226
+            .appendCodePoint(0x1D15F)
+            .appendCodePoint(0x1D16D)
+            .append("\uac01")
+            .append("\u00e7")           // Character with tccc!=0 decomposed together with mis-ordered sequence.
+            .appendCodePoint(0x1D16D).appendCodePoint(0x1D165)
+            .append("\u00e1")           // Character with tccc!=0 decomposed together with decomposed sequence.
+            .append("\u0f73\u0f75")     // Tibetan composite vowels must be decomposed.
+            .append("\u4e00\u0f81");
+        String s = buf.toString();
+
+        // Expected code points.
+        int[] cp = {
+            0x308, 0xe1, 0x62, 0x327, 0x301, 0x430, 0x62,
+            0x1D158, 0x327, 0x1D165, 0x1D16D, 0x308,
+            0x1D15F, 0x1D16D,
+            0xac01,
+            0x63, 0x327, 0x1D165, 0x1D16D,
+            0x61,
+            0xf71, 0xf71, 0xf72, 0xf74, 0x301,
+            0x4e00, 0xf71, 0xf80
+        };
+
+        FCDUTF16CollationIterator u16ci = new FCDUTF16CollationIterator(data, false, s, 0);
+        CodePointIterator cpi = new CodePointIterator(cp);
+        checkFCD("FCDUTF16CollationIterator", u16ci, cpi);
+
+        cpi.resetToStart();
+        UCharacterIterator iter = UCharacterIterator.getInstance(s);
+        FCDIterCollationIterator uici = new FCDIterCollationIterator(data, false, iter, 0);
+        checkFCD("FCDIterCollationIterator", uici, cpi);
+    }
+
+    private void checkAllocWeights(CollationWeights cw, long lowerLimit, long upperLimit,
+            int n, int someLength, int minCount) {
+
+        if (!cw.allocWeights(lowerLimit, upperLimit, n)) {
+            errln("CollationWeights::allocWeights(0x"
+                    + Utility.hex(lowerLimit) + ",0x"
+                    + Utility.hex(upperLimit) + ","
+                    + n + ") = false");
+            return;
+        }
+        long previous = lowerLimit;
+        int count = 0; // number of weights that have someLength
+        for (int i = 0; i < n; ++i) {
+            long w = cw.nextWeight();
+            if (w == 0xffffffffL) {
+                errln("CollationWeights::allocWeights(0x"
+                        + Utility.hex(lowerLimit) + ",0x"
+                        + Utility.hex(upperLimit) + ",0x"
+                        + n + ").nextWeight() returns only "
+                        + i + " weights");
+                return;
+            }
+            if (!(previous < w && w < upperLimit)) {
+                errln("CollationWeights::allocWeights(0x"
+                        + Utility.hex(lowerLimit) + ",0x"
+                        + Utility.hex(upperLimit) + ","
+                        + n + ").nextWeight() number "
+                        + (i + 1) + " -> 0x" + Utility.hex(w)
+                        + " not between "
+                        + Utility.hex(previous) + " and "
+                        + Utility.hex(upperLimit));
+                return;
+            }
+            if (CollationWeights.lengthOfWeight(w) == someLength) {
+                ++count;
+            }
+        }
+        if (count < minCount) {
+            errln("CollationWeights::allocWeights(0x"
+                    + Utility.hex(lowerLimit) + ",0x"
+                    + Utility.hex(upperLimit) + ","
+                    + n + ").nextWeight() returns only "
+                    + count + " < " + minCount + " weights of length "
+                    + someLength);
+
+        }
+    }
+
+    public void TestCollationWeights() {
+        CollationWeights cw = new CollationWeights();
+
+        // Non-compressible primaries use 254 second bytes 02..FF.
+        logln("CollationWeights.initForPrimary(non-compressible)");
+        cw.initForPrimary(false);
+        // Expect 1 weight 11 and 254 weights 12xx.
+        checkAllocWeights(cw, 0x10000000L, 0x13000000L, 255, 1, 1);
+        checkAllocWeights(cw, 0x10000000L, 0x13000000L, 255, 2, 254);
+        // Expect 255 two-byte weights from the ranges 10ff, 11xx, 1202.
+        checkAllocWeights(cw, 0x10fefe40L, 0x12030300L, 260, 2, 255);
+        // Expect 254 two-byte weights from the ranges 10ff and 11xx.
+        checkAllocWeights(cw, 0x10fefe40L, 0x12030300L, 600, 2, 254);
+        // Expect 254^2=64516 three-byte weights.
+        // During computation, there should be 3 three-byte ranges
+        // 10ffff, 11xxxx, 120202.
+        // The middle one should be split 64515:1,
+        // and the newly-split-off range and the last ranged lengthened.
+        checkAllocWeights(cw, 0x10fffe00L, 0x12020300L, 1 + 64516 + 254 + 1, 3, 64516);
+        // Expect weights 1102 & 1103.
+        checkAllocWeights(cw, 0x10ff0000L, 0x11040000L, 2, 2, 2);
+        // Expect weights 102102 & 102103.
+        checkAllocWeights(cw, 0x1020ff00L, 0x10210400L, 2, 3, 2);
+
+        // Compressible primaries use 251 second bytes 04..FE.
+        logln("CollationWeights.initForPrimary(compressible)");
+        cw.initForPrimary(true);
+        // Expect 1 weight 11 and 251 weights 12xx.
+        checkAllocWeights(cw, 0x10000000L, 0x13000000L, 252, 1, 1);
+        checkAllocWeights(cw, 0x10000000L, 0x13000000L, 252, 2, 251);
+        // Expect 252 two-byte weights from the ranges 10fe, 11xx, 1204.
+        checkAllocWeights(cw, 0x10fdfe40L, 0x12050300L, 260, 2, 252);
+        // Expect weights 1104 & 1105.
+        checkAllocWeights(cw, 0x10fe0000L, 0x11060000L, 2, 2, 2);
+        // Expect weights 102102 & 102103.
+        checkAllocWeights(cw, 0x1020ff00L, 0x10210400L, 2, 3, 2);
+
+        // Secondary and tertiary weights use only bytes 3 & 4.
+        logln("CollationWeights.initForSecondary()");
+        cw.initForSecondary();
+        // Expect weights fbxx and all four fc..ff.
+        checkAllocWeights(cw, 0xfb20L, 0x10000L, 20, 3, 4);
+
+        logln("CollationWeights.initForTertiary()");
+        cw.initForTertiary();
+        // Expect weights 3dxx and both 3e & 3f.
+        checkAllocWeights(cw, 0x3d02L, 0x4000L, 10, 3, 2);
+    }
+
+    private static boolean isValidCE(CollationRootElements re, CollationData data, long p, long s, long ctq) {
+        long p1 = p >>> 24;
+        long p2 = (p >>> 16) & 0xff;
+        long p3 = (p >>> 8) & 0xff;
+        long p4 = p & 0xff;
+        long s1 = s >>> 8;
+        long s2 = s & 0xff;
+        // ctq = Case, Tertiary, Quaternary
+        long c = (ctq & Collation.CASE_MASK) >>> 14;
+        long t = ctq & Collation.ONLY_TERTIARY_MASK;
+        long t1 = t >>> 8;
+        long t2 = t & 0xff;
+        long q = ctq & Collation.QUATERNARY_MASK;
+        // No leading zero bytes.
+        if ((p != 0 && p1 == 0) || (s != 0 && s1 == 0) || (t != 0 && t1 == 0)) {
+            return false;
+        }
+        // No intermediate zero bytes.
+        if (p1 != 0 && p2 == 0 && (p & 0xffff) != 0) {
+            return false;
+        }
+        if (p2 != 0 && p3 == 0 && p4 != 0) {
+            return false;
+        }
+        // Minimum & maximum lead bytes.
+        if ((p1 != 0 && p1 <= Collation.MERGE_SEPARATOR_BYTE)
+                || (s1 != 0 && s1 <= Collation.MERGE_SEPARATOR_BYTE)
+                || (t1 != 0 && t1 <= Collation.MERGE_SEPARATOR_BYTE)) {
+            return false;
+        }
+        if (t1 != 0 && t1 > 0x3f) {
+            return false;
+        }
+        if (c > 2) {
+            return false;
+        }
+        // The valid byte range for the second primary byte depends on compressibility.
+        if (p2 != 0) {
+            if (data.isCompressibleLeadByte((int)p1)) {
+                if (p2 <= Collation.PRIMARY_COMPRESSION_LOW_BYTE
+                        || Collation.PRIMARY_COMPRESSION_HIGH_BYTE <= p2) {
+                    return false;
+                }
+            } else {
+                if (p2 <= Collation.LEVEL_SEPARATOR_BYTE) {
+                    return false;
+                }
+            }
+        }
+        // Other bytes just need to avoid the level separator.
+        // Trailing zeros are ok.
+        // assert (Collation.LEVEL_SEPARATOR_BYTE == 1);
+        if (p3 == Collation.LEVEL_SEPARATOR_BYTE || p4 == Collation.LEVEL_SEPARATOR_BYTE
+                || s2 == Collation.LEVEL_SEPARATOR_BYTE || t2 == Collation.LEVEL_SEPARATOR_BYTE) {
+            return false;
+        }
+        // Well-formed CEs.
+        if (p == 0) {
+            if (s == 0) {
+                if (t == 0) {
+                    // Completely ignorable CE.
+                    // Quaternary CEs are not supported.
+                    if (c != 0 || q != 0) {
+                        return false;
+                    }
+                } else {
+                    // Tertiary CE.
+                    if (t < re.getTertiaryBoundary() || c != 2) {
+                        return false;
+                    }
+                }
+            } else {
+                // Secondary CE.
+                if (s < re.getSecondaryBoundary() || t == 0 || t >= re.getTertiaryBoundary()) {
+                    return false;
+                }
+            }
+        } else {
+            // Primary CE.
+            if (s == 0 || (Collation.COMMON_WEIGHT16 < s && s <= re.getLastCommonSecondary())
+                    || s >= re.getSecondaryBoundary()) {
+                return false;
+            }
+            if (t == 0 || t >= re.getTertiaryBoundary()) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private static boolean isValidCE(CollationRootElements re, CollationData data, long ce) {
+        long p = ce >>> 32;
+        long secTer = ce & 0xffffffffL;
+        return isValidCE(re, data, p, secTer >>> 16, secTer & 0xffff);
+    }
+
+    private static class RootElementsIterator {
+        CollationData data;
+        long[] elements;
+        int length;
+
+        long pri;
+        long secTer;
+        int index;
+
+        RootElementsIterator(CollationData root) {
+            data = root;
+            elements = root.rootElements;
+            length = elements.length;
+            pri = 0;
+            secTer = 0;
+            index = (int)elements[CollationRootElements.IX_FIRST_TERTIARY_INDEX];
+        }
+
+        boolean next() {
+            if (index >= length) {
+                return false;
+            }
+            long p = elements[index];
+            if (p == CollationRootElements.PRIMARY_SENTINEL) {
+                return false;
+            }
+            if ((p & CollationRootElements.SEC_TER_DELTA_FLAG) != 0) {
+                ++index;
+                secTer = p & ~CollationRootElements.SEC_TER_DELTA_FLAG;
+                return true;
+            }
+            if ((p & CollationRootElements.PRIMARY_STEP_MASK) != 0) {
+                // End of a range, enumerate the primaries in the range.
+                int step = (int)p & CollationRootElements.PRIMARY_STEP_MASK;
+                p &= 0xffffff00;
+                if (pri == p) {
+                    // Finished the range, return the next CE after it.
+                    ++index;
+                    return next();
+                }
+                assert (pri < p);
+                // Return the next primary in this range.
+                boolean isCompressible = data.isCompressiblePrimary(pri);
+                if ((pri & 0xffff) == 0) {
+                    pri = Collation.incTwoBytePrimaryByOffset(pri, isCompressible, step);
+                } else {
+                    pri = Collation.incThreeBytePrimaryByOffset(pri, isCompressible, step);
+                }
+                return true;
+            }
+            // Simple primary CE.
+            ++index;
+            pri = p;
+            secTer = Collation.COMMON_SEC_AND_TER_CE;
+            return true;
+        }
+
+        long getPrimary() {
+            return pri;
+        }
+
+        long getSecTer() {
+            return secTer;
+        }
+    }
+
+    public void TestRootElements() {
+        CollationData root = CollationRoot.getData();
+
+        CollationRootElements rootElements = new CollationRootElements(root.rootElements);
+        RootElementsIterator iter = new RootElementsIterator(root);
+
+        // We check each root CE for validity,
+        // and we also verify that there is a tailoring gap between each two CEs.
+        CollationWeights cw1c = new CollationWeights(); // compressible primary weights
+        CollationWeights cw1u = new CollationWeights(); // uncompressible primary weights
+        CollationWeights cw2 = new CollationWeights();
+        CollationWeights cw3 = new CollationWeights();
+
+        cw1c.initForPrimary(true);
+        cw1u.initForPrimary(false);
+        cw2.initForSecondary();
+        cw3.initForTertiary();
+
+        // Note: The root elements do not include Han-implicit or unassigned-implicit CEs,
+        // nor the special merge-separator CE for U+FFFE.
+        long prevPri = 0;
+        long prevSec = 0;
+        long prevTer = 0;
+
+        while (iter.next()) {
+            long pri = iter.getPrimary();
+            long secTer = iter.getSecTer();
+            // CollationRootElements CEs must have 0 case and quaternary bits.
+            if ((secTer & Collation.CASE_AND_QUATERNARY_MASK) != 0) {
+                errln("CollationRootElements CE has non-zero case and/or quaternary bits: "
+                        + "0x" + Utility.hex(pri, 8) + " 0x" + Utility.hex(secTer, 8));
+            }
+            long sec = secTer >>> 16;
+            long ter = secTer & Collation.ONLY_TERTIARY_MASK;
+            long ctq = ter;
+            if (pri == 0 && sec == 0 && ter != 0) {
+                // Tertiary CEs must have uppercase bits,
+                // but they are not stored in the CollationRootElements.
+                ctq |= 0x8000;
+            }
+            if (!isValidCE(rootElements, root, pri, sec, ctq)) {
+                errln("invalid root CE 0x"
+                        + Utility.hex(pri, 8) + " 0x" + Utility.hex(secTer, 8));
+            } else {
+                if (pri != prevPri) {
+                    long newWeight = 0;
+                    if (prevPri == 0 || prevPri >= Collation.FFFD_PRIMARY) {
+                        // There is currently no tailoring gap after primary ignorables,
+                        // and we forbid tailoring after U+FFFD and U+FFFF.
+                    } else if (root.isCompressiblePrimary(prevPri)) {
+                        if (!cw1c.allocWeights(prevPri, pri, 1)) {
+                            errln("no primary/compressible tailoring gap between "
+                                    + "0x" + Utility.hex(prevPri, 8)
+                                    + " and 0x" + Utility.hex(pri, 8));
+                        } else {
+                            newWeight = cw1c.nextWeight();
+                        }
+                    } else {
+                        if (!cw1u.allocWeights(prevPri, pri, 1)) {
+                            errln("no primary/uncompressible tailoring gap between "
+                                    + "0x" + Utility.hex(prevPri, 8)
+                                    + " and 0x" + Utility.hex(pri, 8));
+                        } else {
+                            newWeight = cw1u.nextWeight();
+                        }
+                    }
+                    if (newWeight != 0 && !(prevPri < newWeight && newWeight < pri)) {
+                        errln("mis-allocated primary weight, should get "
+                                + "0x" + Utility.hex(prevPri, 8)
+                                + " < 0x" + Utility.hex(newWeight, 8)
+                                + " < 0x" + Utility.hex(pri, 8));
+                    }
+                } else if (sec != prevSec) {
+                    long lowerLimit = prevSec == 0 ?
+                            rootElements.getSecondaryBoundary() - 0x100 : prevSec;
+                    if (!cw2.allocWeights(lowerLimit, sec, 1)) {
+                        errln("no secondary tailoring gap between "
+                                + "0x" + Utility.hex(lowerLimit)
+                                + " and 0x" + Utility.hex(sec));
+                    } else {
+                        long newWeight = cw2.nextWeight();
+                        if (!(prevSec < newWeight && newWeight < sec)) {
+                            errln("mis-allocated secondary weight, should get "
+                                    + "0x" + Utility.hex(lowerLimit)
+                                    + " < 0x" + Utility.hex(newWeight)
+                                    + " < 0x" + Utility.hex(sec));
+                        }
+                    }
+                } else if (ter != prevTer) {
+                    long lowerLimit = prevTer == 0 ?
+                            rootElements.getTertiaryBoundary() - 0x100 : prevTer;
+                    if (!cw3.allocWeights(lowerLimit, ter, 1)) {
+                        errln("no tertiary tailoring gap between "
+                                + "0x" + Utility.hex(lowerLimit)
+                                + " and 0x" + Utility.hex(ter));
+                    } else {
+                        long newWeight = cw3.nextWeight();
+                        if (!(prevTer < newWeight && newWeight < ter)) {
+                            errln("mis-allocated tertiary weight, should get "
+                                    + "0x" + Utility.hex(lowerLimit)
+                                    + " < 0x" + Utility.hex(newWeight)
+                                    + " < 0x" + Utility.hex(ter));
+                        }
+                    }
+                } else {
+                    errln("duplicate root CE 0x"
+                            + Utility.hex(pri, 8) + " 0x" + Utility.hex(secTer, 8));
+                }
+            }
+            prevPri = pri;
+            prevSec = sec;
+            prevTer = ter;
+        }
+    }
+
+    public void TestTailoredElements() {
+        CollationData root = CollationRoot.getData();
+        CollationRootElements rootElements = new CollationRootElements(root.rootElements);
+
+        Set<String> prevLocales = new HashSet<String>();
+        prevLocales.add("");
+        prevLocales.add("root");
+        prevLocales.add("root@collation=standard");
+
+        long[] ces;
+        ULocale[] locales = Collator.getAvailableULocales();
+        String localeID = "root";
+        int locIdx = 0;
+
+        for (; locIdx < locales.length; localeID = locales[locIdx++].getName()) {
+            ULocale locale = new ULocale(localeID);
+            String[] types = Collator.getKeywordValuesForLocale("collation", locale, false);
+            String type = null; // default type
+            int typeIdx = 0;
+            for (; typeIdx < types.length; type = types[typeIdx++]) {
+                ULocale localeWithType = locale;
+                if (type != null) {
+                    localeWithType = localeWithType.setKeywordValue("collation", type);
+                }
+                Collator coll = Collator.getInstance(localeWithType);
+                ULocale actual = coll.getLocale(ULocale.ACTUAL_LOCALE);
+                if (prevLocales.contains(actual.getName())) {
+                    continue;
+                }
+                prevLocales.add(actual.getName());
+                logln("TestTailoredElements(): requested " + localeWithType.getName()
+                        + " -> actual " + actual.getName());
+                if (!(coll instanceof RuleBasedCollator)) {
+                    continue;
+                }
+                RuleBasedCollator rbc = (RuleBasedCollator) coll;
+
+                // Note: It would be better to get tailored strings such that we can
+                // identify the prefix, and only get the CEs for the prefix+string,
+                // not also for the prefix.
+                // There is currently no API for that.
+                // It would help in an unusual case where a contraction starting in the prefix
+                // extends past its end, and we do not see the intended mapping.
+                // For example, for a mapping p|st, if there is also a contraction ps,
+                // then we get CEs(ps)+CEs(t), rather than CEs(p|st).
+                UnicodeSet tailored = coll.getTailoredSet();
+                UnicodeSetIterator iter = new UnicodeSetIterator(tailored);
+                while (iter.next()) {
+                    String s = iter.getString();
+                    ces = rbc.internalGetCEs(s);
+                    for (int i = 0; i < ces.length; ++i) {
+                        long ce = ces[i];
+                        if (!isValidCE(rootElements, root, ce)) {
+                            logln(prettify(s));
+                            errln("invalid tailored CE 0x" + Utility.hex(ce, 16)
+                                    + " at CE index " + i + " from string:");
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    private static boolean isSpace(char c) {
+        return (c == 0x09 || c == 0x20 || c == 0x3000);
+    }
+
+    private static boolean isSectionStarter(char c) {
+        return (c == '%' || c == '*' || c == '@');
+    }
+
+    private int skipSpaces(int i) {
+        while (isSpace(fileLine.charAt(i))) {
+            ++i;
+        }
+        return i;
+    }
+
+    private String printSortKey(byte[] p) {
+        StringBuilder s = new StringBuilder();
+        for (int i = 0; i < p.length; ++i) {
+            if (i > 0) {
+                s.append(' ');
+            }
+            byte b = p[i];
+            if (b == 0) {
+                s.append('.');
+            } else if (b == 1) {
+                s.append('|');
+            } else {
+                s.append(String.format("%02x", b & 0xff));
+            }
+        }
+        return s.toString();
+    }
+
+    private String printCollationKey(CollationKey key) {
+        byte[] p = key.toByteArray();
+        return printSortKey(p);
+    }
+
+    private boolean readLine(BufferedReader in) throws IOException {
+        String line = in.readLine();
+        if (line == null) {
+            fileLine = null;
+            return false;
+        }
+        ++fileLineNumber;
+        // Strip trailing comments and spaces
+        int idx = line.indexOf('#');
+        if (idx < 0) {
+            idx = line.length();
+        }
+        for (; idx > 0; idx--) {
+            if (!isSpace(line.charAt(idx -1))) {
+                break;
+            }
+        }
+
+        fileLine = idx < line.length() ? line.substring(0, idx) : line;
+        return true;
+    }
+
+    private int parseString(int start, Output<String> prefix, Output<String> s) {
+        int length = fileLine.length();
+        int i;
+        for (i = start; i < length && !isSpace(fileLine.charAt(i)); ++i) {
+        }
+        int pipeIndex = fileLine.indexOf('|', start);
+        if (pipeIndex >= 0 && pipeIndex < i) {
+            String tmpPrefix  = Utility.unescape(fileLine.substring(start, pipeIndex));
+            if (tmpPrefix.length() == 0) {
+                prefix.value = null;
+                logln(fileLine);
+                error = new ParseException("empty prefix on line " + fileLineNumber, fileLineNumber);
+                errln("empty prefix on line " + fileLineNumber);
+                return start;
+            }
+            prefix.value = tmpPrefix;
+            start = pipeIndex + 1;
+        } else {
+            prefix.value = null;
+        }
+
+        String tmp = Utility.unescape(fileLine.substring(start, i));
+        if (tmp.length() == 0) {
+            s.value = null;
+            logln(fileLine);
+            error = new ParseException("empty string on line " + fileLineNumber, fileLineNumber);
+            errln("empty string on line " + fileLineNumber);
+            return start;
+        }
+        s.value = tmp;
+        return i;
+    }
+
+    private int parseRelationAndString(Output<String> s) {
+        int relation = Collation.NO_LEVEL;
+        int start;
+        if (fileLine.charAt(0) == '<') {
+            char second = fileLine.charAt(1);
+            start = 2;
+            switch(second) {
+            case 0x31:  // <1
+                relation = Collation.PRIMARY_LEVEL;
+                break;
+            case 0x32:  // <2
+                relation = Collation.SECONDARY_LEVEL;
+                break;
+            case 0x33:  // <3
+                relation = Collation.TERTIARY_LEVEL;
+                break;
+            case 0x34:  // <4
+                relation = Collation.QUATERNARY_LEVEL;
+                break;
+            case 0x63:  // <c
+                relation = Collation.CASE_LEVEL;
+                break;
+            case 0x69:  // <i
+                relation = Collation.IDENTICAL_LEVEL;
+                break;
+            default:  // just <
+                relation = Collation.NO_LEVEL;
+                start = 1;
+                break;
+            }
+        } else if (fileLine.charAt(0) == '=') {
+            relation = Collation.ZERO_LEVEL;
+            start = 1;
+        } else {
+            start = 0;
+        }
+
+        if (start == 0 || !isSpace(fileLine.charAt(start))) {
+            logln(fileLine);
+            error = new ParseException("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line "
+                                        + fileLineNumber, fileLineNumber);
+            errln("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line " + fileLineNumber);
+            return Collation.NO_LEVEL;
+        }
+
+        start = skipSpaces(start);
+        Output<String> prefixOut = new Output<String>();
+        start = parseString(start, prefixOut, s);
+        if (error == null && prefixOut.value != null) {
+            logln(fileLine);
+            error = new ParseException("prefix string not allowed for test string: on line "
+                                        + fileLineNumber, fileLineNumber);
+            errln("prefix string not allowed for test string: on line " + fileLineNumber);
+            return Collation.NO_LEVEL;
+        }
+        if (start < fileLine.length()) {
+            logln(fileLine);
+            error = new ParseException("unexpected line contents after test string on line "
+                                        + fileLineNumber, fileLineNumber);
+            errln("unexpected line contents after test string on line " + fileLineNumber);
+            return Collation.NO_LEVEL;
+        }
+
+        return relation;
+    }
+
+    private void parseAndSetAttribute() {
+        int start = skipSpaces(1);
+        int equalPos = fileLine.indexOf('=');
+        if (equalPos < 0) {
+            if (fileLine.regionMatches(start, "reorder", 0, 7)) {
+                parseAndSetReorderCodes(start + 7);
+                return;
+            }
+            logln(fileLine);
+            error = new ParseException("missing '=' on line " + fileLineNumber, fileLineNumber);
+            errln("missing '=' on line " + fileLineNumber);
+            return;
+        }
+
+        String attrString = fileLine.substring(start,  equalPos);
+        String valueString = fileLine.substring(equalPos + 1);
+        if (attrString.equals("maxVariable")) {
+            int max;
+            if (valueString.equals("space")) {
+                max = ReorderCodes.SPACE;
+            } else if(valueString.equals("punct")) {
+                max = ReorderCodes.PUNCTUATION;
+            } else if(valueString.equals("symbol")) {
+                max = ReorderCodes.SYMBOL;
+            } else if(valueString.equals("currency")) {
+                max = ReorderCodes.CURRENCY;
+            } else {
+                logln(fileLine);
+                error = new ParseException("invalid attribute value name on line "
+                                            + fileLineNumber, fileLineNumber);
+                errln("invalid attribute value name on line " + fileLineNumber);
+                return;
+            }
+            coll.setMaxVariable(max);
+            fileLine = null;
+            return;
+        }
+
+        boolean parsed = true;
+        RuleBasedCollator rbc = (RuleBasedCollator)coll;
+        if (attrString.equals("backwards")) {
+            if (valueString.equals("on")) {
+                rbc.setFrenchCollation(true);
+            } else if (valueString.equals("off")) {
+                rbc.setFrenchCollation(false);
+            } else if (valueString.equals("default")) {
+                rbc.setFrenchCollationDefault();
+            } else {
+                parsed = false;
+            }
+        } else if (attrString.equals("alternate")) {
+            if (valueString.equals("non-ignorable")) {
+                rbc.setAlternateHandlingShifted(false);
+            } else if (valueString.equals("shifted")) {
+                rbc.setAlternateHandlingShifted(true);
+            } else if (valueString.equals("default")) {
+                rbc.setAlternateHandlingDefault();
+            } else {
+                parsed = false;
+            }
+        } else if (attrString.equals("caseFirst")) {
+            if (valueString.equals("upper")) {
+                rbc.setUpperCaseFirst(true);
+            } else if (valueString.equals("lower")) {
+                rbc.setLowerCaseFirst(true);
+            } else if (valueString.equals("default")) {
+                rbc.setCaseFirstDefault();
+            } else {
+                parsed = false;
+            }
+        } else if (attrString.equals("caseLevel")) {
+            if (valueString.equals("on")) {
+                rbc.setCaseLevel(true);
+            } else if (valueString.equals("off")) {
+                rbc.setCaseLevel(false);
+            } else if (valueString.equals("default")) {
+                rbc.setCaseLevelDefault();
+            } else {
+                parsed = false;
+            }
+        } else if (attrString.equals("strength")) {
+            if (valueString.equals("primary")) {
+                rbc.setStrength(Collator.PRIMARY);
+            } else if (valueString.equals("secondary")) {
+                rbc.setStrength(Collator.SECONDARY);
+            } else if (valueString.equals("tertiary")) {
+                rbc.setStrength(Collator.TERTIARY);
+            } else if (valueString.equals("quaternary")) {
+                rbc.setStrength(Collator.QUATERNARY);
+            } else if (valueString.equals("identical")) {
+                rbc.setStrength(Collator.IDENTICAL);
+            } else if (valueString.equals("default")) {
+                rbc.setStrengthDefault();
+            } else {
+                parsed = false;
+            }
+        } else if (attrString.equals("numeric")) {
+            if (valueString.equals("on")) {
+                rbc.setNumericCollation(true);
+            } else if (valueString.equals("off")) {
+                rbc.setNumericCollation(false);
+            } else if (valueString.equals("default")) {
+                rbc.setNumericCollationDefault();
+            } else {
+                parsed = false;
+            }
+        } else {
+            logln(fileLine);
+            error = new ParseException("invalid attribute value name on line "
+                                        + fileLineNumber, fileLineNumber);
+            errln("invalid attribute value name on line " + fileLineNumber);
+            return;
+        }
+        if (!parsed) {
+            logln(fileLine);
+            error = new ParseException("invalid attribute=value combination on line "
+                                        + fileLineNumber, fileLineNumber);
+            errln("invalid attribute=value combination on line " + fileLineNumber);
+            return;
+        }
+
+        fileLine = null;
+    }
+
+    private void parseAndSetReorderCodes(int start) {
+        UVector32 reorderCodes = new UVector32();
+        while (start < fileLine.length()) {
+            start = skipSpaces(start);
+            int limit = start;
+            while (limit < fileLine.length() && !isSpace(fileLine.charAt(limit))) {
+                ++limit;
+            }
+            String name = fileLine.substring(start, limit);
+            int code = CollationRuleParser.getReorderCode(name);
+            if (code < -1) {
+                logln(fileLine);
+                error = new ParseException("invalid reorder code '" + name + "' on line "
+                                            + fileLineNumber, fileLineNumber);
+                return;
+            }
+            reorderCodes.addElement(code);
+            start = limit;
+        }
+        int[] reorderCodesArray = new int[reorderCodes.size()];
+        System.arraycopy(reorderCodes.getBuffer(), 0,
+                reorderCodesArray, 0, reorderCodes.size());
+        coll.setReorderCodes(reorderCodesArray);
+
+        fileLine = null;
+    }
+
+    private void buildTailoring(BufferedReader in) throws IOException {
+        StringBuilder rules = new StringBuilder();
+        while (readLine(in)) {
+            if (fileLine.length() == 0) {
+                continue;
+            }
+            if (isSectionStarter(fileLine.charAt(0))) {
+                break;
+            }
+            rules.append(Utility.unescape(fileLine));
+        }
+
+        try {
+            coll = new RuleBasedCollator(rules.toString());
+        } catch (Exception e) {
+            logln(rules.toString());
+            errln("RuleBasedCollator(rules) failed - " + e.getMessage());
+            error = e;
+        }
+    }
+
+    private void setRootCollator() {
+        coll = Collator.getInstance(ULocale.ROOT);
+    }
+
+    private void setLocaleCollator() {
+        ULocale locale = null;
+        if (fileLine.length() > 9) {
+            String langTag = fileLine.substring(9); // "@ locale <langTag>"
+
+            ULocale.Builder locBld = new ULocale.Builder();
+            try {
+                locale = locBld.setLanguageTag(langTag).build();
+            } catch (IllformedLocaleException e) {
+                locale = null;
+            }
+        }
+        if (locale == null) {
+            logln(fileLine);
+            errln("invalid language tag on line " + fileLineNumber);
+            error = new ParseException("invalid langauge tag on line " + fileLineNumber, fileLineNumber);
+            return;
+        }
+
+        logln("creating a collator for locale ID " + locale.getName());
+        coll = Collator.getInstance(locale);
+    }
+
+    private boolean needsNormalization(String s) {
+        if (!fcd.isNormalized(s)) {
+            return true;
+        }
+        // In some sequences with Tibetan composite vowel signs,
+        // even if the string passes the FCD check,
+        // those composites must be decomposed.
+        // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81.
+        int index = 0;
+        while((index = s.indexOf(0xf71, index)) >= 0) {
+            if (++index < s.length()) {
+                char c = s.charAt(index);
+                if (c == 0xf73 || c == 0xf75 || c == 0xf81) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    private boolean getCollationKey(String norm, String line, String s, Output<CollationKey> keyOut) {
+        CollationKey key = coll.getCollationKey(s);
+        keyOut.value = key;
+
+        byte[] keyBytes = key.toByteArray();
+        if (keyBytes.length == 0 || keyBytes[keyBytes.length - 1] != 0) {
+            logln(fileTestName);
+            logln(line);
+            logln(printCollationKey(key));
+            errln("Collator(" + norm + ").getCollationKey() wrote an empty or unterminated key");
+            return false;
+        }
+
+        int numLevels = coll.getStrength();
+        if (numLevels < Collator.IDENTICAL) {
+            ++numLevels;
+        } else {
+            numLevels = 5;
+        }
+        if (((RuleBasedCollator)coll).isCaseLevel()) {
+            ++numLevels;
+        }
+        int numLevelSeparators = 0;
+        for (int i = 0; i < (keyBytes.length - 1); ++i) {
+            byte b = keyBytes[i];
+            if (b == 0) {
+                logln(fileTestName);
+                logln(line);
+                logln(printCollationKey(key));
+                errln("Collator(" + norm + ").getCollationKey() contains a 00 byte");
+                return false;
+            }
+            if (b == 1) {
+                ++numLevelSeparators;
+            }
+        }
+        if (numLevelSeparators != (numLevels - 1)) {
+            logln(fileTestName);
+            logln(line);
+            logln(printCollationKey(key));
+            errln("Collator(" + norm + ").getCollationKey() has "
+                    + numLevelSeparators + " level separators for "
+                    + numLevels + " levels");
+            return false;
+        }
+
+        // If s contains U+FFFE, check that merged segments make the same key.
+        CollationKey mergedKey = null;
+        int sLength = s.length();
+        int segmentStart = 0;
+        for (int i = 0;;) {
+            if (i == sLength) {
+                if (segmentStart == 0) {
+                    // s does not contain any U+FFFE.
+                    break;
+                }
+            } else if (s.charAt(i) != '\uFFFE') {
+                ++i;
+                continue;
+            }
+            // Get the sort key for another segment and merge it into mergedKey.
+            CollationKey tmpKey = coll.getCollationKey(s.substring(segmentStart, i));
+            if (mergedKey == null) {
+                mergedKey = tmpKey;
+            } else {
+                mergedKey = mergedKey.merge(tmpKey);
+            }
+            if (i == sLength) {
+                break;
+            }
+            segmentStart = ++i;
+        }
+        if (segmentStart != 0 && key.compareTo(mergedKey) != 0) {
+            logln(fileTestName);
+            logln(line);
+            logln(printCollationKey(key));
+            logln(printCollationKey(mergedKey));
+            errln("Collator(" + norm
+                    + ").getCollationKey(with U+FFFE) != CollationKey.merge(segments)");
+            return false;
+        }
+
+        // No nextSortKeyPart support in ICU4J
+
+        return true;
+    }
+
+    private boolean checkCompareTwo(String norm, String prevFileLine, String prevString, String s,
+                                    int expectedOrder, int expectedLevel) {
+        // Get the sort keys first, for error debug output.
+        Output<CollationKey> prevKeyOut = new Output<CollationKey>();
+        CollationKey prevKey;
+        if (!getCollationKey(norm, fileLine, prevString, prevKeyOut)) {
+            return false;
+        }
+        prevKey = prevKeyOut.value;
+
+        Output<CollationKey> keyOut = new Output<CollationKey>();
+        CollationKey key;
+        if (!getCollationKey(norm, fileLine, s, keyOut)) {
+            return false;
+        }
+        key = keyOut.value;
+
+        int order = coll.compare(prevString, s);
+        if (order != expectedOrder) {
+            logln(fileTestName);
+            logln(prevFileLine);
+            logln(fileLine);
+            logln(printCollationKey(prevKey));
+            logln(printCollationKey(key));
+            errln("line " + fileLineNumber
+                    + " Collator(" + norm + ").compare(previous, current) wrong order: "
+                    + order + " != " + expectedOrder);
+            return false;
+        }
+        order = coll.compare(s, prevString);
+        if (order != -expectedOrder) {
+            logln(fileTestName);
+            logln(prevFileLine);
+            logln(fileLine);
+            logln(printCollationKey(prevKey));
+            logln(printCollationKey(key));
+            errln("line " + fileLineNumber
+                    + " Collator(" + norm + ").compare(current, previous) wrong order: "
+                    + order + " != " + -expectedOrder);
+            return false;
+        }
+
+        order = prevKey.compareTo(key);
+        if (order != expectedOrder) {
+            logln(fileTestName);
+            logln(prevFileLine);
+            logln(fileLine);
+            logln(printCollationKey(prevKey));
+            logln(printCollationKey(key));
+            errln("line " + fileLineNumber
+                    + " Collator(" + norm + ").getCollationKey(previous, current).compareTo() wrong order: "
+                    + order + " != " + expectedOrder);
+            return false;
+        }
+        if (order != Collation.EQUAL && expectedLevel != Collation.NO_LEVEL) {
+            byte[] prevBytes = prevKey.toByteArray();
+            byte[] bytes = key.toByteArray();
+            int level = Collation.PRIMARY_LEVEL;
+            for (int i = 0;; ++i) {
+                byte b = prevBytes[i];
+                if (b != bytes[i]) {
+                    break;
+                }
+                if ((int)b == Collation.LEVEL_SEPARATOR_BYTE) {
+                    ++level;
+                    if (level == Collation.CASE_LEVEL
+                            && !((RuleBasedCollator)coll).isCaseLevel()) {
+                        ++level;
+                    }
+                }
+            }
+            if (level != expectedLevel) {
+                logln(fileTestName);
+                logln(prevFileLine);
+                logln(fileLine);
+                logln(printCollationKey(prevKey));
+                logln(printCollationKey(key));
+                errln("line " + fileLineNumber
+                        + " Collator(" + norm + ").getCollationKey(previous, current).compareTo()="
+                        + level + " wrong level: " + level + " != " + expectedLevel);
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private void checkCompareStrings(BufferedReader in) throws IOException {
+        String prevFileLine = "(none)";
+        String prevString = "";
+        String s;
+        Output<String> sOut = new Output<String>();
+        while (readLine(in)) {
+            if (fileLine.length() == 0) {
+                continue;
+            }
+            if (isSectionStarter(fileLine.charAt(0))) {
+                break;
+            }
+            int relation = parseRelationAndString(sOut);
+            s = sOut.value;
+            int expectedOrder = (relation == Collation.ZERO_LEVEL) ? Collation.EQUAL : Collation.LESS;
+            int expectedLevel = relation;
+            boolean isOk = true;
+            if (!needsNormalization(prevString) && !needsNormalization(s)) {
+                coll.setDecomposition(Collator.NO_DECOMPOSITION);
+                isOk = checkCompareTwo("normalization=off", prevFileLine, prevString, s,
+                                        expectedOrder, expectedLevel);
+            }
+            if (isOk) {
+                coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
+                isOk = checkCompareTwo("normalization=on", prevFileLine, prevString, s,
+                                        expectedOrder, expectedLevel);
+            }
+            if (isOk && (!nfd.isNormalized(prevString) || !nfd.isNormalized(s))) {
+                String pn = nfd.normalize(prevString);
+                String n = nfd.normalize(s);
+                isOk = checkCompareTwo("NFD input", prevFileLine, pn, n,
+                                        expectedOrder, expectedLevel);
+            }
+            prevFileLine = fileLine;
+            prevString = s;
+        }
+    }
+
+    public void TestDataDriven() {
+        nfd = Normalizer2.getNFDInstance();
+        fcd = Norm2AllModes.getFCDNormalizer2();
+
+        BufferedReader in = null;
+
+        try {
+            in = TestUtil.getDataReader("collationtest.txt", "UTF-8");
+
+            // read first line and remove BOM if present
+            readLine(in);
+            if (fileLine != null && fileLine.charAt(0) == '\uFEFF') {
+                fileLine = fileLine.substring(1);
+            }
+
+            while (error == null) {
+                if (fileLine == null || fileLine.length() == 0) {
+                    if (!readLine(in)) {
+                        break;
+                    }
+                    continue;
+                }
+                if (!isSectionStarter(fileLine.charAt(0))) {
+                    logln(fileLine);
+                    errln("syntax error on line " + fileLineNumber);
+                    return;
+                }
+                if (fileLine.startsWith("** test: ")) {
+                    fileTestName = fileLine;
+                    logln(fileLine);
+                    fileLine = null;
+                } else if (fileLine.equals("@ root")) {
+                    setRootCollator();
+                    fileLine = null;
+                } else if (fileLine.startsWith("@ locale ")) {
+                    setLocaleCollator();
+                    fileLine = null;
+                } else if (fileLine.equals("@ rules")) {
+                    buildTailoring(in);
+                } else if (fileLine.charAt(0) == '%'
+                        && fileLine.length() > 1 && isSpace(fileLine.charAt(1))) {
+                    parseAndSetAttribute();
+                } else if (fileLine.equals("* compare")) {
+                    checkCompareStrings(in);
+                } else {
+                    logln(fileLine);
+                    errln("syntax error on line " + fileLineNumber);
+                    return;
+                }
+            }
+        } catch (IOException e) {
+            errln(e.getMessage());
+        } finally {
+            try {
+                if (in != null) {
+                    in.close();
+                }
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
  }
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationThaiTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationThaiTest.java

index 94e2633e8d0191c546a580c7faa1f730bf63e817..4750e3beb4a62bb6029415fb1a66963c4708355f 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationThaiTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationThaiTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2012, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -194,8 +194,8 @@ public class CollationThaiTest extends TestFmwk {
                              errln("Fail: getCollationKey returned ");
                              return;
                          }
-                        msg += "key1: " + prettify(k1) + "\n"
-                                    + "key2: " + prettify(k2);
+                        msg += "key1: " + CollationTest.prettify(k1) + "\n"
+                                    + "key2: " + CollationTest.prettify(k2);
                          errln(msg);
                      }
                  }
@@ -307,30 +307,7 @@ public class CollationThaiTest extends TestFmwk {
          }
          compareArray(collator, testcontraction);
      }
-    
-    
-    
-    
-    
-    
-    
-    
-    
  
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
-    
      // private inner class -------------------------------------------------
      
      private static final class StrCmp implements Comparator<String> 
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTurkishTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTurkishTest.java

index 666afd89256c25dc74a22fcd36f8cf426e4bdc44..3080bc0e31114ec322f07cd79498dfa7660f51d2 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTurkishTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTurkishTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -126,8 +126,8 @@ public class CollationTurkishTest extends TestFmwk{
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -137,21 +137,21 @@ public class CollationTurkishTest extends TestFmwk{
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -160,31 +160,4 @@ public class CollationTurkishTest extends TestFmwk{
              }                
          }
      }
-    
-    private String appendCompareResult(int result, String target) {
-        if (result == -1) {
-            target += "LESS";
-        } else if (result == 0) {
-            target += "EQUAL";
-        } else if (result == 1) {
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
-}
-\ No newline at end of file
+}
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/G7CollationTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/G7CollationTest.java

index 1164f3bc5bfeefa160d127d931b88213e5acec54..4bc183bea82b4d2350c9d948ebc8eda1ea4bc24c 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/G7CollationTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/G7CollationTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -231,8 +231,8 @@ public class G7CollationTest extends TestFmwk{
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -242,21 +242,21 @@ public class G7CollationTest extends TestFmwk{
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -265,31 +265,4 @@ public class G7CollationTest extends TestFmwk{
              }                
          }
      }
-    
-    private String appendCompareResult(int result, String target){
-        if (result == -1) {
-            target += "LESS";
-        } else if (result == 0) {
-            target += "EQUAL";
-        } else if (result == 1) {
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-   String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
  }
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/LotusCollationKoreanTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/LotusCollationKoreanTest.java

index bcbd9d331df1fbf9c9278740503770c5734ab028..6f4d9faccc84eb9aa195765c358e362baf06b4fb 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/LotusCollationKoreanTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/LotusCollationKoreanTest.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 2002-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  
@@ -85,8 +85,8 @@ public class LotusCollationKoreanTest extends TestFmwk{
              
              String sExpect = new String("");
              String sResult = new String("");
-            sResult = appendCompareResult(compareResult, sResult);
-            sExpect = appendCompareResult(expectedResult, sExpect);
+            sResult = CollationTest.appendCompareResult(compareResult, sResult);
+            sExpect = CollationTest.appendCompareResult(expectedResult, sExpect);
              if (ok1) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
@@ -96,21 +96,21 @@ public class LotusCollationKoreanTest extends TestFmwk{
              msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
              msg2 = "\").compareTo(key(\"";
              msg3 = "\")) returned ";
-            sResult = appendCompareResult(keyResult, sResult);
+            sResult = CollationTest.appendCompareResult(keyResult, sResult);
              if (ok2) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
              } else {
                  errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
                  msg1 = "  ";
                  msg2 = " vs. ";
-                errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
+                errln(msg1 + CollationTest.prettify(sourceKey) + msg2 + CollationTest.prettify(targetKey));
              }
              
              msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
              msg2 = "\", \"";
              msg3 = "\") returned ";
  
-            sResult = appendCompareResult(incResult, sResult);
+            sResult = CollationTest.appendCompareResult(incResult, sResult);
  
              if (ok3) {
                  logln(msg1 + source + msg2 + target + msg3 + sResult);
@@ -119,31 +119,4 @@ public class LotusCollationKoreanTest extends TestFmwk{
              }                
          }
      }
-    
-    private String appendCompareResult(int result, String target){
-        if (result == -1) {
-            target += "LESS";
-        } else if (result == 0) {
-            target += "EQUAL";
-        } else if (result == 1) {
-            target += "GREATER";
-        } else {
-            String huh = "?";
-            target += huh + result;
-        }
-        return target;
-    }
-    
-    String prettify(CollationKey sourceKey) {
-        int i;
-        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
-    
-        for (i = 0; i < bytes.length; i++) {
-            target += Integer.toHexString(bytes[i]);
-            target += " ";
-        }
-        target += "]";
-        return target;
-    }
-}
-\ No newline at end of file
+}
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/UCAConformanceTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/UCAConformanceTest.java

index 0f64cc8c7b5bd01227dcef50db6de4b928132800..c3eacfbfbff4f8771bd6a035bf05aee2cc3c2faa 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/UCAConformanceTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/UCAConformanceTest.java
@@ -1,6 +1,5 @@
  /********************************************************************
- * COPYRIGHT: 
- * Copyright (c) 2002-2012, International Business Machines Corporation and
+ * Copyright (c) 2002-2014, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  
@@ -13,15 +12,16 @@
  package com.ibm.icu.dev.test.collator;
  
  import java.io.BufferedReader;
-import java.util.Locale;
+import java.io.IOException;
  
  import com.ibm.icu.dev.test.TestFmwk;
  import com.ibm.icu.dev.test.TestUtil;
  import com.ibm.icu.lang.UCharacter;
-import com.ibm.icu.text.CollationKey;
  import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.RawCollationKey;
  import com.ibm.icu.text.RuleBasedCollator;
  import com.ibm.icu.text.UTF16;
+import com.ibm.icu.util.ULocale;
  import com.ibm.icu.util.VersionInfo;
  
  public class UCAConformanceTest extends TestFmwk {
@@ -35,9 +35,9 @@ public class UCAConformanceTest extends TestFmwk {
  
      public UCAConformanceTest() {
      }
-    protected void init()throws Exception{
-        UCA = (RuleBasedCollator)Collator.getInstance(
-                new Locale("root", "", ""));
+    @Override
+    protected void init() throws Exception{
+        UCA = (RuleBasedCollator)Collator.getInstance(ULocale.ROOT);
  
          comparer = new UTF16.StringComparator(true, false, UTF16.StringComparator.FOLD_CASE_DEFAULT);
      }
@@ -60,6 +60,7 @@ public class UCAConformanceTest extends TestFmwk {
      }
  
      public void TestRulesNonIgnorable() {
+        if(logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) { return; }
          initRbUCA();
          if(rbUCA == null) { return; }
  
@@ -87,25 +88,17 @@ public class UCAConformanceTest extends TestFmwk {
          String collationTest = "CollationTest_";
          String ext = ".txt";
          try {
-            if(in != null) {
-                in.close();
-            }
-        } catch (Exception e) {
-            errln("Could not close the opened file!");
-            return;
-        }
-        try {
-            in = TestUtil.getDataReader(collationTest+type+ext);
+            in = TestUtil.getDataReader(collationTest+type+"_SHORT"+ext);
          } catch (Exception e) {
              try {
-                in = TestUtil.getDataReader(collationTest+type+"_SHORT"+ext);
+                in = TestUtil.getDataReader(collationTest+type+ext);
              } catch (Exception e1) {
                  try {
                      in = TestUtil.getDataReader(collationTest+type+"_STUB"+ext);
                      logln( "INFO: Working with the stub file.\n"+
                              "If you need the full conformance test, please\n"+
                              "download the appropriate data files from:\n"+
-                    "http://source.icu-project.org/repos/icu/tools/trunk/unicodetools/com/ibm/text/data/");
+                            "http://unicode.org/cldr/trac/browser/trunk/common/uca");
                  } catch (Exception e11) {
                      errln("ERROR: Could not find any of the test files");
                  }
@@ -178,30 +171,8 @@ public class UCAConformanceTest extends TestFmwk {
      private static final int FROM_RULES = 2;
  
      private static boolean skipLineBecauseOfBug(String s, int flags) {
-        // TODO: Fix ICU ticket #8052
-        if(s.length() >= 3 &&
-                (s.charAt(0) == 0xfb2 || s.charAt(0) == 0xfb3) &&
-                s.charAt(1) == 0x334 &&
-                (s.charAt(2) == 0xf73 || s.charAt(2) == 0xf75 || s.charAt(2) == 0xf81)) {
-            return true;
-        }
-        // TODO: Fix ICU ticket #9361
-        if((flags & IS_SHIFTED) != 0 && s.length() >= 2 && s.charAt(0) == 0xfffe) {
-            return true;
-        }
-        // TODO: Fix ICU ticket #9494
-        int c;
-        if(s.length() >= 2 && 0xe0100 <= (c = s.codePointAt(0)) && c <= 0xe01ef) {
-            return true;
-        }
-        // TODO: Fix ICU ticket #8923
-        if((flags & FROM_RULES) != 0 && 0xac00 <= (c = s.charAt(0)) && c <= 0xd7a3) {
-            return true;
-        }
-        // TODO: Fix tailoring builder, ICU ticket #9593.
-        if((flags & FROM_RULES) != 0 && s.length() >= 2 && ((c = s.charAt(1)) == 0xedc || c == 0xedd)) {
-            return true;
-        }
+        // Add temporary exceptions here if there are ICU bugs, until we can fix them.
+        // For examples see the ICU 52 version of this file.
          return false;
      }
  
@@ -221,10 +192,14 @@ public class UCAConformanceTest extends TestFmwk {
              skipFlags |= FROM_RULES;
          }
  
+        logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest");
+        boolean withSortKeys = getProperty("ucaconfnosortkeys") == null;
+
          int lineNo = 0;
  
          String line = null, oldLine = null, buffer = null, oldB = null;
-        CollationKey oldSk = null, newSk = null;
+        RawCollationKey sk1 = new RawCollationKey(), sk2 = new RawCollationKey();
+        RawCollationKey oldSk = null, newSk = sk1;
  
          try {
              while ((line = in.readLine()) != null) {
@@ -239,19 +214,28 @@ public class UCAConformanceTest extends TestFmwk {
                      continue;
                  }
  
-                newSk = coll.getCollationKey(buffer);
+                if(withSortKeys) {
+                    coll.getRawCollationKey(buffer, newSk);
+                }
                  if(oldSk != null) {
-                    int skres = oldSk.compareTo(newSk);
+                    boolean ok = true;
+                    int skres = withSortKeys ? oldSk.compareTo(newSk) : 0;
                      int cmpres = coll.compare(oldB, buffer);
                      int cmpres2 = coll.compare(buffer, oldB);
  
                      if(cmpres != -cmpres2) {
-                        errln("Compare result not symmetrical on line "+lineNo);
+                        errln(String.format(
+                                "Compare result not symmetrical on line %i: " +
+                                "previous vs. current (%d) / current vs. previous (%d)",
+                                lineNo, cmpres, cmpres2));
+                        ok = false;
                      }
-                    if(normalizeResult(cmpres) != normalizeResult(skres)) {
+
+                    // TODO: Compare with normalization turned off if the input passes the FCD test.
+
+                    if(withSortKeys && cmpres != normalizeResult(skres)) {
                          errln("Difference between coll.compare (" + cmpres + ") and sortkey compare (" + skres + ") on line " + lineNo);
-                        errln(oldLine);
-                        errln(line);
+                        ok = false;
                      }
  
                      int res = cmpres;
@@ -265,17 +249,36 @@ public class UCAConformanceTest extends TestFmwk {
                      }
                      if(res > 0) {
                          errln("Line " + lineNo + " is not greater or equal than previous line");
-                        errln(oldLine);
-                        errln(line);
+                        ok = false;
+                    }
+
+                    if(!ok) {
+                        errln("  Previous data line " + oldLine);
+                        errln("  Current data line  " + line);
+                        if(withSortKeys) {
+                            errln("  Previous key: " + CollationTest.prettify(oldSk));
+                            errln("  Current key:  " + CollationTest.prettify(newSk));
+                        }
                      }
                  }
  
                  oldSk = newSk;
                  oldB = buffer;
                  oldLine = line;
+                if(oldSk == sk1) {
+                    newSk = sk2;
+                } else {
+                    newSk = sk1;
+                }
              }
          } catch (Exception e) {
              errln("Unexpected exception "+e);
+        } finally {
+            try {
+                in.close();
+            } catch (IOException ignored) {
+            }
+            in = null;
          }
      }
  }
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/search/SearchTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/search/SearchTest.java

index 2660364d8c99345fbbe3c8ac46208b09f7947b79..36c7f190a1b20972f2323d8899ed2079a5f5c055 100644 (file)
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/search/SearchTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/search/SearchTest.java
@@ -1,6 +1,6 @@
  /*
   *******************************************************************************
- * Copyright (C) 2000-2013, International Business Machines Corporation and    *
+ * Copyright (C) 2000-2014, International Business Machines Corporation and    *
   * others. All Rights Reserved.                                                *
   *******************************************************************************
   */
@@ -12,6 +12,15 @@
  
  package com.ibm.icu.dev.test.search;
  
+import static com.ibm.icu.text.Collator.IDENTICAL;
+import static com.ibm.icu.text.Collator.PRIMARY;
+import static com.ibm.icu.text.Collator.QUATERNARY;
+import static com.ibm.icu.text.Collator.SECONDARY;
+import static com.ibm.icu.text.Collator.TERTIARY;
+import static com.ibm.icu.text.SearchIterator.ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD;
+import static com.ibm.icu.text.SearchIterator.ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD;
+import static com.ibm.icu.text.SearchIterator.ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
+
  import java.text.StringCharacterIterator;
  import java.util.Locale;
  
@@ -20,6 +29,7 @@ import com.ibm.icu.text.BreakIterator;
  import com.ibm.icu.text.Collator;
  import com.ibm.icu.text.RuleBasedCollator;
  import com.ibm.icu.text.SearchIterator;
+import com.ibm.icu.text.SearchIterator.ElementComparisonType;
  import com.ibm.icu.text.StringSearch;
  import com.ibm.icu.util.ULocale;
  
@@ -27,12 +37,14 @@ public class SearchTest extends TestFmwk {
  
      //inner class
      static class SearchData {
-        SearchData(String text, String pattern, String coll, int strength, String breaker,
-                   int[] offset, int[] size) {
+        SearchData(String text, String pattern,
+                    String coll, int strength, ElementComparisonType cmpType, String breaker,
+                    int[] offset, int[] size) {
              this.text = text;
              this.pattern = pattern;
              this.collator = coll;
              this.strength = strength;
+            this.cmpType = cmpType;
              this.breaker = breaker;
              this.offset = offset;
              this.size = size;
@@ -41,6 +53,7 @@ public class SearchTest extends TestFmwk {
          String              pattern;
          String              collator;
          int                 strength;
+        ElementComparisonType   cmpType;
          String              breaker;
          int[]               offset;
          int[]               size;
@@ -53,401 +66,452 @@ public class SearchTest extends TestFmwk {
      BreakIterator     m_en_wordbreaker_;
      BreakIterator     m_en_characterbreaker_;
  
+    // Just calling SearchData constructor, to make the test data source code
+    // nice and short
+    private static SearchData SD(String text, String pattern, String coll, int strength,
+                    ElementComparisonType cmpType, String breaker, int[] offset, int[] size) {
+        return new SearchData(text, pattern, coll, strength, cmpType, breaker, offset, size);
+    }
+
+    // Just returning int[], to make the test data nice and short
+    private static int[] IA(int... elements) {
+        return elements;
+    }
+
      static SearchData[] BASIC = {
-        new SearchData("xxxxxxxxxxxxxxxxxxxx",          "fisher",       null, Collator.TERTIARY, null, new int[] {-1},            new int[]{0}),
-        new SearchData("silly spring string",           "string",       null, Collator.TERTIARY, null, new int[]{13, -1},         new int[]{6}),
-        new SearchData("silly spring string string",    "string",       null, Collator.TERTIARY, null, new int[]{13, 20, -1},     new int[]{6, 6}),
-        new SearchData("silly string spring string",    "string",       null, Collator.TERTIARY, null, new int[]{6, 20, -1},      new int[]{6, 6}),
-        new SearchData("string spring string",          "string",       null, Collator.TERTIARY, null, new int[]{0, 14, -1},      new int[]{6, 6}),
-        new SearchData("Scott Ganyo",                   "c",            null, Collator.TERTIARY, null, new int[]{1, -1},          new int[]{1}),
-        new SearchData("Scott Ganyo",                   " ",            null, Collator.TERTIARY, null, new int[]{5, -1},          new int[]{1}),
-        new SearchData("\u0300\u0325",                  "\u0300",       null, Collator.TERTIARY, null, new int[]{-1},             new int[]{0}),
-        new SearchData("a\u0300\u0325",                 "\u0300",       null, Collator.TERTIARY, null, new int[]{-1},             new int[]{0}),
-        new SearchData("a\u0300\u0325",                 "\u0300\u0325", null, Collator.TERTIARY, null, new int[]{1, -1},          new int[]{2}),
-        new SearchData("a\u0300b",                      "\u0300",       null, Collator.TERTIARY, null, new int[]{1, -1},          new int[]{1}),
-        new SearchData("\u00c9",                        "e",            null, Collator.PRIMARY,  null, new int[]{0, -1},          new int[]{1}),
-        new SearchData(null,                            null,           null, Collator.TERTIARY, null, new int[]{-1},             new int[]{0})
+        SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)),
+        SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)),
+        SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)),
+        SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)),
+        SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
+        SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)),
+        SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u00c9", "e", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
      };
  
      SearchData BREAKITERATOREXACT[] = {
-        new SearchData("foxy fox", "fox", null, Collator.TERTIARY, "characterbreaker", new int[] {0, 5, -1}, new int[] {3, 3}),
-        new SearchData("foxy fox", "fox", null, Collator.TERTIARY, "wordbreaker", new int[] {5, -1}, new int[] {3}),
-        new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, "characterbreaker", new int[] {10, 14, -1}, new int[] {3, 2}),
-        new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, "wordbreaker", new int[] {10, -1}, new int[] {3}),
-        new SearchData("Channel, another channel, more channels, and one last Channel", "Channel", "es", Collator.TERTIARY,
-             "wordbreaker", new int[] {0, 54, -1}, new int[] {7, 7}),
+        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)),
+        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)),
+        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)),
+        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)),
+        SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)),
          /* jitterbug 1745 */
-        new SearchData("testing that \u00e9 does not match e", "e", null, Collator.TERTIARY,
-            "characterbreaker", new int[] {1, 17, 30, -1}, new int[] {1, 1, 1}),
-        new SearchData("testing that string ab\u00e9cd does not match e", "e", null, Collator.TERTIARY,
-            "characterbreaker", new int[] {1, 28, 41, -1}, new int[] {1, 1, 1}),
-        new SearchData("\u00c9", "e", "fr", Collator.PRIMARY,  "characterbreaker", new int[]{0, -1}, new int[]{1}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)),
+        SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)),
+        SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)),
      };
  
      SearchData BREAKITERATORCANONICAL[] = {
-        new SearchData("foxy fox", "fox", null, Collator.TERTIARY, "characterbreaker", new int[] {0, 5, -1}, new int[] {3, 3}),
-        new SearchData("foxy fox", "fox", null, Collator.TERTIARY, "wordbreaker", new int[] {5, -1}, new int[] {3}),
-        new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, "characterbreaker", new int[] {10, 14, -1}, new int[] {3, 2}),
-        new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, "wordbreaker", new int[] {10, -1}, new int[] {3}),
-        new SearchData("Channel, another channel, more channels, and one last Channel", "Channel", "es", Collator.TERTIARY, "wordbreaker",
-                       new int[] {0, 54, -1}, new int[] {7, 7}),
+        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)),
+        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)),
+        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)),
+        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)),
+        SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)),
          /* jitterbug 1745 */
-        new SearchData("testing that \u00e9 does not match e", "e", null, Collator.TERTIARY,
-            "characterbreaker", new int[] {1, 17, 30, -1}, new int[] {1, 1, 1}),
-        new SearchData("testing that string ab\u00e9cd does not match e", "e", null,
-             Collator.TERTIARY, "characterbreaker", new int[] {1, 28, 41, -1}, new int[] {1, 1, 1}),
-        new SearchData("\u00c9", "e", "fr", Collator.PRIMARY,  "characterbreaker", new int[]{0, -1}, new int[]{1}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)),
+        SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)),
+        SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)),
      };
  
      SearchData BASICCANONICAL[] = {
-        new SearchData("xxxxxxxxxxxxxxxxxxxx", "fisher", null, Collator.TERTIARY, null, new int[] {-1}, new int [] {0}),
-        new SearchData("silly spring string", "string", null, Collator.TERTIARY, null, new int[] {13, -1}, new int[] {6}),
-        new SearchData("silly spring string string", "string", null, Collator.TERTIARY, null, new int[] {13, 20, -1}, new int[] {6, 6}),
-        new SearchData("silly string spring string", "string", null, Collator.TERTIARY, null, new int[] {6, 20, -1}, new int[] {6, 6}),
-        new SearchData("string spring string", "string", null, Collator.TERTIARY, null, new int[] {0, 14, -1}, new int[] {6, 6}),
-        new SearchData("Scott Ganyo", "c", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {1}),
-        new SearchData("Scott Ganyo", " ", null, Collator.TERTIARY, null, new int[] {5, -1}, new int[] {1}),
-        new SearchData("\u0300\u0325", "\u0300", null, Collator.TERTIARY, null, new int [] {0, -1}, new int[] {2}),
-        new SearchData("a\u0300\u0325", "\u0300", null, Collator.TERTIARY, null, new int [] {1, -1}, new int[] {2}),
-        new SearchData("a\u0300\u0325", "\u0300\u0325", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[]{2}),
-        new SearchData("a\u0300b", "\u0300", null, Collator.TERTIARY, null, new int[]{1, -1}, new int[] {1}),
-        new SearchData("a\u0300\u0325b", "\u0300b", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {3}),
-        new SearchData("\u0325\u0300A\u0325\u0300", "\u0300A\u0300", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {5}),
-        new SearchData("\u0325\u0300A\u0325\u0300", "\u0325A\u0325", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {5}),
-        new SearchData("a\u0300\u0325b\u0300\u0325c \u0325b\u0300 \u0300b\u0325", "\u0300b\u0325", null, Collator.TERTIARY, null,
-            new int[] {1, 12, -1}, new int[] {5, 3}),
-        new SearchData("\u00c4\u0323", "A\u0323\u0308", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData("\u0308\u0323", "\u0323\u0308", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)),
+        SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)),
+        SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)),
+        SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)),
+        SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
+        SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)),
+
+        SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300\u0325b", "\u0300b", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u0325\u0300A\u0325\u0300", "\u0300A\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u0325\u0300A\u0325\u0300", "\u0325A\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300\u0325b\u0300\u0325c \u0325b\u0300 \u0300b\u0325", "\u0300b\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u00c4\u0323", "A\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
+        SD("\u0308\u0323", "\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
      };
  
      SearchData COLLATOR[] = {
          /* english */
-        new SearchData("fox fpx", "fox", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {3}),
+        SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
          /* tailored */
-        new SearchData("fox fpx", "fox", null, Collator.PRIMARY, null, new int[] {0, 4, -1}, new int[] {3, 3}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)),
      };
  
      String TESTCOLLATORRULE = "& o,O ; p,P";
      String EXTRACOLLATIONRULE = " & ae ; \u00e4 & AE ; \u00c4 & oe ; \u00f6 & OE ; \u00d6 & ue ; \u00fc & UE ; \u00dc";
  
-
      SearchData COLLATORCANONICAL[] = {
          /* english */
-        new SearchData("fox fpx", "fox", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {3}),
+        SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
          /* tailored */
-        new SearchData("fox fpx", "fox", null, Collator.PRIMARY, null, new int[] {0, 4, -1}, new int[] {3, 3}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)),
      };
  
      SearchData COMPOSITEBOUNDARIES[] = {
-        new SearchData("\u00C0", "A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("A\u00C0C", "A", null, Collator.TERTIARY, null, new int[]  {0, 1, -1}, new int[]  {1, 1}),
-        new SearchData("\u00C0A", "A", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
-        new SearchData("B\u00C0", "A", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {1}),
-        new SearchData("\u00C0B", "A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u00C0", "\u0300", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u0300\u00C0", "\u0300", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
-        new SearchData("\u00C0\u0300", "\u0300", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
+        SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+        SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
+        SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        /* first one matches only because it's at the start of the text */
+        SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+
+        /* \\u0300 blocked by \\u0300 */
+        SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
          /* A + 030A + 0301 */
-        new SearchData("\u01FA", "\u01FA", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u01FA", "\u030A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u01FA", "A\u030A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u01FA", "\u030AA", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u01FA", "\u0301", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u01FA", "A\u0301", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u01FA", "\u0301A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u01FA", "\u030A\u0301", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("A\u01FA", "A\u030A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u01FAA", "\u0301A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u0F73", "\u0F73", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u0F73", "\u0F71", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u0F73", "\u0F72", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u0F73", "\u0F71\u0F72", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("A\u0F73", "A\u0F71", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u0F73A", "\u0F72A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+        SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+
+        SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        /* blocked accent */
+        SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+
+        SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+
+        SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)),
      };
  
      SearchData COMPOSITEBOUNDARIESCANONICAL[] = {
-        new SearchData("\u00C0", "A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("A\u00C0C", "A", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
-        new SearchData("\u00C0A", "A", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
-        new SearchData("B\u00C0", "A", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {1}),
-        new SearchData("\u00C0B", "A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u00C0", "\u0300", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u0300\u00C0", "\u0300", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
+        SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+        SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
+        SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        /* first one matches only because it's at the start of the text */
+        SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+
          /* \u0300 blocked by \u0300 */
-        new SearchData("\u00C0\u0300", "\u0300", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
+        SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
          /* A + 030A + 0301 */
-        new SearchData("\u01FA", "\u01FA", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u01FA", "\u030A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u01FA", "A\u030A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u01FA", "\u030AA", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u01FA", "\u0301", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
+        SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+        SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+
+        SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
          /* blocked accent */
-        new SearchData("\u01FA", "A\u0301", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u01FA", "\u0301A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("\u01FA", "\u030A\u0301", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("A\u01FA", "A\u030A", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {1}),
-        new SearchData("\u01FAA", "\u0301A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData("\u0F73", "\u0F73", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u0F73", "\u0F71", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u0F73", "\u0F72", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("\u0F73", "\u0F71\u0F72", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
-        new SearchData("A\u0F73", "A\u0F71", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData("\u0F73A", "\u0F72A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A",
-            null, Collator.TERTIARY, null, new int[] {0, 6, 10, 13, -1}, new int[] {1, 3, 2, 1}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+
+        SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
+
+        SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)),
      };
  
      SearchData SUPPLEMENTARY[] = {
-        /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
-        new SearchData("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00",
-            "\uD800\uDC00", null, Collator.TERTIARY, null, 
-            new int[] {4, 13, 22, 26, 29, -1}, new int[] {2, 2, 2, 2, 2}),
-        new SearchData("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, 
-                       Collator.TERTIARY, null, new int[] {3, -1}, 
-                       new int[] {2}),
-        new SearchData("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", 
-                       null, Collator.TERTIARY, null, new int[] {3, -1}, 
-                       new int[] {4}),
-        new SearchData("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", 
-                       null, Collator.TERTIARY, null, new int[] {3, -1}, 
-                       new int[] {4}),
-        new SearchData("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", 
-                       null, Collator.TERTIARY, null, new int[] {3, -1}, 
-                       new int[] {4}),
-        new SearchData("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", 
-                       null, Collator.TERTIARY, null, new int[] {3, -1}, 
-                       new int[] {4}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00",
+                "\uD800\uDC00", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)),
+        SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)),
+        SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
+        SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
+        SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
+        SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
      };
  
      String CONTRACTIONRULE = "&z = ab/c < AB < X\u0300 < ABC < X\u0300\u0315";
  
      SearchData CONTRACTION[] = {
          /* common discontiguous */
-        new SearchData("A\u0300\u0315", "\u0300", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("A\u0300\u0315", "\u0300\u0315", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
+        SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
          /* contraction prefix */
-        new SearchData("AB\u0315C", "A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("AB\u0315C", "AB", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData("AB\u0315C", "\u0315", null, Collator.TERTIARY, null, new int[] {2, -1}, new int[] {1}),
-        /* discontiguous problem here for backwards iteration.
-        accents not found because discontiguous stores all information */
-        new SearchData("X\u0300\u0319\u0315", "\u0319", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-         /* ends not with a contraction character */
-        new SearchData("X\u0315\u0300D", "\u0300\u0315", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("X\u0315\u0300D", "X\u0300\u0315", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {3}),
-        new SearchData("X\u0300\u031A\u0315D", "X\u0300", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
+        SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        /*
+         * discontiguous problem here for backwards iteration. accents not found because discontiguous stores all
+         * information
+         */
+        SD("X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        /* ends not with a contraction character */
+        SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
+        SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
          /* blocked discontiguous */
-        new SearchData("X\u0300\u031A\u0315D", "\u031A\u0315D", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("ab", "z", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        /*
+         * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the
+         * match fails because it ends in the middle of an expansion...
+         */
+        SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
      };
  
      SearchData CONTRACTIONCANONICAL[] = {
          /* common discontiguous */
-        new SearchData("A\u0300\u0315", "\u0300", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
-        new SearchData("A\u0300\u0315", "\u0300\u0315", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
+        SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
          /* contraction prefix */
-        new SearchData("AB\u0315C", "A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("AB\u0315C", "AB", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData("AB\u0315C", "\u0315", null, Collator.TERTIARY, null, new int[] {2, -1}, new int[] {1}),
-        /* discontiguous problem here for backwards iteration.
-        forwards gives 0, 4 but backwards give 1, 3 */
-        /* {"X\u0300\u0319\u0315", "\u0319", null, Collator.TERTIARY, null, {0, -1},
-        {4}}, */
-
-         /* ends not with a contraction character */
-        new SearchData("X\u0315\u0300D", "\u0300\u0315", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData("X\u0315\u0300D", "X\u0300\u0315", null, Collator.TERTIARY, null,
-            new int[] {0, -1}, new int[] {3}),
-        new SearchData("X\u0300\u031A\u0315D", "X\u0300", null, Collator.TERTIARY, null,
-            new int[] {0, -1}, new int[] {4}),
+        SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        /*
+         * discontiguous problem here for backwards iteration. forwards gives 0, 4 but backwards give 1, 3
+         */
+        /*
+         * {"X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, {0, -1), {4}),
+         */
+
+        /* ends not with a contraction character */
+        SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
+
+        SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
          /* blocked discontiguous */
-        new SearchData("X\u0300\u031A\u0315D", "\u031A\u0315D", null, Collator.TERTIARY, null,
-            new int[] {1, -1}, new int[] {4}),
-        new SearchData("ab", "z", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+
+        /*
+         * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the
+         * match fails because it ends in the middle of an expansion...
+         */
+        SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(2)),
      };
  
      SearchData MATCH[] = {
-        new SearchData("a busy bee is a very busy beeee", "bee", null, Collator.TERTIARY, null,
-        new int[] {7, 26, -1}, new int[] {3, 3}),
-        /* 012345678901234567890123456789012345678901234567890 */
-        new SearchData("a busy bee is a very busy beeee with no bee life", "bee", null,
-            Collator.TERTIARY, null, new int[] {7, 26, 40, -1}, new int[] {3, 3, 3}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("a busy bee is a very busy beeee", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, -1), IA(3, 3)),
+        /*  012345678901234567890123456789012345678901234567890 */
+        SD("a busy bee is a very busy beeee with no bee life", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, 40, -1), IA(3, 3, 3)),
      };
  
      String IGNORABLERULE = "&a = \u0300";
  
      SearchData IGNORABLE[] = {
-        new SearchData("\u0300\u0315 \u0300\u0315 ", "\u0300", null, Collator.PRIMARY, null,
-            new int[] {0, 3, -1}, new int[] {2, 2}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        /*
+         * This isn't much of a test when matches have to be on grapheme boundiaries. The match at 0 only works because it's
+         * at the start of the text.
+         */
+        SD("\u0300\u0315 \u0300\u0315 ", "\u0300", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
      };
-    
+
      SearchData DIACTRICMATCH[] = {
-        new SearchData("\u0061\u0061\u00E1", "\u0061\u00E1", null, Collator.SECONDARY, null,
-            new int[] {1, -1}, new int[] {2}),   
-        new SearchData("\u0020\u00C2\u0303\u0020\u0041\u0061\u1EAA\u0041\u0302\u0303\u00C2\u0303\u1EAB\u0061\u0302\u0303\u00E2\u0303\uD806\uDC01\u0300\u0020",
-            "\u00C2\u0303", null, Collator.PRIMARY, null, new int[] {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, new int[] {2, 1, 1, 1, 3, 2, 1, 3, 2}),
-        new SearchData("\u03BA\u03B1\u03B9\u0300\u0020\u03BA\u03B1\u1F76", "\u03BA\u03B1\u03B9", null, Collator.PRIMARY, null,
-                new int[] {0, 5, -1}, new int[] {4, 3}),   
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("\u0061\u0061\u00E1", "\u0061\u00E1", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(2)),
+        SD("\u0020\u00C2\u0303\u0020\u0041\u0061\u1EAA\u0041\u0302\u0303\u00C2\u0303\u1EAB\u0061\u0302\u0303\u00E2\u0303\uD806\uDC01\u0300\u0020", "\u00C2\u0303",
+            null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 4, 5, 6, 7, 10, 12, 13, 16, -1), IA(2, 1, 1, 1, 3, 2, 1, 3, 2)),
+        SD("\u03BA\u03B1\u03B9\u0300\u0020\u03BA\u03B1\u1F76", "\u03BA\u03B1\u03B9", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 5, -1), IA(4, 3)),
      };
  
      SearchData NORMCANONICAL[] = {
-        new SearchData("\u0300\u0325", "\u0300", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData("\u0300\u0325", "\u0325", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
-        new SearchData("a\u0300\u0325", "\u0325\u0300", null, Collator.TERTIARY, null, new int[] {1, -1},
-            new int[] {2}),
-        new SearchData("a\u0300\u0325", "\u0300\u0325", null, Collator.TERTIARY, null, new int[] {1, -1},
-            new int[] {2}),
-        new SearchData("a\u0300\u0325", "\u0325", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
-        new SearchData("a\u0300\u0325", "\u0300", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
      };
  
      SearchData NORMEXACT[] = {
-        new SearchData("a\u0300\u0325", "\u0325\u0300", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("a\u0300\u0325", "a\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
      };
  
      SearchData NONNORMEXACT[] = {
-        new SearchData("a\u0300\u0325", "\u0325\u0300", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
      };
  
      SearchData OVERLAP[] = {
-        new SearchData("abababab", "abab", null, Collator.TERTIARY, null, new int[] {0, 2, 4, -1}, new int[] {4, 4, 4}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)),
      };
  
      SearchData NONOVERLAP[] = {
-        new SearchData("abababab", "abab", null, Collator.TERTIARY, null, new int[] {0, 4, -1}, new int[] {4, 4}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)),
      };
  
      SearchData OVERLAPCANONICAL[] = {
-        new SearchData("abababab", "abab", null, Collator.TERTIARY, null, new int[] {0, 2, 4, -1},
-                        new int[] {4, 4, 4}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)),
      };
  
      SearchData NONOVERLAPCANONICAL[] = {
-        new SearchData("abababab", "abab", null, Collator.TERTIARY, null, new int[] {0, 4, -1}, new int[] {4, 4}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)),
      };
  
      SearchData PATTERNCANONICAL[] = {
-        new SearchData("The quick brown fox jumps over the lazy foxes", "the", null,
-                       Collator.PRIMARY, null, new int[] {0, 31, -1}, new int[] {3, 3}),
-        new SearchData("The quick brown fox jumps over the lazy foxes", "fox", null,
-                       Collator.PRIMARY, null, new int[] {16, 40, -1}, new int[] {3, 3}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)),
+        SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
      };
  
      SearchData PATTERN[] = {
-        new SearchData("The quick brown fox jumps over the lazy foxes", "the", null,
-                       Collator.PRIMARY, null, new int[] {0, 31, -1}, new int[] {3, 3}),
-        new SearchData("The quick brown fox jumps over the lazy foxes", "fox", null,
-                       Collator.PRIMARY, null, new int[] {16, 40, -1}, new int[] {3, 3}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)),
+        SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
      };
  
+    String PECHE_WITH_ACCENTS = "un p\u00E9ch\u00E9, "
+                                + "\u00E7a p\u00E8che par, "
+                                + "p\u00E9cher, "
+                                + "une p\u00EAche, "
+                                + "un p\u00EAcher, "
+                                + "j\u2019ai p\u00EAch\u00E9, "
+                                + "un p\u00E9cheur, "
+                                + "\u201Cp\u00E9che\u201D, "
+                                + "decomp peche\u0301, "
+                                + "base peche";
+    // in the above, the interesting words and their offsets are:
+    //    3 pe<301>che<301>
+    //    13 pe<300>che
+    //    24 pe<301>cher
+    //    36 pe<302>che
+    //    46 pe<302>cher
+    //    59 pe<302>che<301>
+    //    69 pe<301>cheur
+    //    79 pe<301>che
+    //    94 peche<+301>
+    //    107 peche
+
      SearchData STRENGTH[] = {
-        /*012345678901234567890123456789012345678901234567890123456789*/
-        new SearchData("The quick brown fox jumps over the lazy foxes", "fox", "en",
-                       Collator.PRIMARY, null, new int[] {16, 40, -1}, new int[] {3, 3}),
-        new SearchData("The quick brown fox jumps over the lazy foxes", "fox", "en",
-                       Collator.PRIMARY, "wordbreaker", new int[] {16, -1}, new int[] {3}),
-        new SearchData("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
-                       "peche", "fr", Collator.PRIMARY, null, new int[] {15, 21, 27, 34, -1}, new int[] {5, 5, 5, 5}),
-        new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, null,
-                        new int[] {10, 14, -1}, new int[] {3, 2}),
-        new SearchData("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es",
-                        Collator.PRIMARY, null, new int[] {2, 19, 33, 56, -1}, new int[] {7, 7, 7, 7}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        /*  012345678901234567890123456789012345678901234567890123456789 */
+        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
+        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)),
+        SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
+                "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)),
+        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)),
+        SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)),
+        SD("\u00c0 should match but not A", "A\u0300", "en", IDENTICAL, STANDARD_ELEMENT_COMPARISON,  null, IA(0, -1), IA(1, 0)),
+
+        /* some tests for modified element comparison, ticket #7093 */
+        SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)),
+        SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
+        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
+        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
+        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
+        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
+
+        /* more tests for modified element comparison (with fr), ticket #7093 */
+        SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)),
+        SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
+        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
+        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
+        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
+        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
+        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
+
      };
  
      SearchData STRENGTHCANONICAL[] = {
-        /*012345678901234567890123456789012345678901234567890123456789 */
-        new SearchData("The quick brown fox jumps over the lazy foxes", "fox", "en",
-                       Collator.PRIMARY, null, new int[] {16, 40, -1}, new int[] {3, 3}),
-        new SearchData("The quick brown fox jumps over the lazy foxes", "fox", "en",
-                       Collator.PRIMARY, "wordbreaker", new int[] {16, -1}, new int[] {3}),
-        new SearchData("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
-                       "peche", "fr", Collator.PRIMARY, null, new int[] {15, 21, 27, 34, -1}, new int[] {5, 5, 5, 5}),
-        new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, null,
-                       new int[] {10, 14, -1}, new int[] {3, 2}),
-        new SearchData("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es",
-                       Collator.PRIMARY, null, new int[]{2, 19, 33, 56, -1}, new int[] {7, 7, 7, 7}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[]{0})
+        /*  012345678901234567890123456789012345678901234567890123456789 */
+        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
+        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)),
+        SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
+                "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)),
+        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)),
+        SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)),
      };
  
      SearchData SUPPLEMENTARYCANONICAL[] = {
-        /*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
-        new SearchData("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00",
-                       "\uD800\uDC00", null, Collator.TERTIARY, null, new int[] {4, 13, 22, 26, 29, -1},
-                       new int[] {2, 2, 2, 2, 2}),
-        new SearchData("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, 
-                       Collator.TERTIARY, null, new int[] {3, -1}, 
-                       new int[] {2}),
-        new SearchData("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", 
-                       null, Collator.TERTIARY, null, new int[] {3, -1}, 
-                       new int[] {4}),
-        new SearchData("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", 
-                       null, Collator.TERTIARY, null, new int[] {3, -1}, 
-                       new int[] {4}),
-        new SearchData("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", 
-                       null, Collator.TERTIARY, null, new int[] {3, -1}, 
-                       new int[] {4}),
-        new SearchData("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", 
-                       null, Collator.TERTIARY, null, new int[] {3, -1}, 
-                       new int[] {4}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        /*  012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
+        SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00", "\uD800\uDC00",
+            null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)),
+        SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)),
+        SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
+        SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
+        SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
+        SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
      };
  
      static SearchData VARIABLE[] = {
-        /*012345678901234567890123456789012345678901234567890123456789*/
-        new SearchData("blackbirds black blackbirds blackbird black-bird", "blackbird", null, Collator.TERTIARY,   null,
-        new int[] {0, 17, 28, 38, -1}, new int[] {9, 9, 9, 10}),
-
-        /* to see that it doesn't go into an infinite loop if the start of text
-        is a ignorable character */
-        new SearchData(" on",                                              "go",        null, Collator.TERTIARY,   null,
-                       new int[] {-1}, new int[]{0}),
-        new SearchData("abcdefghijklmnopqrstuvwxyz",                       "   ",       null, Collator.PRIMARY,    null,
-                        new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1},
-                        new int[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
+        /*  012345678901234567890123456789012345678901234567890123456789 */
+        SD("blackbirds black blackbirds blackbird black-bird", "blackbird", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 17, 28, 38, -1), IA(9, 9, 9, 10)),
+
+        /*
+         * to see that it doesn't go into an infinite loop if the start of text is a ignorable character
+         */
+        SD(" on", "go", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
+        SD("abcdefghijklmnopqrstuvwxyz", "   ",
+            null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null,
+            IA(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1),
+            IA(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
  
          /* testing tightest match */
-        new SearchData(" abc  a bc   ab c    a  bc     ab  c",             "abc",       null, Collator.QUATERNARY, null,
-                       new int[]{1, -1}, new int[] {3}),
-        /*012345678901234567890123456789012345678901234567890123456789 */
-        new SearchData(" abc  a bc   ab c    a  bc     ab  c",             "abc",       null, Collator.SECONDARY,  null,
-                       new int[] {1, 6, 13, 21, 31, -1}, new int[] {3, 4, 4, 5, 5}),
+        SD(" abc  a bc   ab c    a  bc     ab  c", "abc", null, QUATERNARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(3)),
+        /*  012345678901234567890123456789012345678901234567890123456789 */
+        SD(" abc  a bc   ab c    a  bc     ab  c", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 6, 13, 21, 31, -1), IA(3, 4, 4, 5, 5)),
  
          /* totally ignorable text */
-        new SearchData("           ---------------",                       "abc",       null, Collator.SECONDARY,  null,
-                       new int[] {-1}, new int[] {0}),
-        new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
+        SD("           ---------------", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
      };
  
      static SearchData TEXTCANONICAL[] = {
-        new SearchData("the foxy brown fox",                               "fox",       null, Collator.TERTIARY,   null,
-                       new int[] {4, 15, -1}, new int[] {3, 3}),
-        new SearchData("the quick brown fox",                              "fox",       null, Collator.TERTIARY,   null,
-                       new int[] {16, -1}, new int[]{3}),
-        new SearchData(null, null, null, Collator.TERTIARY,null, new int[] {-1}, new int[] {0})
+        SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)),
+        SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3)),
      };
  
      /**
@@ -512,8 +576,10 @@ public class SearchTest extends TestFmwk {
              breaker.setText(text);
          }
          collator.setStrength(search.strength);
+        collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
          try {
              strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
+            strsrch.setElementComparisonType(search.cmpType);
              strsrch.setCanonical(true);
          } catch (Exception e) {
              errln("Error opening string search" + e.getMessage());
@@ -521,10 +587,12 @@ public class SearchTest extends TestFmwk {
          }
  
          if (!assertEqualWithStringSearch(strsrch, search)) {
-            collator.setStrength(Collator.TERTIARY);
+            collator.setStrength(TERTIARY);
+            collator.setDecomposition(Collator.NO_DECOMPOSITION);
              return false;
          }
-        collator.setStrength(Collator.TERTIARY);
+        collator.setStrength(TERTIARY);
+        collator.setDecomposition(Collator.NO_DECOMPOSITION);
          return true;
      }
  
@@ -542,16 +610,17 @@ public class SearchTest extends TestFmwk {
          collator.setStrength(search.strength);
          try {
              strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
+            strsrch.setElementComparisonType(search.cmpType);
          } catch (Exception e) {
              errln("Error opening string search " + e.getMessage());
              return false;
          }
  
          if (!assertEqualWithStringSearch(strsrch, search)) {
-            collator.setStrength(Collator.TERTIARY);
+            collator.setStrength(TERTIARY);
              return false;
          }
-        collator.setStrength(Collator.TERTIARY);
+        collator.setStrength(TERTIARY);
          return true;
      }
  
@@ -571,16 +640,17 @@ public class SearchTest extends TestFmwk {
              strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
              strsrch.setCanonical(canonical);
              strsrch.setOverlapping(overlap);
+            strsrch.setElementComparisonType(search.cmpType);
          } catch (Exception e) {
              errln("Error opening string search " + e.getMessage());
              return false;
          }
  
          if (!assertEqualWithStringSearch(strsrch, search)) {
-            collator.setStrength(Collator.TERTIARY);
+            collator.setStrength(TERTIARY);
              return false;
          }
-        collator.setStrength(Collator.TERTIARY);
+        collator.setStrength(TERTIARY);
          return true;
      }
  
@@ -706,12 +776,10 @@ public class SearchTest extends TestFmwk {
      }
  
      public void TestBasic() {
-        int count = 0;
-        while (BASIC[count].text != null) {
+        for (int count = 0; count < BASIC.length; count++) {
              if (!assertEqual(BASIC[count])) {
                  errln("Error at test number " + count);
              }
-            count ++;
          }
      }
  
@@ -761,7 +829,7 @@ public class SearchTest extends TestFmwk {
                  errln("Error setting break iterator");
              }
              if (!assertEqualWithStringSearch(strsrch, search)) {
-                collator.setStrength(Collator.TERTIARY);
+                collator.setStrength(TERTIARY);
              }
              search   = BREAKITERATOREXACT[count + 1];
              breaker  = getBreakIterator(search.breaker);
@@ -778,12 +846,10 @@ public class SearchTest extends TestFmwk {
              }
              count += 2;
          }
-        count = 0;
-        while (BREAKITERATOREXACT[count].text != null) {
+        for (count = 0; count < BREAKITERATOREXACT.length; count++) {
              if (!assertEqual(BREAKITERATOREXACT[count])) {
                  errln("Error at test number " + count);
              }
-             count++;
          }
      }
  
@@ -812,7 +878,7 @@ public class SearchTest extends TestFmwk {
                  return;
              }
              if (!assertEqualWithStringSearch(strsrch, search)) {
-                collator.setStrength(Collator.TERTIARY);
+                collator.setStrength(TERTIARY);
                  return;
              }
              search  = BREAKITERATOREXACT[count + 1];
@@ -831,23 +897,20 @@ public class SearchTest extends TestFmwk {
              }
              count += 2;
          }
-        count = 0;
-        while (BREAKITERATORCANONICAL[count].text != null) {
+
+        for (count = 0; count < BREAKITERATORCANONICAL.length; count++) {
               if (!assertEqual(BREAKITERATORCANONICAL[count])) {
                   errln("Error at test number " + count);
                   return;
               }
-             count++;
          }
      }
  
      public void TestCanonical() {
-        int count = 0;
-        while (BASICCANONICAL[count].text != null) {
+        for (int count = 0; count < BASICCANONICAL.length; count++) {
              if (!assertCanonicalEqual(BASICCANONICAL[count])) {
                  errln("Error at test number " + count);
              }
-            count ++;
          }
      }
  
@@ -940,24 +1003,20 @@ public class SearchTest extends TestFmwk {
      }
  
      public void TestCompositeBoundaries() {
-        int count = 0;
-        while (COMPOSITEBOUNDARIES[count].text != null) {
+        for (int count = 0; count < COMPOSITEBOUNDARIES.length; count++) {
              // logln("composite " + count);
              if (!assertEqual(COMPOSITEBOUNDARIES[count])) {
                  errln("Error at test number " + count);
              }
-            count++;
          }
      }
  
      public void TestCompositeBoundariesCanonical() {
-        int count = 0;
-        while (COMPOSITEBOUNDARIESCANONICAL[count].text != null) {
+        for (int count = 0; count < COMPOSITEBOUNDARIESCANONICAL.length; count++) {
              // logln("composite " + count);
              if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) {
                  errln("Error at test number " + count);
              }
-            count++;
          }
      }
  
@@ -966,7 +1025,7 @@ public class SearchTest extends TestFmwk {
          RuleBasedCollator collator = null;
          try {
              collator = new RuleBasedCollator(rules);
-            collator.setStrength(Collator.TERTIARY);
+            collator.setStrength(TERTIARY);
              collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
          } catch (Exception e) {
              errln("Error opening collator ");
@@ -980,8 +1039,7 @@ public class SearchTest extends TestFmwk {
              errln("Error opening string search ");
          }
  
-        int count = 0;
-        while (CONTRACTION[count].text != null) {
+        for (int count = 0; count< CONTRACTION.length; count++) {
              text = CONTRACTION[count].text;
              pattern = CONTRACTION[count].pattern;
              strsrch.setTarget(new StringCharacterIterator(text));
@@ -989,7 +1047,6 @@ public class SearchTest extends TestFmwk {
              if (!assertEqualWithStringSearch(strsrch, CONTRACTION[count])) {
                  errln("Error at test number " + count);
              }
-            count++;
          }
      }
  
@@ -998,7 +1055,7 @@ public class SearchTest extends TestFmwk {
          RuleBasedCollator collator = null;
          try {
              collator = new RuleBasedCollator(rules);
-            collator.setStrength(Collator.TERTIARY);
+            collator.setStrength(TERTIARY);
              collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
          } catch (Exception e) {
              errln("Error opening collator ");
@@ -1013,8 +1070,7 @@ public class SearchTest extends TestFmwk {
              errln("Error opening string search");
          }
  
-        int count = 0;
-        while (CONTRACTIONCANONICAL[count].text != null) {
+        for (int count = 0; count < CONTRACTIONCANONICAL.length; count++) {
              text = CONTRACTIONCANONICAL[count].text;
              pattern = CONTRACTIONCANONICAL[count].pattern;
              strsrch.setTarget(new StringCharacterIterator(text));
@@ -1022,7 +1078,6 @@ public class SearchTest extends TestFmwk {
              if (!assertEqualWithStringSearch(strsrch, CONTRACTIONCANONICAL[count])) {
                  errln("Error at test number " + count);
              }
-            count++;
          }
      }
  
@@ -1103,6 +1158,13 @@ public class SearchTest extends TestFmwk {
              errln("Error setting canonical match false");
          }
  
+        if (strsrch.getElementComparisonType() != STANDARD_ELEMENT_COMPARISON) {
+            errln("Error default element comparison type should be STANDARD_ELEMENT_COMPARISON");
+        }
+        strsrch.setElementComparisonType(ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD);
+        if (strsrch.getElementComparisonType() != ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD) {
+            errln("Error setting element comparison type PATTERN_BASE_WEIGHT_IS_WILDCARD");
+        }
      }
  
      public void TestGetSetOffset() {
@@ -1132,9 +1194,8 @@ public class SearchTest extends TestFmwk {
              logln("PASS: strsrch.setIndex(128) failed as expected");
          }
  
-        int index   = 0;
-        while (BASIC[index].text != null) {
-            SearchData  search      = BASIC[index ++];
+        for (int index = 0; index < BASIC.length; index++) {
+            SearchData  search      = BASIC[index];
  
              text =search.text;
              pattern = search.pattern;
@@ -1176,7 +1237,7 @@ public class SearchTest extends TestFmwk {
                  return;
              }
          }
-        strsrch.getCollator().setStrength(Collator.TERTIARY);
+        strsrch.getCollator().setStrength(TERTIARY);
      }
  
      public void TestGetSetOffsetCanonical() {
@@ -1191,6 +1252,8 @@ public class SearchTest extends TestFmwk {
              return;
          }
          strsrch.setCanonical(true);
+        //TODO: setCanonical is not sufficient for canonical match. See #10725
+        strsrch.getCollator().setDecomposition(Collator.CANONICAL_DECOMPOSITION);
          /* testing out of bounds error */
          try {
              strsrch.setIndex(-1);
@@ -1205,14 +1268,8 @@ public class SearchTest extends TestFmwk {
              logln("PASS: strsrch.setIndex(128) failed as expected");
          }
  
-        int   index   = 0;
-        while (BASICCANONICAL[index].text != null) {
-            SearchData  search      = BASICCANONICAL[index ++];
-            if (BASICCANONICAL[index].text == null) {
-                // skip the last one
-                break;
-            }
-
+        for (int index = 0; index < BASICCANONICAL.length; index++) {
+            SearchData  search      = BASICCANONICAL[index];
              text = search.text;
              pattern = search.pattern;
              strsrch.setTarget(new StringCharacterIterator(text));
@@ -1249,7 +1306,8 @@ public class SearchTest extends TestFmwk {
                  return;
              }
          }
-        strsrch.getCollator().setStrength(Collator.TERTIARY);
+        strsrch.getCollator().setStrength(TERTIARY);
+        strsrch.getCollator().setDecomposition(Collator.NO_DECOMPOSITION);
      }
  
      public void TestIgnorable() {
@@ -1274,7 +1332,7 @@ public class SearchTest extends TestFmwk {
              return;
          }
  
-        while (IGNORABLE[count].text != null) {
+        for (; count < IGNORABLE.length; count++) {
              text = IGNORABLE[count].text;
              pattern = IGNORABLE[count].pattern;
              strsrch.setTarget(new StringCharacterIterator(text));
@@ -1282,7 +1340,6 @@ public class SearchTest extends TestFmwk {
              if (!assertEqualWithStringSearch(strsrch, IGNORABLE[count])) {
                  errln("Error at test number " + count);
              }
-            count++;
          }
      }
  
@@ -1318,39 +1375,33 @@ public class SearchTest extends TestFmwk {
  
      public void TestNormCanonical() {
          m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
-        int count = 0;
-        while (NORMCANONICAL[count].text != null) {
+        for (int count = 0; count < NORMCANONICAL.length; count++) {
              if (!assertCanonicalEqual(NORMCANONICAL[count])) {
                  errln("Error at test number " + count);
              }
-            count++;
          }
          m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
      }
  
      public void TestNormExact() {
-        int count = 0;
+        int count;
+
          m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
-        while (BASIC[count].text != null) {
+        for (count = 0; count < BASIC.length; count++) {
              if (!assertEqual(BASIC[count])) {
                  errln("Error at test number " + count);
              }
-            count++;
          }
-        count = 0;
-        while (NORMEXACT[count].text != null) {
+        for (count = 0; count < NORMEXACT.length; count++) {
              if (!assertEqual(NORMEXACT[count])) {
                  errln("Error at test number " + count);
              }
-            count++;
          }
          m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
-        count = 0;
-        while (NONNORMEXACT[count].text != null) {
+        for (count = 0; count < NONNORMEXACT.length; count++) {
              if (!assertEqual(NONNORMEXACT[count])) {
                  errln("Error at test number " + count);
              }
-            count++;
          }
      }
  
@@ -1466,23 +1517,21 @@ public class SearchTest extends TestFmwk {
      }
  
      public void TestOverlap() {
-        int count = 0;
-        while (OVERLAP[count].text != null) {
+        int count;
+
+        for (count = 0; count < OVERLAP.length; count++) {
              if (!assertEqualWithAttribute(OVERLAP[count], false, true)) {
                  errln("Error at overlap test number " + count);
              }
-            count++;
          }
-        count = 0;
-        while (NONOVERLAP[count].text != null) {
+
+        for (count = 0; count < NONOVERLAP.length; count++) {
              if (!assertEqual(NONOVERLAP[count])) {
                  errln("Error at non overlap test number " + count);
              }
-            count++;
          }
  
-        count = 0;
-        while (count < 1) {
+        for (count = 0; count < OVERLAP.length && count < NONOVERLAP.length; count++) {
              SearchData search = (OVERLAP[count]);
              String text = search.text;
              String pattern = search.pattern;
@@ -1513,40 +1562,26 @@ public class SearchTest extends TestFmwk {
              if (!assertEqualWithStringSearch(strsrch, search)) {
                  errln("Error at test number " + count);
               }
-            count ++;
          }
      }
  
      public void TestOverlapCanonical() {
-        int count = 0;
-        while (OVERLAPCANONICAL[count].text != null) {
-            if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], true,
-                                          true)) {
+        int count;
+
+        for (count = 0; count < OVERLAPCANONICAL.length; count++) {
+            if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], true, true)) {
                  errln("Error at overlap test number %d" + count);
              }
-            count ++;
          }
-        count = 0;
-        while (NONOVERLAP[count].text != null) {
+
+        for (count = 0; count < NONOVERLAP.length; count++) {
              if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) {
                  errln("Error at non overlap test number %d" + count);
              }
-            count ++;
          }
  
-        count = 0;
-        while (count < 1) {
-                 /* UChar       temp[128];
-            const SearchData *search = &(OVERLAPCANONICAL[count]);
-                  UErrorCode  status = U_ZERO_ERROR;*/
+        for (count = 0; count < OVERLAPCANONICAL.length && count < NONOVERLAPCANONICAL.length; count++) {
              SearchData search = OVERLAPCANONICAL[count];
-
-            /*u_unescape(search.text, temp, 128);
-            UnicodeString text;
-            text.setTo(temp, u_strlen(temp));
-            u_unescape(search.pattern, temp, 128);
-            UnicodeString pattern;
-            pattern.setTo(temp, u_strlen(temp));*/
              RuleBasedCollator collator = getCollator(search.collator);
              StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), collator, null);
              strsrch.setCanonical(true);
@@ -1568,9 +1603,6 @@ public class SearchTest extends TestFmwk {
                  strsrch = null;
                  errln("Error at test number %d" + count);
               }
-
-            count ++;
-            strsrch = null;
          }
      }
  
@@ -1578,20 +1610,11 @@ public class SearchTest extends TestFmwk {
          m_en_us_.setStrength(PATTERN[0].strength);
          StringSearch strsrch = new StringSearch(PATTERN[0].pattern, new StringCharacterIterator(PATTERN[0].text), m_en_us_, null);
  
-        /*if (U_FAILURE(status)) {
-            errln("Error opening string search %s", u_errorName(status));
-            m_en_us_.setStrength(getECollationStrength(UCOL_TERTIARY));
-            if (strsrch != NULL) {
-                delete strsrch;
-            }
-            return;
-        }*/
-
          if (strsrch.getPattern() != PATTERN[0].pattern) {
              errln("Error setting pattern");
          }
          if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
-            m_en_us_.setStrength(Collator.TERTIARY);
+            m_en_us_.setStrength(TERTIARY);
              if (strsrch != null) {
                  strsrch = null;
              }
@@ -1601,7 +1624,7 @@ public class SearchTest extends TestFmwk {
          strsrch.setPattern(PATTERN[1].pattern);
          if (PATTERN[1].pattern != strsrch.getPattern()) {
              errln("Error setting pattern");
-            m_en_us_.setStrength(Collator.TERTIARY);
+            m_en_us_.setStrength(TERTIARY);
              if (strsrch != null) {
                  strsrch = null;
              }
@@ -1610,7 +1633,7 @@ public class SearchTest extends TestFmwk {
          strsrch.reset();
  
          if (!assertEqualWithStringSearch(strsrch, PATTERN[1])) {
-            m_en_us_.setStrength(Collator.TERTIARY);
+            m_en_us_.setStrength(TERTIARY);
              if (strsrch != null) {
                  strsrch = null;
              }
@@ -1620,7 +1643,7 @@ public class SearchTest extends TestFmwk {
          strsrch.setPattern(PATTERN[0].pattern);
          if (PATTERN[0].pattern != strsrch.getPattern()) {
              errln("Error setting pattern");
-            m_en_us_.setStrength(Collator.TERTIARY);
+            m_en_us_.setStrength(TERTIARY);
              if (strsrch != null) {
                  strsrch = null;
              }
@@ -1629,7 +1652,7 @@ public class SearchTest extends TestFmwk {
              strsrch.reset();
  
          if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
-            m_en_us_.setStrength(Collator.TERTIARY);
+            m_en_us_.setStrength(TERTIARY);
              if (strsrch != null) {
                  strsrch = null;
              }
@@ -1646,7 +1669,7 @@ public class SearchTest extends TestFmwk {
              errln("Error setting pattern with size 512");
          }
  
-        m_en_us_.setStrength(Collator.TERTIARY);
+        m_en_us_.setStrength(TERTIARY);
          if (strsrch != null) {
              strsrch = null;
          }
@@ -1663,7 +1686,7 @@ public class SearchTest extends TestFmwk {
              errln("Error setting pattern");
          }
          if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
-            m_en_us_.setStrength(Collator.TERTIARY);
+            m_en_us_.setStrength(TERTIARY);
              strsrch = null;
              return;
          }
@@ -1671,7 +1694,7 @@ public class SearchTest extends TestFmwk {
          strsrch.setPattern(PATTERNCANONICAL[1].pattern);
          if (PATTERNCANONICAL[1].pattern != strsrch.getPattern()) {
              errln("Error setting pattern");
-            m_en_us_.setStrength(Collator.TERTIARY);
+            m_en_us_.setStrength(TERTIARY);
              strsrch = null;
              return;
          }
@@ -1679,7 +1702,7 @@ public class SearchTest extends TestFmwk {
          strsrch.setCanonical(true);
  
          if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[1])) {
-            m_en_us_.setStrength(Collator.TERTIARY);
+            m_en_us_.setStrength(TERTIARY);
              strsrch = null;
              return;
          }
@@ -1687,7 +1710,7 @@ public class SearchTest extends TestFmwk {
          strsrch.setPattern(PATTERNCANONICAL[0].pattern);
          if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
              errln("Error setting pattern");
-            m_en_us_.setStrength(Collator.TERTIARY);
+            m_en_us_.setStrength(TERTIARY);
              strsrch = null;
              return;
          }
@@ -1695,7 +1718,7 @@ public class SearchTest extends TestFmwk {
          strsrch.reset();
          strsrch.setCanonical(true);
          if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
-            m_en_us_.setStrength(Collator.TERTIARY);
+            m_en_us_.setStrength(TERTIARY);
              strsrch = null;
              return;
          }
@@ -1723,8 +1746,7 @@ public class SearchTest extends TestFmwk {
      }
  
      public void TestSetMatch() {
-        int count = 0;
-        while (MATCH[count].text != null) {
+        for (int count = 0; count < MATCH.length; count++) {
              SearchData     search = MATCH[count];
              StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text),
                                                      m_en_us_, null);
@@ -1762,64 +1784,45 @@ public class SearchTest extends TestFmwk {
              if (strsrch.preceding(0) != SearchIterator.DONE) {
                  errln("Error expecting out of bounds match");
              }
-            count ++;
-            strsrch = null;
          }
      }
  
      public void TestStrength() {
-        int count = 0;
-        while (STRENGTH[count].text != null) {
-            if (count == 3) count ++;
+        for (int count = 0; count < STRENGTH.length; count++) {
              if (!assertEqual(STRENGTH[count])) {
                  errln("Error at test number " + count);
              }
-            count ++;
          }
      }
  
      public void TestStrengthCanonical() {
-        int count = 0;
-        while (STRENGTHCANONICAL[count].text != null) {
-            if (count == 3) count ++;
+        for (int count = 0; count < STRENGTHCANONICAL.length; count++) {
              if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) {
                  errln("Error at test number" + count);
              }
-            count ++;
          }
      }
  
      public void TestSupplementary() {
-        if (logKnownIssue("8080", null)) {
-            return;
-        }
-        int count = 0;
-        while (SUPPLEMENTARY[count].text != null) {
+        for (int count = 0; count < SUPPLEMENTARY.length; count++) {
              if (!assertEqual(SUPPLEMENTARY[count])) {
                  errln("Error at test number " + count);
              }
-            count ++;
          }
      }
  
      public void TestSupplementaryCanonical() {
-        if (logKnownIssue("8080", null)) {
-            return;
-        }
-        int count = 0;
-        while (SUPPLEMENTARYCANONICAL[count].text != null) {
+        for (int count = 0; count < SUPPLEMENTARYCANONICAL.length; count++) {
              if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) {
                  errln("Error at test number" + count);
              }
-            count ++;
          }
      }
  
      public void TestText() {
          SearchData TEXT[] = {
-            new SearchData("the foxy brown fox", "fox", null, Collator.TERTIARY, null, new int[] {4, 15, -1}, new int[] {3, 3}),
-            new SearchData("the quick brown fox", "fox", null, Collator.TERTIARY, null, new int[] {16, -1}, new int[] {3}),
-            new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[]{0})
+            SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)),
+            SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3))
          };
          StringCharacterIterator t = new StringCharacterIterator(TEXT[0].text);
          StringSearch strsrch = new StringSearch(TEXT[0].pattern, t, m_en_us_, null);
@@ -1887,27 +1890,23 @@ public class SearchTest extends TestFmwk {
      }
  
      public void TestVariable() {
-        int count = 0;
          m_en_us_.setAlternateHandlingShifted(true);
-        while (VARIABLE[count].text != null) {
+        for (int count = 0; count < VARIABLE.length; count++) {
              // logln("variable" + count);
              if (!assertEqual(VARIABLE[count])) {
                  errln("Error at test number " + count);
              }
-            count ++;
          }
          m_en_us_.setAlternateHandlingShifted(false);
      }
  
      public void TestVariableCanonical() {
-        int count = 0;
          m_en_us_.setAlternateHandlingShifted(true);
-        while (VARIABLE[count].text != null) {
+        for (int count = 0; count < VARIABLE.length; count++) {
              // logln("variable " + count);
              if (!assertCanonicalEqual(VARIABLE[count])) {
                  errln("Error at test number " + count);
              }
-            count ++;
          }
          m_en_us_.setAlternateHandlingShifted(false);
      }
@@ -1998,7 +1997,6 @@ public class SearchTest extends TestFmwk {
          String pattern = "pattern";
          String text = "text";
          StringSearch strsrch = null;
-        int count = 0;
          try {
              strsrch = new StringSearch(pattern, text);
          } catch (Exception e) {
@@ -2006,7 +2004,7 @@ public class SearchTest extends TestFmwk {
              return;
          }
  
-        while (DIACTRICMATCH[count].text != null) {
+        for (int count = 0; count < DIACTRICMATCH.length; count++) {
              strsrch.setCollator(getCollator(DIACTRICMATCH[count].collator));
              strsrch.getCollator().setStrength(DIACTRICMATCH[count].strength);
              strsrch.setBreakIterator(getBreakIterator(DIACTRICMATCH[count].breaker));
@@ -2018,7 +2016,6 @@ public class SearchTest extends TestFmwk {
              if (!assertEqualWithStringSearch(strsrch, DIACTRICMATCH[count])) {
                  errln("Error at test number " + count);
              }
-            count++;
          }
      }
author	Markus Scherer <markus.icu@gmail.com>
	Tue, 4 Mar 2014 00:11:11 +0000 (00:11 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Tue, 4 Mar 2014 00:11:11 +0000 (00:11 +0000)
icu4j/main/classes/collate/src/com/ibm/icu/impl/ImplicitCEGenerator.java	[deleted file]	patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/BOCSU.java	[moved from icu4j/main/classes/core/src/com/ibm/icu/impl/BOCU.java with 72% similarity]	patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/Collation.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationCompare.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationData.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataBuilder.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFCD.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatin.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatinBuilder.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationIterator.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationKeys.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRootElements.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRuleParser.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationSettings.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationWeights.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/ContractionsAndExpansions.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/FCDIterCollationIterator.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/FCDUTF16CollationIterator.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/IterCollationIterator.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/SharedObject.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/TailoredSet.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UTF16CollationIterator.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UVector32.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/UVector64.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/CollationKey.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java	[deleted file]	patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java	[deleted file]	patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java	[deleted file]	patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/SearchIterator.java	[moved from icu4j/main/classes/core/src/com/ibm/icu/text/SearchIterator.java with 67% similarity]	patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/StringSearch.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java		patch \| blob \| history
icu4j/main/shared/data/icudata.jar		patch \| blob \| history
icu4j/main/tests/collate/build.xml		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_NON_IGNORABLE_SHORT.txt		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_SHIFTED_SHORT.txt		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/data/collationtest.txt	[new file with mode: 0644]	patch \| blob
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationAPITest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCreationMethodTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCurrencyTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationDummyTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationEnglishTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFinnishTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrenchTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrozenMonkeyTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationGermanTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationIteratorTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationKanaTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMonkeyTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationSpanishTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationThaiTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTurkishTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/G7CollationTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/LotusCollationKoreanTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/UCAConformanceTest.java		patch \| blob \| history
icu4j/main/tests/collate/src/com/ibm/icu/dev/test/search/SearchTest.java		patch \| blob \| history