From 8d59026fd4617d38e5ed4f98d1274f45eb67234d Mon Sep 17 00:00:00 2001
From: Shane Carr <shane@unicode.org>
Date: Tue, 13 Sep 2016 22:16:02 +0000
Subject: [PATCH] ICU-12549 Updating SpoofChecker to latest Unicode
 specification (Java version).

X-SVN-Rev: 39219
---
 .../src/com/ibm/icu/text/SpoofChecker.java    | 2309 +++++++----------
 icu4j/main/shared/data/icudata.jar            |    4 +-
 icu4j/main/shared/data/icutzdata.jar          |    2 +-
 icu4j/main/shared/data/testdata.jar           |    2 +-
 .../icu/dev/test/text/SpoofCheckerTest.java   |  378 ++-
 5 files changed, 1080 insertions(+), 1615 deletions(-)
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
index 8b97a1df733..a7ad518105a 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
@@ -11,7 +11,6 @@
 
 package com.ibm.icu.text;
 
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.LineNumberReader;
 import java.io.Reader;
@@ -19,12 +18,14 @@ import java.nio.ByteBuffer;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.BitSet;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.LinkedHashSet;
 import java.util.Locale;
+import java.util.MissingResourceException;
 import java.util.Set;
 import java.util.Vector;
 import java.util.regex.Matcher;
@@ -32,8 +33,6 @@ import java.util.regex.Pattern;
 
 import com.ibm.icu.impl.ICUBinary;
 import com.ibm.icu.impl.ICUBinary.Authenticate;
-import com.ibm.icu.impl.Trie2;
-import com.ibm.icu.impl.Trie2Writable;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UCharacterCategory;
 import com.ibm.icu.lang.UProperty;
@@ -41,147 +40,225 @@ import com.ibm.icu.lang.UScript;
 import com.ibm.icu.util.ULocale;
 
 /**
+ * <p>
+ * This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and
+ * <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions:
  *
- * <b>Unicode Security and Spoofing Detection.</b>
+ * <ol>
+ * <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "desordenado" and
+ * "ÔÐµÑÐ¾Ð³ÔÐµÐ¿Ð°ÔÐ¾".</li>
+ * <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof
+ * detection</em>), such as "pÐ°ypÐ°l" spelled with Cyrillic 'Ð°' characters.</li>
+ * </ol>
  *
- * <p>This class is intended to check strings, typically
- * identifiers of some type, such as URLs, for the presence of
- * characters that are likely to be visually confusing -
- * for cases where the displayed form of an identifier may
- * not be what it appears to be.
+ * <p>
+ * Although originally designed as a method for flagging suspicious identifier strings such as URLs,
+ * <code>SpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word
+ * content filters.
  *
- * <p>Unicode Technical Report #36,
- * <a href="http://unicode.org/reports/tr36">http://unicode.org/reports/tr36</a> and
- * Unicode Technical Standard #39,
- * <a href="http://unicode.org/reports/tr39">http://unicode.org/reports/tr39</a>
- * "Unicode security considerations", give more background on
- * security and spoofing issues with Unicode identifiers.
- * The tests and checks provided by this module implement the recommendations
- * from these Unicode documents.
+ * <h2>Confusables</h2>
  *
- * <p>The tests available on identifiers fall into two general categories:
- *   <ul>
- *     <li>  Single identifier tests.  Check whether an identifier is
- *       potentially confusable with any other string, or is suspicious
- *       for other reasons. </li>
- *     <li> Two identifier tests.  Check whether two specific identifiers are confusable.
- *       This does not consider whether either of strings is potentially
- *       confusable with any string other than the exact one specified. </li>
- *   </ul>
+ * <p>
+ * The following example shows how to use <code>SpoofChecker</code> to check for confusability between two strings:
  *
- * <p>The steps to perform confusability testing are
- *   <ul>
- *     <li>  Create a <code>SpoofChecker.Builder</code> </li>
- *     <li>  Configure the Builder for the desired set of tests.  The tests that will
- *           be performed are specified by a set of SpoofCheck flags. </li>
- *     <li>  Build a <code>SpoofChecker</code> from the Builder. </li>
- *     <li>  Perform the checks using the pre-configured <code>SpoofChecker</code>.  The results indicate
- *           which (if any) of the selected tests have identified possible problems with the identifier.
- *           Results are reported as a set of SpoofCheck flags;  this mirrors the form in which
- *           the set of tests to perform was originally specified to the SpoofChecker. </li>
- *    </ul>
+ * <pre>
+ * <code>
+ * SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CONFUSABLE).build();
+ * int result = sc.areConfusable("desordenado", "ÔÐµÑÐ¾Ð³ÔÐµÐ¿Ð°ÔÐ¾");
+ * System.out.println(result != 0);  // true
+ * </code>
+ * </pre>
  *
- * <p>A <code>SpoofChecker</code> instance may be used repeatedly to perform checks on any number
- *    of identifiers.
+ * <p>
+ * <code>SpoofChecker</code> uses a builder paradigm: options are specified within the context of a lightweight
+ * {@link SpoofChecker.Builder} object, and upon calling {@link SpoofChecker.Builder#build}, expensive data loading
+ * operations are performed, and an immutable <code>SpoofChecker</code> is returned.
  *
- * <p>Thread Safety: The methods on SpoofChecker objects are thread safe.
- * The test functions for checking a single identifier, or for testing
- * whether two identifiers are potentially confusable,  may called concurrently
- * from multiple threads using the same SpoofChecker instance.
+ * <p>
+ * The first line of the example creates a <code>SpoofChecker</code> object with confusable-checking enabled; the second
+ * line performs the confusability test. For best performance, the instance should be created once (e.g., upon
+ * application startup), and the more efficient {@link SpoofChecker#areConfusable} method can be used at runtime.
  *
+ * <p>
+ * UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can
+ * be thought of as a "hash code". {@link SpoofChecker#getSkeleton} computes the skeleton for a particular string, so
+ * the following snippet is equivalent to the example above:
  *
- * <p>Descriptions of the available checks.
+ * <pre>
+ * <code>
+ * SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CONFUSABLE).build();
+ * boolean result = sc.getSkeleton("desordenado").equals(sc.getSkeleton("ÔÐµÑÐ¾Ð³ÔÐµÐ¿Ð°ÔÐ¾"));
+ * System.out.println(result);  // true
+ * </code>
+ * </pre>
  *
- * <p>When testing whether pairs of identifiers are confusable, with <code>areConfusable()</code>
- * the relevant tests are
+ * <p>
+ * If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling
+ * {@link SpoofChecker#areConfusable} many times in a loop, {@link SpoofChecker#getSkeleton} can be used instead, as
+ * shown below:
  *
- *  <ul>
- *   <li> <code>SINGLE_SCRIPT_CONFUSABLE</code>:  All of the characters from the two identifiers are
- *      from a single script, and the two identifiers are visually confusable.</li>
- *   <li> <code>MIXED_SCRIPT_CONFUSABLE</code>:  At least one of the identifiers contains characters
- *      from more than one script, and the two identifiers are visually confusable.</li>
- *   <li> <code>WHOLE_SCRIPT_CONFUSABLE</code>: Each of the two identifiers is of a single script, but
- *      the the two identifiers are from different scripts, and they are visually confusable.</li>
- *  </ul>
+ * <pre>
+ * <code>
+ * // Setup:
+ * String[] DICTIONARY = new String[]{ "lorem", "ipsum" }; // example
+ * SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CONFUSABLE).build();
+ * HashSet&lt;String&gt; skeletons = new HashSet&lt;String&gt;();
+ * for (String word : DICTIONARY) {
+ *   skeletons.add(sc.getSkeleton(word));
+ * }
  *
- * <p>The safest approach is to enable all three of these checks as a group.
+ * // Live Check:
+ * boolean result = skeletons.contains(sc.getSkeleton("1orern"));
+ * System.out.println(result);  // true
+ * </code>
+ * </pre>
  *
- * <p><code>ANY_CASE</code> is a modifier for the above tests.  If the identifiers being checked can
- * be of mixed case and are used in a case-sensitive manner, this option should be specified.
+ * <p>
+ * <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em>
+ * guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons
+ * at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons.
  *
- * <p>If the identifiers being checked are used in a case-insensitive manner, and if they are
- * displayed to users in lower-case form only, the <code>ANY_CASE</code> option should not be
- * specified.  Confusabality issues involving upper case letters will not be reported.
+ * <h2>Spoof Detection</h2>
  *
- * <p>When performing tests on a single identifier, with the check() family of functions,
- * the relevant tests are:
+ * <p>
+ * The following snippet shows a minimal example of using <code>SpoofChecker</code> to perform spoof detection on a
+ * string:
  *
- *  <ul>
- *    <li><code>MIXED_SCRIPT_CONFUSABLE</code>: the identifier contains characters from multiple
- *       scripts, and there exists an identifier of a single script that is visually confusable.</li>
- *    <li><code>WHOLE_SCRIPT_CONFUSABLE</code>: the identifier consists of characters from a single
- *       script, and there exists a visually confusable identifier.
- *       The visually confusable identifier also consists of characters from a single script.
- *       but not the same script as the identifier being checked.</li>
- *    <li><code>ANY_CASE</code>: modifies the mixed script and whole script confusables tests.  If
- *       specified, the checks will find confusable characters of any case.
- *       If this flag is not set, the test is performed assuming case folded identifiers.</li>
- *    <li><code>SINGLE_SCRIPT</code>: check that the identifier contains only characters from a
- *       single script.  (Characters from the <em>common</em> and <em>inherited</em> scripts are ignored.)
- *       This is not a test for confusable identifiers</li>
- *    <li><code>INVISIBLE</code>: check an identifier for the presence of invisible characters,
- *       such as zero-width spaces, or character sequences that are
- *       likely not to display, such as multiple occurrences of the same
- *       non-spacing mark.  This check does not test the input string as a whole
- *       for conformance to any particular syntax for identifiers.</li>
- *    <li><code>CHAR_LIMIT</code>: check that an identifier contains only characters from a specified set
- *       of acceptable characters.  See <code>Builder.setAllowedChars()</code> and
- *       <code>Builder.setAllowedLocales()</code>.</li>
- *  </ul>
+ * <code>
+ * <pre>
+ * SpoofChecker sc = new SpoofChecker.Builder()
+ *     .setAllowedChars(SpoofChecker.RECOMMENDED.cloneAsThawed().addAll(SpoofChecker.INCLUSION))
+ *     .setRestrictionLevel(SpoofChecker.RestrictionLevel.MODERATELY_RESTRICTIVE)
+ *     .setChecks(SpoofChecker.ALL_CHECKS &~ SpoofChecker.CONFUSABLE)
+ *     .build();
+ * boolean result = sc.failsChecks("pÐ°ypÐ°l");  // with Cyrillic 'Ð°' characters
+ * System.out.println(result);  // true
+ * </pre>
+ * </code>
  *
- *  <p>Note on Scripts:
- *     <blockquote>Characters from the Unicode Scripts "Common" and "Inherited" are ignored when considering
- *     the script of an identifier. Common characters include digits and symbols that
- *     are normally used with text from many different scripts. </blockquote>
+ * <p>
+ * As in the case for confusability checking, it is good practice to create one <code>SpoofChecker</code> instance at
+ * startup, and call the cheaper {@link SpoofChecker#failsChecks} online. In the second line, we specify the set of
+ * allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39. In the
+ * third line, the CONFUSABLE checks are disabled. It is good practice to disable them if you won't be using the
+ * instance to perform confusability checking.
+ *
+ * <p>
+ * To get more details on why a string failed the checks, use a {@link SpoofChecker.CheckResult}:
+ *
+ * <pre>
+ * <code>
+ * SpoofChecker sc = new SpoofChecker.Builder()
+ *     .setAllowedChars(SpoofChecker.RECOMMENDED.cloneAsThawed().addAll(SpoofChecker.INCLUSION))
+ *     .setRestrictionLevel(SpoofChecker.RestrictionLevel.MODERATELY_RESTRICTIVE)
+ *     .setChecks(SpoofChecker.ALL_CHECKS &~ SpoofChecker.CONFUSABLE)
+ *     .build();
+ * SpoofChecker.CheckResult checkResult = new SpoofChecker.CheckResult();
+ * boolean result = sc.failsChecks("pÐ°ypÐ°l", checkResult);
+ * System.out.println(checkResult.checks);  // 16
+ * </code>
+ * </pre>
+ *
+ * <p>
+ * The return value is a bitmask of the checks that failed. In this case, there was one check that failed:
+ * {@link SpoofChecker#RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are:
+ *
+ * <ul>
+ * <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the
+ * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS
+ * 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li>
+ * <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character
+ * sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li>
+ * <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable
+ * characters. See {@link SpoofChecker.Builder#setAllowedChars} and {@link SpoofChecker.Builder#setAllowedLocales}.</li>
+ * <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li>
+ * </ul>
+ *
+ * <p>
+ * These checks can be enabled independently of each other. For example, if you were interested in checking for only the
+ * INVISIBLE and MIXED_NUMBERS conditions, you could do:
+ *
+ * <pre>
+ * <code>
+ * SpoofChecker sc = new SpoofChecker.Builder()
+ *     .setChecks(SpoofChecker.INVISIBLE | SpoofChecker.MIXED_NUMBERS)
+ *     .build();
+ * boolean result = sc.failsChecks("à§ª8");
+ * System.out.println(result);  // true
+ * </code>
+ * </pre>
+ *
+ * <p>
+ * <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in
+ * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings
+ * are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have
+ * Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is
+ * recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed
+ * with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on
+ * the levels, see UTS 39 or {@link SpoofChecker.RestrictionLevel}. The Restriction Level test is aware of the set of
+ * allowed characters set in {@link SpoofChecker.Builder#setAllowedChars}. Note that characters which have script code
+ * COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
+ * scripts.
+ *
+ * <h2>Additional Information</h2>
+ *
+ * <p>
+ * A <code>SpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.
+ *
+ * <p>
+ * <b>Thread Safety:</b> The methods on <code>SpoofChecker</code> objects are thread safe. The test functions for
+ * checking a single identifier, or for testing whether two identifiers are potentially confusable, may called
+ * concurrently from multiple threads using the same <code>SpoofChecker</code> instance.
  *
  * @stable ICU 4.6
  */
 public class SpoofChecker {
 
     /**
-     * Constants from UAX 31 for use in setRestrictionLevel.
+     * Constants from UTS 39 for use in setRestrictionLevel.
+     *
      * @stable ICU 53
      */
     public enum RestrictionLevel {
         /**
-         * Only ASCII characters: U+0000..U+007F
+         * All characters in the string are in the identifier profile and all characters in the string are in the ASCII
+         * range.
          *
          * @stable ICU 53
          */
         ASCII,
         /**
-         * All characters in each identifier must be from a single script.
+         * The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and the
+         * string is single-script, according to the definition in UTS 39 section 5.1.
          *
          * @stable ICU 53
          */
         SINGLE_SCRIPT_RESTRICTIVE,
-         /**
-         * All characters in each identifier must be from a single script, or from the combinations: Latin + Han +
-         * Hiragana + Katakana; Latin + Han + Bopomofo; or Latin + Han + Hangul. Note that this level will satisfy the
-         * vast majority of Latin-script users; also that TR36 has ASCII instead of Latin.
+        /**
+         * The string classifies as Single Script, or all characters in the string are in the identifier profile and the
+         * string is covered by any of the following sets of scripts, according to the definition in UTS 39 section 5.1:
+         * <ul>
+         * <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li>
+         * <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li>
+         * <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li>
+         * </ul>
          *
          * @stable ICU 53
          */
         HIGHLY_RESTRICTIVE,
         /**
-         * Allow Latin with other scripts except Cyrillic, Greek, Cherokee Otherwise, the same as Highly Restrictive
+         * The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile
+         * and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic,
+         * Greek, and Cherokee.
+         *
+         * This is the default restriction level as of ICU 58.
          *
          * @stable ICU 53
          */
         MODERATELY_RESTRICTIVE,
         /**
-         * Allow arbitrary mixtures of scripts, such as Î©mega, TeÏ, HÎ»LF-LIFE, Toys-Ð¯-Us. Otherwise, the same as
-         * Moderately Restrictive
+         * All characters in the string are in the identifier profile. Allow arbitrary mixtures of scripts, such as
+         * Î©mega, TeÏ, HÎ»LF-LIFE, Toys-Ð¯-Us.
          *
          * @stable ICU 53
          */
@@ -191,91 +268,89 @@ public class SpoofChecker {
          *
          * @stable ICU 53
          */
-        UNRESTRICTIVE
+        UNRESTRICTIVE,
     }
 
-
     /**
-     * Security Profile constant from UAX 31 for use in setAllowedChars.
-     * Will probably be replaced by UnicodeSet property.
-     * @internal
-     * @deprecated This API is ICU internal only.
+     * Security Profile constant from UTS 39 for use in {@link SpoofChecker.Builder#setAllowedChars}.
+     *
+     * @draft ICU 58
+     * @provisional This API might change or be removed in a future release.
      */
-    @Deprecated
-    public static final UnicodeSet INCLUSION = new UnicodeSet("[" +
-            "\\u0027\\u002D-\\u002E\\u003A\\u00B7\\u0375\\u058A\\u05F3-\\u05F4"+
-            "\\u06FD-\\u06FE\\u0F0B\\u200C-\\u200D\\u2010\\u2019\\u2027\\u30A0\\u30FB]").freeze();
-        // Note: data from http://unicode.org/Public/security/latest/xidmodifications.txt version 6.3.0
+    public static final UnicodeSet INCLUSION = new UnicodeSet(
+            "['\\-.\\:\\u00B7\\u0375\\u058A\\u05F3\\u05F4\\u06FD\\u06FE\\u0F0B\\u200C\\u200D\\u2010\\u"
+                    + "2019\\u2027\\u30A0\\u30FB]").freeze();
+    // Note: data from http://unicode.org/Public/security/9.0.0/IdentifierStatus.txt
+    // There is tooling to generate this constant in the unicodetools project:
+    //      org.unicode.text.tools.RecommendedSetGenerator
+    // It will print the Java and C++ code to the console for easy copy-paste into this file.
 
     /**
-     * Security Profile constant from UAX 31 for use in setAllowedChars.
-     * Will probably be replaced by UnicodeSet property.
-     * @internal
-     * @deprecated This API is ICU internal only.
+     * Security Profile constant from UTS 39 for use in {@link SpoofChecker.Builder#setAllowedChars}.
+     *
+     * @draft ICU 58
+     * @provisional This API might change or be removed in a future release.
      */
-    @Deprecated
     public static final UnicodeSet RECOMMENDED = new UnicodeSet(
-            "[\\u0030-\\u0039\\u0041-\\u005A\\u005F\\u0061-\\u007A\\u00C0-\\u00D6\\u00D8-\\u00F6" +
-            "\\u00F8-\\u0131\\u0134-\\u013E\\u0141-\\u0148\\u014A-\\u017E\\u018F\\u01A0-\\u01A1" +
-            "\\u01AF-\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4-\\u01F5\\u01F8-\\u021B" +
-            "\\u021E-\\u021F\\u0226-\\u0233\\u0259\\u02BB-\\u02BC\\u02EC\\u0300-\\u0304\\u0306-\\u030C" +
-            "\\u030F-\\u0311\\u0313-\\u0314\\u031B\\u0323-\\u0328\\u032D-\\u032E\\u0330-\\u0331" +
-            "\\u0335\\u0338-\\u0339\\u0342\\u0345\\u037B-\\u037D\\u0386\\u0388-\\u038A\\u038C" +
-            "\\u038E-\\u03A1\\u03A3-\\u03CE\\u03FC-\\u045F\\u048A-\\u0529\\u052E-\\u052F\\u0531-\\u0556" +
-            "\\u0559\\u0561-\\u0586\\u05B4\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0620-\\u063F\\u0641-\\u0655" +
-            "\\u0660-\\u0669\\u0670-\\u0672\\u0674\\u0679-\\u068D\\u068F-\\u06D3\\u06D5\\u06E5-\\u06E6" +
-            "\\u06EE-\\u06FC\\u06FF\\u0750-\\u07B1\\u08A0-\\u08AC\\u08B2\\u0901-\\u094D\\u094F-\\u0950" +
-            "\\u0956-\\u0957\\u0960-\\u0963\\u0966-\\u096F\\u0971-\\u0977\\u0979-\\u097F\\u0981-\\u0983" +
-            "\\u0985-\\u098C\\u098F-\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9" +
-            "\\u09BC-\\u09C4\\u09C7-\\u09C8\\u09CB-\\u09CE\\u09D7\\u09E0-\\u09E3\\u09E6-\\u09F1" +
-            "\\u0A01-\\u0A03\\u0A05-\\u0A0A\\u0A0F-\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32" +
-            "\\u0A35\\u0A38-\\u0A39\\u0A3C\\u0A3E-\\u0A42\\u0A47-\\u0A48\\u0A4B-\\u0A4D\\u0A5C" +
-            "\\u0A66-\\u0A74\\u0A81-\\u0A83\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0" +
-            "\\u0AB2-\\u0AB3\\u0AB5-\\u0AB9\\u0ABC-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD\\u0AD0" +
-            "\\u0AE0-\\u0AE3\\u0AE6-\\u0AEF\\u0B01-\\u0B03\\u0B05-\\u0B0C\\u0B0F-\\u0B10\\u0B13-\\u0B28" +
-            "\\u0B2A-\\u0B30\\u0B32-\\u0B33\\u0B35-\\u0B39\\u0B3C-\\u0B43\\u0B47-\\u0B48\\u0B4B-\\u0B4D" +
-            "\\u0B56-\\u0B57\\u0B5F-\\u0B61\\u0B66-\\u0B6F\\u0B71\\u0B82-\\u0B83\\u0B85-\\u0B8A" +
-            "\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99-\\u0B9A\\u0B9C\\u0B9E-\\u0B9F\\u0BA3-\\u0BA4" +
-            "\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0BD0" +
-            "\\u0BD7\\u0BE6-\\u0BEF\\u0C01-\\u0C03\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28" +
-            "\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55-\\u0C56" +
-            "\\u0C60-\\u0C61\\u0C66-\\u0C6F\\u0C82-\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8" +
-            "\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0CD5-\\u0CD6" +
-            "\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1-\\u0CF2\\u0D02-\\u0D03\\u0D05-\\u0D0C\\u0D0E-\\u0D10" +
-            "\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-\\u0D4E\\u0D57\\u0D60-\\u0D61" +
-            "\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D82-\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5" +
-            "\\u0DA7-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6" +
-            "\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\u0E59" +
-            "\\u0E81-\\u0E82\\u0E84\\u0E87-\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F" +
-            "\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA-\\u0EAB\\u0EAD-\\u0EB2\\u0EB4-\\u0EB9\\u0EBB-\\u0EBD" +
-            "\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE-\\u0EDF\\u0F00\\u0F20-\\u0F29" +
-            "\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56" +
-            "\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-\\u0F6C\\u0F71-\\u0F72\\u0F74\\u0F7A-\\u0F80" +
-            "\\u0F82-\\u0F84\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6" +
-            "\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D" +
-            "\\u10C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-\\u124D" +
-            "\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0" +
-            "\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310" +
-            "\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7" +
-            "\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1E00-\\u1E99" +
-            "\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D" +
-            "\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76\\u1F78" +
-            "\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FC2-\\u1FC4\\u1FC6-\\u1FC8" +
-            "\\u1FCA\\u1FCC\\u1FD0-\\u1FD2\\u1FD6-\\u1FDA\\u1FE0-\\u1FE2\\u1FE4-\\u1FEA\\u1FEC" +
-            "\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8\\u1FFA\\u1FFC\\u2D27\\u2D2D\\u2D80-\\u2D96\\u2DA0-\\u2DA6" +
-            "\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6" +
-            "\\u2DD8-\\u2DDE\\u3005-\\u3007\\u3041-\\u3096\\u3099-\\u309A\\u309D-\\u309E\\u30A1-\\u30FA" +
-            "\\u30FC-\\u30FE\\u3105-\\u312D\\u31A0-\\u31BA\\u3400-\\u4DB5\\u4E00-\\u9FD5\\uA660-\\uA661" +
-            "\\uA674-\\uA67B\\uA67F\\uA69F\\uA717-\\uA71F\\uA788\\uA78D-\\uA78E\\uA790-\\uA793" +
-            "\\uA7A0-\\uA7AA\\uA7FA\\uA9E7-\\uA9FE\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06" +
-            "\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAC00-\\uD7A3\\uFA0E-\\uFA0F" +
-            "\\uFA11\\uFA13-\\uFA14\\uFA1F\\uFA21\\uFA23-\\uFA24\\uFA27-\\uFA29\\U00020000-\\U0002A6D6" +
-            "\\U0002A700-\\U0002B734\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1]"
-            ).freeze();
-            // Note: data from http://unicode.org/Public/security/latest/xidmodifications.txt version 8.0.0
-            //       There is no tooling to generate this from the .txt file,
-            //       copy the set contents from ICU4C source/i18n/uspoof.cpp recommendedPat.
-            //       (Add '+' for string concatenation.)
+            "[0-9A-Z_a-z\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u0131\\u0134-\\u013E\\u0141-\\u014"
+                    + "8\\u014A-\\u017E\\u018F\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E"
+                    + "6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B\\u021E\\u021F\\u0226-\\u0233\\u0259\\u02BB\\u02B"
+                    + "C\\u02EC\\u0300-\\u0304\\u0306-\\u030C\\u030F-\\u0311\\u0313\\u0314\\u031B\\u0323-\\u03"
+                    + "28\\u032D\\u032E\\u0330\\u0331\\u0335\\u0338\\u0339\\u0342\\u0345\\u037B-\\u037D\\u0386"
+                    + "\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03CE\\u03FC-\\u045F\\u048A-\\u0529\\u05"
+                    + "2E\\u052F\\u0531-\\u0556\\u0559\\u0561-\\u0586\\u05B4\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0"
+                    + "620-\\u063F\\u0641-\\u0655\\u0660-\\u0669\\u0670-\\u0672\\u0674\\u0679-\\u068D\\u068F-"
+                    + "\\u06D3\\u06D5\\u06E5\\u06E6\\u06EE-\\u06FC\\u06FF\\u0750-\\u07B1\\u08A0-\\u08AC\\u08B2"
+                    + "\\u08B6-\\u08BD\\u0901-\\u094D\\u094F\\u0950\\u0956\\u0957\\u0960-\\u0963\\u0966-\\u096"
+                    + "F\\u0971-\\u0977\\u0979-\\u097F\\u0981-\\u0983\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u0"
+                    + "9A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BC-\\u09C4\\u09C7\\u09C8\\u09CB-\\u09CE\\u"
+                    + "09D7\\u09E0-\\u09E3\\u09E6-\\u09F1\\u0A01-\\u0A03\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-"
+                    + "\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A35\\u0A38\\u0A39\\u0A3C\\u0A3E-\\u0A42\\u0A47\\u0A48\\"
+                    + "u0A4B-\\u0A4D\\u0A5C\\u0A66-\\u0A74\\u0A81-\\u0A83\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A9"
+                    + "3-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABC-\\u0AC5\\u0AC7-\\u0AC9\\u0"
+                    + "ACB-\\u0ACD\\u0AD0\\u0AE0-\\u0AE3\\u0AE6-\\u0AEF\\u0B01-\\u0B03\\u0B05-\\u0B0C\\u0B0F\\"
+                    + "u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B3C-\\u0B43\\u0B47"
+                    + "\\u0B48\\u0B4B-\\u0B4D\\u0B56\\u0B57\\u0B5F-\\u0B61\\u0B66-\\u0B6F\\u0B71\\u0B82\\u0B83"
+                    + "\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3"
+                    + "\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0B"
+                    + "D0\\u0BD7\\u0BE6-\\u0BEF\\u0C01-\\u0C03\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u"
+                    + "0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56"
+                    + "\\u0C60\\u0C61\\u0C66-\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92"
+                    + "-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0"
+                    + "CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D02\\u0D03\\u0D05-\\u0D0C\\u0"
+                    + "D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-\\u0D4E\\u0D54-\\u0D57"
+                    + "\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D82\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D9"
+                    + "6\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0"
+                    + "DD4\\u0DD6\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\"
+                    + "u0E59\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u"
+                    + "0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD-\\u0EB2\\u0EB4-\\u0EB9\\u0EBB-\\u0EBD\\"
+                    + "u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE\\u0EDF\\u0F00\\u0F20-\\u0F29"
+                    + "\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F"
+                    + "56\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0"
+                    + "F82-\\u0F84\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6"
+                    + "\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D\\u10"
+                    + "C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-\\u124D\\u"
+                    + "1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2"
+                    + "-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1"
+                    + "315\\u1318-\\u135A\\u135D-\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-"
+                    + "\\u17B3\\u17B6-\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1C80-\\u1C88\\u1E00-\\u1E9"
+                    + "9\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1"
+                    + "F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A\\u1F"
+                    + "7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FC2-\\u1FC4\\u1FC6-\\u1FC8\\u1FCA\\u1FCC\\u1"
+                    + "FD0-\\u1FD2\\u1FD6-\\u1FDA\\u1FE0-\\u1FE2\\u1FE4-\\u1FEA\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-"
+                    + "\\u1FF8\\u1FFA\\u1FFC\\u2D27\\u2D2D\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0"
+                    + "-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u3"
+                    + "005-\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E\\u30A1-\\u30FA\\u30FC-\\u30FE\\u"
+                    + "3105-\\u312D\\u31A0-\\u31BA\\u3400-\\u4DB5\\u4E00-\\u9FD5\\uA660\\uA661\\uA674-\\uA67B"
+                    + "\\uA67F\\uA69F\\uA717-\\uA71F\\uA788\\uA78D\\uA78E\\uA790-\\uA793\\uA7A0-\\uA7AA\\uA7AE"
+                    + "\\uA7FA\\uA9E7-\\uA9FE\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB"
+                    + "11-\\uAB16\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11\\uFA13\\uF"
+                    + "A14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00020000-\\U0002A6D6\\U0002A700-\\U0"
+                    + "002B734\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1]").freeze();
+    // Note: data from http://unicode.org/Public/security/9.0.0/IdentifierStatus.txt
+    // There is tooling to generate this constant in the unicodetools project:
+    //      org.unicode.text.tools.RecommendedSetGenerator
+    // It will print the Java and C++ code to the console for easy copy-paste into this file.
 
     /**
      * Constants for the kinds of checks that USpoofChecker can perform. These values are used both to select the set of
@@ -284,63 +359,66 @@ public class SpoofChecker {
      */
 
     /**
-     * Single script confusable test. When testing whether two identifiers are confusable, report that they are if both
-     * are from the same script and they are visually confusable. Note: this test is not applicable to a check of a
-     * single identifier.
+     * When performing the two-string {@link SpoofChecker#areConfusable} test, this flag in the return value indicates
+     * that the two strings are visually confusable and that they are from the same script, according to UTS 39 section
+     * 4.
      *
      * @stable ICU 4.6
      */
     public static final int SINGLE_SCRIPT_CONFUSABLE = 1;
 
     /**
-     * Mixed script confusable test.
-     *
-     * <p>When checking a single identifier, report a problem if the identifier contains multiple scripts, and is also
-     * confusable with some other identifier in a single script.
-     *
-     * <p>When testing whether two identifiers are confusable, report that they are if the two IDs are visually confusable,
-     * and and at least one contains characters from more than one script.
+     * When performing the two-string {@link SpoofChecker#areConfusable} test, this flag in the return value indicates
+     * that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS
+     * 39 section 4.
      *
      * @stable ICU 4.6
      */
     public static final int MIXED_SCRIPT_CONFUSABLE = 2;
 
     /**
-     * Whole script confusable test.
-     *
-     * <p>When checking a single identifier, report a problem if The identifier is of a single script, and there exists a
-     * confusable identifier in another script.
-     *
-     * <p>When testing whether two Identifiers are confusable, report that they are if each is of a single script, the
-     * scripts of the two identifiers are different, and the identifiers are visually confusable.
+     * When performing the two-string {@link SpoofChecker#areConfusable} test, this flag in the return value indicates
+     * that the two strings are visually confusable and that they are not from the same script but both of them are
+     * single-script strings, according to UTS 39 section 4.
      *
      * @stable ICU 4.6
      */
     public static final int WHOLE_SCRIPT_CONFUSABLE = 4;
 
     /**
-     * Any Case Modifier for confusable identifier tests.
+     * Enable this flag in {@link SpoofChecker.Builder#setChecks} to turn on all types of confusables. You may set the
+     * checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to make
+     * {@link SpoofChecker#areConfusable} return only those types of confusables.
      *
-     * <p>When specified, consider all characters, of any case, when looking for confusables. If ANY_CASE is not specified,
-     * identifiers being checked are assumed to have been case folded, and upper case conusable characters will not be
-     * checked.
+     * <p>
+     * Note: if you wish to use {@link SpoofChecker#getSkeleton}, it is required that you enable at least one of the
+     * CONFUSABLE flags.
      *
-     * @stable ICU 4.6
+     * @draft ICU 58
+     * @provisional This API might change or be removed in a future release.
      */
-    public static final int ANY_CASE = 8;
+    public static final int CONFUSABLE = SINGLE_SCRIPT_CONFUSABLE | MIXED_SCRIPT_CONFUSABLE | WHOLE_SCRIPT_CONFUSABLE;
 
     /**
-     * Check that an identifier is no looser than the specified RestrictionLevel.
-     * The default if this is not called is HIGHLY_RESTRICTIVE.
+     * This flag is deprecated and no longer affects the behavior of SpoofChecker.
      *
-     * @internal
-     * @deprecated This API is ICU internal only.
+     * @deprecated ICU 58 This API was deprecated in UTS 39 revision 11 and is no longer used.
      */
     @Deprecated
+    public static final int ANY_CASE = 8;
+
+    /**
+     * Check that an identifier satisfies the requirements for the restriction level specified in
+     * {@link SpoofChecker.Builder#setRestrictionLevel}. The default restriction level is
+     * {@link RestrictionLevel#HIGHLY_RESTRICTIVE}.
+     *
+     * @draft ICU 58
+     * @provisional This API might change or be removed in a future release.
+     */
     public static final int RESTRICTION_LEVEL = 16;
 
     /**
-     * Check that an identifer contains only characters from a single script (plus chars from the common and inherited
+     * Check that an identifier contains only characters from a single script (plus chars from the common and inherited
      * scripts.) Applies to checks of a single identifier check only.
      *
      * @deprecated ICU 51 Use RESTRICTION_LEVEL
@@ -359,19 +437,20 @@ public class SpoofChecker {
 
     /**
      * Check that an identifier contains only characters from a specified set of acceptable characters. See
-     * Builder.setAllowedChars() and Builder.setAllowedLocales().
+     * {@link Builder#setAllowedChars} and {@link Builder#setAllowedLocales}. Note that a string that fails this check
+     * will also fail the {@link #RESTRICTION_LEVEL} check.
      *
      * @stable ICU 4.6
      */
     public static final int CHAR_LIMIT = 64;
 
     /**
-     * Check that an identifier does not mix numbers.
+     * Check that an identifier does not mix numbers from different numbering systems. For more information, see UTS 39
+     * section 5.3.
      *
-     * @internal
-     * @deprecated This API is ICU internal only.
+     * @draft ICU 58
+     * @provisional This API might change or be removed in a future release.
      */
-    @Deprecated
     public static final int MIXED_NUMBERS = 128;
 
     // Update CheckResult.toString() when a new check is added.
@@ -383,9 +462,8 @@ public class SpoofChecker {
      */
     public static final int ALL_CHECKS = 0xFFFFFFFF;
 
-
-    // Magic number for sanity checking spoof binary resource data.
-    static final int MAGIC = 0x3845fdef;
+    // Used for checking for ASCII-Only restriction level
+    static final UnicodeSet ASCII = new UnicodeSet(0, 0x7F).freeze();
 
     /**
      * private constructor: a SpoofChecker has to be built by the builder
@@ -429,10 +507,10 @@ public class SpoofChecker {
          */
         public Builder(SpoofChecker src) {
             fChecks = src.fChecks;
-            fSpoofData = src.fSpoofData;      // For the data, we will either use the source data
-                                              //   as-is, or drop the builder's reference to it
-                                              //   and generate new data, depending on what our
-                                              //   caller does with the builder.
+            fSpoofData = src.fSpoofData; // For the data, we will either use the source data
+                                         // as-is, or drop the builder's reference to it
+                                         // and generate new data, depending on what our
+                                         // caller does with the builder.
             fAllowedCharsSet.set(src.fAllowedCharsSet);
             fAllowedLocales.addAll(src.fAllowedLocales);
             fRestrictionLevel = src.fRestrictionLevel;
@@ -445,19 +523,20 @@ public class SpoofChecker {
          * @stable ICU 4.6
          */
         public SpoofChecker build() {
-            if (fSpoofData == null) { // read binary file
+            // TODO: Make this data loading be lazy (see #12696).
+            if (fSpoofData == null) {
+                // read binary file
                 fSpoofData = SpoofData.getDefault();
             }
 
             // Copy all state from the builder to the new SpoofChecker.
-            //  Make sure that everything is either cloned or copied, so
-            //  that subsequent re-use of the builder won't modify the built
-            //  SpoofChecker.
+            // Make sure that everything is either cloned or copied, so
+            // that subsequent re-use of the builder won't modify the built
+            // SpoofChecker.
             //
-            //  One exception to this: the SpoofData is just assigned.
-            //  If the builder subsequently needs to modify fSpoofData
-            //  it will create a new SpoofData object first.
-
+            // One exception to this: the SpoofData is just assigned.
+            // If the builder subsequently needs to modify fSpoofData
+            // it will create a new SpoofData object first.
 
             SpoofChecker result = new SpoofChecker();
             result.fChecks = this.fChecks;
@@ -470,35 +549,77 @@ public class SpoofChecker {
         }
 
         /**
-         * Specify the source form of the spoof data Spoof Checker. The inputs correspond to the Unicode data
-         * files confusables.txt and confusablesWholeScript.txt as described in Unicode UAX 39. The syntax of the source
-         * data is as described in UAX 39 for these files, and the content of these files is acceptable input.
+         * Specify the source form of the spoof data Spoof Checker. The inputs correspond to the Unicode data file
+         * confusables.txt as described in Unicode UAX 39. The syntax of the source data is as described in UAX 39 for
+         * these files, and the content of these files is acceptable input.
          *
          * @param confusables
          *            the Reader of confusable characters definitions, as found in file confusables.txt from
          *            unicode.org.
-         * @param confusablesWholeScript
-         *            the Reader of whole script confusables definitions, as found in the file
-         *            xonfusablesWholeScript.txt from unicode.org.
          * @throws ParseException
          *             To report syntax errors in the input.
-         * @stable ICU 4.6
+         *
+         * @draft ICU 58
+         * @provisional This API might change or be removed in a future release.
          */
-        public Builder setData(Reader confusables, Reader confusablesWholeScript) throws ParseException,
-        java.io.IOException {
+        public Builder setData(Reader confusables) throws ParseException, IOException {
 
             // Compile the binary data from the source (text) format.
-            //   Drop the builder's reference to any pre-existing data, which may
-            //   be in use in an already-built checker.
+            // Drop the builder's reference to any pre-existing data, which may
+            // be in use in an already-built checker.
 
             fSpoofData = new SpoofData();
             ConfusabledataBuilder.buildConfusableData(confusables, fSpoofData);
-            WSConfusableDataBuilder.buildWSConfusableData(confusablesWholeScript, fSpoofData);
             return this;
         }
 
         /**
-         * Specify the set of checks that will be performed by the check functions of this Spoof Checker.
+         * Deprecated as of ICU 58; use {@link SpoofChecker.Builder#setData(Reader confusables)} instead.
+         *
+         * @param confusables
+         *            the Reader of confusable characters definitions, as found in file confusables.txt from
+         *            unicode.org.
+         * @param confusablesWholeScript
+         *            No longer supported.
+         * @throws ParseException
+         *             To report syntax errors in the input.
+         *
+         * @deprecated ICU 58
+         */
+        @Deprecated
+        public Builder setData(Reader confusables, Reader confusablesWholeScript) throws ParseException, IOException {
+            setData(confusables);
+            return this;
+        }
+
+        /**
+         * Specify the bitmask of checks that will be performed by {@link SpoofChecker#failsChecks}. Calling this method
+         * overwrites any checks that may have already been enabled. By default, all checks are enabled.
+         *
+         * To enable specific checks and disable all others, the "whitelisted" checks should be ORed together. For
+         * example, to fail strings containing characters outside of the set specified by {@link #setAllowedChars} and
+         * also strings that contain digits from mixed numbering systems:
+         *
+         * <pre>
+         * {@code
+         * builder.setChecks(SpoofChecker.CHAR_LIMIT | SpoofChecker.MIXED_NUMBERS);
+         * }
+         * </pre>
+         *
+         * To disable specific checks and enable all others, the "blacklisted" checks should be ANDed away from
+         * ALL_CHECKS. For example, if you are not planning to use the {@link SpoofChecker#areConfusable} functionality,
+         * it is good practice to disable the CONFUSABLE check:
+         *
+         * <pre>
+         * {@code
+         * builder.setChecks(SpoofChecker.ALL_CHECKS & ~SpoofChecker.CONFUSABLE);
+         * }
+         * </pre>
+         *
+         * Note that methods such as {@link #setAllowedChars}, {@link #setAllowedLocales}, and
+         * {@link #setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they
+         * enable onto the existing bitmask specified by this method. For more details, see the documentation of those
+         * methods.
          *
          * @param checks
          *            The set of checks that this spoof checker will perform. The value is an 'or' of the desired
@@ -527,15 +648,15 @@ public class SpoofChecker {
          *
          * Supplying an empty string removes all restrictions; characters from any script will be allowed.
          *
-         * The CHAR_LIMIT test is automatically enabled for this SpoofChecker when calling this function with a
+         * The {@link #CHAR_LIMIT} test is automatically enabled for this SpoofChecker when calling this function with a
          * non-empty list of locales.
          *
-         * The Unicode Set of characters that will be allowed is accessible via the getAllowedChars() function.
+         * The Unicode Set of characters that will be allowed is accessible via the {@link #getAllowedChars} function.
          * setAllowedLocales() will <i>replace</i> any previously applied set of allowed characters.
          *
          * Adjustments, such as additions or deletions of certain classes of characters, can be made to the result of
-         * setAllowedLocales() by fetching the resulting set with getAllowedChars(), manipulating it with the Unicode
-         * Set API, then resetting the spoof detectors limits with setAllowedChars()
+         * {@link #setAllowedChars} by fetching the resulting set with {@link #getAllowedChars}, manipulating it with
+         * the Unicode Set API, then resetting the spoof detectors limits with {@link #setAllowedChars}.
          *
          * @param locales
          *            A Set of ULocales, from which the language and associated script are extracted. If the locales Set
@@ -581,6 +702,7 @@ public class SpoofChecker {
          * Limit characters that are acceptable in identifiers being checked to those normally used with the languages
          * associated with the specified locales. Any previously specified list of locales is replaced by the new
          * settings.
+         *
          * @param locales
          *            A Set of Locales, from which the language and associated script are extracted. If the locales Set
          *            is null, no restrictions will be placed on the allowed characters.
@@ -616,9 +738,9 @@ public class SpoofChecker {
         /**
          * Limit the acceptable characters to those specified by a Unicode Set. Any previously specified character limit
          * is is replaced by the new settings. This includes limits on characters that were set with the
-         * setAllowedLocales() function. Note that the RESTRICTED set is useful;
+         * setAllowedLocales() function. Note that the RESTRICTED set is useful.
          *
-         * The CHAR_LIMIT test is automatically enabled for this SpoofChecker by this function.
+         * The {@link #CHAR_LIMIT} test is automatically enabled for this SpoofChecker by this function.
          *
          * @param chars
          *            A Unicode Set containing the list of characters that are permitted. The incoming set is cloned by
@@ -634,330 +756,27 @@ public class SpoofChecker {
             return this;
         }
 
-
         /**
-         * Set the loosest restriction level allowed. The default if this is not called is HIGHLY_RESTRICTIVE.
-         * This method also sets RESTRICTION_LEVEL.
-         * @param restrictionLevel The loosest restriction level allowed.
+         * Set the loosest restriction level allowed for strings. The default if this is not called is
+         * {@link RestrictionLevel#HIGHLY_RESTRICTIVE}. Calling this method enables the {@link #RESTRICTION_LEVEL} and
+         * {@link #MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are
+         * to be performed by {@link SpoofChecker#failsChecks}, see {@link #setChecks}.
+         *
+         * @param restrictionLevel
+         *            The loosest restriction level allowed.
          * @return self
-         * @internal
-         * @deprecated This API is ICU internal only.
+         * @provisional This API might change or be removed in a future release.
+         * @draft ICU 58
          */
-        @Deprecated
         public Builder setRestrictionLevel(RestrictionLevel restrictionLevel) {
             fRestrictionLevel = restrictionLevel;
-            fChecks |= RESTRICTION_LEVEL;
+            fChecks |= RESTRICTION_LEVEL | MIXED_NUMBERS;
             return this;
         }
 
-        // Structure for the Whole Script Confusable Data
-        // See Unicode UAX-39, Unicode Security Mechanisms, for a description of the
-        // Whole Script confusable data
-        //
-        // The data provides mappings from code points to a set of scripts
-        // that contain characters that might be confused with the code point.
-        // There are two mappings, one for lower case only, and one for characters
-        // of any case.
-        //
-        // The actual data consists of a utrie2 to map from a code point to an offset,
-        // and an array of UScriptSets (essentially bit maps) that is indexed
-        // by the offsets obtained from the Trie.
-        //
-        //
-
-        /*
-         * Internal functions for compiling Whole Script confusable source data into its binary (runtime) form. The
-         * binary data format is described in uspoof_impl.h
-         */
-        private static class WSConfusableDataBuilder {
-
-            // Regular expression for parsing a line from the Unicode file confusablesWholeScript.txt
-            // Example Lines:
-            //   006F           ; Latn; Deva; A #      (o) LATIN SMALL LETTER O
-            //   0048..0049     ; Latn; Grek; A #  [2] (H..I) LATIN CAPITAL LETTER H..LATIN CAPITAL LETTER I
-            //     |               |     |    |
-            //     |               |     |    |---- Which table, Any Case or Lower Case (A or L)
-            //     |               |     |----------Target script. We need this.
-            //     |               |----------------Src script. Should match the script of the source
-            //     |                                code points. Beyond checking that, we don't keep it.
-            //     |--------------------------------Source code points or range.
-            //
-            // The expression will match _all_ lines, including erroneous lines.
-            // The result of the parse is returned via the contents of the (match) groups.
-            static String parseExp =
-                "(?m)" +                        // Multi-line mode
-                "^([ \\t]*(?:#.*?)?)$" +        // A blank or comment line. Matches Group 1.
-                "|^(?:" +                       // OR
-                "\\s*([0-9A-F]{4,})(?:..([0-9A-F]{4,}))?\\s*;" + // Code point range. Groups 2 and 3.
-                "\\s*([A-Za-z]+)\\s*;" +        // The source script. Group 4.
-                "\\s*([A-Za-z]+)\\s*;" +        // The target script. Group 5.
-                "\\s*(?:(A)|(L))" +             // The table A or L. Group 6 or 7
-                "[ \\t]*(?:#.*?)?" +            // Trailing commment
-                ")$|" +                         // OR
-                "^(.*?)$";                      // An error line. Group 8.
-                                                // Any line not matching the preceding
-                                                // parts of the expression will match
-                                                // this, and thus be flagged as an error
-
-
-            static void readWholeFileToString(Reader reader, StringBuffer buffer) throws java.io.IOException {
-                // Convert the user input data from UTF-8 to char (UTF-16)
-                LineNumberReader lnr = new LineNumberReader(reader);
-                do {
-                    String line = lnr.readLine();
-                    if (line == null) {
-                        break;
-                    }
-                    buffer.append(line);
-                    buffer.append('\n');
-                } while (true);
-            }
-
-            // Build the Whole Script Confusable data
-            //
-            static void buildWSConfusableData(Reader confusablesWS, SpoofData dest)
-                    throws ParseException, java.io.IOException {
-                Pattern parseRegexp = null;
-                StringBuffer input = new StringBuffer();
-                int lineNum = 0;
-
-                ArrayList<BuilderScriptSet> scriptSets = null;
-                int rtScriptSetsCount = 2;
-
-                Trie2Writable anyCaseTrie = new Trie2Writable(0, 0);
-                Trie2Writable lowerCaseTrie = new Trie2Writable(0, 0);
-
-                // The scriptSets vector provides a mapping from TRIE values to the set
-                // of scripts.
-                //
-                // Reserved TRIE values:
-                //   0: Code point has no whole script confusables.
-                //   1: Code point is of script Common or Inherited.
-                //
-                // These code points do not participate in whole script confusable detection.
-                // (This is logically equivalent to saying that they contain confusables
-                // in all scripts)
-                //
-                // Because Trie values are indexes into the ScriptSets vector, pre-fill
-                // vector positions 0 and 1 to avoid conflicts with the reserved values.
-
-                scriptSets = new ArrayList<BuilderScriptSet>();
-                scriptSets.add(null);
-                scriptSets.add(null);
-
-                readWholeFileToString(confusablesWS, input);
-
-                parseRegexp = Pattern.compile(parseExp);
-
-                // Zap any Byte Order Mark at the start of input. Changing it to a space
-                // is benign
-                // given the syntax of the input.
-                if (input.charAt(0) == 0xfeff) {
-                    input.setCharAt(0, (char) 0x20);
-                }
-
-                // Parse the input, one line per iteration of this loop.
-                Matcher matcher = parseRegexp.matcher(input);
-                while (matcher.find()) {
-                    lineNum++;
-                    if (matcher.start(1) >= 0) {
-                        // this was a blank or comment line.
-                        continue;
-                    }
-                    if (matcher.start(8) >= 0) {
-                        // input file syntax error.
-                        throw new ParseException("ConfusablesWholeScript, line " + lineNum + ": Unrecognized input: "
-                                + matcher.group(), matcher.start());
-                    }
-
-                    // Pick up the start and optional range end code points from the
-                    // parsed line.
-                    int startCodePoint = Integer.parseInt(matcher.group(2), 16);
-                    if (startCodePoint > 0x10ffff) {
-                        throw new ParseException("ConfusablesWholeScript, line " + lineNum
-                                + ": out of range code point: " + matcher.group(2), matcher.start(2));
-                    }
-                    int endCodePoint = startCodePoint;
-                    if (matcher.start(3) >= 0) {
-                        endCodePoint = Integer.parseInt(matcher.group(3), 16);
-                    }
-                    if (endCodePoint > 0x10ffff) {
-                        throw new ParseException("ConfusablesWholeScript, line " + lineNum
-                                + ": out of range code point: " + matcher.group(3), matcher.start(3));
-                    }
-
-                    // Extract the two script names from the source line.
-                    String srcScriptName = matcher.group(4);
-                    String targScriptName = matcher.group(5);
-                    int srcScript = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, srcScriptName);
-                    int targScript = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, targScriptName);
-                    if (srcScript == UScript.INVALID_CODE) {
-                        throw new ParseException("ConfusablesWholeScript, line " + lineNum
-                                + ": Invalid script code t: " + matcher.group(4), matcher.start(4));
-                    }
-                    if (targScript == UScript.INVALID_CODE) {
-                        throw new ParseException("ConfusablesWholeScript, line " + lineNum
-                                + ": Invalid script code t: " + matcher.group(5), matcher.start(5));
-                    }
-
-                    // select the table - (A) any case or (L) lower case only
-                    Trie2Writable table = anyCaseTrie;
-                    if (matcher.start(7) >= 0) {
-                        table = lowerCaseTrie;
-                    }
-
-                    // Build the set of scripts containing confusable characters for
-                    // the code point(s) specified in this input line.
-                    // Sanity check that the script of the source code point is the same
-                    // as the source script indicated in the input file. Failure of this
-                    // check is an error in the input file.
-                    //
-                    // Include the source script in the set (needed for Mixed Script
-                    // Confusable detection).
-                    //
-                    int cp;
-                    for (cp = startCodePoint; cp <= endCodePoint; cp++) {
-                        int setIndex = table.get(cp);
-                        BuilderScriptSet bsset = null;
-                        if (setIndex > 0) {
-                            assert (setIndex < scriptSets.size());
-                            bsset = scriptSets.get(setIndex);
-                        } else {
-                            bsset = new BuilderScriptSet();
-                            bsset.codePoint = cp;
-                            bsset.trie = table;
-                            bsset.sset = new ScriptSet();
-                            setIndex = scriptSets.size();
-                            bsset.index = setIndex;
-                            bsset.rindex = 0;
-                            scriptSets.add(bsset);
-                            table.set(cp, setIndex);
-                        }
-                        bsset.sset.Union(targScript);
-                        bsset.sset.Union(srcScript);
-
-                        int cpScript = UScript.getScript(cp);
-                        if (cpScript != srcScript) {
-                            // status = U_INVALID_FORMAT_ERROR;
-                            throw new ParseException("ConfusablesWholeScript, line " + lineNum
-                                    + ": Mismatch between source script and code point " + Integer.toString(cp, 16),
-                                    matcher.start(5));
-                        }
-                    }
-                }
-
-                // Eliminate duplicate script sets. At this point we have a separate
-                // script set for every code point that had data in the input file.
-                //
-                // We eliminate underlying ScriptSet objects, not the BuildScriptSets
-                // that wrap them
-                //
-                // printf("Number of scriptSets: %d\n", scriptSets.size());
-                //int duplicateCount = 0;
-                rtScriptSetsCount = 2;
-                for (int outeri = 2; outeri < scriptSets.size(); outeri++) {
-                    BuilderScriptSet outerSet = scriptSets.get(outeri);
-                    if (outerSet.index != outeri) {
-                        // This set was already identified as a duplicate.
-                        // It will not be allocated a position in the runtime array
-                        // of ScriptSets.
-                        continue;
-                    }
-                    outerSet.rindex = rtScriptSetsCount++;
-                    for (int inneri = outeri + 1; inneri < scriptSets.size(); inneri++) {
-                        BuilderScriptSet innerSet = scriptSets.get(inneri);
-                        if (outerSet.sset.equals(innerSet.sset) && outerSet.sset != innerSet.sset) {
-                            innerSet.sset = outerSet.sset;
-                            innerSet.index = outeri;
-                            innerSet.rindex = outerSet.rindex;
-                            //duplicateCount++;
-                        }
-                        // But this doesn't get all. We need to fix the TRIE.
-                    }
-                }
-                // printf("Number of distinct script sets: %d\n",
-                // rtScriptSetsCount);
-
-                // Update the Trie values to be reflect the run time script indexes (after duplicate merging).
-                // (Trie Values 0 and 1 are reserved, and the corresponding slots in scriptSets
-                // are unused, which is why the loop index starts at 2.)
-                for (int i = 2; i < scriptSets.size(); i++) {
-                    BuilderScriptSet bSet = scriptSets.get(i);
-                    if (bSet.rindex != i) {
-                        bSet.trie.set(bSet.codePoint, bSet.rindex);
-                    }
-                }
-
-                // For code points with script==Common or script==Inherited,
-                // Set the reserved value of 1 into both Tries. These characters do not participate
-                // in Whole Script Confusable detection; this reserved value is the means
-                // by which they are detected.
-                UnicodeSet ignoreSet = new UnicodeSet();
-                ignoreSet.applyIntPropertyValue(UProperty.SCRIPT, UScript.COMMON);
-                UnicodeSet inheritedSet = new UnicodeSet();
-                inheritedSet.applyIntPropertyValue(UProperty.SCRIPT, UScript.INHERITED);
-                ignoreSet.addAll(inheritedSet);
-                for (int rn = 0; rn < ignoreSet.getRangeCount(); rn++) {
-                    int rangeStart = ignoreSet.getRangeStart(rn);
-                    int rangeEnd = ignoreSet.getRangeEnd(rn);
-                    anyCaseTrie.setRange(rangeStart, rangeEnd, 1, true);
-                    lowerCaseTrie.setRange(rangeStart, rangeEnd, 1, true);
-                }
-
-                // Put the compiled data to the destination SpoofData
-                dest.fAnyCaseTrie   = anyCaseTrie.toTrie2_16();
-                dest.fLowerCaseTrie = lowerCaseTrie.toTrie2_16();
-                dest.fScriptSets = new ScriptSet[rtScriptSetsCount];
-                dest.fScriptSets[0] = new ScriptSet();
-                dest.fScriptSets[1] = new ScriptSet();
-
-                int rindex = 2;
-                for (int i = 2; i < scriptSets.size(); i++) {
-                    BuilderScriptSet bSet = scriptSets.get(i);
-                    if (bSet.rindex < rindex) {
-                        // We have already put this script set to the output data.
-                        continue;
-                    }
-                    assert (rindex == bSet.rindex);
-                    dest.fScriptSets[rindex] = bSet.sset;
-                    rindex++;
-                }
-            }
-
-            // class BuilderScriptSet. Represents the set of scripts (Script Codes)
-            // containing characters that are confusable with one specific
-            // code point.
-            static class BuilderScriptSet {
-                int codePoint;           // The source code point.
-                Trie2Writable trie;      // Any-case or Lower-case Trie.
-                                         // These Trie tables are the final result of the
-                                         // build. This flag indicates which of the two
-                                         // this set of data is for.
-
-                ScriptSet sset;          // The set of scripts itself.
-
-                int index;               // Index of this set in the Build Time vector
-                                         // of script sets.
-
-                int rindex;              // Index of this set in the final (runtime)
-                                         // array of sets.
-
-                // its underlying sset.
-
-                BuilderScriptSet() {
-                    codePoint = -1;
-                    trie = null;
-                    sset = null;
-                    index = 0;
-                    rindex = 0;
-                }
-            }
-
-        }
-
         /*
-         * *****************************************************************************
-         * Internal classes for compililing confusable data into its binary (runtime) form.
+         * ***************************************************************************** Internal classes for
+         * compililing confusable data into its binary (runtime) form.
          * *****************************************************************************
          */
         // ---------------------------------------------------------------------
@@ -968,31 +787,27 @@ public class SpoofChecker {
         //
         // The binary structures are described in uspoof_impl.h
         //
-        // 1. parse the data, building 4 hash tables, one each for the SL, SA, ML and MA
-        // tables. Each maps from a int to a String.
+        // 1. parse the data, making a hash table mapping from a codepoint to a String.
         //
         // 2. Sort all of the strings encountered by length, since they will need to
         // be stored in that order in the final string table.
         //
-        // 3. Build a list of keys (UChar32s) from the four mapping tables. Sort the
+        // 3. Build a list of keys (UChar32s) from the mapping table. Sort the
         // list because that will be the ordering of our runtime table.
         //
         // 4. Generate the run time string table. This is generated before the key & value
-        // tables because we need the string indexes when building those tables.
+        // table because we need the string indexes when building those tables.
         //
-        // 5. Build the run-time key and value tables. These are parallel tables, and
+        // 5. Build the run-time key and value table. These are parallel tables, and
         // are built at the same time
 
         // class ConfusabledataBuilder
-        //     An instance of this class exists while the confusable data is being built from source.
-        //     It encapsulates the intermediate data structures that are used for building.
-        //     It exports one static function, to do a confusable data build.
+        // An instance of this class exists while the confusable data is being built from source.
+        // It encapsulates the intermediate data structures that are used for building.
+        // It exports one static function, to do a confusable data build.
         private static class ConfusabledataBuilder {
 
-            private Hashtable<Integer, SPUString> fSLTable;
-            private Hashtable<Integer, SPUString> fSATable;
-            private Hashtable<Integer, SPUString> fMLTable;
-            private Hashtable<Integer, SPUString> fMATable;
+            private Hashtable<Integer, SPUString> fTable;
             private UnicodeSet fKeySet; // A set of all keys (UChar32s) that go into the
                                         // four mapping tables.
 
@@ -1001,43 +816,49 @@ public class SpoofChecker {
             private StringBuffer fStringTable;
             private ArrayList<Integer> fKeyVec;
             private ArrayList<Integer> fValueVec;
-            private ArrayList<Integer> fStringLengthsTable;
             private SPUStringPool stringPool;
             private Pattern fParseLine;
             private Pattern fParseHexNum;
             private int fLineNum;
 
             ConfusabledataBuilder() {
-                fSLTable  = new Hashtable<Integer, SPUString>();
-                fSATable  = new Hashtable<Integer, SPUString>();
-                fMLTable  = new Hashtable<Integer, SPUString>();
-                fMATable  = new Hashtable<Integer, SPUString>();
-                fKeySet   = new UnicodeSet();
-                fKeyVec   = new ArrayList<Integer>();
+                fTable = new Hashtable<Integer, SPUString>();
+                fKeySet = new UnicodeSet();
+                fKeyVec = new ArrayList<Integer>();
                 fValueVec = new ArrayList<Integer>();
                 stringPool = new SPUStringPool();
             }
 
             void build(Reader confusables, SpoofData dest) throws ParseException, java.io.IOException {
                 StringBuffer fInput = new StringBuffer();
-                WSConfusableDataBuilder.readWholeFileToString(confusables, fInput);
+
+                // Convert the user input data from UTF-8 to char (UTF-16)
+                LineNumberReader lnr = new LineNumberReader(confusables);
+                do {
+                    String line = lnr.readLine();
+                    if (line == null) {
+                        break;
+                    }
+                    fInput.append(line);
+                    fInput.append('\n');
+                } while (true);
 
                 // Regular Expression to parse a line from Confusables.txt. The expression will match
                 // any line. What was matched is determined by examining which capture groups have a match.
-                //   Capture Group 1: the source char
-                //   Capture Group 2: the replacement chars
-                //   Capture Group 3-6 the table type, SL, SA, ML, or MA
-                //   Capture Group 7: A blank or comment only line.
-                //   Capture Group 8: A syntactically invalid line. Anything that didn't match before.
+                // Capture Group 1: the source char
+                // Capture Group 2: the replacement chars
+                // Capture Group 3-6 the table type, SL, SA, ML, or MA (deprecated)
+                // Capture Group 7: A blank or comment only line.
+                // Capture Group 8: A syntactically invalid line. Anything that didn't match before.
                 // Example Line from the confusables.txt source file:
-                //   "1D702 ; 006E 0329 ; SL # MATHEMATICAL ITALIC SMALL ETA ... "
+                // "1D702 ; 006E 0329 ; SL # MATHEMATICAL ITALIC SMALL ETA ... "
                 fParseLine = Pattern.compile("(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;" + // Match the source char
-                        "[ \\t]*([0-9A-Fa-f]+" +                     // Match the replacement char(s)
-                        "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;" +        //     (continued)
-                        "\\s*(?:(SL)|(SA)|(ML)|(MA))" +              // Match the table type
-                        "[ \\t]*(?:#.*?)?$" +                        // Match any trailing #comment
-                        "|^([ \\t]*(?:#.*?)?)$" +                    // OR match empty lines or lines with only a #comment
-                        "|^(.*?)$");                                 // OR match any line, which catches illegal lines.
+                        "[ \\t]*([0-9A-Fa-f]+" + // Match the replacement char(s)
+                        "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;" + // (continued)
+                        "\\s*(?:(SL)|(SA)|(ML)|(MA))" + // Match the table type
+                        "[ \\t]*(?:#.*?)?$" + // Match any trailing #comment
+                        "|^([ \\t]*(?:#.*?)?)$" + // OR match empty lines or lines with only a #comment
+                        "|^(.*?)$"); // OR match any line, which catches illegal lines.
 
                 // Regular expression for parsing a hex number out of a space-separated list of them.
                 // Capture group 1 gets the number, with spaces removed.
@@ -1060,8 +881,9 @@ public class SpoofChecker {
                     if (matcher.start(8) >= 0) {
                         // input file syntax error.
                         // status = U_PARSE_ERROR;
-                        throw new ParseException("Confusables, line " + fLineNum + ": Unrecognized Line: "
-                                + matcher.group(8), matcher.start(8));
+                        throw new ParseException(
+                                "Confusables, line " + fLineNum + ": Unrecognized Line: " + matcher.group(8),
+                                matcher.start(8));
                     }
 
                     // We have a good input line. Extract the key character and mapping
@@ -1069,8 +891,9 @@ public class SpoofChecker {
                     // put them into the appropriate mapping table.
                     int keyChar = Integer.parseInt(matcher.group(1), 16);
                     if (keyChar > 0x10ffff) {
-                        throw new ParseException("Confusables, line " + fLineNum + ": Bad code point: "
-                                + matcher.group(1), matcher.start(1));
+                        throw new ParseException(
+                                "Confusables, line " + fLineNum + ": Bad code point: " + matcher.group(1),
+                                matcher.start(1));
                     }
                     Matcher m = fParseHexNum.matcher(matcher.group(2));
 
@@ -1078,8 +901,9 @@ public class SpoofChecker {
                     while (m.find()) {
                         int c = Integer.parseInt(m.group(1), 16);
                         if (c > 0x10ffff) {
-                            throw new ParseException("Confusables, line " + fLineNum + ": Bad code point: "
-                                    + Integer.toString(c, 16), matcher.start(2));
+                            throw new ParseException(
+                                    "Confusables, line " + fLineNum + ": Bad code point: " + Integer.toString(c, 16),
+                                    matcher.start(2));
                         }
                         mapString.appendCodePoint(c);
                     }
@@ -1090,33 +914,10 @@ public class SpoofChecker {
                     // eliminated.
                     SPUString smapString = stringPool.addString(mapString.toString());
 
-                    // Add the char . string mapping to the appropriate table.
-                    Hashtable<Integer, SPUString> table =
-                            matcher.start(3) >= 0 ? fSLTable :
-                            matcher.start(4) >= 0 ? fSATable :
-                            matcher.start(5) >= 0 ? fMLTable :
-                            matcher.start(6) >= 0 ? fMATable :
-                            null;
-                    assert (table != null);
-
+                    // Add the char . string mapping to the table.
                     // For Unicode 8, the SL, SA and ML tables have been discontinued.
-                    //                All input data from confusables.txt is tagged MA.
-                    //                ICU spoof check functions should ignore the specified table and always
-                    //                use this MA Data.
-                    //                For now, implement by populating the MA data into all four tables, and
-                    //                keep the multiple table implementation in place, in case it comes back
-                    //                at some time in the future.
-                    //                There is no run time size penalty to keeping the four table implementation -
-                    //                the data is shared when it's the same betweeen tables.
-
-                    if (table != fMATable) {
-                        throw new ParseException("Confusables, line " + fLineNum + ": Table must be 'MA'.", 0);
-                    }
-                    // table.put(keyChar, smapString);
-                    fSLTable.put(keyChar, smapString);
-                    fSATable.put(keyChar, smapString);
-                    fMLTable.put(keyChar, smapString);
-                    fMATable.put(keyChar, smapString);
+                    // All input data from confusables.txt is tagged MA.
+                    fTable.put(keyChar, smapString);
 
                     fKeySet.add(keyChar);
                 }
@@ -1131,83 +932,62 @@ public class SpoofChecker {
                 // Build up the string array, and record the index of each string therein
                 // in the (build time only) string pool.
                 // Strings of length one are not entered into the strings array.
-                // At the same time, build up the string lengths table, which records the
-                // position in the string table of the first string of each length >= 4.
                 // (Strings in the table are sorted by length)
 
                 stringPool.sort();
                 fStringTable = new StringBuffer();
-                fStringLengthsTable = new ArrayList<Integer>();
-                int previousStringLength = 0;
-                int previousStringIndex = 0;
                 int poolSize = stringPool.size();
                 int i;
                 for (i = 0; i < poolSize; i++) {
                     SPUString s = stringPool.getByIndex(i);
                     int strLen = s.fStr.length();
                     int strIndex = fStringTable.length();
-                    assert (strLen >= previousStringLength);
                     if (strLen == 1) {
                         // strings of length one do not get an entry in the string table.
                         // Keep the single string character itself here, which is the same
                         // convention that is used in the final run-time string table index.
-                        s.fStrTableIndex = s.fStr.charAt(0);
+                        s.fCharOrStrTableIndex = s.fStr.charAt(0);
                     } else {
-                        if ((strLen > previousStringLength) && (previousStringLength >= 4)) {
-                            fStringLengthsTable.add(previousStringIndex);
-                            fStringLengthsTable.add(previousStringLength);
-                        }
-                        s.fStrTableIndex = strIndex;
+                        s.fCharOrStrTableIndex = strIndex;
                         fStringTable.append(s.fStr);
                     }
-                    previousStringLength = strLen;
-                    previousStringIndex = strIndex;
-                }
-                // Make the final entry to the string lengths table.
-                // (it holds an entry for the _last_ string of each length, so adding
-                // the
-                // final one doesn't happen in the main loop because no longer string
-                // was encountered.)
-                if (previousStringLength >= 4) {
-                    fStringLengthsTable.add(previousStringIndex);
-                    fStringLengthsTable.add(previousStringLength);
                 }
 
-                // Construct the compile-time Key and Value tables
+                // Construct the compile-time Key and Value table.
                 //
-                // For each key code point, check which mapping tables it applies to,
-                // and create the final data for the key & value structures.
+                // The keys in the Key table follow the format described in uspoof.h for the
+                // Cfu confusables data structure.
                 //
-                // The four logical mapping tables are conflated into one combined
-                // table.
-                // If multiple logical tables have the same mapping for some key, they
-                // share a single entry in the combined table.
-                // If more than one mapping exists for the same key code point, multiple
-                // entries will be created in the table
-
-                for (String keyCharStr: fKeySet) {
+                // Starting in ICU 58, each code point has exactly one entry in the data
+                // structure.
+
+                for (String keyCharStr : fKeySet) {
                     int keyChar = keyCharStr.codePointAt(0);
-                    addKeyEntry(keyChar, fSLTable, SpoofChecker.SL_TABLE_FLAG);
-                    addKeyEntry(keyChar, fSATable, SpoofChecker.SA_TABLE_FLAG);
-                    addKeyEntry(keyChar, fMLTable, SpoofChecker.ML_TABLE_FLAG);
-                    addKeyEntry(keyChar, fMATable, SpoofChecker.MA_TABLE_FLAG);
+                    SPUString targetMapping = fTable.get(keyChar);
+                    assert targetMapping != null;
+
+                    int key = ConfusableDataUtils.codePointAndLengthToKey(keyChar, targetMapping.fStr.length());
+                    int value = targetMapping.fCharOrStrTableIndex;
+
+                    fKeyVec.add(key);
+                    fValueVec.add(value);
                 }
 
                 // Put the assembled data into the destination SpoofData object.
 
                 // The Key Table
-                //     While copying the keys to the output array,
-                //     also sanity check that the keys are sorted.
-
+                // While copying the keys to the output array,
+                // also sanity check that the keys are sorted.
                 int numKeys = fKeyVec.size();
                 dest.fCFUKeys = new int[numKeys];
-                int previousKey = 0;
-                for (i=0; i<numKeys; i++) {
+                int previousCodePoint = 0;
+                for (i = 0; i < numKeys; i++) {
                     int key = fKeyVec.get(i);
-                    assert ((key & 0x00ffffff) >= (previousKey & 0x00ffffff));
-                    assert ((key & 0xff000000) != 0);
+                    int codePoint = ConfusableDataUtils.keyToCodePoint(key);
+                    // strictly greater because there can be only one entry per code point
+                    assert codePoint > previousCodePoint;
                     dest.fCFUKeys[i] = key;
-                    previousKey = key;
+                    previousCodePoint = codePoint;
                 }
 
                 // The Value Table, parallels the key table
@@ -1215,167 +995,24 @@ public class SpoofChecker {
                 assert (numKeys == numValues);
                 dest.fCFUValues = new short[numValues];
                 i = 0;
-                for (int value:fValueVec) {
+                for (int value : fValueVec) {
                     assert (value < 0xffff);
-                    dest.fCFUValues[i++] = (short)value;
+                    dest.fCFUValues[i++] = (short) value;
                 }
 
                 // The Strings Table.
-
                 dest.fCFUStrings = fStringTable.toString();
-
-
-                // The String Lengths Table.
-
-                // While copying into the runtime array do some sanity checks on the values
-                // Each complete entry contains two fields, an index and an offset.
-                // Lengths should increase with each entry.
-                // Offsets should be less than the size of the string table.
-
-                int lengthTableLength = fStringLengthsTable.size();
-                int previousLength = 0;
-
-                // Note: StringLengthsSize in the raw data is the number of complete entries,
-                //       each consisting of a pair of 16 bit values, hence the divide by 2.
-
-                int stringLengthsSize = lengthTableLength / 2;
-                dest.fCFUStringLengths = new SpoofData.SpoofStringLengthsElement[stringLengthsSize];
-                for (i = 0; i < stringLengthsSize; i += 1) {
-                    int offset = fStringLengthsTable.get(i*2);
-                    int length = fStringLengthsTable.get(i*2 + 1);
-                    assert (offset < dest.fCFUStrings.length());
-                    assert (length < 40);
-                    assert (length > previousLength);
-                    dest.fCFUStringLengths[i] = new SpoofData.SpoofStringLengthsElement();
-                    dest.fCFUStringLengths[i].fLastString = offset;
-                    dest.fCFUStringLengths[i].fStrLength  = length;
-                    previousLength = length;
-                }
-             }
-
-            // Add an entry to the key and value tables being built
-            // input: data from SLTable, MATable, etc.
-            // outut: entry added to fKeyVec and fValueVec
-            // addKeyEntry Construction of the confusable Key and Mapping Values tables.
-            // This is an intermediate point in the building process.
-            // We already have the mappings in the hash tables fSLTable, etc.
-            // This function builds corresponding run-time style table entries into
-            // fKeyVec and fValueVec
-            void addKeyEntry(int keyChar, // The key character
-                    Hashtable<Integer, SPUString> table, // The table, one of SATable,
-                    // MATable, etc.
-                    int tableFlag) { // One of SA_TABLE_FLAG, etc.
-                SPUString targetMapping = table.get(keyChar);
-                if (targetMapping == null) {
-                    // No mapping for this key character.
-                    // (This function is called for all four tables for each key char
-                    // that
-                    // is seen anywhere, so this no entry cases are very much expected.)
-                    return;
-                }
-
-                // Check whether there is already an entry with the correct mapping.
-                // If so, simply set the flag in the keyTable saying that the existing
-                // entry
-                // applies to the table that we're doing now.
-                boolean keyHasMultipleValues = false;
-                int i;
-                for (i = fKeyVec.size() - 1; i >= 0; i--) {
-                    int key = fKeyVec.get(i);
-                    if ((key & 0x0ffffff) != keyChar) {
-                        // We have now checked all existing key entries for this key
-                        // char (if any)
-                        // without finding one with the same mapping.
-                        break;
-                    }
-                    String mapping = getMapping(i);
-                    if (mapping.equals(targetMapping.fStr)) {
-                        // The run time entry we are currently testing has the correct
-                        // mapping.
-                        // Set the flag in it indicating that it applies to the new
-                        // table also.
-                        key |= tableFlag;
-                        fKeyVec.set(i, key);
-                        return;
-                    }
-                    keyHasMultipleValues = true;
-                }
-
-                // Need to add a new entry to the binary data being built for this
-                // mapping.
-                // Includes adding entries to both the key table and the parallel values
-                // table.
-                int newKey = keyChar | tableFlag;
-                if (keyHasMultipleValues) {
-                    newKey |= SpoofChecker.KEY_MULTIPLE_VALUES;
-                }
-                int adjustedMappingLength = targetMapping.fStr.length() - 1;
-                if (adjustedMappingLength > 3) {
-                    adjustedMappingLength = 3;
-                }
-                newKey |= adjustedMappingLength << SpoofChecker.KEY_LENGTH_SHIFT;
-
-                int newData = targetMapping.fStrTableIndex;
-
-                fKeyVec.add(newKey);
-                fValueVec.add(newData);
-
-                // If the preceding key entry is for the same key character (but with a
-                // different mapping)
-                // set the multiple-values flag on it.
-                if (keyHasMultipleValues) {
-                    int previousKeyIndex = fKeyVec.size() - 2;
-                    int previousKey = fKeyVec.get(previousKeyIndex);
-                    previousKey |= SpoofChecker.KEY_MULTIPLE_VALUES;
-                    fKeyVec.set(previousKeyIndex, previousKey);
-                }
             }
 
-            // From an index into fKeyVec & fValueVec
-            // get a String with the corresponding mapping.
-            String getMapping(int index) {
-                int key = fKeyVec.get(index);
-                int value = fValueVec.get(index);
-                int length = SpoofChecker.getKeyLength(key);
-                int lastIndexWithLen;
-                switch (length) {
-                case 0:
-                    char[] cs = { (char) value };
-                    return new String(cs);
-                case 1:
-                case 2:
-                    return fStringTable.substring(value, value + length + 1); // Note: +1 as optimization
-                case 3:
-                    length = 0;
-                    int i;
-                    for (i = 0; i < fStringLengthsTable.size(); i += 2) {
-                        lastIndexWithLen = fStringLengthsTable.get(i);
-                        if (value <= lastIndexWithLen) {
-                            length = fStringLengthsTable.get(i + 1);
-                            break;
-                        }
-                    }
-                    assert (length >= 3);
-                    return fStringTable.substring(value, value + length);
-                default:
-                    assert (false);
-                }
-                return "";
-            }
-
-
-
-
-
-            public static void buildConfusableData(Reader confusables, SpoofData dest) throws java.io.IOException,
-            ParseException {
+            public static void buildConfusableData(Reader confusables, SpoofData dest)
+                    throws java.io.IOException, ParseException {
                 ConfusabledataBuilder builder = new ConfusabledataBuilder();
                 builder.build(confusables, dest);
             }
 
             /*
-             * *****************************************************************************
-             * Internal classes for compiling confusable data into its binary (runtime) form.
+             * ***************************************************************************** Internal classes for
+             * compiling confusable data into its binary (runtime) form.
              * *****************************************************************************
              */
             // SPUString
@@ -1385,13 +1022,13 @@ public class SpoofChecker {
 
             private static class SPUString {
                 String fStr; // The actual string.
-                int fStrTableIndex; // Index into the final runtime data for this string.
-                                    // (or, for length 1, the single string char itself,
-                                    // there being no string table entry for it.)
+                int fCharOrStrTableIndex; // Index into the final runtime data for this string.
+                // (or, for length 1, the single string char itself,
+                // there being no string table entry for it.)
 
                 SPUString(String s) {
                     fStr = s;
-                    fStrTableIndex = 0;
+                    fCharOrStrTableIndex = 0;
                 }
             }
 
@@ -1412,6 +1049,8 @@ public class SpoofChecker {
                         return sL.fStr.compareTo(sR.fStr);
                     }
                 }
+
+                final static SPUStringComparator INSTANCE = new SPUStringComparator();
             }
 
             // String Pool A utility class for holding the strings that are the result of
@@ -1451,7 +1090,7 @@ public class SpoofChecker {
 
                 // Sort the contents; affects the ordering of getByIndex().
                 public void sort() {
-                    Collections.sort(fVec, new SPUStringComparator());
+                    Collections.sort(fVec, SPUStringComparator.INSTANCE);
                 }
 
                 private Vector<SPUString> fVec; // Elements are SPUString *
@@ -1484,8 +1123,8 @@ public class SpoofChecker {
     }
 
     /**
-     * Get a read-only set of locales for the scripts that are acceptable in strings to be checked. If no limitations on scripts
-     * have been specified, an empty set will be returned.
+     * Get a read-only set of locales for the scripts that are acceptable in strings to be checked. If no limitations on
+     * scripts have been specified, an empty set will be returned.
      *
      * setAllowedChars() will reset the list of allowed locales to be empty.
      *
@@ -1501,8 +1140,8 @@ public class SpoofChecker {
     }
 
     /**
-     * Get a set of {@link java.util.Locale} instances for the scripts that are acceptable in strings to be checked. If no
-     * limitations on scripts have been specified, an empty set will be returned.
+     * Get a set of {@link java.util.Locale} instances for the scripts that are acceptable in strings to be checked. If
+     * no limitations on scripts have been specified, an empty set will be returned.
      *
      * @return A set of locales corresponding to the acceptable scripts.
      * @stable ICU 54
@@ -1530,20 +1169,20 @@ public class SpoofChecker {
     }
 
     /**
-     * A struct-like class to hold the results of a Spoof Check operation.
-     * Tells which check(s) have failed.
+     * A struct-like class to hold the results of a Spoof Check operation. Tells which check(s) have failed.
      *
      * @stable ICU 4.6
      */
     public static class CheckResult {
         /**
-         * Indicate which of the spoof check(s) has failed.  The value is a bitwise OR
-         * of the constants for the tests in question, SINGLE_SCRIPT_CONFUSABLE,
-         * MIXED_SCRIPT_CONFUSABLE, WHOLE_SCRIPT_CONFUSABLE, and so on.
+         * Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests
+         * in question: RESTRICTION_LEVEL, CHAR_LIMIT, and so on.
          *
          * @stable ICU 4.6
+         * @see Builder#setChecks
          */
         public int checks;
+
         /**
          * The index of the first string position that failed a check.
          *
@@ -1551,26 +1190,27 @@ public class SpoofChecker {
          */
         @Deprecated
         public int position;
+
         /**
          * The numerics found in the string, if MIXED_NUMBERS was set; otherwise null;
          *
-         * @internal
-         * @deprecated This API is ICU internal only.
+         * @draft ICU 58
+         * @provisional This API might change or be removed in a future release.
          */
-        @Deprecated
         public UnicodeSet numerics;
+
         /**
          * The restriction level that the text meets, if RESTRICTION_LEVEL is set; otherwise null.
          *
-         * @internal
-         * @deprecated This API is ICU internal only.
+         * @draft ICU 58
+         * @provisional This API might change or be removed in a future release.
          */
-        @Deprecated
         public RestrictionLevel restrictionLevel;
 
         /**
-         *  Default constructor
-         *  @stable ICU 4.6
+         * Default constructor
+         *
+         * @stable ICU 4.6
          */
         public CheckResult() {
             checks = 0;
@@ -1579,6 +1219,7 @@ public class SpoofChecker {
 
         /**
          * {@inheritDoc}
+         *
          * @stable ICU 4.6
          */
         @Override
@@ -1629,8 +1270,7 @@ public class SpoofChecker {
      * @param text
      *            A String to be checked for possible security issues.
      * @param checkResult
-     *            Output parameter, indicates which specific tests failed.
-     *            May be null if the information is not wanted.
+     *            Output parameter, indicates which specific tests failed. May be null if the information is not wanted.
      * @return True there any issue is found with the input string.
      * @stable ICU 4.8
      */
@@ -1644,15 +1284,8 @@ public class SpoofChecker {
             checkResult.restrictionLevel = null;
         }
 
-        // Allocate an identifier info if needed.
-
-        IdentifierInfo identifierInfo = null;
-        if (0 != ((this.fChecks) & (RESTRICTION_LEVEL | MIXED_NUMBERS))) {
-            identifierInfo = getIdentifierInfo().setIdentifier(text).setIdentifierProfile(fAllowedCharsSet);
-        }
-
-        if (0 != ((this.fChecks) & RESTRICTION_LEVEL)) {
-            RestrictionLevel textRestrictionLevel = identifierInfo.getRestrictionLevel();
+        if (0 != (this.fChecks & RESTRICTION_LEVEL)) {
+            RestrictionLevel textRestrictionLevel = getRestrictionLevel(text);
             if (textRestrictionLevel.compareTo(fRestrictionLevel) > 0) {
                 result |= RESTRICTION_LEVEL;
             }
@@ -1661,8 +1294,9 @@ public class SpoofChecker {
             }
         }
 
-        if (0 != ((this.fChecks) & MIXED_NUMBERS)) {
-            UnicodeSet numerics = identifierInfo.getNumerics();
+        if (0 != (this.fChecks & MIXED_NUMBERS)) {
+            UnicodeSet numerics = new UnicodeSet();
+            getNumerics(text, numerics);
             if (numerics.size() > 1) {
                 result |= MIXED_NUMBERS;
             }
@@ -1685,86 +1319,49 @@ public class SpoofChecker {
             }
         }
 
-        if (0 != (this.fChecks & (WHOLE_SCRIPT_CONFUSABLE | MIXED_SCRIPT_CONFUSABLE | INVISIBLE))) {
-            // These are the checks that need to be done on NFD input
+        if (0 != (this.fChecks & INVISIBLE)) {
+            // This check needs to be done on NFD input
             String nfdText = nfdNormalizer.normalize(text);
 
-            if (0 != (this.fChecks & INVISIBLE)) {
-
-                // scan for more than one occurence of the same non-spacing mark
-                // in a sequence of non-spacing marks.
-                int i;
-                int c;
-                int firstNonspacingMark = 0;
-                boolean haveMultipleMarks = false;
-                UnicodeSet marksSeenSoFar = new UnicodeSet(); // Set of combining marks in a
-                                                              // single combining sequence.
-                for (i = 0; i < length;) {
-                    c = Character.codePointAt(nfdText, i);
-                    i = Character.offsetByCodePoints(nfdText, i, 1);
-                    if (Character.getType(c) != UCharacterCategory.NON_SPACING_MARK) {
-                        firstNonspacingMark = 0;
-                        if (haveMultipleMarks) {
-                            marksSeenSoFar.clear();
-                            haveMultipleMarks = false;
-                        }
-                        continue;
-                    }
-                    if (firstNonspacingMark == 0) {
-                        firstNonspacingMark = c;
-                        continue;
-                    }
-                    if (!haveMultipleMarks) {
-                        marksSeenSoFar.add(firstNonspacingMark);
-                        haveMultipleMarks = true;
-                    }
-                    if (marksSeenSoFar.contains(c)) {
-                        // report the error, and stop scanning.
-                        // No need to find more than the first failure.
-                        result |= INVISIBLE;
-                        break;
+            // scan for more than one occurrence of the same non-spacing mark
+            // in a sequence of non-spacing marks.
+            int i;
+            int c;
+            int firstNonspacingMark = 0;
+            boolean haveMultipleMarks = false;
+            UnicodeSet marksSeenSoFar = new UnicodeSet(); // Set of combining marks in a
+                                                          // single combining sequence.
+            for (i = 0; i < length;) {
+                c = Character.codePointAt(nfdText, i);
+                i = Character.offsetByCodePoints(nfdText, i, 1);
+                if (Character.getType(c) != UCharacterCategory.NON_SPACING_MARK) {
+                    firstNonspacingMark = 0;
+                    if (haveMultipleMarks) {
+                        marksSeenSoFar.clear();
+                        haveMultipleMarks = false;
                     }
-                    marksSeenSoFar.add(c);
+                    continue;
                 }
-            }
-
-            if (0 != (this.fChecks & (WHOLE_SCRIPT_CONFUSABLE | MIXED_SCRIPT_CONFUSABLE))) {
-                // The basic test is the same for both whole and mixed script confusables.
-                // Compute the set of scripts that every input character has a confusable in.
-                // For this computation an input character is always considered to be
-                // confusable with itself in its own script.
-                //
-                // If the number of such scripts is two or more, and the input consisted of
-                // characters all from a single script, we have a whole script confusable.
-                // (The two scripts will be the original script and the one that is confusable).
-
-                // If the number of such scripts >= one, and the original input contained characters from
-                // more than one script, we have a mixed script confusable. (We can transform
-                // some of the characters, and end up with a visually similar string all in one script.)
-
-                if (identifierInfo == null) {
-                    identifierInfo = getIdentifierInfo();
-                    identifierInfo.setIdentifier(text);
+                if (firstNonspacingMark == 0) {
+                    firstNonspacingMark = c;
+                    continue;
                 }
-                int scriptCount = identifierInfo.getScriptCount();
-
-                ScriptSet scripts = new ScriptSet();
-                this.wholeScriptCheck(nfdText, scripts);
-                int confusableScriptCount = scripts.countMembers();
-
-                if ((0 != (this.fChecks & WHOLE_SCRIPT_CONFUSABLE)) && confusableScriptCount >= 2 && scriptCount == 1) {
-                    result |= WHOLE_SCRIPT_CONFUSABLE;
+                if (!haveMultipleMarks) {
+                    marksSeenSoFar.add(firstNonspacingMark);
+                    haveMultipleMarks = true;
                 }
-
-                if ((0 != (this.fChecks & MIXED_SCRIPT_CONFUSABLE)) && confusableScriptCount >= 1 && scriptCount > 1) {
-                    result |= MIXED_SCRIPT_CONFUSABLE;
+                if (marksSeenSoFar.contains(c)) {
+                    // report the error, and stop scanning.
+                    // No need to find more than the first failure.
+                    result |= INVISIBLE;
+                    break;
                 }
+                marksSeenSoFar.add(c);
             }
         }
         if (checkResult != null) {
             checkResult.checks = result;
         }
-        releaseIdentifierInfo(identifierInfo);
         return (0 != result);
     }
 
@@ -1806,63 +1403,42 @@ public class SpoofChecker {
         // and for definitions of the types (single, whole, mixed-script) of confusables.
 
         // We only care about a few of the check flags. Ignore the others.
-        // If no tests relavant to this function have been specified, signal an error.
+        // If no tests relevant to this function have been specified, signal an error.
         // TODO: is this really the right thing to do? It's probably an error on
         // the caller's part, but logically we would just return 0 (no error).
-        if ((this.fChecks & (SINGLE_SCRIPT_CONFUSABLE | MIXED_SCRIPT_CONFUSABLE | WHOLE_SCRIPT_CONFUSABLE)) == 0) {
+        if ((this.fChecks & CONFUSABLE) == 0) {
             throw new IllegalArgumentException("No confusable checks are enabled.");
         }
-        int flagsForSkeleton = this.fChecks & ANY_CASE;
 
-        int result = 0;
-        IdentifierInfo identifierInfo = getIdentifierInfo();
-        identifierInfo.setIdentifier(s1);
-        int s1ScriptCount = identifierInfo.getScriptCount();
-        int s1FirstScript = identifierInfo.getScripts().nextSetBit(0);
-        identifierInfo.setIdentifier(s2);
-        int s2ScriptCount = identifierInfo.getScriptCount();
-        int s2FirstScript = identifierInfo.getScripts().nextSetBit(0);
-        releaseIdentifierInfo(identifierInfo);
-
-        if (0 != (this.fChecks & SINGLE_SCRIPT_CONFUSABLE)) {
-            // Do the Single Script compare.
-            if (s1ScriptCount <= 1 && s2ScriptCount <= 1 && s1FirstScript == s2FirstScript) {
-                flagsForSkeleton |= SINGLE_SCRIPT_CONFUSABLE;
-                String s1Skeleton = getSkeleton(flagsForSkeleton, s1);
-                String s2Skeleton = getSkeleton(flagsForSkeleton, s2);
-                if (s1Skeleton.equals(s2Skeleton)) {
-                    result |= SINGLE_SCRIPT_CONFUSABLE;
-                }
-            }
+        // Compute the skeletons and check for confusability.
+        String s1Skeleton = getSkeleton(s1);
+        String s2Skeleton = getSkeleton(s2);
+        if (!s1Skeleton.equals(s2Skeleton)) {
+            return 0;
         }
 
-        if (0 != (result & SINGLE_SCRIPT_CONFUSABLE)) {
-            // If the two inputs are single script confusable they cannot also be
-            // mixed or whole script confusable, according to the UAX39 definitions.
-            // So we can skip those tests.
-            return result;
-        }
+        // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate classes
+        // of confusables according to UTS 39 section 4.
+        // Start by computing the resolved script sets of s1 and s2.
+        ScriptSet s1RSS = new ScriptSet();
+        getResolvedScriptSet(s1, s1RSS);
+        ScriptSet s2RSS = new ScriptSet();
+        getResolvedScriptSet(s2, s2RSS);
 
-        // Two identifiers are whole script confusable if each is of a single script
-        // and they are mixed script confusable.
-        boolean possiblyWholeScriptConfusables = s1ScriptCount <= 1 && s2ScriptCount <= 1
-                && (0 != (this.fChecks & WHOLE_SCRIPT_CONFUSABLE));
-
-        // Mixed Script Check
-        if ((0 != (this.fChecks & MIXED_SCRIPT_CONFUSABLE)) || possiblyWholeScriptConfusables) {
-            // For getSkeleton(), resetting the SINGLE_SCRIPT_CONFUSABLE flag will get us
-            // the mixed script table skeleton, which is what we want.
-            // The Any Case / Lower Case bit in the skelton flags was set at the top of the function.
-            flagsForSkeleton &= ~SINGLE_SCRIPT_CONFUSABLE;
-            String s1Skeleton = getSkeleton(flagsForSkeleton, s1);
-            String s2Skeleton = getSkeleton(flagsForSkeleton, s2);
-            if (s1Skeleton.equals(s2Skeleton)) {
-                result |= MIXED_SCRIPT_CONFUSABLE;
-                if (possiblyWholeScriptConfusables) {
-                    result |= WHOLE_SCRIPT_CONFUSABLE;
-                }
+        // Turn on all applicable flags
+        int result = 0;
+        if (s1RSS.intersects(s2RSS)) {
+            result |= SINGLE_SCRIPT_CONFUSABLE;
+        } else {
+            result |= MIXED_SCRIPT_CONFUSABLE;
+            if (!s1RSS.isEmpty() && !s2RSS.isEmpty()) {
+                result |= WHOLE_SCRIPT_CONFUSABLE;
             }
         }
+
+        // Turn off flags that the user doesn't want
+        result &= fChecks;
+
         return result;
     }
 
@@ -1873,307 +1449,243 @@ public class SpoofChecker {
      * Using skeletons directly makes it possible to quickly check whether an identifier is confusable with any of some
      * large set of existing identifiers, by creating an efficiently searchable collection of the skeletons.
      *
-     * Skeletons are computed using the algorithm and data describe in Unicode UAX 39.
-     * The latest proposed update, UAX 39 Version 8 draft 1, says "the tables SL, SA, and ML
-     * were still problematic, and discouraged from use in [Uniocde] 7.0.
-     * They were thus removed from version 8.0"
+     * Skeletons are computed using the algorithm and data described in Unicode UAX 39.
      *
-     * In light of this, the default mapping data included with ICU 55 uses the
-     * Unicode 7 MA (Multi script Any case) table data for the other type options
-     * (Single Script, Any Case), (Single Script, Lower Case) and (Multi Script, Lower Case).
-     *
-     * @param type
-     *            The type of skeleton, corresponding to which of the Unicode confusable data tables to use. The default
-     *            is Mixed-Script, Lowercase. Allowed options are SINGLE_SCRIPT_CONFUSABLE and ANY_CASE_CONFUSABLE. The
-     *            two flags may be ORed.
-     * @param id
-     *            The input identifier whose skeleton will be genereated.
+     * @param str
+     *            The input string whose skeleton will be generated.
      * @return The output skeleton string.
      *
-     * @stable ICU 4.6
+     * @draft ICU 58
+     * @provisional This API might change or be removed in a future release.
      */
-    public String getSkeleton(int type, String id) {
-        int tableMask = 0;
-        switch (type) {
-        case 0:
-            tableMask = ML_TABLE_FLAG;
-            break;
-        case SINGLE_SCRIPT_CONFUSABLE:
-            tableMask = SL_TABLE_FLAG;
-            break;
-        case ANY_CASE:
-            tableMask = MA_TABLE_FLAG;
-            break;
-        case SINGLE_SCRIPT_CONFUSABLE | ANY_CASE:
-            tableMask = SA_TABLE_FLAG;
-            break;
-        default:
-            // *status = U_ILLEGAL_ARGUMENT_ERROR;
-            throw new IllegalArgumentException("SpoofChecker.getSkeleton(), bad type value.");
-        }
-
+    public String getSkeleton(CharSequence str) {
         // Apply the skeleton mapping to the NFD normalized input string
         // Accumulate the skeleton, possibly unnormalized, in a String.
-
-        String nfdId = nfdNormalizer.normalize(id);
+        String nfdId = nfdNormalizer.normalize(str);
         int normalizedLen = nfdId.length();
         StringBuilder skelSB = new StringBuilder();
         for (int inputIndex = 0; inputIndex < normalizedLen;) {
             int c = Character.codePointAt(nfdId, inputIndex);
             inputIndex += Character.charCount(c);
-            this.confusableLookup(c, tableMask, skelSB);
+            this.fSpoofData.confusableLookup(c, skelSB);
         }
         String skelStr = skelSB.toString();
         skelStr = nfdNormalizer.normalize(skelStr);
         return skelStr;
     }
 
-
     /**
-     *   Equality function. Return true if the two SpoofChecker objects
-     *   incorporate the same confusable data and have enabled the same
-     *   set of checks.
+     * Calls {@link SpoofChecker#getSkeleton(CharSequence id)}. Starting with ICU 55, the "type" parameter has been
+     * ignored, and starting with ICU 58, this function has been deprecated.
+     *
+     * @param type
+     *            No longer supported. Prior to ICU 55, was used to specify the mapping table SL, SA, ML, or MA.
+     * @param id
+     *            The input identifier whose skeleton will be generated.
+     * @return The output skeleton string.
      *
-     *   @param other the SpoofChecker being compared with.
-     *   @return true if the two SpoofCheckers are equal.
-     *   @internal
-     *   @deprecated This API is ICU internal only.
+     * @deprecated ICU 58
      */
     @Deprecated
+    public String getSkeleton(int type, CharSequence id) {
+        return getSkeleton(id);
+    }
+
+    /**
+     * Equality function. Return true if the two SpoofChecker objects incorporate the same confusable data and have
+     * enabled the same set of checks.
+     *
+     * @param other
+     *            the SpoofChecker being compared with.
+     * @return true if the two SpoofCheckers are equal.
+     * @draft ICU 58
+     * @provisional This API might change or be removed in a future release.
+     */
     @Override
     public boolean equals(Object other) {
-        if (!(other instanceof SpoofChecker)) {return false; }
-        SpoofChecker otherSC = (SpoofChecker)other;
-        if (fSpoofData != otherSC.fSpoofData &&
-                fSpoofData != null &&
-                !fSpoofData.equals(otherSC.fSpoofData)) {
+        if (!(other instanceof SpoofChecker)) {
+            return false;
+        }
+        SpoofChecker otherSC = (SpoofChecker) other;
+        if (fSpoofData != otherSC.fSpoofData && fSpoofData != null && !fSpoofData.equals(otherSC.fSpoofData)) {
             return false;
         }
-        if (fChecks != otherSC.fChecks) {return false; }
-        if (fAllowedLocales != otherSC.fAllowedLocales &&
-                fAllowedLocales != null &&
-                !fAllowedLocales.equals(otherSC.fAllowedLocales)) {
+        if (fChecks != otherSC.fChecks) {
             return false;
         }
-        if (fAllowedCharsSet != otherSC.fAllowedCharsSet &&
-                fAllowedCharsSet != null &&
-                !fAllowedCharsSet.equals(otherSC.fAllowedCharsSet)) {
+        if (fAllowedLocales != otherSC.fAllowedLocales && fAllowedLocales != null
+                && !fAllowedLocales.equals(otherSC.fAllowedLocales)) {
+            return false;
+        }
+        if (fAllowedCharsSet != otherSC.fAllowedCharsSet && fAllowedCharsSet != null
+                && !fAllowedCharsSet.equals(otherSC.fAllowedCharsSet)) {
             return false;
         }
         if (fRestrictionLevel != otherSC.fRestrictionLevel) {
             return false;
         }
         return true;
-     }
+    }
 
     /**
-     * This is a stub implementation and not designed for generic use.
-     * @internal
-     * @deprecated This API is ICU internal only.
+     * @draft ICU 58
+     * @provisional This API might change or be removed in a future release.
      */
-    @Deprecated
     @Override
     public int hashCode() {
-        assert false;   // To make sure ICU implementation does not depend on this.
-        return 1234;    // Any arbitrary value - for now, using 1234.
+        return fChecks
+                ^ fSpoofData.hashCode()
+                ^ fAllowedLocales.hashCode()
+                ^ fAllowedCharsSet.hashCode()
+                ^ fRestrictionLevel.ordinal();
     }
 
-    /*
-     * Append the confusable skeleton transform for a single code point to a StringBuilder.
-     * The string to be appended will between 1 and 18 characters.
-     *
-     * This is the heart of the confusable skeleton generation implementation.
-     *
-     * @param tableMask bit flag specifying which confusable table to use. One of SL_TABLE_FLAG, MA_TABLE_FLAG, etc.
+    /**
+     * Computes the augmented script set for a code point, according to UTS 39 section 5.1.
      */
-    private void confusableLookup(int inChar, int tableMask, StringBuilder dest) {
-        // Binary search the spoof data key table for the inChar
-        int low = 0;
-        int mid = 0;
-        int limit = fSpoofData.fCFUKeys.length;
-        int midc;
-        boolean foundChar = false;
-        // [low, limit), i.e low is inclusive, limit is exclusive
-        do {
-            int delta = (limit - low) / 2;
-            mid = low + delta;
-            midc = fSpoofData.fCFUKeys[mid] & 0x1fffff;
-            if (inChar == midc) {
-                foundChar = true;
-                break;
-            } else if (inChar < midc) {
-                limit = mid; // limit is exclusive
-            } else {
-                // we have checked mid is not the char we looking for, the next char
-                // we want to check is (mid + 1)
-                low = mid + 1; // low is inclusive
-            }
-        } while (low < limit);
-        if (!foundChar) { // Char not found. It maps to itself.
-            dest.appendCodePoint(inChar);
-            return;
+    private static void getAugmentedScriptSet(int codePoint, ScriptSet result) {
+        result.clear();
+        UScript.getScriptExtensions(codePoint, result);
+
+        // Section 5.1 step 1
+        if (result.get(UScript.HAN)) {
+            result.set(UScript.HAN_WITH_BOPOMOFO);
+            result.set(UScript.JAPANESE);
+            result.set(UScript.KOREAN);
+        }
+        if (result.get(UScript.HIRAGANA)) {
+            result.set(UScript.JAPANESE);
+        }
+        if (result.get(UScript.KATAKANA)) {
+            result.set(UScript.JAPANESE);
+        }
+        if (result.get(UScript.HANGUL)) {
+            result.set(UScript.KOREAN);
+        }
+        if (result.get(UScript.BOPOMOFO)) {
+            result.set(UScript.HAN_WITH_BOPOMOFO);
         }
 
-        boolean foundKey = false;
-        int keyFlags = fSpoofData.fCFUKeys[mid] & 0xff000000;
-        if ((keyFlags & tableMask) == 0) {
-            // We found the right key char, but the entry doesn't pertain to the
-            // table we need. See if there is an adjacent key that does
-            if (0 != (keyFlags & SpoofChecker.KEY_MULTIPLE_VALUES)) {
-                int altMid;
-                for (altMid = mid - 1; (fSpoofData.fCFUKeys[altMid] & 0x00ffffff) == inChar; altMid--) {
-                    keyFlags = fSpoofData.fCFUKeys[altMid] & 0xff000000;
-                    if (0 != (keyFlags & tableMask)) {
-                        mid = altMid;
-                        foundKey = true;
-                        break;
-                    }
-                }
-                if (!foundKey) {
-                    for (altMid = mid + 1; (fSpoofData.fCFUKeys[altMid] & 0x00ffffff) == inChar; altMid++) {
-                        keyFlags = fSpoofData.fCFUKeys[altMid] & 0xff000000;
-                        if (0 != (keyFlags & tableMask)) {
-                            mid = altMid;
-                            foundKey = true;
-                            break;
-                        }
-                    }
-                }
-            }
-            if (!foundKey) {
-                // No key entry for this char & table.
-                // The input char maps to itself.
-                dest.appendCodePoint(inChar);
-                return;
-            }
+        // Section 5.1 step 2
+        if (result.get(UScript.COMMON) || result.get(UScript.INHERITED)) {
+            result.setAll();
         }
+    }
 
-        int stringLen = getKeyLength(keyFlags) + 1;
-        int keyTableIndex = mid;
+    /**
+     * Computes the resolved script set for a string, according to UTS 39 section 5.1.
+     */
+    private void getResolvedScriptSet(CharSequence input, ScriptSet result) {
+        getResolvedScriptSetWithout(input, UScript.CODE_LIMIT, result);
+    }
 
-        // Value is either a char (for strings of length 1) or
-        // an index into the string table (for longer strings)
-        short value = fSpoofData.fCFUValues[keyTableIndex];
-        if (stringLen == 1) {
-            dest.append((char) value);
-            return;
-        }
+    /**
+     * Computes the resolved script set for a string, omitting characters having the specified script. If
+     * UScript.CODE_LIMIT is passed as the second argument, all characters are included.
+     */
+    private void getResolvedScriptSetWithout(CharSequence input, int script, ScriptSet result) {
+        result.setAll();
 
-        // String length of 4 from the above lookup is used for all strings of
-        // length >= 4.
-        // For these, get the real length from the string lengths table,
-        // which maps string table indexes to lengths.
-        // All strings of the same length are stored contiguously in the string table.
-        // 'value' from the lookup above is the starting index for the desired string.
+        ScriptSet temp = new ScriptSet();
+        for (int utf16Offset = 0; utf16Offset < input.length();) {
+            int codePoint = Character.codePointAt(input, utf16Offset);
+            utf16Offset += Character.charCount(codePoint);
 
-        if (stringLen == 4) {
-            boolean dataOK = false;
-            for (SpoofData.SpoofStringLengthsElement el: fSpoofData.fCFUStringLengths) {
-                if (el.fLastString >= value) {
-                    stringLen = el.fStrLength;
-                    dataOK = true;
-                    break;
-                }
+            // Compute the augmented script set for the character
+            getAugmentedScriptSet(codePoint, temp);
+
+            // Intersect the augmented script set with the resolved script set, but only if the character doesn't
+            // have the script specified in the function call
+            if (script == UScript.CODE_LIMIT || !temp.get(script)) {
+                result.and(temp);
             }
-            assert(dataOK);
         }
-
-        dest.append(fSpoofData.fCFUStrings, value, value + stringLen);
-        return;
     }
 
-    // Implementation for Whole Script tests.
-    // Input text is already normalized to NFD
-    // Return the set of scripts, each of which can represent something that is
-    // confusable with the input text. The script of the input text
-    // is included; input consisting of characters from a single script will
-    // always produce a result consisting of a set containing that script.
-    private void wholeScriptCheck(CharSequence text, ScriptSet result) {
-        int inputIdx = 0;
-        int c;
-
-        Trie2 table = (0 != (fChecks & ANY_CASE)) ? fSpoofData.fAnyCaseTrie : fSpoofData.fLowerCaseTrie;
-        result.setAll();
-        while (inputIdx < text.length()) {
-            c = Character.codePointAt(text, inputIdx);
-            inputIdx = Character.offsetByCodePoints(text, inputIdx, 1);
-            int index = table.get(c);
-            if (index == 0) {
-                // No confusables in another script for this char.
-                // TODO: we should change the data to have sets with just the single script
-                // bit for the script of this char. Gets rid of this special case.
-                // Until then, grab the script from the char and intersect it with the set.
-                int cpScript = UScript.getScript(c);
-                assert (cpScript > UScript.INHERITED);
-                result.intersect(cpScript);
-            } else if (index == 1) {
-                // Script == Common or Inherited. Nothing to do.
-            } else {
-                result.intersect(fSpoofData.fScriptSets[index]);
+    /**
+     * Computes the set of numerics for a string, according to UTS 39 section 5.3.
+     */
+    private void getNumerics(String input, UnicodeSet result) {
+        result.clear();
+
+        for (int utf16Offset = 0; utf16Offset < input.length();) {
+            int codePoint = Character.codePointAt(input, utf16Offset);
+            utf16Offset += Character.charCount(codePoint);
+
+            // Store a representative character for each kind of decimal digit
+            if (UCharacter.getType(codePoint) == UCharacterCategory.DECIMAL_DIGIT_NUMBER) {
+                // Store the zero character as a representative for comparison.
+                // Unicode guarantees it is codePoint - value
+                result.add(codePoint - UCharacter.getNumericValue(codePoint));
             }
         }
     }
 
-    // IdentifierInfo Cache. IdentifierInfo objects are somewhat expensive to create.
-    //  Maintain a one-element cache, which is sufficient to avoid repeatedly
-    //  creating new ones unless we get multi-thread concurrency collisions in spoof
-    //  check operations, which should be statistically uncommon.
-
-    private IdentifierInfo fCachedIdentifierInfo = null;  // Do not use this directly.
+    /**
+     * Computes the restriction level of a string, according to UTS 39 section 5.2.
+     */
+    private RestrictionLevel getRestrictionLevel(String input) {
+        // Section 5.2 step 1:
+        if (!fAllowedCharsSet.containsAll(input)) {
+            return RestrictionLevel.UNRESTRICTIVE;
+        }
 
-    private IdentifierInfo getIdentifierInfo() {
-        IdentifierInfo returnIdInfo = null;
-        synchronized (this) {
-            returnIdInfo = fCachedIdentifierInfo;
-            fCachedIdentifierInfo = null;
+        // Section 5.2 step 2:
+        if (ASCII.containsAll(input)) {
+            return RestrictionLevel.ASCII;
         }
-        if (returnIdInfo == null) {
-            returnIdInfo = new IdentifierInfo();
+
+        // Section 5.2 steps 3:
+        ScriptSet resolvedScriptSet = new ScriptSet();
+        getResolvedScriptSet(input, resolvedScriptSet);
+
+        // Section 5.2 step 4:
+        if (!resolvedScriptSet.isEmpty()) {
+            return RestrictionLevel.SINGLE_SCRIPT_RESTRICTIVE;
         }
-        return returnIdInfo;
-    }
 
+        // Section 5.2 step 5:
+        ScriptSet resolvedNoLatn = new ScriptSet();
+        getResolvedScriptSetWithout(input, UScript.LATIN, resolvedNoLatn);
 
-    private void releaseIdentifierInfo(IdentifierInfo idInfo) {
-        if (idInfo != null) {
-            synchronized (this) {
-                if (fCachedIdentifierInfo == null) {
-                    fCachedIdentifierInfo = idInfo;
-                }
-            }
+        // Section 5.2 step 6:
+        if (resolvedNoLatn.get(UScript.HAN_WITH_BOPOMOFO) || resolvedNoLatn.get(UScript.JAPANESE)
+                || resolvedNoLatn.get(UScript.KOREAN)) {
+            return RestrictionLevel.HIGHLY_RESTRICTIVE;
+        }
+
+        // Section 5.2 step 7:
+        if (!resolvedNoLatn.isEmpty() && !resolvedNoLatn.get(UScript.CYRILLIC) && !resolvedNoLatn.get(UScript.GREEK)
+                && !resolvedNoLatn.get(UScript.CHEROKEE)) {
+            return RestrictionLevel.MODERATELY_RESTRICTIVE;
         }
-    };
+
+        // Section 5.2 step 8:
+        return RestrictionLevel.MINIMALLY_RESTRICTIVE;
+    }
 
     // Data Members
-    private int fChecks;                         // Bit vector of checks to perform.
+    private int fChecks; // Bit vector of checks to perform.
     private SpoofData fSpoofData;
-    private Set<ULocale> fAllowedLocales;        // The Set of allowed locales.
-    private UnicodeSet fAllowedCharsSet;         // The UnicodeSet of allowed characters.
+    private Set<ULocale> fAllowedLocales; // The Set of allowed locales.
+    private UnicodeSet fAllowedCharsSet; // The UnicodeSet of allowed characters.
     private RestrictionLevel fRestrictionLevel;
 
     private static Normalizer2 nfdNormalizer = Normalizer2.getNFDInstance();
 
-
     // Confusable Mappings Data Structures
     //
     // For the confusable data, we are essentially implementing a map,
-    //    key: a code point
-    //    value: a string. Most commonly one char in length, but can be more.
+    // key: a code point
+    // value: a string. Most commonly one char in length, but can be more.
     //
     // The keys are stored as a sorted array of 32 bit ints.
-    //          bits 0-23    a code point value
-    //          bits 24-31   flags
-    //             24:    1 if entry applies to SL table
-    //             25:    1 if entry applies to SA table
-    //             26:    1 if entry applies to ML table
-    //             27:    1 if entry applies to MA table
-    //             28:    1 if there are multiple entries for this code point.
-    //             29-30: length of value string, in UChars.
-    //                    values are (1, 2, 3, other)
-    //     The key table is sorted in ascending code point order. (not on the
-    //     32 bit int value, the flag bits do not participate in the sorting.)
+    // bits 0-23 a code point value
+    // bits 24-31 length of value string, in UChars (between 1 and 256 UChars).
+    // The key table is sorted in ascending code point order. (not on the
+    // 32 bit int value, the flag bits do not participate in the sorting.)
     //
-    //     Lookup is done by means of a binary search in the key table.
+    // Lookup is done by means of a binary search in the key table.
     //
     // The corresponding values are kept in a parallel array of 16 bit ints.
     // If the value string is of length 1, it is literally in the value array.
@@ -2181,98 +1693,78 @@ public class SpoofChecker {
     // table.
     //
     // String Table:
-    //     The strings table contains all of the value strings (those of length two or greater)
-    //     concatentated together into one long char (UTF-16) array.
+    // The strings table contains all of the value strings (those of length two or greater)
+    // concatentated together into one long char (UTF-16) array.
     //
-    //     The array is arranged by length of the strings - all strings of the same length
-    //     are stored together. The sections are ordered by length of the strings -
-    //     all two char strings first, followed by all of the three Char strings, etc.
+    // The array is arranged by length of the strings - all strings of the same length
+    // are stored together. The sections are ordered by length of the strings -
+    // all two char strings first, followed by all of the three Char strings, etc.
     //
-    //     There is no nul character or other mark between adjacent strings.
-    //
-    // String Lengths table
-    //     The length of strings from 1 to 3 is flagged in the key table.
-    //     For strings of length 4 or longer, the string length table provides a
-    //     mapping between an index into the string table and the corresponding length.
-    //     Strings of these lengths are rare, so lookup time is not an issue.
-    //     Each entry consists of
-    //        unsigned short      index of the _last_ string with this length
-    //        unsigned short      the length
-
-    // Flag bits in the Key entries
-    static final int SL_TABLE_FLAG = (1 << 24);
-    static final int SA_TABLE_FLAG = (1 << 25);
-    static final int ML_TABLE_FLAG = (1 << 26);
-    static final int MA_TABLE_FLAG = (1 << 27);
-    static final int KEY_MULTIPLE_VALUES = (1 << 28);
-    static final int KEY_LENGTH_SHIFT = 29;
-
-    static final int getKeyLength(int x) {
-        return (((x) >> 29) & 3);
-    }
+    // There is no nul character or other mark between adjacent strings.
+    private static final class ConfusableDataUtils {
+        public static final int FORMAT_VERSION = 2; // version for ICU 58
+
+        public static final int keyToCodePoint(int key) {
+            return key & 0x00ffffff;
+        }
 
+        public static final int keyToLength(int key) {
+            return ((key & 0xff000000) >> 24) + 1;
+        }
+
+        public static final int codePointAndLengthToKey(int codePoint, int length) {
+            assert (codePoint & 0x00ffffff) == codePoint;
+            assert length <= 256;
+            return codePoint | ((length - 1) << 24);
+        }
+    }
 
     // -------------------------------------------------------------------------------------
     //
     // SpoofData
     //
-    //   This class corresonds to the ICU SpoofCheck data.
+    // This class corresponds to the ICU SpoofCheck data.
     //
-    //   The data can originate with the Binary ICU data that is generated in ICU4C,
-    //   or it can originate from source rules that are compiled in ICU4J.
+    // The data can originate with the Binary ICU data that is generated in ICU4C,
+    // or it can originate from source rules that are compiled in ICU4J.
     //
-    //   This class does not include the set of checks to be performed, but only
-    //     data that is serialized into the ICU binary data.
+    // This class does not include the set of checks to be performed, but only
+    // data that is serialized into the ICU binary data.
     //
-    //   Because Java cannot easily wrap binaray data like ICU4C, the binary data is
-    //     copied into Java structures that are convenient for use by the run time code.
+    // Because Java cannot easily wrap binary data like ICU4C, the binary data is
+    // copied into Java structures that are convenient for use by the run time code.
     //
     // ---------------------------------------------------------------------------------------
     private static class SpoofData {
 
         // The Confusable data, Java data structures for.
-        int[]                       fCFUKeys;
-        short[]                     fCFUValues;
-        SpoofStringLengthsElement[] fCFUStringLengths;
-        String                      fCFUStrings;
-
-        // Whole Script Confusable Data
-        Trie2                       fAnyCaseTrie;
-        Trie2                       fLowerCaseTrie;
-        ScriptSet[]                 fScriptSets;
-
-        static class SpoofStringLengthsElement {
-            int fLastString;  // index in string table of last string with this length
-            int fStrLength;   // Length of strings
-            @Override
-            public boolean equals(Object other) {
-                if (!(other instanceof SpoofStringLengthsElement)) {
-                    return false;
-                }
-                SpoofStringLengthsElement otherEl = (SpoofStringLengthsElement)other;
-                return fLastString == otherEl.fLastString &&
-                       fStrLength  == otherEl.fStrLength;
-            }
-        }
+        int[] fCFUKeys;
+        short[] fCFUValues;
+        String fCFUStrings;
 
-        private static final int DATA_FORMAT = 0x43667520;  // "Cfu "
+        private static final int DATA_FORMAT = 0x43667520; // "Cfu "
 
         private static final class IsAcceptable implements Authenticate {
-            // @Override when we switch to Java 6
             @Override
             public boolean isDataVersionAcceptable(byte version[]) {
-                return version[0] == 1;
+                return version[0] == ConfusableDataUtils.FORMAT_VERSION || version[1] != 0 || version[2] != 0
+                        || version[3] != 0;
             }
         }
+
         private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
 
         private static final class DefaultData {
             private static SpoofData INSTANCE = null;
+            private static IOException EXCEPTION = null;
 
             static {
+                // Note: Although this is static, the Java runtime can delay execution of this block until
+                // the data is actually requested via SpoofData.getDefault().
                 try {
                     INSTANCE = new SpoofData(ICUBinary.getRequiredData("confusables.cfu"));
-                } catch (IOException ignored) {
+                } catch (IOException e) {
+                    EXCEPTION = e;
                 }
             }
         }
@@ -2280,18 +1772,23 @@ public class SpoofChecker {
         /**
          * @return instance for Unicode standard data
          */
-        static SpoofData getDefault() {
+        public static SpoofData getDefault() {
+            if (DefaultData.EXCEPTION != null) {
+                throw new MissingResourceException(
+                        "Could not load default confusables data: " + DefaultData.EXCEPTION.getMessage(),
+                        "SpoofChecker", "");
+            }
             return DefaultData.INSTANCE;
         }
 
         // SpoofChecker Data constructor for use from data builder.
         // Initializes a new, empty data area that will be populated later.
-        SpoofData() {
+        private SpoofData() {
         }
 
         // Constructor for use when creating from prebuilt default data.
         // A ByteBuffer is what the ICU internal data loading functions provide.
-        SpoofData(ByteBuffer bytes) throws java.io.IOException {
+        private SpoofData(ByteBuffer bytes) throws java.io.IOException {
             ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
             bytes.mark();
             readData(bytes);
@@ -2302,62 +1799,45 @@ public class SpoofChecker {
             if (!(other instanceof SpoofData)) {
                 return false;
             }
-            SpoofData otherData = (SpoofData)other;
-            if (!Arrays.equals(fCFUKeys, otherData.fCFUKeys)) return false;
-            if (!Arrays.equals(fCFUValues, otherData.fCFUValues)) return false;
-            if (!Arrays.deepEquals(fCFUStringLengths, otherData.fCFUStringLengths)) return false;
-            if (fCFUStrings != otherData.fCFUStrings &&
-                    fCFUStrings != null &&
-                    !fCFUStrings.equals(otherData.fCFUStrings)) return false;
-            if (fAnyCaseTrie != otherData.fAnyCaseTrie &&
-                    fAnyCaseTrie != null &&
-                    !fAnyCaseTrie.equals(otherData.fAnyCaseTrie)) return false;
-            if (fLowerCaseTrie != otherData.fLowerCaseTrie &&
-                    fLowerCaseTrie != null &&
-                    !fLowerCaseTrie.equals(otherData.fLowerCaseTrie)) return false;
-            if (!Arrays.deepEquals(fScriptSets, otherData.fScriptSets)) return false;
+            SpoofData otherData = (SpoofData) other;
+            if (!Arrays.equals(fCFUKeys, otherData.fCFUKeys))
+                return false;
+            if (!Arrays.equals(fCFUValues, otherData.fCFUValues))
+                return false;
+            if (fCFUStrings != otherData.fCFUStrings && fCFUStrings != null
+                    && !fCFUStrings.equals(otherData.fCFUStrings))
+                return false;
             return true;
         }
 
+        @Override
+        public int hashCode() {
+            return Arrays.hashCode(fCFUKeys)
+                    ^ Arrays.hashCode(fCFUValues)
+                    ^ fCFUStrings.hashCode();
+        }
+
         // Set the SpoofChecker data from pre-built binary data in a byte buffer.
         // The binary data format is as described for ICU4C spoof data.
         //
-        void readData(ByteBuffer bytes) throws java.io.IOException {
+        private void readData(ByteBuffer bytes) throws java.io.IOException {
             int magic = bytes.getInt();
             if (magic != 0x3845fdef) {
                 throw new IllegalArgumentException("Bad Spoof Check Data.");
             }
             @SuppressWarnings("unused")
-            int dataFormatVersion      = bytes.getInt();
+            int dataFormatVersion = bytes.getInt();
             @SuppressWarnings("unused")
-            int dataLength             = bytes.getInt();
-
-            int CFUKeysOffset          = bytes.getInt();
-            int CFUKeysSize            = bytes.getInt();
-
-            int CFUValuesOffset        = bytes.getInt();
-            int CFUValuesSize          = bytes.getInt();
-
-            int CFUStringTableOffset   = bytes.getInt();
-            int CFUStringTableSize     = bytes.getInt();
+            int dataLength = bytes.getInt();
 
-            int CFUStringLengthsOffset = bytes.getInt();
-            int CFUStringLengthsSize   = bytes.getInt();
+            int CFUKeysOffset = bytes.getInt();
+            int CFUKeysSize = bytes.getInt();
 
-            int anyCaseTrieOffset      = bytes.getInt();
-            /*int anyCaseTrieSize      =*/ bytes.getInt();
+            int CFUValuesOffset = bytes.getInt();
+            int CFUValuesSize = bytes.getInt();
 
-            int lowerCaseTrieOffset    = bytes.getInt();
-            /*int lowerCaseTrieLength  =*/ bytes.getInt();
-
-            int scriptSetsOffset       = bytes.getInt();
-            int scriptSetslength       = bytes.getInt();
-
-            int i;
-            fCFUKeys = null;
-            fCFUValues = null;
-            fCFUStringLengths = null;
-            fCFUStrings = null;
+            int CFUStringTableOffset = bytes.getInt();
+            int CFUStringTableSize = bytes.getInt();
 
             // We have now read the file header, and obtained the position for each
             // of the data items. Now read each in turn, first seeking the
@@ -2374,131 +1854,170 @@ public class SpoofChecker {
             bytes.reset();
             ICUBinary.skipBytes(bytes, CFUStringTableOffset);
             fCFUStrings = ICUBinary.getString(bytes, CFUStringTableSize, 0);
+        }
 
-            bytes.reset();
-            ICUBinary.skipBytes(bytes, CFUStringLengthsOffset);
-            fCFUStringLengths = new SpoofStringLengthsElement[CFUStringLengthsSize];
-            for (i = 0; i < CFUStringLengthsSize; i++) {
-                fCFUStringLengths[i] = new SpoofStringLengthsElement();
-                fCFUStringLengths[i].fLastString = bytes.getShort();
-                fCFUStringLengths[i].fStrLength = bytes.getShort();
+        /**
+         * Append the confusable skeleton transform for a single code point to a StringBuilder. The string to be
+         * appended will between 1 and 18 characters as of Unicode 9.
+         *
+         * This is the heart of the confusable skeleton generation implementation.
+         */
+        public void confusableLookup(int inChar, StringBuilder dest) {
+            // Perform a binary search.
+            // [lo, hi), i.e lo is inclusive, hi is exclusive.
+            // The result after the loop will be in lo.
+            int lo = 0;
+            int hi = length();
+            do {
+                int mid = (lo + hi) / 2;
+                if (codePointAt(mid) > inChar) {
+                    hi = mid;
+                } else if (codePointAt(mid) < inChar) {
+                    lo = mid;
+                } else {
+                    // Found result. Break early.
+                    lo = mid;
+                    break;
+                }
+            } while (hi - lo > 1);
+
+            // Did we find an entry? If not, the char maps to itself.
+            if (codePointAt(lo) != inChar) {
+                dest.appendCodePoint(inChar);
+                return;
             }
 
-            bytes.reset();
-            ICUBinary.skipBytes(bytes, anyCaseTrieOffset);
-            fAnyCaseTrie = Trie2.createFromSerialized(bytes);
+            // Add the element to the string builder and return.
+            appendValueTo(lo, dest);
+            return;
+        }
 
-            bytes.reset();
-            ICUBinary.skipBytes(bytes, lowerCaseTrieOffset);
-            fLowerCaseTrie = Trie2.createFromSerialized(bytes);
+        /**
+         * Return the number of confusable entries in this SpoofData.
+         *
+         * @return The number of entries.
+         */
+        public int length() {
+            return fCFUKeys.length;
+        }
 
-            bytes.reset();
-            ICUBinary.skipBytes(bytes, scriptSetsOffset);
-            fScriptSets = new ScriptSet[scriptSetslength];
-            for (i = 0; i < scriptSetslength; i++) {
-                fScriptSets[i] = new ScriptSet(bytes);
-            }
+        /**
+         * Return the code point (key) at the specified index.
+         *
+         * @param index
+         *            The index within the SpoofData.
+         * @return The code point.
+         */
+        public int codePointAt(int index) {
+            return ConfusableDataUtils.keyToCodePoint(fCFUKeys[index]);
         }
 
+        /**
+         * Append the confusable skeleton at the specified index to the StringBuilder dest.
+         *
+         * @param index
+         *            The index within the SpoofData.
+         * @param dest
+         *            The StringBuilder to which to append the skeleton.
+         */
+        public void appendValueTo(int index, StringBuilder dest) {
+            int stringLength = ConfusableDataUtils.keyToLength(fCFUKeys[index]);
+
+            // Value is either a char (for strings of length 1) or
+            // an index into the string table (for longer strings)
+            short value = fCFUValues[index];
+            if (stringLength == 1) {
+                dest.append((char) value);
+            } else {
+                dest.append(fCFUStrings, value, value + stringLength);
+            }
+        }
     }
 
     // -------------------------------------------------------------------------------
     //
     // ScriptSet - Script code bit sets. Used with the whole script confusable data.
     // Used both at data build and at run time.
-    // Could almost be a Java BitSet, except that the input and output would
-    // be awkward.
+    // Extends Java BitSet with input/output support and a few helper methods.
+    // Note: The I/O is not currently being used, so it has been commented out. If
+    // it is needed again, the code can be restored.
     //
     // -------------------------------------------------------------------------------
-    static class ScriptSet {
-        public ScriptSet() {
-        }
-
-        public ScriptSet(ByteBuffer bytes) throws java.io.IOException {
-            for (int j = 0; j < bits.length; j++) {
-                bits[j] = bytes.getInt();
-            }
-        }
-
-        public void output(DataOutputStream os) throws java.io.IOException {
-            for (int i = 0; i < bits.length; i++) {
-                os.writeInt(bits[i]);
-            }
-        }
-
-        @Override
-        public boolean equals(Object other) {
-            if (!(other instanceof ScriptSet)) {
-                return false;
-            }
-            ScriptSet otherSet = (ScriptSet)other;
-            return Arrays.equals(bits, otherSet.bits);
-        }
+    static class ScriptSet extends BitSet {
 
-        public void Union(int script) {
-            int index = script / 32;
-            int bit = 1 << (script & 31);
-            assert (index < bits.length * 4 * 4);
-            bits[index] |= bit;
-        }
+        // Eclipse default value to quell warnings:
+        private static final long serialVersionUID = 1L;
 
-        @SuppressWarnings("unused")
-        public void Union(ScriptSet other) {
-            for (int i = 0; i < bits.length; i++) {
-                bits[i] |= other.bits[i];
-            }
-        }
-
-        public void intersect(ScriptSet other) {
-            for (int i = 0; i < bits.length; i++) {
-                bits[i] &= other.bits[i];
-            }
-        }
-
-        public void intersect(int script) {
-            int index = script / 32;
-            int bit = 1 << (script & 31);
-            assert (index < bits.length * 4 * 4);
-            int i;
-            for (i = 0; i < index; i++) {
-                bits[i] = 0;
-            }
-            bits[index] &= bit;
-            for (i = index + 1; i < bits.length; i++) {
-                bits[i] = 0;
-            }
+        // // The serialized version of this class can hold INT_CAPACITY * 32 scripts.
+        // private static final int INT_CAPACITY = 6;
+        // private static final long serialVersionUID = INT_CAPACITY;
+        // static {
+        // assert ScriptSet.INT_CAPACITY * Integer.SIZE <= UScript.CODE_LIMIT;
+        // }
+        //
+        // public ScriptSet() {
+        // }
+        //
+        // public ScriptSet(ByteBuffer bytes) throws java.io.IOException {
+        // for (int i = 0; i < INT_CAPACITY; i++) {
+        // int bits = bytes.getInt();
+        // for (int j = 0; j < Integer.SIZE; j++) {
+        // if ((bits & (1 << j)) != 0) {
+        // set(i * Integer.SIZE + j);
+        // }
+        // }
+        // }
+        // }
+        //
+        // public void output(DataOutputStream os) throws java.io.IOException {
+        // for (int i = 0; i < INT_CAPACITY; i++) {
+        // int bits = 0;
+        // for (int j = 0; j < Integer.SIZE; j++) {
+        // if (get(i * Integer.SIZE + j)) {
+        // bits |= (1 << j);
+        // }
+        // }
+        // os.writeInt(bits);
+        // }
+        // }
+
+        public void and(int script) {
+            this.clear(0, script);
+            this.clear(script + 1, UScript.CODE_LIMIT);
         }
 
         public void setAll() {
-            for (int i = 0; i < bits.length; i++) {
-                bits[i] = 0xffffffff;
-            }
+            this.set(0, UScript.CODE_LIMIT);
         }
 
-        @SuppressWarnings("unused")
-        public void resetAll() {
-            for (int i = 0; i < bits.length; i++) {
-                bits[i] = 0;
-            }
+        public boolean isFull() {
+            return cardinality() == UScript.CODE_LIMIT;
         }
 
-        public int countMembers() {
-            // This bit counter is good for sparse numbers of '1's, which is
-            // very much the case that we will usually have.
-            int count = 0;
-            for (int i = 0; i < bits.length; i++) {
-                int x = bits[i];
-                while (x != 0) {
-                    count++;
-                    x &= (x - 1); // AND off the least significant one bit.
-                                  // Note - Java integer over/underflow behavior is well defined.
-                                  //        0x80000000 - 1 = 0x7fffffff
+        public void appendStringTo(StringBuilder sb) {
+            sb.append("{ ");
+            if (isEmpty()) {
+                sb.append("- ");
+            } else if (isFull()) {
+                sb.append("* ");
+            } else {
+                for (int script = 0; script < UScript.CODE_LIMIT; script++) {
+                    if (get(script)) {
+                        sb.append(UScript.getShortName(script));
+                        sb.append(" ");
+                    }
                 }
             }
-            return count;
+            sb.append("}");
         }
 
-        private int[] bits = new int[6];
+        @Override
+        public String toString() {
+            StringBuilder sb = new StringBuilder();
+            sb.append("<ScriptSet ");
+            appendStringTo(sb);
+            sb.append(">");
+            return sb.toString();
+        }
     }
 }
-
diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar
index aff50ded99a..bfc42727953 100755
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21fc240ab98201884ee8e49c44bab3ba7b71d7eba95c9c442d82db15cd4c68d4
-size 11788999
+oid sha256:c3615865e8068508cca380d3aa8f8079f051dfabd556f6cd5bafe0ae3f9de5d0
+size 11786200
diff --git a/icu4j/main/shared/data/icutzdata.jar b/icu4j/main/shared/data/icutzdata.jar
index 4709edab781..5ec5a40a24c 100755
--- a/icu4j/main/shared/data/icutzdata.jar
+++ b/icu4j/main/shared/data/icutzdata.jar
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52eef4e7e50fdffa89d1246d8ddeb17e51146f7a586e451196080acdd76730e4
+oid sha256:aade3b2d8f0a6f46d0ee33eed27d0e682c1abc8b72f7a85676ffacfb5815e27a
 size 91127
diff --git a/icu4j/main/shared/data/testdata.jar b/icu4j/main/shared/data/testdata.jar
index 9586bffc609..91fbba0b437 100755
--- a/icu4j/main/shared/data/testdata.jar
+++ b/icu4j/main/shared/data/testdata.jar
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb2c11d6d6d76e7fd31f99773a3ab7b6c13e0ddc748f00d94fcc048544f3043d
+oid sha256:690b23f3bd2ea163e801126ba8e2c65709eae387011ad3ec863d8b7bad4cd571
 size 811715
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/text/SpoofCheckerTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/text/SpoofCheckerTest.java
index d486e0989b4..f42c8c5469c 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/text/SpoofCheckerTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/text/SpoofCheckerTest.java
@@ -13,13 +13,9 @@ import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import java.text.ParseException;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.Comparator;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.Locale;
-import java.util.Random;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -30,8 +26,6 @@ import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.dev.test.TestUtil;
 import com.ibm.icu.dev.test.TestUtil.JavaVendor;
 import com.ibm.icu.impl.Utility;
-import com.ibm.icu.lang.UScript;
-import com.ibm.icu.text.IdentifierInfo;
 import com.ibm.icu.text.Normalizer2;
 import com.ibm.icu.text.SpoofChecker;
 import com.ibm.icu.text.SpoofChecker.CheckResult;
@@ -88,7 +82,6 @@ public class SpoofCheckerTest extends TestFmwk {
         }
         String fileName;
         Reader confusables;
-        Reader confusablesWholeScript;
 
         try {
             SpoofChecker rsc = null;
@@ -96,13 +89,7 @@ public class SpoofCheckerTest extends TestFmwk {
             fileName = "unicode/confusables.txt";
             confusables = TestUtil.getDataReader(fileName, "UTF-8");
             try {
-                fileName = "unicode/confusablesWholeScript.txt";
-                confusablesWholeScript = TestUtil.getDataReader(fileName, "UTF-8");
-                try {
-                    rsc = new SpoofChecker.Builder().setData(confusables, confusablesWholeScript).build();
-                } finally {
-                    confusablesWholeScript.close();
-                }
+                rsc = new SpoofChecker.Builder().setData(confusables).build();
             } finally {
                 confusables.close();
             }
@@ -120,17 +107,13 @@ public class SpoofCheckerTest extends TestFmwk {
             // The checker we just built from source rules should be equivalent to the
             //  default checker created from prebuilt rules baked into the ICU data.
             SpoofChecker defaultChecker = new SpoofChecker.Builder().build();
-            assertTrue("Checker built from rules equals default", defaultChecker.equals(rsc));
+            assertEquals("Checker built from rules equals default", defaultChecker, rsc);
+            assertEquals("Checker built from rules has same hash code as default", defaultChecker.hashCode(), rsc.hashCode());
 
             SpoofChecker optionChecker = new SpoofChecker.Builder().
                                     setRestrictionLevel(RestrictionLevel.UNRESTRICTIVE).build();
             assertFalse("", optionChecker.equals(rsc));
 
-            // Stub source data to build into a test SpoofChecker
-            String stubWSConfusables =
-                "# Stub Whole Script Confusable data\n" +
-                "0561          ; Armn; Cyrl; L #      (Õ¡)  ARMENIAN SMALL LETTER AYB\n";
-
             String stubConfusables =
                 "# Stub confusables data\n" +
                 "05AD ; 0596 ;  MA  # ( Ö­ â Ö ) HEBREW ACCENT DEHI â HEBREW ACCENT TIPEHA   #\n";
@@ -143,7 +126,7 @@ public class SpoofCheckerTest extends TestFmwk {
             SpoofChecker testChecker1 = builder.build();
             assertTrue("", testChecker1.equals(defaultChecker));
 
-            builder.setData(new StringReader(stubConfusables), new StringReader(stubWSConfusables));
+            builder.setData(new StringReader(stubConfusables));
             builder.setRestrictionLevel(RestrictionLevel.UNRESTRICTIVE);
             builder.setChecks(SpoofChecker.SINGLE_SCRIPT_CONFUSABLE);
             Set<ULocale>allowedLocales = new HashSet<ULocale>();
@@ -190,14 +173,14 @@ public class SpoofCheckerTest extends TestFmwk {
      */
     @Test
     public void TestGetSetAllowedChars() {
-        SpoofChecker sc = new SpoofChecker.Builder().build();
+        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CHAR_LIMIT).build();
         UnicodeSet us;
         UnicodeSet uset;
 
         uset = sc.getAllowedChars();
         assertTrue("", uset.isFrozen());
-        us = new UnicodeSet((int) 0x41, (int) 0x5A); /* [A-Z] */
-        sc = new SpoofChecker.Builder().setAllowedChars(us).build();
+        us = new UnicodeSet(0x41, 0x5A); /* [A-Z] */
+        sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CHAR_LIMIT).setAllowedChars(us).build();
         assertEquals("", us, sc.getAllowedChars());
     }
 
@@ -232,7 +215,7 @@ public class SpoofCheckerTest extends TestFmwk {
      */
     @Test
     public void TestAllowedLocales() {
-        SpoofChecker sc = new SpoofChecker.Builder().build();
+        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CHAR_LIMIT).build();
         Set<ULocale> allowedLocales = null;
         Set<Locale> allowedJavaLocales = null;
         boolean checkResults;
@@ -250,7 +233,7 @@ public class SpoofCheckerTest extends TestFmwk {
         allowedLocales = new HashSet<ULocale>();
         allowedLocales.add(enloc);
         allowedLocales.add(ruloc);
-        sc = new SpoofChecker.Builder().setAllowedLocales(allowedLocales).build();
+        sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CHAR_LIMIT).setAllowedLocales(allowedLocales).build();
         allowedLocales = sc.getAllowedLocales();
         assertTrue("en in allowed locales", allowedLocales.contains(enloc));
         assertTrue("ru_RU in allowed locales", allowedLocales.contains(ruloc));
@@ -258,14 +241,10 @@ public class SpoofCheckerTest extends TestFmwk {
         Locale frlocJ = new Locale("fr");
         allowedJavaLocales = new HashSet<Locale>();
         allowedJavaLocales.add(frlocJ);
-        sc = new SpoofChecker.Builder().setAllowedJavaLocales(allowedJavaLocales).build();
+        sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CHAR_LIMIT).setAllowedJavaLocales(allowedJavaLocales).build();
         assertFalse("no en in allowed Java locales", allowedJavaLocales.contains(new Locale("en")));
         assertTrue("fr in allowed Java locales", allowedJavaLocales.contains(frlocJ));
 
-        /*
-         * Limit checks to SpoofChecker.CHAR_LIMIT. Some of the test data has whole script confusables also, which we
-         * don't want to see in this test.
-         */
         sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CHAR_LIMIT).setAllowedLocales(allowedLocales).build();
 
         SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
@@ -291,7 +270,7 @@ public class SpoofCheckerTest extends TestFmwk {
      */
     @Test
     public void TestAllowedChars() {
-        SpoofChecker sc = new SpoofChecker.Builder().build();
+        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CHAR_LIMIT).build();
         UnicodeSet set;
         UnicodeSet tmpSet;
         boolean checkResults;
@@ -301,48 +280,40 @@ public class SpoofCheckerTest extends TestFmwk {
         tmpSet = new UnicodeSet(0, 0x10ffff);
         assertEquals("", tmpSet, set);
 
-        /* Setting the allowed chars should enable the check. */
-        sc = new SpoofChecker.Builder().setChecks(SpoofChecker.ALL_CHECKS & ~SpoofChecker.CHAR_LIMIT).build();
-
         /* Remove a character that is in our good Latin test identifier from the allowed chars set. */
         tmpSet.remove(goodLatin.charAt(1));
-        sc = new SpoofChecker.Builder().setAllowedChars(tmpSet).build();
+        sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CHAR_LIMIT).setAllowedChars(tmpSet).build();
 
         /* Latin Identifier should now fail; other non-latin test cases should still be OK */
         SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
         checkResults = sc.failsChecks(goodLatin, result);
         assertTrue("", checkResults);
-        assertEquals("", SpoofChecker.CHAR_LIMIT | SpoofChecker.RESTRICTION_LEVEL, result.checks);
-
-        checkResults = sc.failsChecks(goodGreek, result);
-        assertTrue("", checkResults);
-        assertEquals("", SpoofChecker.WHOLE_SCRIPT_CONFUSABLE, result.checks);
+        assertEquals("", SpoofChecker.CHAR_LIMIT, result.checks);
     }
 
     @Test
     public void TestCheck() {
-        SpoofChecker sc = new SpoofChecker.Builder().build();
+        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.ALL_CHECKS).build();
         SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
         boolean checkResults;
 
         result.position = 666;
         checkResults = sc.failsChecks(goodLatin, result);
         assertFalse("", checkResults);
-        assertEquals("", 0, result.position);
+        assertEquals("", 0, result.checks);
 
         checkResults = sc.failsChecks(goodCyrl, result);
         assertFalse("", checkResults);
+        assertEquals("", 0, result.checks);
 
         result.position = 666;
         checkResults = sc.failsChecks(scMixed, result);
         assertTrue("", checkResults);
-        assertEquals("", SpoofChecker.MIXED_SCRIPT_CONFUSABLE | SpoofChecker.SINGLE_SCRIPT, result.checks);
-        assertEquals("", 0, result.position);
+        assertEquals("", SpoofChecker.RESTRICTION_LEVEL, result.checks);
 
         result.position = 666;
         checkResults = sc.failsChecks(han_Hiragana, result);
         assertFalse("", checkResults);
-        assertEquals("", 0, result.position);
         assertEquals("", 0, result.checks);
     }
 
@@ -351,18 +322,18 @@ public class SpoofCheckerTest extends TestFmwk {
         SpoofChecker sc = new SpoofChecker.Builder().build();
         int checkResults;
         checkResults = sc.areConfusable(scLatin, scMixed);
-        assertEquals("", SpoofChecker.MIXED_SCRIPT_CONFUSABLE, checkResults);
+        assertEquals("Latin/Mixed is not MIXED_SCRIPT_CONFUSABLE", SpoofChecker.MIXED_SCRIPT_CONFUSABLE, checkResults);
 
         checkResults = sc.areConfusable(goodGreek, scLatin);
-        assertEquals("", 0, checkResults);
+        assertEquals("Greek/Latin is not unconfusable", 0, checkResults);
 
         checkResults = sc.areConfusable(lll_Latin_a, lll_Latin_b);
-        assertEquals("", SpoofChecker.SINGLE_SCRIPT_CONFUSABLE, checkResults);
+        assertEquals("Latin/Latin is not SINGLE_SCRIPT_CONFUSABLE", SpoofChecker.SINGLE_SCRIPT_CONFUSABLE, checkResults);
     }
 
     @Test
     public void TestGetSkeleton() {
-        SpoofChecker sc = new SpoofChecker.Builder().build();
+        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CONFUSABLE).build();
         String dest;
         dest = sc.getSkeleton(SpoofChecker.ANY_CASE, lll_Latin_a);
         assertEquals("", lll_Skel, dest);
@@ -379,9 +350,8 @@ public class SpoofCheckerTest extends TestFmwk {
      */
     @Test
     public void TestSpoofAPI() {
-        SpoofChecker sc = new SpoofChecker.Builder().build();
-        String s = "xyz";  // Many latin ranges are whole-script confusable with other scripts.
-        // If this test starts failing, consult confusablesWholeScript.txt
+        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.ALL_CHECKS).build();
+        String s = "xyz";
         SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
         result.position = 666;
         boolean checkResults = sc.failsChecks(s, result);
@@ -475,13 +445,13 @@ public class SpoofCheckerTest extends TestFmwk {
         actual = sc.getSkeleton(type, uInput);
         Throwable t = new Throwable();
         int lineNumberOfTest = t.getStackTrace()[1].getLineNumber();
-        
+
         assertEquals(testName + " test at line " + lineNumberOfTest + " :  Expected (escaped): " + expected, uExpected, actual);
     }
 
     @Test
     public void TestAreConfusable() {
-        SpoofChecker sc = new SpoofChecker.Builder().build();
+        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CONFUSABLE).build();
         String s1 = "A long string that will overflow stack buffers.  A long string that will overflow stack buffers. "
                 + "A long string that will overflow stack buffers.  A long string that will overflow stack buffers. ";
         String s2 = "A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. "
@@ -489,9 +459,68 @@ public class SpoofCheckerTest extends TestFmwk {
         assertEquals("", SpoofChecker.SINGLE_SCRIPT_CONFUSABLE, sc.areConfusable(s1, s2));
     }
 
+    @Test
+    public void TestConfusableFlagVariants() {
+        // The spoof checker should only return those tests that the user requested.  This test makes sure that
+        // the checker doesn't return anything the user doesn't want.  This test started passing in ICU 58.
+
+        String latn = "desordenado";
+        String cyrl = "ÔÐµÑÐ¾Ð³ÔÐµÐ¿Ð°ÔÐ¾";
+        String mixed = "dÐµÑÐ¾Ð³denÐ°do";
+
+        Object[][] tests = {
+                // string 1, string 2, checks for spoof checker, expected output
+                { latn, cyrl,
+                    SpoofChecker.CONFUSABLE,
+                    SpoofChecker.MIXED_SCRIPT_CONFUSABLE | SpoofChecker.WHOLE_SCRIPT_CONFUSABLE },
+                { latn, cyrl,
+                    SpoofChecker.MIXED_SCRIPT_CONFUSABLE | SpoofChecker.WHOLE_SCRIPT_CONFUSABLE,
+                    SpoofChecker.MIXED_SCRIPT_CONFUSABLE | SpoofChecker.WHOLE_SCRIPT_CONFUSABLE },
+                { latn, cyrl,
+                    SpoofChecker.MIXED_SCRIPT_CONFUSABLE,
+                    SpoofChecker.MIXED_SCRIPT_CONFUSABLE },
+                { latn, cyrl,
+                    SpoofChecker.WHOLE_SCRIPT_CONFUSABLE,
+                    SpoofChecker.WHOLE_SCRIPT_CONFUSABLE },
+                { latn, cyrl,
+                    SpoofChecker.SINGLE_SCRIPT_CONFUSABLE,
+                    0 },
+                { latn, mixed,
+                    SpoofChecker.CONFUSABLE,
+                    SpoofChecker.MIXED_SCRIPT_CONFUSABLE },
+                { latn, mixed,
+                    SpoofChecker.MIXED_SCRIPT_CONFUSABLE,
+                    SpoofChecker.MIXED_SCRIPT_CONFUSABLE },
+                { latn, mixed,
+                    SpoofChecker.MIXED_SCRIPT_CONFUSABLE | SpoofChecker.WHOLE_SCRIPT_CONFUSABLE,
+                    SpoofChecker.MIXED_SCRIPT_CONFUSABLE },
+                { latn, mixed,
+                    SpoofChecker.WHOLE_SCRIPT_CONFUSABLE,
+                    0 },
+                { latn, latn,
+                    SpoofChecker.CONFUSABLE,
+                    SpoofChecker.SINGLE_SCRIPT_CONFUSABLE },
+        };
+
+        for (Object[] test : tests) {
+            String s1 = (String) test[0];
+            String s2 = (String) test[1];
+            int checks = (Integer) test[2];
+            int expectedResult = (Integer) test[3];
+
+            // Sanity check: expectedResult should be a subset of checks
+            assertEquals("Invalid test case", expectedResult & checks, expectedResult);
+
+            SpoofChecker sc = new SpoofChecker.Builder().setChecks(checks).build();
+            int actualResult = sc.areConfusable(s1, s2);
+            assertEquals("Comparing '" + s1 + "' and '" + s2 + "' with checks '" + checks + "'",
+                    expectedResult, actualResult);
+        }
+    }
+
     @Test
     public void TestInvisible() {
-        SpoofChecker sc = new SpoofChecker.Builder().build();
+        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.INVISIBLE).build();
         String s = Utility.unescape("abcd\\u0301ef");
         SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
         result.position = -42;
@@ -522,30 +551,40 @@ public class SpoofCheckerTest extends TestFmwk {
                 {"aã¢ã¼", RestrictionLevel.HIGHLY_RESTRICTIVE},
                 {"aà¤", RestrictionLevel.MODERATELY_RESTRICTIVE},
                 {"aÎ³", RestrictionLevel.MINIMALLY_RESTRICTIVE},
+                {"aâ¥", RestrictionLevel.UNRESTRICTIVE},
+                {"a\u303c", RestrictionLevel.HIGHLY_RESTRICTIVE},
+                {"aã¼\u303c", RestrictionLevel.HIGHLY_RESTRICTIVE},
+                {"aã¼\u303cã¢", RestrictionLevel.HIGHLY_RESTRICTIVE},
+                { "ã¢aã¼\u303c", RestrictionLevel.HIGHLY_RESTRICTIVE},
+                {"a1Ù¡", RestrictionLevel.MODERATELY_RESTRICTIVE},
+                {"a1Ù¡Û±", RestrictionLevel.MODERATELY_RESTRICTIVE},
+                {"Ù¡ã¼\u303caã¢1à¥§Û±", RestrictionLevel.MINIMALLY_RESTRICTIVE},
+                {"aã¢ã¼\u303c1à¥§Ù¡Û±", RestrictionLevel.MINIMALLY_RESTRICTIVE},
         };
-        IdentifierInfo idInfo = new IdentifierInfo().setIdentifierProfile(SpoofChecker.RECOMMENDED);
+
+        UnicodeSet allowedChars = new UnicodeSet();
+        // Allowed Identifier Characters. In addition to the Recommended Set,
+        //    allow u303c, which has an interesting script extension of Hani Hira Kana.
+        allowedChars.addAll(SpoofChecker.RECOMMENDED).add(0x303c);
+
         CheckResult checkResult = new CheckResult();
         for (Object[] test : tests) {
             String testString = (String) test[0];
             RestrictionLevel expectedLevel = (RestrictionLevel) test[1];
-            idInfo.setIdentifier(testString);
-            assertEquals("Testing restriction level for '" + testString + "'", expectedLevel, idInfo.getRestrictionLevel());
             for (RestrictionLevel levelSetInSpoofChecker : RestrictionLevel.values()) {
                 SpoofChecker sc = new SpoofChecker.Builder()
-                .setChecks(SpoofChecker.RESTRICTION_LEVEL) // only check this
-                .setAllowedChars(SpoofChecker.RECOMMENDED)
-                .setRestrictionLevel(levelSetInSpoofChecker)
-                .build();
+                        .setAllowedChars(allowedChars)
+                        .setRestrictionLevel(levelSetInSpoofChecker)
+                        .setChecks(SpoofChecker.RESTRICTION_LEVEL) // only check this
+                        .build();
                 boolean actualValue = sc.failsChecks(testString, checkResult);
+                assertEquals("Testing restriction level for '" + testString + "'",
+                        expectedLevel, checkResult.restrictionLevel);
 
                 // we want to fail if the text is (say) MODERATE and the testLevel is ASCII
-                boolean expectedFailure = expectedLevel.compareTo(levelSetInSpoofChecker) > 0 || !SpoofChecker.RECOMMENDED.containsAll(testString);
-                boolean t = assertEquals("Testing spoof restriction level for '" + testString + "', " + levelSetInSpoofChecker, expectedFailure, actualValue);
-                if (!t) { // debugging
-                    actualValue = sc.failsChecks(testString, checkResult);
-                    // we want to fail if the text is (say) MODERATE and the testLevel is ASCII
-                    expectedFailure = expectedLevel.compareTo(levelSetInSpoofChecker) > 0 || !SpoofChecker.RECOMMENDED.containsAll(testString);
-                }
+                boolean expectedFailure = expectedLevel.compareTo(levelSetInSpoofChecker) > 0;
+                assertEquals("Testing spoof restriction level for '" + testString + "', " + levelSetInSpoofChecker,
+                        expectedFailure, actualValue);
             }
         }
     }
@@ -557,157 +596,41 @@ public class SpoofCheckerTest extends TestFmwk {
                 {"à¥§", "[à¥¦]"},
                 {"1à¥§", "[0à¥¦]"},
                 {"Ù¡Û±", "[Ù Û°]"},
+                {"aâ¥", "[]"},
+                {"a\u303c", "[]"},
+                {"aã¼\u303c", "[]"},
+                {"aã¼\u303cã¢", "[]"},
+                { "ã¢aã¼\u303c", "[]"},
+                {"a1Ù¡", "[0Ù ]"},
+                {"a1Ù¡Û±", "[0Ù Û°]"},
+                {"Ù¡ã¼\u303caã¢1à¥§Û±", "[0Ù Û°à¥¦]"},
+                {"aã¢ã¼\u303c1à¥§Ù¡Û±", "[0Ù Û°à¥¦]"},
         };
-        IdentifierInfo idInfo = new IdentifierInfo();
         CheckResult checkResult = new CheckResult();
         for (Object[] test : tests) {
             String testString = (String) test[0];
             UnicodeSet expected = new UnicodeSet((String)test[1]);
-            idInfo.setIdentifier(testString);
-            assertEquals("", expected, idInfo.getNumerics());
 
             SpoofChecker sc = new SpoofChecker.Builder()
             .setChecks(SpoofChecker.MIXED_NUMBERS) // only check this
             .build();
             boolean actualValue = sc.failsChecks(testString, checkResult);
+            assertEquals("", expected, checkResult.numerics);
             assertEquals("Testing spoof mixed numbers for '" + testString + "', ", expected.size() > 1, actualValue);
         }
     }
 
-    @Test
-    public void TestIdentifierInfo() {
-//        contains(BitSet, BitSet)
-        BitSet bitset12 = IdentifierInfo.set(new BitSet(), UScript.LATIN, UScript.HANGUL);
-        BitSet bitset2 = IdentifierInfo.set(new BitSet(), UScript.HANGUL);
-        assertTrue("", IdentifierInfo.contains(bitset12, bitset2));
-        assertTrue("", IdentifierInfo.contains(bitset12, bitset12));
-        assertTrue("", !IdentifierInfo.contains(bitset2, bitset12));
-
-        assertTrue("", IdentifierInfo.BITSET_COMPARATOR.compare(
-                IdentifierInfo.set(new BitSet(), UScript.ARABIC),
-                IdentifierInfo.set(new BitSet(), UScript.LATIN)) < 0);
-//      displayAlternates(Collection<BitSet>)
-//      displayScripts(BitSet)
-        String scriptString = IdentifierInfo.displayScripts(bitset12);
-        assertEquals("", "Hang Latn", scriptString);
-        Set<BitSet> alternates = new HashSet(Arrays.asList(bitset12, bitset2));
-        String alternatesString = IdentifierInfo.displayAlternates(alternates);
-        assertEquals("", "Hang; Hang Latn", alternatesString);
-
-//        parseAlternates(String)
-//        parseScripts(String)
-        assertEquals("", bitset12, IdentifierInfo.parseScripts(scriptString));
-        assertEquals("", alternates, IdentifierInfo.parseAlternates(alternatesString));
-
-        String[][] tests = {
-                // String, restriction-level, numerics, scripts, alternates, common-alternates
-                {"aâ¥",  "UNRESTRICTIVE", "[]", "Latn", "", ""},
-                {"a\u303c",  "HIGHLY_RESTRICTIVE", "[]", "Latn", "Hani Hira Kana", "Hani Hira Kana"},
-                {"aã¼\u303c",  "HIGHLY_RESTRICTIVE", "[]", "Latn", "Hira Kana", "Hira Kana"},
-                {"aã¼\u303cã¢",  "HIGHLY_RESTRICTIVE", "[]", "Latn Kana", "", ""},
-                { "ã¢aã¼\u303c",  "HIGHLY_RESTRICTIVE", "[]", "Latn Kana", "", ""},
-                {"a1Ù¡",  "UNRESTRICTIVE", "[0Ù ]", "Latn", "Arab Thaa", "Arab Thaa"},
-                {"a1Ù¡Û±",  "UNRESTRICTIVE", "[0Ù Û°]", "Latn Arab", "", ""},
-                {"Ù¡ã¼\u303caã¢1à¥§Û±",  "UNRESTRICTIVE", "[0Ù Û°à¥¦]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
-                {"aã¢ã¼\u303c1à¥§Ù¡Û±",  "UNRESTRICTIVE", "[0Ù Û°à¥¦]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
-        };
-        for (String[] test : tests) {
-            String testString = test[0];
-            IdentifierInfo idInfo = new IdentifierInfo();
-            UnicodeSet allowedChars = new UnicodeSet();
-            // Allowed Identifier Characters. In addition to the Recommended Set,
-            //    allow u303c, which has an interesting script extension of Hani Hira Kana. 
-            allowedChars.addAll(SpoofChecker.RECOMMENDED).add(0x303c);
-            idInfo.setIdentifierProfile(allowedChars);
-            idInfo.setIdentifier(testString);
-            assertEquals("Identifier " + testString, testString, idInfo.getIdentifier());
-
-            RestrictionLevel restrictionLevel = RestrictionLevel.valueOf(test[1]);
-            assertEquals("RestrictionLevel " + testString, restrictionLevel, idInfo.getRestrictionLevel());
-
-            UnicodeSet numerics = new UnicodeSet(test[2]);
-            assertEquals("Numerics " + testString, numerics, idInfo.getNumerics());
-
-            BitSet scripts = IdentifierInfo.parseScripts(test[3]);
-            assertEquals("Scripts " + testString, scripts, idInfo.getScripts());
-
-            Set<BitSet> alternates2 = IdentifierInfo.parseAlternates(test[4]);
-            assertEquals("Alternates " + testString, alternates2, idInfo.getAlternates());
-
-            BitSet commonAlternates = IdentifierInfo.parseScripts(test[5]);
-            assertEquals("Common Alternates " + testString, commonAlternates, idInfo.getCommonAmongAlternates());
-        }
-
-// TODO
-//        getIdentifierProfile()
-//        setIdentifierProfile(UnicodeSet)
-    }
-    
     @Test
     public void TestBug11635() {
         // The bug was an error in iterating through supplementary characters in IdentifierInfo.
         //  The three supplemental chars in the string are "123" from the mathematical bold digit range.
         //  Common script, Nd general category, and no other restrictions on allowed characters
-        //  leaves "ABC123" as SINGLE_SCRIPT_RESTRICTIVE.  
+        //  leaves "ABC123" as SINGLE_SCRIPT_RESTRICTIVE.
         String identifier = Utility.unescape("ABC\\U0001D7CF\\U0001D7D0\\U0001D7D1");
-        IdentifierInfo idInfo = new IdentifierInfo();
-        idInfo.setIdentifier(identifier);
-        assertEquals("", RestrictionLevel.SINGLE_SCRIPT_RESTRICTIVE, idInfo.getRestrictionLevel());
-    }
-
-    @Test
-    public void TestComparator() {
-        Random random = new Random(0);
-        for (int i = 0; i < 100; ++i) {
-            BitSet[] items = new BitSet[random.nextInt(5)+3];
-            for (int j = 0; j < items.length; ++j) {
-                items[j] = new BitSet();
-                int countInBitset = random.nextInt(5);
-                for (int k = 0; k < countInBitset; ++k) {
-                    items[j].set(random.nextInt(10));
-                }
-            }
-            checkComparator(IdentifierInfo.BITSET_COMPARATOR, items);
-        }
-    }
-
-    // Dumb implementation for now
-    private <T> void checkComparator(Comparator<T> comparator, T... items) {
-        logln("Checking " + Arrays.asList(items));
-        /*
-         * The relation is transitive: a < b and b < c implies a < c. We test here.
-         * The relation is trichotomous: exactly one of a <  b, b < a and a = b is true. Guaranteed by comparator.
-         */
-        for (int i = 0; i < items.length-2; ++i) {
-            T a = items[i];
-            for (int j = i+1; j < items.length-1; ++j) {
-                T b = items[j];
-                for (int k = j+1; k < items.length; ++k) {
-                    T c = items[k];
-                    checkTransitivity(comparator, a, b, c);
-                    checkTransitivity(comparator, a, c, b);
-                    checkTransitivity(comparator, b, a, b);
-                    checkTransitivity(comparator, b, c, a);
-                    checkTransitivity(comparator, c, a, b);
-                    checkTransitivity(comparator, c, b, a);
-                }
-            }
-        }
-    }
-
-    private <T> void checkTransitivity(Comparator<T> comparator, T a, T b, T c) {
-        int ab = comparator.compare(a,b);
-        int bc = comparator.compare(b,c);
-        int ca = comparator.compare(c,a);
-        if (!assertFalse("Transitive: " + a + ", " + b + ", " + c,
-                ab < 0 && bc < 0 && ca <= 0)) {
-            // for debugging
-            comparator.compare(a,b);
-            comparator.compare(b,c);
-            comparator.compare(c,a);
-            assertFalse("Transitive: " + a + ", " + b + ", " + c,
-                    ab < 0 && bc < 0 && ca <= 0);
-        }
+        CheckResult checkResult = new CheckResult();
+        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.RESTRICTION_LEVEL).build();
+        sc.failsChecks(identifier, checkResult);
+        assertEquals("", RestrictionLevel.SINGLE_SCRIPT_RESTRICTIVE, checkResult.restrictionLevel);
     }
 
     private String parseHex(String in) {
@@ -760,7 +683,7 @@ public class SpoofCheckerTest extends TestFmwk {
             // This regular expression matches lines and splits the fields into capture groups.
             // Capture group 1: map from chars
             // 2: map to chars
-            // 3: table type, SL, ML, SA or MA
+            // 3: table type, SL, ML, SA or MA (deprecated)
             // 4: Comment Lines Only
             // 5: Error Lines Only
             Matcher parseLine = Pattern.compile(
@@ -793,20 +716,8 @@ public class SpoofCheckerTest extends TestFmwk {
                 String rawExpected = parseHex(parseLine.group(2));
                 String expected = normalizer.normalize(rawExpected);
 
-                int skeletonType = 0;
-                String tableType = parseLine.group(3);
-                if (tableType.equals("SL")) {
-                    skeletonType = SpoofChecker.SINGLE_SCRIPT_CONFUSABLE;
-                } else if (tableType.indexOf("SA") >= 0) {
-                    skeletonType = SpoofChecker.SINGLE_SCRIPT_CONFUSABLE | SpoofChecker.ANY_CASE;
-                } else if (tableType.indexOf("ML") >= 0) {
-                    skeletonType = 0;
-                } else if (tableType.indexOf("MA") >= 0) {
-                    skeletonType = SpoofChecker.ANY_CASE;
-                }
-
                 String actual;
-                actual = sc.getSkeleton(skeletonType, from);
+                actual = sc.getSkeleton(from);
 
                 if (!actual.equals(expected)) {
                     errln("confusables.txt: " + lineNum + ": " + parseLine.group(0));
@@ -823,10 +734,45 @@ public class SpoofCheckerTest extends TestFmwk {
     public void TestCheckResultToString11447() {
         CheckResult checkResult = new CheckResult();
         SpoofChecker sc = new SpoofChecker.Builder()
-                .setChecks(-1)
+                .setChecks(SpoofChecker.MIXED_NUMBERS)
                 .build();
         sc.failsChecks("1à¥§", checkResult);
         assertTrue("CheckResult: ", checkResult.toString().contains("MIXED_NUMBERS"));
     }
 
+    @Test
+    public void TestDeprecated() {
+        // getSkeleton
+        SpoofChecker sc = new SpoofChecker.Builder().build();
+        assertEquals("Deprecated version of getSkeleton method does not work",
+                sc.getSkeleton(SpoofChecker.ANY_CASE, scMixed),
+                sc.getSkeleton(scMixed));
+
+        // setData
+        try {
+            String fileName1 = "unicode/confusables.txt";
+            String fileName2 = "unicode/confusablesWholeScript.txt";
+            Reader reader1 = TestUtil.getDataReader(fileName1, "UTF-8");
+            Reader reader2 = TestUtil.getDataReader(fileName2, "UTF-8");
+            Reader reader3 = TestUtil.getDataReader(fileName1, "UTF-8");
+            try {
+                SpoofChecker sc2 = new SpoofChecker.Builder()
+                        .setData(reader1, reader2)
+                        .build();
+                SpoofChecker sc1 = new SpoofChecker.Builder()
+                        .setData(reader3)
+                        .build();
+                assertEquals("Deprecated version of setData method does not work", sc1, sc2);
+            } finally {
+                reader1.close();
+                reader2.close();
+                reader3.close();
+            }
+        } catch(IOException e) {
+            fail("Could not load confusables data");
+        } catch (ParseException e) {
+            fail("Could not parse confusables data");
+        }
+    }
+
 }
-- 
2.40.0