From e984474ee5be87817b028ace6fe23ba81f53e034 Mon Sep 17 00:00:00 2001 From: David Beaumont Date: Fri, 30 Aug 2019 14:21:32 +0000 Subject: [PATCH] ICU-20693 Removing unused code, fixing IDE warnings and preparing for unit tests See #773 --- .../unicode/icu/tool/cldrtoicu/IcuData.java | 18 ++++++++++- .../icu/tool/cldrtoicu/IcuFunctions.java | 6 ++-- .../icu/tool/cldrtoicu/IcuTextWriter.java | 18 +++++------ .../icu/tool/cldrtoicu/PathMatcher.java | 3 +- .../unicode/icu/tool/cldrtoicu/RbPath.java | 32 +------------------ .../unicode/icu/tool/cldrtoicu/RbValue.java | 12 ++++--- .../tool/cldrtoicu/mapper/Bcp47Mapper.java | 23 +++++++++---- .../cldrtoicu/mapper/BreakIteratorMapper.java | 21 ++++++++---- .../cldrtoicu/mapper/CollationMapper.java | 6 ++-- .../cldrtoicu/mapper/DayPeriodsMapper.java | 9 ++++-- .../tool/cldrtoicu/mapper/LocaleMapper.java | 16 +++------- .../cldrtoicu/mapper/PluralRangesMapper.java | 15 +++++++-- .../tool/cldrtoicu/mapper/PluralsMapper.java | 14 +++++--- .../icu/tool/cldrtoicu/mapper/RbnfMapper.java | 13 +++++--- .../cldrtoicu/mapper/SupplementalMapper.java | 9 +++--- .../cldrtoicu/mapper/TransformsMapper.java | 3 +- .../cldrtoicu/regex/RegexTransformer.java | 4 +-- .../icu/tool/cldrtoicu/regex/ResultSpec.java | 13 ++++---- .../icu/tool/cldrtoicu/regex/Rule.java | 4 +-- 19 files changed, 132 insertions(+), 107 deletions(-) diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuData.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuData.java index 63959d790d2..6f736f8b14c 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuData.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuData.java @@ -4,6 +4,8 @@ package org.unicode.icu.tool.cldrtoicu; import static com.google.common.base.Preconditions.checkArgument; +import java.io.PrintWriter; +import java.io.StringWriter; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -22,7 +24,7 @@ import com.google.common.collect.ListMultimap; */ public final class IcuData { private static final RbPath RB_VERSION = RbPath.of("Version"); - private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)\\])?$"); + private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$"); private final String name; private final boolean hasFallback; @@ -162,4 +164,18 @@ public final class IcuData { public boolean isEmpty() { return paths.isEmpty(); } + + @Override public String toString() { + StringWriter out = new StringWriter(); + PrintWriter w = new PrintWriter(out); + w.format("IcuData{ name=%s, fallback=%s\n", name, hasFallback); + commentLines.forEach(c -> w.format(" # %s\n", c)); + paths.forEach(p -> { + w.format(" %s:\n", p); + rbPathToValues.get(p).forEach(v -> w.format(" %s\n", v)); + }); + w.format("}\n"); + w.flush(); + return out.toString(); + } } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuFunctions.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuFunctions.java index a2f83139e05..1736ad4398e 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuFunctions.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuFunctions.java @@ -3,7 +3,6 @@ package org.unicode.icu.tool.cldrtoicu; import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; import static java.lang.Integer.parseInt; import java.time.LocalDate; @@ -13,10 +12,11 @@ import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.unicode.icu.tool.cldrtoicu.regex.NamedFunction; + import com.google.common.base.Ascii; import com.google.common.base.CharMatcher; import com.google.common.collect.ImmutableMap; -import org.unicode.icu.tool.cldrtoicu.regex.NamedFunction; /** * The named functions used by the {@code RegexTransformer} for {@code ldml2icu_supplemental.txt}. @@ -46,7 +46,7 @@ final class IcuFunctions { return hiBits + " " + loBits; }); - // TODO(dbeaumont): Improve this documentation (e.g. why is this being done, give examples?). + // TODO: Improve this documentation (e.g. why is this being done, give examples?). /** * Inserts '%' into numberingSystems descriptions. * diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuTextWriter.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuTextWriter.java index c5f2fe89178..66e77ec9313 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuTextWriter.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuTextWriter.java @@ -27,7 +27,7 @@ final class IcuTextWriter { // List of characters to escape in UnicodeSets // ('\' followed by any of '\', '[', ']', '{', '}', '-', '&', ':', '^', '='). private static final Pattern UNICODESET_ESCAPE = - Pattern.compile("\\\\[\\\\\\[\\]\\{\\}\\-&:^=]"); + Pattern.compile("\\\\[\\\\\\[\\]{}\\-&:^=]"); // Only escape \ and " from other strings. private static final Pattern STRING_ESCAPE = Pattern.compile("(?!')\\\\\\\\(?!')"); private static final Pattern QUOTE_ESCAPE = Pattern.compile("\\\\?\""); @@ -54,7 +54,7 @@ final class IcuTextWriter { } // TODO: Write a UTF-8 header (see https://unicode-org.atlassian.net/browse/ICU-10197). - private void writeTo(PrintWriter out, List header) throws IOException { + private void writeTo(PrintWriter out, List header) { out.write('\uFEFF'); writeHeaderAndComments(out, header, icuData.getFileComment()); @@ -83,20 +83,20 @@ final class IcuTextWriter { // account for the implicit root segment. int commonDepth = RbPath.getCommonPrefixLength(lastPath, path) + 1; // Before closing, the "cursor" is at the end of the last value written. - closeLastPath(lastPath, commonDepth, out); + closeLastPath(commonDepth, out); // After opening the value will be ready for the next value to be written. openNextPath(path, out); valueWasInline = appendValues(icuData.getName(), path, icuData.get(path), out); lastPath = path; } - closeLastPath(lastPath, 0, out); + closeLastPath(0, out); out.println(); out.close(); } // Before: Cursor is at the end of the previous line. // After: Cursor is positioned immediately after the last closed '}' - private void closeLastPath(RbPath lastPath, int minDepth, PrintWriter out) { + private void closeLastPath(int minDepth, PrintWriter out) { if (valueWasInline) { depth--; out.print('}'); @@ -164,7 +164,7 @@ final class IcuTextWriter { boolean isSequence = rbPath.endsWith(RB_SEQUENCE); if (values.size() == 1 && !mustBeArray(true, name, rbPath)) { onlyValue = values.get(0); - if (onlyValue.size() == 1 && !mustBeArray(false, name, rbPath)) { + if (onlyValue.isSingleton() && !mustBeArray(false, name, rbPath)) { // Value has a single element and is not being forced to be an array. String onlyElement = onlyValue.getElement(0); if (quote) { @@ -195,7 +195,7 @@ final class IcuTextWriter { } } else { for (RbValue value : values) { - if (value.size() == 1) { + if (value.isSingleton()) { // Single-value array: print normally. printArray(value, quote, isSequence, out); } else { @@ -238,9 +238,9 @@ final class IcuTextWriter { } private void printArray(RbValue rbValue, boolean quote, boolean isSequence, PrintWriter out) { - for (int n = 0; n < rbValue.size(); n++) { + for (String v : rbValue.getElements()) { newLineAndIndent(out); - printValue(out, quoteInside(rbValue.getElement(n)), quote); + printValue(out, quoteInside(v), quote); if (!isSequence) { out.print(","); } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PathMatcher.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PathMatcher.java index e6e8e4dba5c..bf21c029bf5 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PathMatcher.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PathMatcher.java @@ -148,12 +148,13 @@ public abstract class PathMatcher { // Make a new, non-interned, unique instance here which we can test by reference to // determine if the argument is to be captured (needed as ImmutableMap prohibits null). // DO NOT change this code to assign "*" as the value directly, it MUST be a new instance. + @SuppressWarnings("StringOperationCanBeSimplified") private static final String WILDCARD = new String("*"); private static final Pattern ELEMENT_START_REGEX = Pattern.compile("(\\*|[-:\\w]+)(?:/|\\[|$)"); private static final Pattern ATTRIBUTE_REGEX = - Pattern.compile("\\[@([-:\\w]+)=(?:\\*|\"([^\"]*)\")\\]"); + Pattern.compile("\\[@([-:\\w]+)=(?:\\*|\"([^\"]*)\")]"); // element := foo, foo[@bar="baz"], foo[@bar=*] // pathspec := element{/element}* diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java index 3af37b5646c..0adeeb5fd4b 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java @@ -54,10 +54,6 @@ public final class RbPath implements Comparable { private static final CharMatcher UNQUOTED_SEGMENT_CHARS = QUOTED_SEGMENT_CHARS.and(whitespace().negate()); - // Characters allowed in path segments which separate the "base name" from any suffix (e.g. - // the base name of "Foo:intvector" is "Foo"). - private static final CharMatcher SEGMENT_SEPARATORS = CharMatcher.anyOf("%:"); - private static final RbPath EMPTY = new RbPath(ImmutableList.of()); public static RbPath empty() { @@ -97,12 +93,10 @@ public final class RbPath implements Comparable { this.segments = ImmutableList.copyOf(segments); this.hashCode = Objects.hash(this.segments); for (String segment : this.segments) { - checkArgument(!segment.isEmpty(), - "empty path segments not permitted: %s", this.segments); + checkArgument(!segment.isEmpty(), "path segments must not be empty: %s", this.segments); // Either the label is quoted (e.g. "foo") or it is bar (e.g. foo) but it can only // contain double quotes at either end, or not at all. If the string is quoted, only // validate the content, and not the quotes themselves. - String toValidate; switch (segment.charAt(0)) { case '<': // Allow anything in hidden labels, since they will be removed later and never @@ -157,30 +151,6 @@ public final class RbPath implements Comparable { return new RbPath(segments.stream().map(fn).collect(toImmutableList())); } - /** - * Returns whether the first element of this path is prefix by the given "base name". - * - *

Resource bundle paths relating to semantically similar data are typically grouped by the - * same first path element. This is not as simple as just comparing the first element, as in - * {@code path.startsWith(prefix)} however, since path elements can have suffixes, such as - * {@code "Foo:alias"} or {@code "Foo%subtype"}. - * - * @param baseName the base name to test for. - * @return true is the "base name" of the first path element is the given prefix. - */ - public boolean hasPrefix(String baseName) { - checkArgument(!baseName.isEmpty() && SEGMENT_SEPARATORS.matchesNoneOf(baseName)); - if (length() == 0) { - return false; - } - String firstElement = getSegment(0); - // Slightly subtle (but safe) access to the separator character, since: - // (!a.equals(b) && a.startsWith(b)) ==> a.length() > b.length(). - return firstElement.equals(baseName) - || (firstElement.startsWith(baseName) - && SEGMENT_SEPARATORS.matches(firstElement.charAt(baseName.length()))); - } - public boolean startsWith(RbPath prefix) { return prefix.length() <= length() && matchesSublist(prefix, 0); } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbValue.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbValue.java index 84751d43e5f..af1020f93f7 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbValue.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbValue.java @@ -6,7 +6,6 @@ import static com.google.common.base.Preconditions.checkArgument; import java.util.Arrays; import java.util.Objects; -import java.util.function.Function; import com.google.common.collect.ImmutableList; @@ -34,9 +33,14 @@ public final class RbValue { checkArgument(!this.elements.isEmpty(), "Resource bundle values cannot be empty"); } - /** Returns the (non zero) number of elements in this value. */ - public int size() { - return elements.size(); + /** Returns the non-empty list of value elements. */ + public ImmutableList getElements() { + return elements; + } + + /** Returns whether this is a single element value. */ + public boolean isSingleton() { + return elements.size() == 1; } /** Returns the Nth element of this value. */ diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47Mapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47Mapper.java index 4d80a69f400..c8654bd32b7 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47Mapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47Mapper.java @@ -20,20 +20,22 @@ import java.util.Set; import javax.annotation.Nullable; import org.unicode.cldr.api.AttributeKey; +import org.unicode.cldr.api.CldrData; import org.unicode.cldr.api.CldrData.PrefixVisitor; import org.unicode.cldr.api.CldrData.ValueVisitor; import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; import org.unicode.cldr.api.CldrPath; import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.PathMatcher; +import org.unicode.icu.tool.cldrtoicu.RbPath; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Ascii; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; -import org.unicode.icu.tool.cldrtoicu.IcuData; -import org.unicode.icu.tool.cldrtoicu.PathMatcher; -import org.unicode.icu.tool.cldrtoicu.RbPath; /** * A mapper to collect BCP-47 data from {@link CldrDataType#BCP47 BCP47} data under paths @@ -80,8 +82,13 @@ public final class Bcp47Mapper { * @return A list of IcuData instances containing BCP-47 data to be written to files. */ public static ImmutableList process(CldrDataSupplier src) { + return process(src.getDataForType(BCP47)); + } + + @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier. + static ImmutableList process(CldrData cldrData) { Bcp47Visitor visitor = new Bcp47Visitor(); - src.getDataForType(BCP47).accept(ARBITRARY, visitor); + cldrData.accept(ARBITRARY, visitor); visitor.addKeyMapValues(); return ImmutableList.of(visitor.keyTypeData.icuData, visitor.tzData.icuData); } @@ -168,13 +175,15 @@ public final class Bcp47Mapper { // replacement and these will be processed below (in particular we need to emit // the fact that they are deprecated). - // According to the old mapper code, it's an error not to have an alias, but - // it's emitted via debug logging and not actually enforced. - // TODO: Consider making this an error if possible. + // Not all key elements have an alias. E.g. in calendar.xml: + // + // But we still add it as a alias to itself (which is later turned into a path with + // an empty value). String keyAlias = toLowerCase(KEY_ALIAS.valueFrom(value, keyName)); keyMap.put(keyName, keyAlias); RbPath typeMapPrefix = RbPath.of("typeMap", keyAlias); + List typeAliases = TYPE_ALIASES.listOfValuesFrom(value); if (typeAliases.isEmpty()) { // Generate type map entry using empty value (an empty value indicates same diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java index 15a4f982488..a016cd238de 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java @@ -15,12 +15,13 @@ import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; import org.unicode.cldr.api.CldrPath; import org.unicode.cldr.api.CldrValue; - -import com.google.common.escape.UnicodeEscaper; import org.unicode.icu.tool.cldrtoicu.IcuData; import org.unicode.icu.tool.cldrtoicu.PathMatcher; import org.unicode.icu.tool.cldrtoicu.RbPath; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.escape.UnicodeEscaper; + /** * A mapper to collect break-iterator data from {@link CldrDataType#LDML LDML} data under * paths matching: @@ -31,9 +32,11 @@ import org.unicode.icu.tool.cldrtoicu.RbPath; */ // TODO: This class can almost certainly be replace with a small RegexTransformer config. public final class BreakIteratorMapper { - // The "type" attribute is not required here, so cannot appear in the matcher. - private static final PathMatcher SUPPRESSION = - PathMatcher.of("ldml/segmentations/segmentation/suppressions/suppression"); + // The "type" attribute in /suppressions/ is not required so cannot be in the matcher. And + // its default (and only) value is "standard". + // TODO: Understand and document why this is the case. + private static final PathMatcher SUPPRESSION = PathMatcher.of( + "ldml/segmentations/segmentation[@type=*]/suppressions/suppression"); private static final AttributeKey SEGMENTATION_TYPE = keyOf("segmentation", "type"); // Note: This could be done with an intermediate matcher for @@ -58,9 +61,15 @@ public final class BreakIteratorMapper { public static IcuData process( String localeId, CldrDataSupplier src, Optional icuSpecialData) { + CldrData cldrData = src.getDataForLocale(localeId, UNRESOLVED); + return process(localeId, cldrData, icuSpecialData); + } + + @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier. + static IcuData process(String localeId, CldrData cldrData, Optional icuSpecialData) { BreakIteratorMapper mapper = new BreakIteratorMapper(localeId); icuSpecialData.ifPresent(s -> s.accept(ARBITRARY, mapper::addSpecials)); - src.getDataForLocale(localeId, UNRESOLVED).accept(DTD, mapper::addSuppression); + cldrData.accept(DTD, mapper::addSuppression); return mapper.icuData; } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java index bf9f7400d8b..c64c5ae9d61 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java @@ -16,14 +16,14 @@ import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; import org.unicode.cldr.api.CldrPath; import org.unicode.cldr.api.CldrValue; - -import com.google.common.base.CharMatcher; -import com.google.common.base.Splitter; import org.unicode.icu.tool.cldrtoicu.IcuData; import org.unicode.icu.tool.cldrtoicu.PathMatcher; import org.unicode.icu.tool.cldrtoicu.RbPath; import org.unicode.icu.tool.cldrtoicu.RbValue; +import com.google.common.base.CharMatcher; +import com.google.common.base.Splitter; + /** * A mapper to collect collation data from {@link CldrDataType#LDML LDML} data via the paths: *

{@code
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/DayPeriodsMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/DayPeriodsMapper.java
index 8235c9c1623..e09fe5f141b 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/DayPeriodsMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/DayPeriodsMapper.java
@@ -15,11 +15,12 @@ import org.unicode.cldr.api.CldrDataSupplier;
 import org.unicode.cldr.api.CldrDataType;
 import org.unicode.cldr.api.CldrPath;
 import org.unicode.cldr.api.CldrValue;
-
 import org.unicode.icu.tool.cldrtoicu.IcuData;
 import org.unicode.icu.tool.cldrtoicu.PathMatcher;
 import org.unicode.icu.tool.cldrtoicu.RbPath;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * A mapper to collect day-period data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL}
  * data via the paths:
@@ -47,8 +48,12 @@ public final class DayPeriodsMapper {
      * @return the IcuData instance to be written to a file.
      */
     public static IcuData process(CldrDataSupplier src) {
+        return process(src.getDataForType(SUPPLEMENTAL));
+    }
+
+    @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
+    static IcuData process(CldrData data) {
         RuleSetVisitor mapper = new RuleSetVisitor();
-        CldrData data = src.getDataForType(SUPPLEMENTAL);
         data.accept(ARBITRARY, mapper);
         return mapper.icuData;
     }
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java
index 2395d6fe349..88273a3a5ff 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java
@@ -6,7 +6,6 @@ import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
 import static com.google.common.collect.Ordering.natural;
-import static org.unicode.cldr.api.AttributeKey.keyOf;
 import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
@@ -16,18 +15,12 @@ import java.util.List;
 import java.util.Optional;
 import java.util.Set;
 
-import org.unicode.cldr.api.AttributeKey;
 import org.unicode.cldr.api.CldrData;
 import org.unicode.cldr.api.CldrData.ValueVisitor;
 import org.unicode.cldr.api.CldrDataSupplier;
 import org.unicode.cldr.api.CldrDataType;
 import org.unicode.cldr.api.CldrValue;
-
-import com.google.common.collect.ImmutableListMultimap;
-import com.google.common.collect.LinkedHashMultimap;
-import com.google.common.collect.SetMultimap;
 import org.unicode.icu.tool.cldrtoicu.IcuData;
-import org.unicode.icu.tool.cldrtoicu.PathMatcher;
 import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
 import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
 import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
@@ -35,6 +28,10 @@ import org.unicode.icu.tool.cldrtoicu.RbPath;
 import org.unicode.icu.tool.cldrtoicu.RbValue;
 import org.unicode.icu.tool.cldrtoicu.SupplementalData;
 
+import com.google.common.collect.ImmutableListMultimap;
+import com.google.common.collect.LinkedHashMultimap;
+import com.google.common.collect.SetMultimap;
+
 /**
  * Generate locale {@link IcuData} by transforming {@link CldrDataType#LDML LDML} data using a
  * {@link PathValueTransformer}.
@@ -43,11 +40,6 @@ import org.unicode.icu.tool.cldrtoicu.SupplementalData;
  * {@code RegexTransformer}, but could use any {@link PathValueTransformer} implementation.
  */
 public final class LocaleMapper {
-    // Match territory paths so we can skip processing deprecated territories.
-    private static final PathMatcher TERRITORY = PathMatcher.of(
-        "ldml/localeDisplayNames/territories/territory[@type=*]");
-    private static final AttributeKey TERRITORY_TYPE = keyOf("territory", "type");
-
     // The default calendar (only set is different from inherited parent value).
     private static final RbPath RB_CALENDAR = RbPath.of("calendar", "default");
 
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralRangesMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralRangesMapper.java
index 74542e2cc45..2bed54a01c6 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralRangesMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralRangesMapper.java
@@ -14,12 +14,13 @@ import org.unicode.cldr.api.CldrDataSupplier;
 import org.unicode.cldr.api.CldrDataType;
 import org.unicode.cldr.api.CldrPath;
 import org.unicode.cldr.api.CldrValue;
-
 import org.unicode.icu.tool.cldrtoicu.IcuData;
 import org.unicode.icu.tool.cldrtoicu.PathMatcher;
 import org.unicode.icu.tool.cldrtoicu.RbPath;
 import org.unicode.icu.tool.cldrtoicu.RbValue;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * A mapper to collect plural data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data via
  * the paths:
@@ -28,6 +29,10 @@ import org.unicode.icu.tool.cldrtoicu.RbValue;
  * }
*/ public final class PluralRangesMapper { + // Note that this mapper only matches when there's no "type" specified on the "plurals" element. + // This is a bit weird, since the PluralsMapper expects a type (e.g. cardinal or ordinal) to + // be present. Really this just illustrates that the plural ranges just should not be under the + // same parent element as plurals. private static final PathMatcher RANGES = PathMatcher.of("supplementalData/plurals/pluralRanges[@locales=*]"); private static final AttributeKey RANGES_LOCALES = keyOf("pluralRanges", "locales"); @@ -47,8 +52,13 @@ public final class PluralRangesMapper { * @return the IcuData instance to be written to a file. */ public static IcuData process(CldrDataSupplier src) { - PluralRangesVisitor visitor = new PluralRangesVisitor(); CldrData data = src.getDataForType(SUPPLEMENTAL); + return process(data); + } + + @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier. + static IcuData process(CldrData data) { + PluralRangesVisitor visitor = new PluralRangesVisitor(); data.accept(ARBITRARY, visitor); return visitor.icuData; } @@ -61,7 +71,6 @@ public final class PluralRangesMapper { @Override public void visitPrefixStart(CldrPath prefix, Context ctx) { - // Captured type is either "cardinal" or "ordinal" (and will cause exception otherwise). if (RANGES.matches(prefix)) { ruleLabel = String.format("set%02d", setIndex++); RANGES_LOCALES.listOfValuesFrom(prefix) diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapper.java index d20d31dcfef..4e125c4e387 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapper.java @@ -19,13 +19,14 @@ import org.unicode.cldr.api.CldrData.PrefixVisitor; import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; import org.unicode.cldr.api.CldrPath; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Iterables; import org.unicode.icu.tool.cldrtoicu.IcuData; import org.unicode.icu.tool.cldrtoicu.PathMatcher; import org.unicode.icu.tool.cldrtoicu.RbPath; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; + /** * A mapper to collect plural data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data via * the paths: @@ -53,8 +54,13 @@ public final class PluralsMapper { * @return the IcuData instance to be written to a file. */ public static IcuData process(CldrDataSupplier src) { - PluralsVisitor visitor = new PluralsVisitor(); CldrData data = src.getDataForType(SUPPLEMENTAL); + return process(data); + } + + @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier. + static IcuData process(CldrData data) { + PluralsVisitor visitor = new PluralsVisitor(); // Note: We explicitly reset the type to mimic the order of the existing code, since this // affects the set indices we generate during processing. Ideally this would all be immune // to ordering (or just enforce DTD ordering) but right now it's very dependent on diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java index f7f4f73c0da..45cbf3863d2 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java @@ -15,12 +15,13 @@ import org.unicode.cldr.api.CldrData.PrefixVisitor; import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; import org.unicode.cldr.api.CldrPath; - -import com.google.common.escape.UnicodeEscaper; import org.unicode.icu.tool.cldrtoicu.IcuData; import org.unicode.icu.tool.cldrtoicu.PathMatcher; import org.unicode.icu.tool.cldrtoicu.RbPath; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.escape.UnicodeEscaper; + /** * A mapper to collect plural data from {@link CldrDataType#LDML LDML} data via the paths: *
{@code
@@ -39,7 +40,6 @@ public final class RbnfMapper {
     private static final AttributeKey RBNF_RADIX = keyOf("rbnfrule", "radix");
     private static final AttributeKey RULESET_ACCESS = keyOf("ruleset", "access");
 
-    private static final RbPath RB_PARENT = RbPath.of("%%Parent");
     // This is the ICU path prefix, below which everything generated by this visitor will go.
     private static final RbPath RB_ROOT = RbPath.of("RBNFRules");
 
@@ -54,12 +54,17 @@ public final class RbnfMapper {
     public static IcuData process(
         String localeId, CldrDataSupplier src, Optional icuSpecialData) {
 
+        return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData);
+    }
+
+    @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
+    static IcuData process(String localeId, CldrData cldrData, Optional icuSpecialData) {
         // Using DTD order is essential here because the RBNF paths contain ordered elements,
         // so we must ensure that they appear in sorted order (otherwise we'd have to do more
         // work at this end to re-sort the results).
         RulesetVisitor visitor = new RulesetVisitor(localeId);
         icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
-        src.getDataForLocale(localeId, UNRESOLVED).accept(DTD, visitor);
+        cldrData.accept(DTD, visitor);
         return visitor.icuData;
     }
 
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java
index 4817e1de628..20847541ee5 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java
@@ -12,17 +12,16 @@ import org.unicode.cldr.api.CldrData;
 import org.unicode.cldr.api.CldrDataSupplier;
 import org.unicode.cldr.api.CldrDataType;
 import org.unicode.cldr.api.CldrValue;
-
-import com.google.common.collect.ImmutableListMultimap;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.LinkedHashMultimap;
-import com.google.common.collect.SetMultimap;
 import org.unicode.icu.tool.cldrtoicu.IcuData;
 import org.unicode.icu.tool.cldrtoicu.PathMatcher;
 import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
 import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
 import org.unicode.icu.tool.cldrtoicu.RbPath;
 
+import com.google.common.collect.ImmutableListMultimap;
+import com.google.common.collect.LinkedHashMultimap;
+import com.google.common.collect.SetMultimap;
+
 /**
  * Generate supplemental {@link IcuData} by transforming {@link CldrDataType#SUPPLEMENTAL
  * SUPPLEMENTAL} data using a {@link PathValueTransformer}.
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapper.java
index 5a4741edbc9..18e189d6d7c 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapper.java
@@ -23,11 +23,11 @@ import org.unicode.cldr.api.CldrData.ValueVisitor;
 import org.unicode.cldr.api.CldrDataSupplier;
 import org.unicode.cldr.api.CldrDataType;
 import org.unicode.cldr.api.CldrValue;
-
 import org.unicode.icu.tool.cldrtoicu.IcuData;
 import org.unicode.icu.tool.cldrtoicu.PathMatcher;
 import org.unicode.icu.tool.cldrtoicu.RbPath;
 import org.unicode.icu.tool.cldrtoicu.RbValue;
+
 import com.ibm.icu.text.Transliterator;
 
 /**
@@ -99,6 +99,7 @@ public final class TransformsMapper {
 
             // I have _no_ idea what any of this is about, I'm just trying to mimic the original
             // (complex and undocumented) code in "ConvertTransforms.java".
+            // TODO: Understand and document each of the cases below.
             icuData.add(RbPath.of("TransliteratorNamePattern"), "{0,choice,0#|1#{1}|2#{1}-{2}}");
             // Note that this quoting of path segments is almost certainly unnecessary. It matches
             // the old "ConvertTransforms" behaviour, but '%' is used elsewhere without quoting, so
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/RegexTransformer.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/RegexTransformer.java
index 10d4c368d68..6bd8ae42b56 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/RegexTransformer.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/RegexTransformer.java
@@ -23,12 +23,12 @@ import java.util.regex.Pattern;
 import org.unicode.cldr.api.CldrDataType;
 import org.unicode.cldr.api.CldrPath;
 import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableListMultimap;
 import com.google.common.collect.ImmutableSetMultimap;
-import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
-import org.unicode.icu.tool.cldrtoicu.RbPath;
 
 /**
  * Path/value transformer configured by {@code ldml2icu_xxx.txt} mapping and configuration files.
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/ResultSpec.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/ResultSpec.java
index 7c1467e0479..cc50c580eaa 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/ResultSpec.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/ResultSpec.java
@@ -26,14 +26,14 @@ import java.util.stream.Stream;
 
 import org.unicode.cldr.api.CldrPath;
 import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
 
 import com.google.common.base.Splitter;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
-import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
-import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
-import org.unicode.icu.tool.cldrtoicu.RbPath;
 
 /**
  * A specification for building a result from the arguments in a matched xpath. Results always
@@ -74,7 +74,7 @@ final class ResultSpec {
     private static final Splitter VALUE_SPLITTER = Splitter.on(whitespace()).omitEmptyStrings();
 
     // Matcher for "&foo_bar(a,b,c)" which captures function name and complete argument list.
-    private static final Pattern FUNCTION = Pattern.compile("\\&(\\w++)\\(([^\\)]++)\\)");
+    private static final Pattern FUNCTION = Pattern.compile("&(\\w++)\\(([^)]++)\\)");
 
     // Resource bundle path specification with placeholders (e.g. "/foo/$1/bar") exactly as it
     // appears in the configuration file.
@@ -168,7 +168,7 @@ final class ResultSpec {
         CldrValue value, List args, DynamicVars varLookupFn) {
         return new MatchedResult(
             getRbPath(args),
-            getValues(value.getValue(), args, varLookupFn),
+            getValues(value.getValue(), args),
             getResultPath(value.getPath(), args, varLookupFn));
     }
 
@@ -197,8 +197,7 @@ final class ResultSpec {
     // Create an array of output values according to the CLDR value (if present) and the
     // "values" instruction in the result specification (if present). Any functions present in
     // the "values" instruction are invoked here.
-    private ImmutableList getValues(
-        String value, List args, DynamicVars varLookupFn) {
+    private ImmutableList getValues(String value, List args) {
         VarString valuesSpec = instructions.get(Instruction.VALUES);
         if (valuesSpec == null) {
             // No "values" instruction, so just use the _unsplit_ CLDR value. To split a CLDR
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/Rule.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/Rule.java
index 7f51188a5f3..5189a2dd09b 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/Rule.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/regex/Rule.java
@@ -15,12 +15,12 @@ import java.util.stream.Stream;
 import org.unicode.cldr.api.CldrDataType;
 import org.unicode.cldr.api.CldrPath;
 import org.unicode.cldr.api.CldrValue;
-
-import com.google.common.collect.ImmutableList;
 import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
 import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
 import org.unicode.icu.tool.cldrtoicu.RbPath;
 
+import com.google.common.collect.ImmutableList;
+
 /*
  * Each rule corresponds to a single target xpath specification in the configuration file
  * (lines starting //) but may have more than one result specification. For example:
-- 
2.40.0