ICU-20693 Remaining tests for new ICU tooling and some refactoring

author David Beaumont <dbeaumont@google.com>

Thu, 19 Sep 2019 13:30:04 +0000 (15:30 +0200)

committer David Beaumont <david.beaumont+github@gmail.com>

Wed, 25 Sep 2019 21:37:05 +0000 (23:37 +0200)
author David Beaumont <dbeaumont@google.com>
Thu, 19 Sep 2019 13:30:04 +0000 (15:30 +0200)
committer David Beaumont <david.beaumont+github@gmail.com>
Wed, 25 Sep 2019 21:37:05 +0000 (23:37 +0200)
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java

index 563e1efb41d1300c2d01b581036c575ff3b12827..4e37788c1c8c6086072c2df4ea197ec90ce58061 100644 (file)
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java
@@ -23,7 +23,6 @@ import java.io.InputStream;
  import java.io.InputStreamReader;
  import java.nio.file.Files;
  import java.nio.file.Path;
-import java.nio.file.Paths;
  import java.util.Arrays;
  import java.util.Collection;
  import java.util.HashSet;
@@ -147,14 +146,6 @@ public final class LdmlConverter {
      // TODO: Confirm that this has no meaningful effect and unify "empty" file contents.
      private static RbPath RB_EMPTY_ALIAS = RbPath.of("___");
  
-    /** Provisional entry point until better config support exists. */
-    public static void main(String... args) {
-        convert(IcuConverterConfig.builder()
-            .setOutputDir(Paths.get(args[0]))
-            .setEmitReport(true)
-            .build());
-    }
-
      /**
       * Output types defining specific subsets of the ICU data which can be converted separately.
       * This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java

index b6843134b57dc9ba8720a54cdb7734bbe1be7a9a..071b04fb376cbc6507368c3a0b05f093d1030572 100644 (file)
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java
@@ -190,12 +190,16 @@ public final class RbPath implements Comparable<RbPath> {
          return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
      }
  
-    // TODO: Remove this in favour of having properly typed paths.
+    // TODO: Remove this and isAlias() in favour of having properly typed paths.
      boolean isIntPath() {
          String lastElement = segments.get(segments.size() - 1);
          return lastElement.endsWith(":int") || lastElement.endsWith(":intvector");
      }
  
+    public boolean isAlias() {
+        return getSegment(length() - 1).endsWith(":alias");
+    }
+
      @Override public int compareTo(RbPath other) {
          return ORDERING.compare(this, other);
      }
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java

index 954ebe0c2877ccaca52cb4ce3439de8bfe4d9874..b05d67359c27d8265799654a5e2cc895bfe4a60d 100644 (file)
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java
@@ -93,7 +93,7 @@ public final class SupplementalData {
       * @param supplementalData the raw CLDR supplemental data instance.
       * @return the supplemental data API.
       */
-    static SupplementalData create(CldrData supplementalData) {
+    public static SupplementalData create(CldrData supplementalData) {
          Table<Alias, String, String> aliasTable = HashBasedTable.create();
          Map<String, String> parentLocaleMap = new HashMap<>();
          Map<String, String> defaultCalendarMap = new HashMap<>();
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java

index 66781a5c5f66b95f9e626b77aceaf29fd05f6adc..b896d2126a7c25d80d0779ef7ee31423c4c40e94 100644 (file)
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java
@@ -3,98 +3,237 @@
  package org.unicode.icu.tool.cldrtoicu.mapper;
  
  import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
  import static com.google.common.base.Preconditions.checkState;
+import static com.google.common.collect.Ordering.natural;
  
  import java.util.List;
+import java.util.Set;
  import java.util.regex.Matcher;
  import java.util.regex.Pattern;
+import java.util.stream.Stream;
  
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrPath;
+import org.unicode.cldr.api.CldrValue;
  import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
  import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
  import org.unicode.icu.tool.cldrtoicu.RbPath;
  import org.unicode.icu.tool.cldrtoicu.RbValue;
  
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ImmutableListMultimap;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.LinkedHashMultimap;
  import com.google.common.collect.ListMultimap;
+import com.google.common.collect.SetMultimap;
  
  /**
   * An abstract parent class for any mappers based on {@code PathValueTransformer}. This ensures
   * that transformation results are correctly processed when being added to IcuData instances.
   */
-public abstract class AbstractPathValueMapper {
+abstract class AbstractPathValueMapper {
+    // Matches "/foo/bar" or "/foo/bar[N]" as a resource bundle path, capturing the path and
+    // optional index separately. Note that this is very sloppy matching and the path string will
+    // also be parsed via RbPath.parse().
      private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$");
  
-    private final IcuData icuData;
+    private final CldrData cldrData;
+    private final PathValueTransformer transformer;
  
-    AbstractPathValueMapper(String name, boolean hasFallback) {
-        this.icuData = new IcuData(name, hasFallback);
-    }
+    // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
+    // each key. The reason is that result comparison is not "consistent with equals", and
+    // TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
+    // method), and it does this even if using the add() method of the sorted set (this is in
+    // fact in violation of the stated behaviour of Set#add).
+    private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
  
-    /** Implemented by sub-classes to return all results to be added to the IcuData instance. */
-    abstract ListMultimap<RbPath, Result> getResults();
+    AbstractPathValueMapper(CldrData cldrData, PathValueTransformer transformer) {
+        this.cldrData = checkNotNull(cldrData);
+        this.transformer = checkNotNull(transformer);
+    }
  
      /**
-     * Adds results to the IcuData instance according to expected {@code PathValueTransformer}
-     * semantics. This method must only be called once per mapper.
+     * Returns a new {@code IcuData} instance produced by post-processing a set of results
+     * generated by calling sub-class method {@link #addResults()}. This is the only method which
+     * need be directly invoked by the sub-class implementation (other methods are optionally used
+     * from within the {@link #addResults()} callback).
       */
-    final IcuData transform() {
-        checkState(icuData.getPaths().isEmpty(),
-            "transform() method cannot be called multiple times: %s", icuData);
-
+    final IcuData generateIcuData(String icuName, boolean hasFallback) {
          // This subclass mostly exists to control the fact that results need to be added in one go
          // to the IcuData because of how referenced paths are handled. If results could be added in
          // multiple passes, you could have confusing situations in which values has path references
          // in them but the referenced paths have not been transformed yet. Forcing the subclass to
          // implement a single method to generate all results at once ensures that we control the
          // lifecycle of the data and how results are processed as they are added to the IcuData.
-        addResults(getResults());
+        checkState(resultsByRbPath.isEmpty(),
+            "results must not be added outside the call to addResults(): %s", resultsByRbPath);
+        addResults();
+        IcuData icuData = addResultsToIcuData(finalizeResults(), new IcuData(icuName, hasFallback));
+        resultsByRbPath.clear();
          return icuData;
      }
  
      /**
-     * Adds transformation results on the specified multi-map to this data instance. Results are
-     * handled differently according to whether they are grouped, or represent an alias value. If
-     * the value of an ungrouped result is itself a resource bundle path (including possibly having
-     * an array index) then the referenced value is assumed to be an existing path whose value is
-     * then substituted.
+     * Implemented by sub-classes to return all results to be added to the IcuData instance. The
+     * primary job of this callback is to generate transformed results (typically by calling
+     * {@link #transformValue(CldrValue)}) and then, after optional post-processing, add the
+     * results to this mapper using {@link #addResult(RbPath, Result)}.
+     *
+     * <p>This method is called once for each call to {@link #generateIcuData(String, boolean)} and
+     * is responsible for adding all necessary results for the returned {@link IcuData}.
+     */
+    abstract void addResults();
+
+    /**
+     * Returns the CLDR data used for this transformation. Note that a subclass mapper might have
+     * other data for different purposes, but this data instance is the one from which variables
+     * are resolved. A sub-class mapper might access this for additional processing.
       */
-    // TODO: Fix this to NOT implicitly rely of ordering of referenced values.
-    private void addResults(ListMultimap<RbPath, Result> resultsByRbPath) {
+    final CldrData getCldrData() {
+        return cldrData;
+    }
+
+    /**
+     * Transforms a single value into a sequence of results using this mapper's {@link
+     * PathValueTransformer}, which can be added to the mapper (possibly after optional
+     * post-processing).
+     */
+    final Stream<Result> transformValue(CldrValue value) {
+        return transformer.transform(value, this::getVarsFn).stream();
+    }
+
+    /**
+     * Adds a transformed result to the mapper. This should be called by the sub-class mapper in
+     * its implementation of the {@link #addResults()} method.
+     *
+     * <p>Note that the given path will often (but not always) be just the path of the result.
+     */
+    final void addResult(RbPath path, Result result) {
+        resultsByRbPath.put(path, result);
+    }
+
+    // Callback function used by the transform() method to resolve variables from CLDR data.
+    private String getVarsFn(CldrPath p) {
+        CldrValue cldrValue = cldrData.get(p);
+        return cldrValue != null ? cldrValue.getValue() : null;
+    }
+
+    // Fills in any fallback results and orders the results by the resource bundle path.
+    private ImmutableListMultimap<RbPath, Result> finalizeResults() {
+        ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
+        out.orderValuesBy(natural());
          for (RbPath rbPath : resultsByRbPath.keySet()) {
-            for (Result r : resultsByRbPath.get(rbPath)) {
+            Set<Result> existingResults = resultsByRbPath.get(rbPath);
+            out.putAll(rbPath, existingResults);
+            for (Result fallback : transformer.getFallbackResultsFor(rbPath, this::getVarsFn)) {
+                if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
+                    out.put(rbPath, fallback);
+                }
+            }
+        }
+        return out.build();
+    }
+
+    /**
+     * Adds transformation results on the specified multi-map to this data instance. Results are
+     * processed in list order and handled differently according to whether they are grouped, or
+     * represent an alias value.
+     *
+     * If the value of an ungrouped result is itself a resource bundle path (including possibly
+     * having an array index) then the referenced value is assumed to be an existing path whose
+     * value is then substituted.
+     */
+    private static IcuData addResultsToIcuData(
+        ImmutableListMultimap<RbPath, Result> results, IcuData icuData) {
+
+        // Ordering of paths should not matter here (IcuData will re-sort them) and ordering of
+        // values for a given key is preserved by list multimaps.
+        ListMultimap<RbPath, ValueOrAlias> map = ArrayListMultimap.create();
+
+        // IMPORTANT: This code MUST use the keys of the results map (rather than extracting the
+        // paths from the results). This is because paths can be post-processed after the result
+        // is obtained, which can affect output ordering as well as the path mappings.
+        for (RbPath rbPath : results.keySet()) {
+            for (Result r : results.get(rbPath)) {
                  if (r.isGrouped()) {
-                    // Grouped results have all the values in a single value entry.
-                    icuData.add(rbPath, RbValue.of(r.getValues()));
+                    // Grouped results have all values in a single entry and cannot be aliases.
+                    map.put(rbPath, ValueOrAlias.value(RbValue.of(r.getValues())));
+                } else if (rbPath.isAlias()) {
+                    // Aliases (which should be single values) are not expanded to their referenced
+                    // values (whereas non-aliases might be). This is really just a hack to work
+                    // around the fact that RbPath/RbValue is not properly typed and we have to use
+                    // heuristics to determine whether to replace a resource bundle path with its
+                    // referenced value.
+                    checkArgument(r.getValues().size() == 1,
+                        "explicit aliases must be singleton values: %s", r);
+                    map.put(rbPath, ValueOrAlias.value(Iterables.getOnlyElement(r.getValues())));
                  } else {
-                    if (rbPath.getSegment(rbPath.length() - 1).endsWith(":alias")) {
-                        r.getValues().forEach(v -> icuData.add(rbPath, RbValue.of(v)));
-                    } else {
-                        // Ungrouped results are one value per entry, but might be expanded into
-                        // grouped results if they are a path referencing a grouped entry.
-                        r.getValues().forEach(v -> icuData.add(rbPath, replacePathValues(v)));
-                    }
+                    // Ungrouped results are one value per entry, but might later be expanded into
+                    // grouped results if they are a path referencing a grouped entry.
+                    r.getValues().forEach(v -> map.put(rbPath, ValueOrAlias.parse(v)));
                  }
              }
          }
+        // This works because insertion order is maintained for values of each path.
+        map.forEach((p, v) -> icuData.add(p, v.resolve(map)));
+        return icuData;
      }
  
-    /**
-     * Replaces an ungrouped CLDR value for the form "/foo/bar" or "/foo/bar[N]" which is assumed
-     * to be a reference to an existing value in a resource bundle. Note that the referenced bundle
-     * might be grouped (i.e. an array with more than one element).
+    /*
+     * An unfortunately messy little interface to handle to way that aliases are defined in the
+     * path value mappers. A mapper Result is permitted to contain values which are actually
+     * aliases to other resource bundle elements. This is typically used in fallback values, where
+     * the fallback is a functional value. For example:
+     *    fallback=/weekData/001:intvector[0]
+     *
+     * This is messy because when we process the Results from the mapper to put them into the
+     * IcuData instance, we cannot be sure we can resolve these "aliases" at the time that they
+     * are encountered (the target value might not be present yet). So we need to wait until
+     * all the values are in place and then do a 2nd pass to resolve things.
+     *
+     * So far path replacement is strictly limited to fallback results, so perhaps it could be
+     * handled more directly in the Result class, though it is possible for a single result to
+     * contain multiple path references:
+     *     fallback=/weekData/001:intvector[2] /weekData/001:intvector[3]
       */
-    private RbValue replacePathValues(String value) {
-        Matcher m = ARRAY_INDEX.matcher(value);
-        if (!m.matches()) {
-            return RbValue.of(value);
+    private interface ValueOrAlias {
+        // A simple value doesn't need resolving, and doesn't care if the given map is null (*).
+        static ValueOrAlias value(RbValue v) {
+            return src -> v;
+        }
+
+        // Helper for (common) singleton values.
+        static ValueOrAlias value(String v) {
+            return value(RbValue.of(v));
          }
-        // The only constraint is that the "path" value starts with a leading '/', but parsing into
-        // the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the
-        // captured value contains '/' characters to represent path delimiters.
-        RbPath replacePath = RbPath.parse(m.group(1));
-        List<RbValue> replaceValues = icuData.get(replacePath);
-        checkArgument(replaceValues != null, "Path %s is missing from IcuData", replacePath);
-        // If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]").
-        int replaceIndex = m.groupCount() > 1 ? Integer.parseInt(m.group(2)) : 0;
-        return replaceValues.get(replaceIndex);
+
+        static ValueOrAlias parse(String valueOrAlias) {
+            Matcher m = ARRAY_INDEX.matcher(valueOrAlias);
+            if (!m.matches()) {
+                return value(valueOrAlias);
+            }
+            // The only constraint is that the "path" value starts with a leading '/', but parsing into
+            // the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the
+            // captured value contains '/' characters to represent path delimiters.
+            RbPath path = RbPath.parse(m.group(1));
+            // If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]").
+            int index = m.group(2) != null ? Integer.parseUnsignedInt(m.group(2)) : 0;
+            return src -> {
+                checkState(src != null, "recursive alias resolution is not supported");
+                List<ValueOrAlias> values = src.get(path);
+                checkArgument(!values.isEmpty(), "no such alias value: /%s", path);
+                checkArgument(index < values.size(),
+                    "index for alias /%s[%s] is out of bounds", path, index);
+                // By passing 'null' to the recursive call to resolve, we prevent the resolution
+                // from being recursive (*). This could be changed to pass 'src' and achieve
+                // arbitrary recursive resolving if needed, put that's currently unnecessary (and
+                // should probably be guarded against unbounded recursion if it is ever enabled).
+                return values.get(index).resolve(null);
+            };
+        }
+
+        RbValue resolve(ListMultimap<RbPath, ValueOrAlias> src);
      }
  }
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java

index 3cb20a4cf057f0fdbb9cee763c0b005f2e0b6ee7..ce196662e6f73a7c3f3d4eb16527864e95f1eadc 100644 (file)
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java
@@ -2,10 +2,8 @@
  // License & terms of use: http://www.unicode.org/copyright.html
  package org.unicode.icu.tool.cldrtoicu.mapper;
  
-import static com.google.common.base.Preconditions.checkArgument;
  import static com.google.common.base.Preconditions.checkNotNull;
  import static com.google.common.base.Preconditions.checkState;
-import static com.google.common.collect.Ordering.natural;
  import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
  import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
  import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
@@ -16,22 +14,16 @@ import java.util.Optional;
  import java.util.Set;
  
  import org.unicode.cldr.api.CldrData;
-import org.unicode.cldr.api.CldrData.ValueVisitor;
  import org.unicode.cldr.api.CldrDataSupplier;
  import org.unicode.cldr.api.CldrDataType;
-import org.unicode.cldr.api.CldrValue;
  import org.unicode.icu.tool.cldrtoicu.IcuData;
  import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
-import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
  import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
  import org.unicode.icu.tool.cldrtoicu.RbPath;
  import org.unicode.icu.tool.cldrtoicu.RbValue;
  import org.unicode.icu.tool.cldrtoicu.SupplementalData;
  
-import com.google.common.collect.ImmutableListMultimap;
-import com.google.common.collect.LinkedHashMultimap;
-import com.google.common.collect.ListMultimap;
-import com.google.common.collect.SetMultimap;
+import com.google.common.annotations.VisibleForTesting;
  
  /**
   * Generate locale {@link IcuData} by transforming {@link CldrDataType#LDML LDML} data using a
@@ -45,8 +37,7 @@ public final class LocaleMapper extends AbstractPathValueMapper {
      private static final RbPath RB_CALENDAR = RbPath.of("calendar", "default");
  
      /**
-     * Processes data from the given supplier to generate general locale data for the given locale
-     * ID.
+     * Processes data from the given supplier to generate general locale data for the given locale.
       *
       * @param localeId the locale ID to generate data for.
       * @param src the CLDR data supplier to process.
@@ -63,50 +54,33 @@ public final class LocaleMapper extends AbstractPathValueMapper {
          PathValueTransformer transformer,
          SupplementalData supplementalData) {
  
-        IcuData icuData = new LocaleMapper(localeId, src, icuSpecialData, transformer).transform();
-        doDateTimeHack(icuData);
-        supplementalData.getDefaultCalendar(icuData.getName())
-            .ifPresent(c -> icuData.add(RB_CALENDAR, c));
-        return icuData;
+        return process(
+            localeId,
+            src,
+            icuSpecialData,
+            transformer,
+            supplementalData.getDefaultCalendar(localeId));
      }
  
-    // This is an awful hack for post-processing the date-time format patterns to inject a 13th
-    // pattern at index 8, which is just a duplicate of the "medium" date-time pattern. The reasons
-    // for this are lost in the midst of time, but essentially there's ICU library code that just
-    // expects the value at index 8 to be this "default" value, and reads the date-time values
-    // starting at index 9.
-    //
-    // Before the hack would be at index 10, since there are 3 groups:
-    //   "time" -> "date" -> "date-time"
-    // with 4 patterns each:
-    //   "full" -> "long" -> "medium" -> "short"
-    private static void doDateTimeHack(IcuData icuData) {
-        for (RbPath rbPath : icuData.getPaths()) {
-            if (rbPath.length() == 3
-                && rbPath.getSegment(0).equals("calendar")
-                && rbPath.getSegment(2).equals("DateTimePatterns")) {
-                // This cannot be null and should not be empty, since the path is in this data.
-                List<RbValue> valuesToHack = icuData.get(rbPath);
-                checkArgument(valuesToHack.size() == 12,
-                    "unexpected number of date/time patterns for '%s': %s", rbPath, valuesToHack);
-                valuesToHack.add(8, valuesToHack.get(10));
-            }
-        }
+    @VisibleForTesting  // Avoids needing to pass a complete SupplementalData instance in tests.
+    public static IcuData process(
+        String localeId,
+        CldrDataSupplier src,
+        Optional<CldrData> icuSpecialData,
+        PathValueTransformer transformer,
+        Optional<String> defaultCalendar) {
+
+        IcuData icuData =
+            new LocaleMapper(localeId, src, icuSpecialData, transformer)
+                .generateIcuData(localeId, true);
+        doDateTimeHack(icuData);
+        defaultCalendar.ifPresent(c -> icuData.add(RB_CALENDAR, c));
+        return icuData;
      }
  
      private final String localeId;
      private final CldrDataSupplier src;
      private final Optional<CldrData> icuSpecialData;
-    private final PathValueTransformer transformer;
-
-    private final Set<RbPath> validRbPaths = new HashSet<>();
-
-    // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
-    // each key. The reason is that result comparison is not "consistent with equals", and
-    // TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
-    // method), and it does this even if using the add() method of the sorted set (this is in
-    // fact in violation of the stated behaviour of Set#add).
-    private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
  
      private LocaleMapper(
          String localeId,
@@ -114,67 +88,68 @@ public final class LocaleMapper extends AbstractPathValueMapper {
          Optional<CldrData> icuSpecialData,
          PathValueTransformer transformer) {
  
-        super(localeId, true);
+        super(src.getDataForLocale(localeId, RESOLVED), transformer);
          this.localeId = localeId;
          this.src = checkNotNull(src);
          this.icuSpecialData = checkNotNull(icuSpecialData);
-        this.transformer = checkNotNull(transformer);
      }
  
      @Override
-    ListMultimap<RbPath, Result> getResults() {
-        CldrData unresolved = src.getDataForLocale(localeId, UNRESOLVED);
-        CldrData resolved = src.getDataForLocale(localeId, RESOLVED);
-        DynamicVars varFn = p -> {
-            CldrValue cldrValue = resolved.get(p);
-            return cldrValue != null ? cldrValue.getValue() : null;
-        };
-
-        collectPaths(unresolved, varFn);
-        collectResults(resolved, varFn);
-        icuSpecialData.ifPresent(s -> collectSpecials(s, varFn));
-
-        ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
-        out.orderValuesBy(natural());
-        for (RbPath rbPath : resultsByRbPath.keySet()) {
-            Set<Result> existingResults = resultsByRbPath.get(rbPath);
-            out.putAll(rbPath, existingResults);
-            for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
-                if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
-                    out.put(rbPath, fallback);
-                }
-            }
-        }
-        return out.build();
+    void addResults() {
+        collectResults(collectPaths());
+        icuSpecialData.ifPresent(this::collectSpecials);
      }
  
-    private void collectPaths(CldrData unresolved, DynamicVars varFn) {
-        ValueVisitor collectPaths =
-            v -> transformer.transform(v, varFn).forEach(this::collectResultPath);
-        unresolved.accept(DTD, collectPaths);
+    private Set<RbPath> collectPaths() {
+        Set<RbPath> validRbPaths = new HashSet<>();
+        src.getDataForLocale(localeId, UNRESOLVED)
+            .accept(DTD, v -> transformValue(v).forEach(r -> collectResultPath(r, validRbPaths)));
+        return validRbPaths;
      }
  
-    private void collectResultPath(Result result) {
+    private static void collectResultPath(Result result, Set<RbPath> validRbPaths) {
          RbPath rbPath = result.getKey();
          validRbPaths.add(rbPath);
          if (rbPath.isAnonymous()) {
              RbPath parent = rbPath.getParent();
-            checkState(!parent.isAnonymous(),
-                "anonymous paths should not be nested: %s", rbPath);
+            checkState(!parent.isAnonymous(), "anonymous paths must not be nested: %s", rbPath);
              validRbPaths.add(parent);
          }
      }
  
-    private void collectResults(CldrData resolved, DynamicVars varFn) {
-        ValueVisitor collectResults =
-            v -> transformer.transform(v, varFn).stream()
+    private void collectResults(Set<RbPath> validRbPaths) {
+        getCldrData().accept(DTD,
+            v -> transformValue(v)
                  .filter(r -> validRbPaths.contains(r.getKey()))
-                .forEach(r -> resultsByRbPath.put(r.getKey(), r));
-        resolved.accept(DTD, collectResults);
+                .forEach(result -> addResult(result.getKey(), result)));
+    }
+
+    private void collectSpecials(CldrData specials) {
+        specials.accept(DTD,
+            v -> transformValue(v).forEach(result -> addResult(result.getKey(), result)));
      }
  
-    private void collectSpecials(CldrData cldrData, DynamicVars varFn) {
-        cldrData.accept(DTD, v ->
-            transformer.transform(v, varFn).forEach(r -> resultsByRbPath.put(r.getKey(), r)));
+    // This is an awful hack for post-processing the date-time format patterns to inject a 13th
+    // pattern at index 8, which is just a duplicate of the "medium" date-time pattern. The reasons
+    // for this are lost in the midst of time, but essentially there's ICU library code that just
+    // expects the value at index 8 to be this "default" value, and reads the date-time values
+    // starting at index 9.
+    //
+    // Before the hack would be at index 10, since there are 3 groups:
+    //   "time" -> "date" -> "date-time"
+    // with 4 patterns each:
+    //   "full" -> "long" -> "medium" -> "short"
+    private static void doDateTimeHack(IcuData icuData) {
+        for (RbPath rbPath : icuData.getPaths()) {
+            if (rbPath.length() == 3
+                && rbPath.getSegment(0).equals("calendar")
+                && rbPath.getSegment(2).equals("DateTimePatterns")) {
+                // This cannot be null and should not be empty, since the path is in this data.
+                List<RbValue> valuesToHack = icuData.get(rbPath);
+                checkState(valuesToHack.size() == 12,
+                    "unexpected number of date/time patterns for '/%s': %s", rbPath, valuesToHack);
+                valuesToHack.add(8, valuesToHack.get(10));
+            }
+        }
      }
  }
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java

index 0f6885b9637aef1a1adbcad7e08e5b5ec2b41496..c8b81007702c18befe6e33260635e815506c7653 100644 (file)
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java
@@ -3,12 +3,8 @@
  package org.unicode.icu.tool.cldrtoicu.mapper;
  
  import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.collect.Ordering.natural;
  import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
  
-import java.util.Set;
-
-import org.unicode.cldr.api.CldrData;
  import org.unicode.cldr.api.CldrDataSupplier;
  import org.unicode.cldr.api.CldrDataType;
  import org.unicode.cldr.api.CldrValue;
@@ -18,10 +14,6 @@ import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
  import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
  import org.unicode.icu.tool.cldrtoicu.RbPath;
  
-import com.google.common.collect.ImmutableListMultimap;
-import com.google.common.collect.LinkedHashMultimap;
-import com.google.common.collect.SetMultimap;
-
  /**
   * Generate supplemental {@link IcuData} by transforming {@link CldrDataType#SUPPLEMENTAL
   * SUPPLEMENTAL} data using a {@link PathValueTransformer}.
@@ -46,70 +38,48 @@ public final class SupplementalMapper extends AbstractPathValueMapper {
      public static IcuData process(
          CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
  
-        return new SupplementalMapper(src, transformer, icuName, paths).transform();
+        return new SupplementalMapper(src, transformer, paths).generateIcuData(icuName, false);
      }
  
-    private final CldrDataSupplier src;
      private final PathMatcher paths;
-    private final PathValueTransformer transformer;
-
-    // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
-    // each key. The reason is that result comparison is not "consistent with equals", and
-    // TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
-    // method), and it does this even if using the add() method of the sorted set (this is in
-    // fact in violation of the stated behaviour of Set#add).
-    private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
      private int fifoCounter = 0;
  
      private SupplementalMapper(
-        CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
+        CldrDataSupplier src, PathValueTransformer transformer, PathMatcher pathFilter) {
  
-        super(icuName, false);
-        this.src = checkNotNull(src);
-        this.paths = checkNotNull(paths);
-        this.transformer = checkNotNull(transformer);
+        super(src.getDataForType(CldrDataType.SUPPLEMENTAL), transformer);
+        this.paths = checkNotNull(pathFilter);
      }
  
      @Override
-    ImmutableListMultimap<RbPath, Result> getResults() {
+    void addResults() {
          // DTD and NESTED_GROUPING order differ because of how the magic <FIFO> label works (it
          // basically enforces "encounter order" onto things in unlabeled sequences, which matches
          // the old behaviour). If it wouldn't break anything, it might be worth moving to DTD order
          // to remove any lingering implicit dependencies on the CLDR data behaviour.
-        CldrData supplementalData = src.getDataForType(CldrDataType.SUPPLEMENTAL);
-        PathValueTransformer.DynamicVars varFn = p -> {
-            CldrValue cldrValue = supplementalData.get(p);
-            return cldrValue != null ? cldrValue.getValue() : null;
-        };
-
-        supplementalData.accept(NESTED_GROUPING, this::visit);
-
-        ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
-        out.orderValuesBy(natural());
-        for (RbPath rbPath : resultsByRbPath.keySet()) {
-            Set<Result> existingResults = resultsByRbPath.get(rbPath);
-            out.putAll(rbPath, existingResults);
-            for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
-                if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
-                    out.put(rbPath, fallback);
-                }
-            }
-        }
-        return out.build();
+        getCldrData().accept(NESTED_GROUPING, this::visit);
      }
  
      private void visit(CldrValue value) {
          if (paths.matchesPrefixOf(value.getPath())) {
-            for (Result r : transformer.transform(value)) {
-                RbPath rbPath = r.getKey();
-                if (rbPath.contains(RB_FIFO)) {
-                    // The fifo counter needs to be formatted with leading zeros for sorting.
-                    rbPath = rbPath.mapSegments(
-                        s -> s.equals("<FIFO>") ? String.format("<%04d>", fifoCounter) : s);
-                }
-                resultsByRbPath.put(rbPath, r);
-            }
+            transformValue(value).forEach(this::collectResult);
              fifoCounter++;
          }
      }
+
+    // <FIFO> hidden labels could be supported in the abstract mapper, but would need a "bulk" add
+    // method for results (since the counter is updated once per batch, which corresponds to once
+    // per rule). Having the same FIFO counter value for the same group of values is essential
+    // since it serves to group them.
+    //
+    // TODO: Improve this and push this up into the abstract class (so it works with LocaleMapper).
+    private void collectResult(Result r) {
+        RbPath rbPath = r.getKey();
+        if (rbPath.contains(RB_FIFO)) {
+            // The fifo counter needs to be formatted with leading zeros for sorting.
+            rbPath = rbPath.mapSegments(
+                s -> s.equals("<FIFO>") ? String.format("<%04d>", fifoCounter) : s);
+        }
+        addResult(rbPath, r);
+    }
  }
diff --git a/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt b/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt

index db421033e637dd14302d78c694205dfa44c1a9e0..03ae48374fabd462a21fcc1d0c6bf361b92c1655 100644 (file)
--- a/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt
+++ b/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt
@@ -198,7 +198,7 @@
  
  # Leap year names go after other month names.
  # "yeartype" is an #IMPLIED attribute in the DTD and it should implicitly default to "standard".
-# In practice "standard" is never explicitly given, but it could be (so must match it here).
+# In practice "standard" is never explicitly given, but it could be (so it must be matched here).
  //ldml/dates/calendars/calendar[@type="(%A)"]/(day|month)s/%W[@type="(%A)"]/%W[@type="(%A)"]/%W[@type="(%A)"](?:[@yeartype="standard"])? ; /calendar/$1/$2Names/$3/$4
  //ldml/dates/calendars/calendar[@type="(%A)"]/(day|month)s/%W[@type="(%A)"]/%W[@type="(%A)"]/%W[@type="(%A)"][@yeartype="leap"] ; /calendar/$1/$2Names/$3/$4
  
@@ -227,7 +227,6 @@
  # Locale Display Names
  
  //ldml/localeDisplayNames/codePatterns/codePattern[@type="(%A)"] ; /codePatterns/$1
-//ldml/localeDisplayNames/annotationPatterns/annotationPattern[@type="(%A)"] ; /codePatterns/$1
  
  //ldml/localeDisplayNames/keys/key[@type="(%A)"] ; /Keys/$1
  
@@ -264,23 +263,23 @@
  # Ordering of rules is critical here since they write into the same resource bundle path and the
  # last 3 values are grouped together as a single value (via the special <FIFO> hidden label).
  #
-# Note that the <FIFO> label is needed here (not the "group" instruction) because the grouped
+# Note that the <FORMAT> label is needed here (not the "group" instruction) because the grouped
  # values must be seen as having a resource bundle path that is a child of the "/Currencies/$1"
  # path. This is so that the grouped values only appear when one of them is present rather than
-# whenever any of the other values in the main resource bundle path exist.
+# whenever any of the other values in the main resource bundle path exists.
  #
  # Due to the optional nature of the final sub-array in the bundle, it would be very hard to ever
-# add more elements after it.
+# add any more elements after it.
  //ldml/numbers/currencies/currency[@type="(%W)"]/symbol
       ; /Currencies/$1 ; fallback=$1
  //ldml/numbers/currencies/currency[@type="(%W)"]/displayName
       ; /Currencies/$1 ; fallback=$1
  //ldml/numbers/currencies/currency[@type="(%W)"]/pattern[@type="standard"]
-     ; /Currencies/$1/<FIFO> ; fallback=//ldml/numbers/currencyFormats[@numberSystem="%D"]/currencyFormatLength/currencyFormat[@type="standard"]/pattern[@type="standard"]
+     ; /Currencies/$1/<FORMAT> ; fallback=//ldml/numbers/currencyFormats[@numberSystem="%D"]/currencyFormatLength/currencyFormat[@type="standard"]/pattern[@type="standard"]
  //ldml/numbers/currencies/currency[@type="(%W)"]/decimal
-     ; /Currencies/$1/<FIFO> ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/decimal
+     ; /Currencies/$1/<FORMAT> ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/decimal
  //ldml/numbers/currencies/currency[@type="(%W)"]/group
-     ; /Currencies/$1/<FIFO> ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/group
+     ; /Currencies/$1/<FORMAT> ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/group
  # ----
  
  //ldml/numbers/currencyFormats[@numberSystem="%D"]/currencySpacing/(%W)/(%W)  ; /currencySpacing/$1/$2
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java

new file mode 100644 (file)

index 0000000..d3ee813
--- /dev/null
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java
@@ -0,0 +1,304 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrPath;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
+import org.unicode.icu.tool.cldrtoicu.RbValue;
+import org.unicode.icu.tool.cldrtoicu.testing.FakeResult;
+import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer;
+
+import com.google.common.collect.ImmutableList;
+
+@RunWith(JUnit4.class)
+public class AbstractPathValueMapperTest {
+    @Test
+    public void testNameAndIcuFallback() {
+        IcuData foo = new FakeMapper().generateIcuData("foo", false);
+        IcuData bar = new FakeMapper().generateIcuData("bar", true);
+
+        assertThat(foo).getPaths().isEmpty();
+        assertThat(foo).hasName("foo");
+        assertThat(foo).hasFallback(false);
+
+        assertThat(bar).getPaths().isEmpty();
+        assertThat(bar).hasName("bar");
+        assertThat(bar).hasFallback(true);
+    }
+
+    @Test
+    public void testUngroupedConcatenation() {
+        FakeMapper mapper = new FakeMapper();
+        mapper.addUngroupedResult("foo/bar", "one", "two");
+        mapper.addUngroupedResult("foo/baz", "other", "path");
+        mapper.addUngroupedResult("foo/bar", "three", "four");
+        IcuData icuData = mapper.generateIcuData("foo", false);
+
+        assertThat(icuData).getPaths().hasSize(2);
+        assertThat(icuData).hasValuesFor("foo/bar", singletonValues("one", "two", "three", "four"));
+        assertThat(icuData).hasValuesFor("foo/baz", singletonValues("other", "path"));
+    }
+
+    @Test
+    public void testGrouping() {
+        FakeMapper mapper = new FakeMapper();
+        mapper.addGroupedResult("foo/bar", "one", "two");
+        mapper.addGroupedResult("foo/baz", "other", "path");
+        mapper.addGroupedResult("foo/bar", "three", "four");
+        IcuData icuData = mapper.generateIcuData("foo", false);
+
+        assertThat(icuData).getPaths().hasSize(2);
+        assertThat(icuData)
+            .hasValuesFor("foo/bar", RbValue.of("one", "two"), RbValue.of("three", "four"));
+        assertThat(icuData)
+            .hasValuesFor("foo/baz", RbValue.of("other", "path"));
+    }
+
+    @Test
+    public void testFallbackResults() {
+        // The indices are important in matching up the results and their respective fallbacks.
+        Result explicit1 = FakeResult.of("foo/bar", 1, false, "one");
+        Result explicit2 = FakeResult.of("foo/bar", 2, false, "two");
+        Result explicit3 = FakeResult.of("foo/bar", 3, false, "three");
+
+        Result fallback1 = FakeResult.fallback("foo/bar", 1, "<ONE>");
+        Result fallback2 = FakeResult.fallback("foo/bar", 2, "<TWO>");
+        Result fallback3 = FakeResult.fallback("foo/bar", 3, "<THREE>");
+
+        FakeTransformer transformer = new FakeTransformer();
+        transformer.addFallbacks("foo/bar", fallback1, fallback2, fallback3);
+
+        // When all results are explicitly present, no fallbacks are used.
+        IcuData noFallback = new FakeMapper(transformer)
+            .addResult(explicit1)
+            .addResult(explicit2)
+            .addResult(explicit3)
+            .generateIcuData("foo", false);
+        assertThat(noFallback).hasValuesFor("foo/bar", singletonValues("one", "two", "three"));
+
+        // Missing explicit results trigger fallbacks.
+        IcuData firstFallback = new FakeMapper(transformer)
+            .addResult(explicit2)
+            .addResult(explicit3)
+            .generateIcuData("foo", false);
+        assertThat(firstFallback).hasValuesFor("foo/bar", singletonValues("<ONE>", "two", "three"));
+
+        // Fallbacks can appear in any part of the result sequence.
+        IcuData lastFallbacks = new FakeMapper(transformer)
+            .addResult(explicit1)
+            .generateIcuData("foo", false);
+        assertThat(lastFallbacks)
+            .hasValuesFor("foo/bar", singletonValues("one", "<TWO>", "<THREE>"));
+
+        // Without a single result to "seed" the fallback group, nothing is emitted.
+        IcuData allFallbacks = new FakeMapper(transformer).generateIcuData("foo", false);
+        assertThat(allFallbacks).getPaths().isEmpty();
+    }
+
+    @Test
+    public void testAliases_ungrouped() {
+        FakeMapper mapper = new FakeMapper();
+        mapper.addUngroupedResult("foo/default", "start", "/alias/target", "end");
+        mapper.addUngroupedResult("foo/alias-0", "start", "/alias/target[0]", "end");
+        mapper.addUngroupedResult("foo/alias-1", "start", "/alias/target[1]", "end");
+        mapper.addUngroupedResult("foo/alias-2", "start", "/alias/target[2]", "end");
+        mapper.addUngroupedResult("alias/target", "first", "second", "third");
+        IcuData icuData = mapper.generateIcuData("foo", false);
+
+        assertThat(icuData).getPaths().hasSize(5);
+        assertThat(icuData)
+            .hasValuesFor("foo/default", singletonValues("start", "first", "end"));
+        assertThat(icuData)
+            .hasValuesFor("foo/alias-0", singletonValues("start", "first", "end"));
+        assertThat(icuData)
+            .hasValuesFor("foo/alias-1", singletonValues("start", "second", "end"));
+        assertThat(icuData)
+            .hasValuesFor("foo/alias-2", singletonValues("start", "third", "end"));
+        assertThat(icuData)
+            .hasValuesFor("alias/target", singletonValues("first", "second", "third"));
+    }
+
+    // Grouping ignores aliases.
+    @Test
+    public void testAliases_grouped() {
+        FakeMapper mapper = new FakeMapper();
+        mapper.addGroupedResult("foo/bar", "grouped", "/alias/target");
+        mapper.addGroupedResult("foo/bar", "/alias/target[1]");
+        mapper.addUngroupedResult("alias/target", "first", "second");
+
+        IcuData icuData = mapper.generateIcuData("foo", false);
+        assertThat(icuData).getPaths().hasSize(2);
+        assertThat(icuData)
+            .hasValuesFor("foo/bar",
+                RbValue.of("grouped", "/alias/target"),
+                RbValue.of("/alias/target[1]"));
+        assertThat(icuData).hasValuesFor("alias/target", singletonValues("first", "second"));
+    }
+
+    @Test
+    public void testAliases_explicit() {
+        FakeMapper mapper = new FakeMapper();
+        mapper.addUngroupedResult("foo/bar:alias", "/alias/target");
+        mapper.addUngroupedResult("foo/bar", "/alias/target");
+        mapper.addUngroupedResult("alias/target", "alias-value");
+        IcuData icuData = mapper.generateIcuData("foo", false);
+
+        assertThat(icuData).getPaths().hasSize(3);
+        assertThat(icuData).hasValuesFor("foo/bar:alias", singletonValues("/alias/target"));
+        assertThat(icuData).hasValuesFor("foo/bar", singletonValues("alias-value"));
+        assertThat(icuData).hasValuesFor("alias/target", singletonValues("alias-value"));
+    }
+
+    @Test
+    public void testAliases_ordering() {
+        // It doesn't matter where an alias is in the order of results.
+        FakeMapper mapper = new FakeMapper();
+        mapper.addUngroupedResult("first/alias", "hello");
+        mapper.addUngroupedResult("foo/bar", "/first/alias", "/last/alias");
+        mapper.addUngroupedResult("last/alias", "world");
+        IcuData icuData = mapper.generateIcuData("foo", false);
+
+        assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world"));
+    }
+
+    @Test
+    public void testAliases_concatenation() {
+        // It doesn't matter where an alias is in the order of results.
+        FakeMapper mapper = new FakeMapper();
+        mapper.addUngroupedResult("alias/target", "hello");
+        mapper.addUngroupedResult("foo/bar", "/alias/target[0]", "/alias/target[1]");
+        mapper.addUngroupedResult("alias/target", "world");
+        IcuData icuData = mapper.generateIcuData("foo", false);
+
+        assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world"));
+    }
+
+    @Test
+    public void testAliases_missing() {
+        FakeMapper mapper = new FakeMapper();
+        mapper.addUngroupedResult("alias/target", "value");
+        mapper.addUngroupedResult("foo/bar", "/no-such-alias/target");
+        IllegalArgumentException e =
+            assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
+        assertThat(e).hasMessageThat().contains("no such alias value");
+        assertThat(e).hasMessageThat().contains("/no-such-alias/target");
+    }
+
+    @Test
+    public void testAliases_badIndex() {
+        FakeMapper mapper = new FakeMapper();
+        mapper.addUngroupedResult("alias/target", "value");
+        mapper.addUngroupedResult("foo/bar", "/alias/target[1]");
+        IllegalArgumentException e =
+            assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
+        assertThat(e).hasMessageThat().contains("out of bounds");
+        assertThat(e).hasMessageThat().contains("/alias/target[1]");
+    }
+
+    @Test
+    public void testAliases_noRecursion() {
+        FakeMapper mapper = new FakeMapper();
+        mapper.addUngroupedResult("alias/target", "/other/alias");
+        mapper.addUngroupedResult("other/alias", "/other/alias");
+        mapper.addUngroupedResult("foo/bar", "/alias/target");
+        IllegalStateException e =
+            assertThrows(IllegalStateException.class, () -> mapper.generateIcuData("foo", false));
+        assertThat(e).hasMessageThat().contains("recursive alias resolution is not supported");
+    }
+
+    @Test
+    public void testAliases_explicitAliasesAreSingletonOnly() {
+        FakeMapper mapper = new FakeMapper();
+        mapper.addUngroupedResult("foo/bar:alias", "first", "second");
+        IllegalArgumentException e =
+            assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
+        assertThat(e).hasMessageThat().contains("explicit aliases must be singleton values");
+        assertThat(e).hasMessageThat().contains("foo/bar:alias");
+    }
+
+    private static final class FakeMapper extends AbstractPathValueMapper {
+        private final static CldrData EXPLODING_DATA =
+            new CldrData() {
+                @Override public void accept(PathOrder pathOrder, ValueVisitor valueVisitor) {
+                    throw new UnsupportedOperationException("should not be called by test");
+                }
+
+                @Override public void accept(PathOrder pathOrder, PrefixVisitor prefixVisitor) {
+                    throw new UnsupportedOperationException("should not be called by test");
+                }
+
+                @Override public CldrValue get(CldrPath cldrPath) {
+                    throw new UnsupportedOperationException("should not be called by test");
+                }
+            };
+
+        // We could also just use Mockito for this (it's not yet a project dependency however).
+        private final PathValueTransformer transformer =
+            new PathValueTransformer() {
+                @Override public ImmutableList<Result> transform(CldrValue cldrValue) {
+                    throw new UnsupportedOperationException("should not be called by test");
+                }
+
+                @Override
+                public ImmutableList<Result> transform(CldrValue cldrValue, DynamicVars varFn) {
+                    throw new UnsupportedOperationException("should not be called by test");
+                }
+
+                @Override
+                public ImmutableList<Result> getFallbackResultsFor(RbPath key, DynamicVars varFn) {
+                    // TODO: Test fallbacks.
+                    return ImmutableList.of();
+                }
+            };
+
+        // This preserves insertion order in a well defined way (good for testing alias order).
+        private final List<Result> fakeResults = new ArrayList<>();
+
+        FakeMapper() {
+            this(new FakeTransformer());
+        }
+
+        FakeMapper(FakeTransformer transformer) {
+            super(EXPLODING_DATA, transformer);
+        }
+
+        FakeMapper addUngroupedResult(String path, String... values) {
+            int index = fakeResults.size() + 1;
+            return addResult(FakeResult.of(path, index, false, values));
+        }
+
+        FakeMapper addGroupedResult(String path, String... values) {
+            int index = fakeResults.size() + 1;
+            return addResult(FakeResult.of(path, index, true, values));
+        }
+
+        FakeMapper addResult(Result r) {
+            fakeResults.add(r);
+            return this;
+        }
+
+        @Override void addResults() {
+            fakeResults.forEach(result -> addResult(result.getKey(), result));
+        }
+    }
+
+    private static RbValue[] singletonValues(String... values) {
+        return Arrays.stream(values).map(RbValue::of).toArray(RbValue[]::new);
+    }
+}
+\ No newline at end of file
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java

new file mode 100644 (file)

index 0000000..3d3a8e2
--- /dev/null
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java
@@ -0,0 +1,404 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static com.google.common.truth.Truth.assertThat;
+import static java.util.Optional.empty;
+import static org.unicode.cldr.api.CldrValue.parseValue;
+import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import java.util.Arrays;
+import java.util.Optional;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
+import org.unicode.icu.tool.cldrtoicu.RbValue;
+import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
+import org.unicode.icu.tool.cldrtoicu.testing.FakeResult;
+import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer;
+
+// Almost all the unit-testing for LocaleMapper is done via AbstractPathValueMapper or
+// RegexTransformer (and friends). Very little is left that's special to locale data.
+@RunWith(JUnit4.class)
+public class LocaleMapperTest {
+    private final FakeTransformer transformer = new FakeTransformer();
+    private final FakeDataSupplier src = new FakeDataSupplier();
+
+    @Test
+    public void testSimple() {
+        //ldml/units/durationUnit[@type="(%A)"]/durationUnitPattern ; /durationUnits/$1
+        addMapping("xx",
+            ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"),
+            simpleResult("/durationUnits/foo", "Bar"));
+
+        IcuData icuData =
+            LocaleMapper.process("xx", src, empty(), transformer, empty());
+
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/durationUnits/foo", "Bar");
+    }
+
+    @Test
+    public void testCorrectLocaleIsUsed() {
+        src.addLocaleData(
+            "xx", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "XX"));
+        addMapping(
+            "yy", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "YY"),
+            simpleResult("/durationUnits/foo", "YY"));
+        src.addLocaleData(
+            "zz", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "ZZ"));
+
+        IcuData icuData =
+            LocaleMapper.process("yy", src, empty(), transformer, empty());
+
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/durationUnits/foo", "YY");
+    }
+
+    @Test
+    public void testInheritedValuesNotIncludedByDefault() {
+        //ldml/units/durationUnit[@type="(%A)"]/durationUnitPattern ; /durationUnits/$1
+        addMapping("xx",
+            ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"),
+            simpleResult("/durationUnits/foo", "Bar"));
+        //ldml/localeDisplayNames/keys/key[@type="(%A)"] ; /Keys/$1
+        addInheritedMapping("xx",
+            ldml("localeDisplayNames/keys/key[@type=\"sometype\"]", "Value"),
+            simpleResult("/Keys/sometype", "Value"));
+
+        IcuData icuData =
+            LocaleMapper.process("xx", src, empty(), transformer, empty());
+
+        // The 2nd mapping is not used because it does not appear in the unresolved CldrData.
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/durationUnits/foo", "Bar");
+    }
+
+    @Test
+    public void testInheritedValuesIncludedWhenSameResourceBundle() {
+        //ldml/numbers/currencies/currency[@type="(%W)"]/symbol ; /Currencies/$1 ; fallback=$1
+        //ldml/numbers/currencies/currency[@type="(%W)"]/displayName ; /Currencies/$1 ; fallback=$1
+        addMapping("xx",
+            ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "US$"),
+            simpleResult("/Currencies/USD", 1, "US$"));
+        // This is included because the resource bundle path is the same as above. Note that we
+        // have to use the index to distinguish results here (this corresponds to the line number
+        // or the real when the real regex based config is used and determines result ordering).
+        addInheritedMapping("xx",
+            ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"),
+            simpleResult("/Currencies/USD", 2, "US Dollar"));
+
+        IcuData icuData =
+            LocaleMapper.process("xx", src, empty(), transformer, empty());
+
+        // Now the inherited mapping is used because the path appeared for the unresolved CldrData.
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/Currencies/USD", singletonValues("US$", "US Dollar"));
+    }
+
+    @Test
+    public void testChildPathsNotIncludedByDefault() {
+        // Tests that in the case that one path is the child of another path (rare) the existence
+        // of the parent path will not trigger the child path to be included.
+        //
+        //ldml/.../dateTimeFormats/availableFormats/dateFormatItem[@id="(%A)"]
+        //   ; /calendar/$1/availableFormats/$2
+        //ldml/.../dateTimeFormats/availableFormats/dateFormatItem[@id="(%A)"][@count="(%A)"]
+        //   ; /calendar/$1/availableFormats/$2/$3
+        addMapping("xx",
+            ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+                + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"),
+            simpleResult("/calendar/foo/availableFormats/bar", "Foo"));
+        addInheritedMapping("xx",
+            ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+                + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
+            simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
+
+        IcuData icuData =
+            LocaleMapper.process("xx", src, empty(), transformer, empty());
+
+        // Now the inherited mapping is used because the path appeared for the unresolved CldrData.
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Foo");
+    }
+
+    @Test
+    public void testParentPathsNotIncludedByDefault() {
+        // Same as above but swapping inherited vs explicit mappings.
+        addInheritedMapping("xx",
+            ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+                + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"),
+            simpleResult("/calendar/foo/availableFormats/bar", "Foo"));
+        addMapping("xx",
+            ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+                + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
+            simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
+
+        IcuData icuData =
+            LocaleMapper.process("xx", src, empty(), transformer, empty());
+
+        // Now the inherited mapping is used because the path appeared for the unresolved CldrData.
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar/one", "Bar");
+    }
+
+    // This is done so that when these paths are written into the ICU data file (and the hidden
+    // labels are removed) you get the "two layer" array:
+    //
+    // {
+    //   "Parent",
+    //   { "Child-1", "Child-2" }
+    // }
+    //
+    // This needs to happen even when only one of the child elements is given explicitly.
+    @Test
+    public void testHiddenLabelsIncludeParentPaths() {
+        // Testing that the existence of a child element using a hidden label *does* trigger the
+        // parent element to be included.
+        addInheritedMapping("xx",
+            ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+                + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"),
+            simpleResult("/calendar/foo/availableFormats/bar", "Parent"));
+        addInheritedMapping("xx",
+            ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+                + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"),
+            simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 1, "Child-1"));
+
+        // This is the only explicit mapping and it triggers the sibling _and_ the parent.
+        addMapping("xx",
+            ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+                + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
+            simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 2, "Child-2"));
+
+        IcuData icuData =
+            LocaleMapper.process("xx", src, empty(), transformer, empty());
+
+        assertThat(icuData).getPaths().hasSize(2);
+        assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent");
+        assertThat(icuData)
+            .hasValuesFor("/calendar/foo/availableFormats/bar/<HIDDEN>",
+                singletonValues("Child-1", "Child-2"));
+    }
+
+    // This is strange behaviour given the test above, since it means that it's impossible to
+    // use hidden labels to create a situation where the output ICU data looks like:
+    //
+    // {
+    //   "Parent",
+    //   { "Child-1", "Child-2" },
+    //   "Other Parent"
+    // }
+    //
+    // if the child elements can be inherited; since if they are not present, you just get:
+    //
+    // {
+    //   "Parent",
+    //   "Other Parent"
+    // }
+    //
+    // Which moves the index of the following elements up by one and makes it impossible to
+    // define a stable length or index mapping for the array.
+    //
+    // However this is relied upon in the /Currencies/XXX case where a child array exists, but
+    // is optional if none of it's values are explicitly present. For example in en_150.txt:
+    //
+    // Currencies{
+    //     EUR{
+    //         "€",
+    //         "Euro",
+    //         {
+    //             "¤#,##0.00",
+    //             ".",
+    //             ",",
+    //         }
+    //     }
+    // }
+    //
+    // In most cases the formatting/grouping information is omitted if it can all be inherited.
+    //
+    // This only really works because the child array is the last element in the parent array, so
+    // not having it present doesn't affect any later elements.
+    //
+    // The "group" instruction in the transformation configuration files is a different way to
+    // allow grouping of sub-arrays which does not have this behaviour.
+    @Test
+    public void testHiddenLabelsAreNotIncludedAutomatically() {
+        // As above, but now only the parent path is included explicitly.
+        addMapping("xx",
+            ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+                + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"),
+            simpleResult("/calendar/foo/availableFormats/bar", "Parent"));
+        addInheritedMapping("xx",
+            ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+                + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"),
+            simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 1, "Child-1"));
+
+        // This is the only explicit mapping and it triggers the sibling _and_ the parent.
+        addInheritedMapping("xx",
+            ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+                + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
+            simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 2, "Child-2"));
+
+        IcuData icuData =
+            LocaleMapper.process("xx", src, empty(), transformer, empty());
+
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent");
+    }
+
+    @Test
+    public void testDefaultCalendar() {
+        IcuData icuData =
+            LocaleMapper.process("xx", src, empty(), transformer, Optional.of("pastafarian"));
+
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/calendar/default", "pastafarian");
+    }
+
+    @Test
+    public void testDateTimeHack() {
+        //calendar/$1/DateTimePatterns
+        addMapping("xx",
+            format("time", "full", "one"),
+            simpleResult("/calendar/foo/DateTimePatterns", 1, "one"));
+        addMapping("xx",
+            format("time", "long", "two"),
+            simpleResult("/calendar/foo/DateTimePatterns", 2, "two"));
+        addMapping("xx",
+            format("time", "medium", "three"),
+            simpleResult("/calendar/foo/DateTimePatterns", 3, "three"));
+        addMapping("xx",
+            format("time", "short", "four"),
+            simpleResult("/calendar/foo/DateTimePatterns", 4, "four"));
+        addMapping("xx",
+            format("date", "full", "five"),
+            simpleResult("/calendar/foo/DateTimePatterns", 5, "five"));
+        addMapping("xx",
+            format("date", "long", "six"),
+            simpleResult("/calendar/foo/DateTimePatterns", 6, "six"));
+        addMapping("xx",
+            format("date", "medium", "seven"),
+            simpleResult("/calendar/foo/DateTimePatterns", 7, "seven"));
+        addMapping("xx",
+            format("date", "short", "eight"),
+            simpleResult("/calendar/foo/DateTimePatterns", 8, "eight"));
+        addMapping("xx",
+            format("dateTime", "full", "nine"),
+            simpleResult("/calendar/foo/DateTimePatterns", 9, "nine"));
+        addMapping("xx",
+            format("dateTime", "long", "ten"),
+            simpleResult("/calendar/foo/DateTimePatterns", 10, "ten"));
+        addMapping("xx",
+            format("dateTime", "medium", "eleven"),
+            simpleResult("/calendar/foo/DateTimePatterns", 11, "eleven"));
+        addMapping("xx",
+            format("dateTime", "short", "twelve"),
+            simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve"));
+
+        IcuData icuData =
+            LocaleMapper.process("xx", src, empty(), transformer, empty());
+
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/calendar/foo/DateTimePatterns",
+            singletonValues(
+                "one", "two", "three", "four",
+                "five", "six", "seven", "eight",
+                "eleven",  // <-- legacy reasons, don't ask!
+                "nine", "ten", "eleven", "twelve"));
+    }
+
+    @Test
+    public void testDateTimeHack_wrongNumberofElements() {
+        // One missing pattern from the start.
+        addMapping("xx",
+            format("time", "long", "two"),
+            simpleResult("/calendar/foo/DateTimePatterns", 2, "two"));
+        addMapping("xx",
+            format("time", "medium", "three"),
+            simpleResult("/calendar/foo/DateTimePatterns", 3, "three"));
+        addMapping("xx",
+            format("time", "short", "four"),
+            simpleResult("/calendar/foo/DateTimePatterns", 4, "four"));
+        addMapping("xx",
+            format("date", "full", "five"),
+            simpleResult("/calendar/foo/DateTimePatterns", 5, "five"));
+        addMapping("xx",
+            format("date", "long", "six"),
+            simpleResult("/calendar/foo/DateTimePatterns", 6, "six"));
+        addMapping("xx",
+            format("date", "medium", "seven"),
+            simpleResult("/calendar/foo/DateTimePatterns", 7, "seven"));
+        addMapping("xx",
+            format("date", "short", "eight"),
+            simpleResult("/calendar/foo/DateTimePatterns", 8, "eight"));
+        addMapping("xx",
+            format("dateTime", "full", "nine"),
+            simpleResult("/calendar/foo/DateTimePatterns", 9, "nine"));
+        addMapping("xx",
+            format("dateTime", "long", "ten"),
+            simpleResult("/calendar/foo/DateTimePatterns", 10, "ten"));
+        addMapping("xx",
+            format("dateTime", "medium", "eleven"),
+            simpleResult("/calendar/foo/DateTimePatterns", 11, "eleven"));
+        addMapping("xx",
+            format("dateTime", "short", "twelve"),
+            simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve"));
+
+        IllegalStateException e = assertThrows(
+            IllegalStateException.class,
+            () -> LocaleMapper.process("xx", src, empty(), transformer, empty()));
+
+        assertThat(e).hasMessageThat().contains("unexpected");
+        assertThat(e).hasMessageThat().contains("/calendar/foo/DateTimePatterns");
+    }
+
+    private static CldrValue format(String type,String length, String pattern) {
+        return ldml(String.format(
+            "dates/calendars/calendar[@type=\"foo\"]"
+                + "/%1$sFormats"
+                + "/%1$sFormatLength[@type=\"%2$s\"]"
+                + "/%1$sFormat[@type=\"standard\"]/pattern[@type=\"%3$s\"]",
+            type, length, pattern));
+    }
+
+    private void addMapping(String locale, CldrValue value, Result... results) {
+        src.addLocaleData(locale, value);
+        transformer.addResults(value, results);
+    }
+
+    private void addInheritedMapping(String locale, CldrValue value, Result... results) {
+        src.addInheritedData(locale, value);
+        transformer.addResults(value, results);
+    }
+
+    private static Result simpleResult(String path, String value) {
+        return FakeResult.of(path, 1, false, value);
+    }
+
+    private static Result simpleResult(String path, int index, String value) {
+        return FakeResult.of(path, index, false, value);
+    }
+
+    private static CldrData cldrData(CldrValue... values) {
+        return CldrDataSupplier.forValues(Arrays.asList(values));
+    }
+
+    private static CldrValue ldml(String path) {
+        return ldml(path, "");
+    }
+
+    private static CldrValue ldml(String path, String value) {
+        return parseValue("//ldml/" + path, "");
+    }
+
+    private static RbValue[] singletonValues(String... values) {
+        return Arrays.stream(values).map(RbValue::of).toArray(RbValue[]::new);
+    }
+}
+\ No newline at end of file
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java

index a62e3fe86a2bbadfb2b978b1913684f02c8c952c..4d52a716dcbef7def0d0a4addf7585b48369029e 100644 (file)
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java
@@ -180,7 +180,8 @@ public class PluralsMapperTest {
          StringBuilder cldrPath = new StringBuilder("//supplementalData");
          appendAttribute(cldrPath.append("/plurals"), "type", type);
          appendAttribute(cldrPath.append("/pluralRules"), "locales", Joiner.on(' ').join(locales));
-        appendAttribute(cldrPath.append("/pluralRule"), "count", count);
+        // We aren't testing sort index (#N) here, but still need to set it to something.
+        appendAttribute(cldrPath.append("/pluralRule#0"), "count", count);
          return CldrValue.parseValue(cldrPath.toString(), value);
      }
  
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java

index 38ba97185f042c879836801fbd624a378fb0a2ba..841a2e12823f21025532381bfd0af6f3146b6229 100644 (file)
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java
@@ -49,11 +49,11 @@ public class RbnfMapperTest {
  
      @Test
      public void testSingleRuleset() {
-        int idx = 1;
+        int idx = 0;
          CldrData cldrData = cldrData(
-            rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "0", "hundred;", idx++),
-            rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", idx++),
-            rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", idx++));
+            rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "0", "hundred;", ++idx),
+            rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", ++idx),
+            rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", ++idx));
  
          IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
  
@@ -71,16 +71,16 @@ public class RbnfMapperTest {
          // Note that input order of these paths shouldn't matter since they are ordered (and thus
          // grouped) by DTD order (relative order matters for values in the same set, but values
          // do not have to grouped together).
-        int idx = 1;
+        int idx = 0;
          CldrData cldrData = cldrData(
-            rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "-x", "one;", idx++),
-            rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "Inf", "two;", idx++),
-            rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "-x", "five;", idx++),
-            rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "Inf", "six;", idx++),
-            rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "NaN", "three;", idx++),
-            rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "0", "four;", idx++),
-            rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", idx++),
-            rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", idx++));
+            rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "-x", "one;", ++idx),
+            rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "Inf", "two;", ++idx),
+            rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "-x", "five;", ++idx),
+            rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "Inf", "six;", ++idx),
+            rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "NaN", "three;", ++idx),
+            rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "0", "four;", ++idx),
+            rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", ++idx),
+            rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", ++idx));
  
          IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
  
@@ -101,19 +101,19 @@ public class RbnfMapperTest {
  
      @Test
      public void testSpecials() {
-        int idx = 1;
+        int idx = 0;
          CldrData specials = cldrData(
-            rbnfRule(DURATION_RULES, "min", PRIVATE, "0", "0 minutes; 1 minute; =0= minutes;", idx++),
-            rbnfRule(DURATION_RULES, "hr", PRIVATE, "0", "0 hours; 1 hour; =0= hours;", idx++),
-            rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "0", "=0= sec.;", idx++),
-            rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "60", "=%%min-sec=;", idx++),
-            rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "3600", "=%%hr-min-sec=;", idx++));
+            rbnfRule(DURATION_RULES, "min", PRIVATE, "0", "0 minutes; 1 minute; =0= minutes;", ++idx),
+            rbnfRule(DURATION_RULES, "hr", PRIVATE, "0", "0 hours; 1 hour; =0= hours;", ++idx),
+            rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "0", "=0= sec.;", ++idx),
+            rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "60", "=%%min-sec=;", ++idx),
+            rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "3600", "=%%hr-min-sec=;", ++idx));
  
-        idx = 1;
+        idx = 0;
          CldrData cldrData = cldrData(
-            rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "-x", "−→→;", idx++),
+            rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "-x", "−→→;", ++idx),
              rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "0",
-                "=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", idx++));
+                "=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", ++idx));
  
          IcuData icuData = RbnfMapper.process("en", cldrData, Optional.of(specials));
  
@@ -139,12 +139,12 @@ public class RbnfMapperTest {
      // the same, it's not entirely obviously why some of the special cases really exist.
      @Test
      public void testEscaping() {
-        int idx = 1;
+        int idx = 0;
          CldrData cldrData = cldrData(
-            rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k1", "\\ Backslash", idx++),
-            rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k2", "←← Arrows →→", idx++),
-            rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", idx++),
-            rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", idx++));
+            rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k1", "\\ Backslash", ++idx),
+            rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k2", "←← Arrows →→", ++idx),
+            rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", ++idx),
+            rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", ++idx));
  
          IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
  
@@ -173,7 +173,8 @@ public class RbnfMapperTest {
  
          StringBuilder cldrPath = new StringBuilder("//ldml/rbnf");
          appendAttribute(cldrPath.append("/rulesetGrouping"), "type", group);
-        cldrPath.append("/ruleset");
+        // We aren't testing sort index (#N) here, but still need to set it to something.
+        cldrPath.append("/ruleset#0");
          appendAttribute(cldrPath, "type", setType);
          appendAttribute(cldrPath, "access", access);
          cldrPath.append("/rbnfrule#").append(ruleIndex);
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapperTest.java

new file mode 100644 (file)

index 0000000..8085616
--- /dev/null
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapperTest.java
@@ -0,0 +1,95 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static org.unicode.cldr.api.CldrValue.parseValue;
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.PathMatcher;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
+import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
+import org.unicode.icu.tool.cldrtoicu.testing.FakeResult;
+import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer;
+
+// Almost all the unit-testing for SupplementalMapper is done via AbstractPathValueMapper or
+// RegexTransformer (and friends). Very little is left that's special to supplemental data.
+@RunWith(JUnit4.class)
+public class SupplementalMapperTest {
+    private final FakeTransformer transformer = new FakeTransformer();
+    private final FakeDataSupplier src = new FakeDataSupplier();
+
+    @Test
+    public void testSimple() {
+        addExpectedMapping(
+            supplementalData("likelySubtags/likelySubtag[@from=\"Foo\"][@to=\"Bar\"]"),
+            simpleResult("/Foo", "Bar"));
+
+        PathMatcher allPaths = PathMatcher.of("supplementalData");
+        IcuData icuData = SupplementalMapper.process(src, transformer, "name", allPaths);
+
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/Foo", "Bar");
+    }
+
+    @Test
+    public void testFifoLabel() {
+        // Example:
+        // //supplementalData/currencyData/region[@iso3166="(%W)"]/currency[@iso4217="(%W)"]
+        //     ; /CurrencyMap/$1/<FIFO>/id ; values=$2
+        //
+        // Note that the order mappings are added does not affect the output, since even though the
+        // "FIFO" mechanism works on encounter-order, the CldrData is sorted before being
+        // transformed (and in this case, is resolved on the currency code USD < USN < USS).
+        addExpectedMapping(
+            supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USN\"]"),
+            simpleResult("/CurrencyMap/US/<FIFO>/id", "USN"));
+        addExpectedMapping(
+            supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USS\"]"),
+            simpleResult("/CurrencyMap/US/<FIFO>/id", "USS"));
+        addExpectedMapping(
+            supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USD\"]"),
+            simpleResult("/CurrencyMap/US/<FIFO>/id", "USD"));
+
+        PathMatcher allPaths = PathMatcher.of("supplementalData");
+        IcuData icuData = SupplementalMapper.process(src, transformer, "name", allPaths);
+
+        assertThat(icuData).getPaths().hasSize(3);
+        assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0000>/id", "USD");
+        assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0001>/id", "USN");
+        assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0002>/id", "USS");
+    }
+
+    @Test
+    public void testPathFilter() {
+        addExpectedMapping(
+            supplementalData("likelySubtags/likelySubtag[@from=\"Foo\"][@to=\"Bar\"]"),
+            simpleResult("/Foo", "Bar"));
+        addExpectedMapping(
+            supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USN\"]"),
+            simpleResult("/CurrencyMap/US/<FIFO>/id", "USN"));
+
+        PathMatcher filter = PathMatcher.of("supplementalData/likelySubtags");
+        IcuData icuData = SupplementalMapper.process(src, transformer, "name", filter);
+
+        assertThat(icuData).getPaths().hasSize(1);
+        assertThat(icuData).hasValuesFor("/Foo", "Bar");
+    }
+
+    private void addExpectedMapping(CldrValue value, Result... results) {
+        src.addSupplementalData(value);
+        transformer.addResults(value, results);
+    }
+
+    private static Result simpleResult(String path, String value) {
+        return FakeResult.of(path, 1, false, value);
+    }
+
+    private static CldrValue supplementalData(String path) {
+        return parseValue("//supplementalData/" + path, "");
+    }
+}
+\ No newline at end of file
author	David Beaumont <dbeaumont@google.com>
	Thu, 19 Sep 2019 13:30:04 +0000 (15:30 +0200)
committer	David Beaumont <david.beaumont+github@gmail.com>
	Wed, 25 Sep 2019 21:37:05 +0000 (23:37 +0200)
tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java		patch \| blob \| history
tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java		patch \| blob \| history
tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java		patch \| blob \| history
tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java		patch \| blob \| history
tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java		patch \| blob \| history
tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java		patch \| blob \| history
tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt		patch \| blob \| history
tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java	[new file with mode: 0644]	patch \| blob
tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java	[new file with mode: 0644]	patch \| blob
tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java		patch \| blob \| history
tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java		patch \| blob \| history
tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapperTest.java	[new file with mode: 0644]	patch \| blob