// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
-import static com.google.common.base.Preconditions.checkArgument;
-
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.NavigableSet;
import java.util.Set;
import java.util.TreeSet;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
*/
public final class IcuData {
private static final RbPath RB_VERSION = RbPath.of("Version");
- private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$");
private final String name;
private final boolean hasFallback;
add(rbPath, rbValue);
}
- public void setVersion(String versionString) {
- add(RB_VERSION, versionString);
- }
-
- public void addResults(ListMultimap<RbPath, PathValueTransformer.Result> resultsByRbPath) {
- for (RbPath rbPath : resultsByRbPath.keySet()) {
- for (PathValueTransformer.Result r : resultsByRbPath.get(rbPath)) {
- if (r.isGrouped()) {
- // Grouped results have all the values in a single value entry.
- add(rbPath, RbValue.of(r.getValues()));
- } else {
- if (rbPath.getSegment(rbPath.length() - 1).endsWith(":alias")) {
- r.getValues().forEach(v -> add(rbPath, RbValue.of(v)));
- } else {
- // Ungrouped results are one value per entry, but might be expanded into
- // grouped results if they are a path referencing a grouped entry.
- r.getValues().forEach(v -> add(rbPath, replacePathValues(v)));
- }
- }
- }
- }
- }
-
/**
- * Replaces an ungrouped CLDR value for the form "/foo/bar" or "/foo/bar[N]" which is assumed
- * to be a reference to an existing value in a resource bundle. Note that the referenced bundle
- * might be grouped (i.e. an array with more than one element).
+ * Sets the value of the "/Version" path to be the given string, replacing any previous value.
*/
- private RbValue replacePathValues(String value) {
- Matcher m = ARRAY_INDEX.matcher(value);
- if (!m.matches()) {
- return RbValue.of(value);
- }
- // The only constraint is that the "path" value starts with a leading '/', but parsing into
- // the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the
- // captured value contains '/' characters to represent path delimiters.
- RbPath replacePath = RbPath.parse(m.group(1));
- List<RbValue> replaceValues = get(replacePath);
- checkArgument(replaceValues != null, "Path %s is missing from IcuData", replacePath);
- // If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]").
- int replaceIndex = m.groupCount() > 1 ? Integer.parseInt(m.group(2)) : 0;
- return replaceValues.get(replaceIndex);
+ public void setVersion(String versionString) {
+ replace(RB_VERSION, versionString);
}
/**
return Collections.unmodifiableSet(paths);
}
- /** Returns whether the given path is present in this instance. */
- public boolean contains(RbPath rbPath) {
- return paths.contains(rbPath);
- }
-
- /** Returns whether there are any paths in this instance. */
- public boolean isEmpty() {
- return paths.isEmpty();
- }
-
@Override public String toString() {
StringWriter out = new StringWriter();
PrintWriter w = new PrintWriter(out);
private RbPath getPathFromStack() {
if (pathStack.isEmpty()) {
- return RbPath.empty();
+ return RbPath.of();
}
List<String> segments = new ArrayList<>();
Iterables.addAll(segments, pathStack);
if (segments.get(0).matches("<[0-9]{4}>")) {
segments.remove(0);
}
- return segments.isEmpty() ? RbPath.empty() : RbPath.of(Lists.reverse(segments));
+ return RbPath.of(Lists.reverse(segments));
}
private String getSegment(String segmentOrNull) {
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.google.common.collect.Iterables;
+
/**
* Writes an IcuData object to a text file. A lot of this class was copied directly from the
* original {@code IcuTextWriter} in the CLDR project and has a number of very idiosyncratic
out.print("{");
depth++;
- RbPath lastPath = RbPath.empty();
+ RbPath lastPath = RbPath.of();
for (RbPath path : icuData.getPaths()) {
// Close any blocks up to the common path length. Since paths are all distinct, the
// common length should always be shorter than either path. We add 1 since we must also
onlyValue = values.get(0);
if (onlyValue.isSingleton() && !mustBeArray(false, name, rbPath)) {
// Value has a single element and is not being forced to be an array.
- String onlyElement = onlyValue.getElement(0);
+ String onlyElement = Iterables.getOnlyElement(onlyValue.getElements());
if (quote) {
onlyElement = quoteInside(onlyElement);
}
// Adding a parent locale makes the data non-empty and forces it to be written.
supplementalData.getExplicitParentLocaleOf(splitData.getName())
.ifPresent(p -> splitData.add(RB_PARENT, p));
- if (!splitData.isEmpty() || isBaseLanguage || dir.includeEmpty()) {
+ if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) {
splitData.setVersion(CldrDataSupplier.getCldrVersionString());
write(splitData, outDir);
writtenLocaleIds.put(dir, id);
private static final CharMatcher UNQUOTED_SEGMENT_CHARS =
QUOTED_SEGMENT_CHARS.and(whitespace().negate());
- private static final RbPath EMPTY = new RbPath(ImmutableList.of());
-
- public static RbPath empty() {
- return EMPTY;
- }
-
+ /**
+ * Returns a path with the specified segments in (possibly empty). Note that unlike
+ * {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path
+ * element constructed by this method.
+ */
public static RbPath of(String... segments) {
return of(Arrays.asList(segments));
}
+ /**
+ * Returns a path with the specified segments in (possibly empty). Note that unlike
+ * {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path
+ * element constructed by this method.
+ */
public static RbPath of(Iterable<String> segments) {
return new RbPath(segments);
}
+ /** Parses the given path string, assuming {@code '/'} as a path separator. */
public static RbPath parse(String path) {
checkArgument(!path.isEmpty(), "cannot parse an empty path string");
// Allow leading '/', but don't allow empty segments anywhere else.
return new RbPath(PATH_SPLITTER.split(path));
}
- static int getCommonPrefixLength(RbPath lhs, RbPath rhs) {
+ /** Returns the common prefix length of two paths (useful when thinking of path hierarchies). */
+ public static int getCommonPrefixLength(RbPath lhs, RbPath rhs) {
int maxLength = Math.min(lhs.length(), rhs.length());
int n = 0;
while (n < maxLength && lhs.getSegment(n).equals(rhs.getSegment(n))) {
private RbPath(Iterable<String> segments) {
this.segments = ImmutableList.copyOf(segments);
+ // Use "this.segments" since the incoming list can have a different hash!
this.hashCode = Objects.hash(this.segments);
for (String segment : this.segments) {
checkArgument(!segment.isEmpty(), "path segments must not be empty: %s", this.segments);
}
}
+ /** Returns the number of segments in this path. */
public int length() {
return segments.size();
}
+ /** Returns the Nth segments in this path. */
public String getSegment(int n) {
return segments.get(n);
}
- public RbPath getParent() {
- checkState(length() > 0, "cannot get parent of the empty path");
- return length() > 1 ? new RbPath(segments.subList(0, length() - 1)) : EMPTY;
- }
-
- public boolean isAnonymous() {
- return length() > 0 && segments.get(length() - 1).charAt(0) == '<';
- }
-
+ /** Returns a new path extended at the end by the specified segments. */
public RbPath extendBy(String... parts) {
return new RbPath(Iterables.concat(segments, Arrays.asList(parts)));
}
- public RbPath extendBy(RbPath suffix) {
- return new RbPath(Iterables.concat(segments, suffix.segments));
- }
-
- public RbPath mapSegments(Function<? super String, String> fn) {
- return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
- }
-
+ /** Returns whether this path starts with the specified prefix. */
public boolean startsWith(RbPath prefix) {
return prefix.length() <= length() && matchesSublist(prefix, 0);
}
+ /** Returns whether this path ends with the specified suffix. */
public boolean endsWith(RbPath suffix) {
return suffix.length() <= length() && matchesSublist(suffix, length() - suffix.length());
}
+ /** Returns whether this path contains the specified path. */
public boolean contains(RbPath path) {
int maxOffset = length() - path.length();
for (int i = 0; i <= maxOffset; i++) {
return true;
}
+ // TODO: Remove this and isAnonymous() since they are only called once each, in the same place.
+ public RbPath getParent() {
+ checkState(length() > 0, "cannot get parent of the empty path");
+ return new RbPath(segments.subList(0, length() - 1));
+ }
+
+ public boolean isAnonymous() {
+ return length() > 0 && segments.get(length() - 1).charAt(0) == '<';
+ }
+
+ // TODO: Remove this special case code (called exactly once).
+ public RbPath mapSegments(Function<? super String, String> fn) {
+ return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
+ }
+
+ // TODO: Remove this in favour of having properly typed paths.
boolean isIntPath() {
String lastElement = segments.get(segments.size() - 1);
return lastElement.endsWith(":int") || lastElement.endsWith(":intvector");
return elements;
}
- /** Returns whether this is a single element value. */
+ /**
+ * Returns whether this is a single element value. Singleton values are treated different when
+ * writing out ICU data files.
+ */
public boolean isSingleton() {
return elements.size() == 1;
}
- /** Returns the Nth element of this value. */
- public String getElement(int n) {
- return elements.get(n);
- }
-
@Override public int hashCode() {
return Objects.hashCode(elements);
}
@Override public boolean equals(Object obj) {
- return obj instanceof RbValue && elements.equals(((RbValue) obj).elements);
+ return obj instanceof RbValue && elements.equals(((RbValue) obj).elements);
}
@Override public String toString() {
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkState;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
+import org.unicode.icu.tool.cldrtoicu.RbValue;
+
+import com.google.common.collect.ListMultimap;
+
+/**
+ * An abstract parent class for any mappers based on {@code PathValueTransformer}. This ensures
+ * that transformation results are correctly processed when being added to IcuData instances.
+ */
+public abstract class AbstractPathValueMapper {
+ private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$");
+
+ private final IcuData icuData;
+
+ AbstractPathValueMapper(String name, boolean hasFallback) {
+ this.icuData = new IcuData(name, hasFallback);
+ }
+
+ /** Implemented by sub-classes to return all results to be added to the IcuData instance. */
+ abstract ListMultimap<RbPath, Result> getResults();
+
+ /**
+ * Adds results to the IcuData instance according to expected {@code PathValueTransformer}
+ * semantics. This method must only be called once per mapper.
+ */
+ final IcuData transform() {
+ checkState(icuData.getPaths().isEmpty(),
+ "transform() method cannot be called multiple times: %s", icuData);
+
+ // This subclass mostly exists to control the fact that results need to be added in one go
+ // to the IcuData because of how referenced paths are handled. If results could be added in
+ // multiple passes, you could have confusing situations in which values has path references
+ // in them but the referenced paths have not been transformed yet. Forcing the subclass to
+ // implement a single method to generate all results at once ensures that we control the
+ // lifecycle of the data and how results are processed as they are added to the IcuData.
+ addResults(getResults());
+ return icuData;
+ }
+
+ /**
+ * Adds transformation results on the specified multi-map to this data instance. Results are
+ * handled differently according to whether they are grouped, or represent an alias value. If
+ * the value of an ungrouped result is itself a resource bundle path (including possibly having
+ * an array index) then the referenced value is assumed to be an existing path whose value is
+ * then substituted.
+ */
+ // TODO: Fix this to NOT implicitly rely of ordering of referenced values.
+ private void addResults(ListMultimap<RbPath, Result> resultsByRbPath) {
+ for (RbPath rbPath : resultsByRbPath.keySet()) {
+ for (Result r : resultsByRbPath.get(rbPath)) {
+ if (r.isGrouped()) {
+ // Grouped results have all the values in a single value entry.
+ icuData.add(rbPath, RbValue.of(r.getValues()));
+ } else {
+ if (rbPath.getSegment(rbPath.length() - 1).endsWith(":alias")) {
+ r.getValues().forEach(v -> icuData.add(rbPath, RbValue.of(v)));
+ } else {
+ // Ungrouped results are one value per entry, but might be expanded into
+ // grouped results if they are a path referencing a grouped entry.
+ r.getValues().forEach(v -> icuData.add(rbPath, replacePathValues(v)));
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Replaces an ungrouped CLDR value for the form "/foo/bar" or "/foo/bar[N]" which is assumed
+ * to be a reference to an existing value in a resource bundle. Note that the referenced bundle
+ * might be grouped (i.e. an array with more than one element).
+ */
+ private RbValue replacePathValues(String value) {
+ Matcher m = ARRAY_INDEX.matcher(value);
+ if (!m.matches()) {
+ return RbValue.of(value);
+ }
+ // The only constraint is that the "path" value starts with a leading '/', but parsing into
+ // the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the
+ // captured value contains '/' characters to represent path delimiters.
+ RbPath replacePath = RbPath.parse(m.group(1));
+ List<RbValue> replaceValues = icuData.get(replacePath);
+ checkArgument(replaceValues != null, "Path %s is missing from IcuData", replacePath);
+ // If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]").
+ int replaceIndex = m.groupCount() > 1 ? Integer.parseInt(m.group(2)) : 0;
+ return replaceValues.get(replaceIndex);
+ }
+}
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
public static IcuData process(
String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
+ return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData);
+ }
+
+ @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
+ static IcuData process(String localeId, CldrData cldrData, Optional<CldrData> icuSpecialData) {
CollationVisitor visitor = new CollationVisitor(localeId);
icuSpecialData.ifPresent(s -> s.accept(ARBITRARY, visitor));
- src.getDataForLocale(localeId, UNRESOLVED).accept(ARBITRARY, visitor);
+ cldrData.accept(ARBITRARY, visitor);
return visitor.icuData;
}
// Super special hack case because the XML data is a bit broken for the root collation
// data (there's an empty <collation> element that's a non-leaf element and thus not
// visited, but we should add an empty sequence to the output data.
+ // TODO: Fix CLDR (https://unicode-org.atlassian.net/projects/CLDR/issues/CLDR-13131)
if (localeId.equals("root")) {
icuData.replace(RB_STANDARD_SEQUENCE, "");
// TODO: Collation versioning probably needs to be improved.
// "short" it can also have other values. This code was copied from CollationMapper
// which has the line;
// isShort = attr.getValue("alt") != null;
+ // TODO: Raise a ticket to examine this.
boolean isShort = COLLATION_RULE_ALT.optionalValueFrom(v).isPresent();
// Note that it's not clear why there's a check for "contains()" here. The code
// from which this was derived is largely undocumented and this check could have
// been overly defensive (perhaps a duplicate key should be an error?).
- if (isShort || !icuData.contains(rbPath)) {
+ if (isShort || !icuData.getPaths().contains(rbPath)) {
RbValue rules = RbValue.of(
LINE_SPLITTER.splitToList(v.getValue()).stream()
.map(CollationMapper::removeComment)
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.LinkedHashMultimap;
+import com.google.common.collect.ListMultimap;
import com.google.common.collect.SetMultimap;
/**
* <p>This is currently driven by the {@code ldml2icu_locale.txt} configuration file via a
* {@code RegexTransformer}, but could use any {@link PathValueTransformer} implementation.
*/
-public final class LocaleMapper {
+public final class LocaleMapper extends AbstractPathValueMapper {
// The default calendar (only set is different from inherited parent value).
private static final RbPath RB_CALENDAR = RbPath.of("calendar", "default");
PathValueTransformer transformer,
SupplementalData supplementalData) {
- IcuData icuData = new IcuData(localeId, true);
- // Write out the results into the IcuData class, preserving result grouping and expanding
- // path references as necessary.
- ResultsCollector collector = new ResultsCollector(transformer);
- icuData.addResults(collector.collectResultsFor(localeId, src, icuSpecialData));
+ IcuData icuData = new LocaleMapper(localeId, src, icuSpecialData, transformer).transform();
doDateTimeHack(icuData);
supplementalData.getDefaultCalendar(icuData.getName())
.ifPresent(c -> icuData.add(RB_CALENDAR, c));
}
}
- private static final class ResultsCollector {
- private final PathValueTransformer transformer;
- private final Set<RbPath> validRbPaths = new HashSet<>();
+ private final String localeId;
+ private final CldrDataSupplier src;
+ private final Optional<CldrData> icuSpecialData;
+ private final PathValueTransformer transformer;
- // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
- // each key. The reason is that result comparison is not "consistent with equals", and
- // TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
- // method), and it does this even if using the add() method of the sorted set (this is in
- // fact in violation of the stated behaviour of Set#add).
- private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
+ private final Set<RbPath> validRbPaths = new HashSet<>();
- ResultsCollector(PathValueTransformer transformer) {
- this.transformer = checkNotNull(transformer);
- }
+ // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
+ // each key. The reason is that result comparison is not "consistent with equals", and
+ // TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
+ // method), and it does this even if using the add() method of the sorted set (this is in
+ // fact in violation of the stated behaviour of Set#add).
+ private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
- ImmutableListMultimap<RbPath, Result> collectResultsFor(
- String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
-
- CldrData unresolved = src.getDataForLocale(localeId, UNRESOLVED);
- CldrData resolved = src.getDataForLocale(localeId, RESOLVED);
- DynamicVars varFn = p -> {
- CldrValue cldrValue = resolved.get(p);
- return cldrValue != null ? cldrValue.getValue() : null;
- };
-
- collectPaths(unresolved, varFn);
- collectResults(resolved, varFn);
- icuSpecialData.ifPresent(s -> collectSpecials(s, varFn));
-
- ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
- out.orderValuesBy(natural());
- for (RbPath rbPath : resultsByRbPath.keySet()) {
- Set<Result> existingResults = resultsByRbPath.get(rbPath);
- out.putAll(rbPath, existingResults);
- for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
- if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
- out.put(rbPath, fallback);
- }
- }
- }
- return out.build();
- }
+ private LocaleMapper(
+ String localeId,
+ CldrDataSupplier src,
+ Optional<CldrData> icuSpecialData,
+ PathValueTransformer transformer) {
- private void collectPaths(CldrData unresolved, DynamicVars varFn) {
- ValueVisitor collectPaths =
- v -> transformer.transform(v, varFn).forEach(this::collectResultPath);
- unresolved.accept(DTD, collectPaths);
- }
+ super(localeId, true);
+ this.localeId = localeId;
+ this.src = checkNotNull(src);
+ this.icuSpecialData = checkNotNull(icuSpecialData);
+ this.transformer = checkNotNull(transformer);
+ }
- private void collectResultPath(Result result) {
- RbPath rbPath = result.getKey();
- validRbPaths.add(rbPath);
- if (rbPath.isAnonymous()) {
- RbPath parent = rbPath.getParent();
- checkState(!parent.isAnonymous(),
- "anonymous paths should not be nested: %s", rbPath);
- validRbPaths.add(parent);
+ @Override
+ ListMultimap<RbPath, Result> getResults() {
+ CldrData unresolved = src.getDataForLocale(localeId, UNRESOLVED);
+ CldrData resolved = src.getDataForLocale(localeId, RESOLVED);
+ DynamicVars varFn = p -> {
+ CldrValue cldrValue = resolved.get(p);
+ return cldrValue != null ? cldrValue.getValue() : null;
+ };
+
+ collectPaths(unresolved, varFn);
+ collectResults(resolved, varFn);
+ icuSpecialData.ifPresent(s -> collectSpecials(s, varFn));
+
+ ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
+ out.orderValuesBy(natural());
+ for (RbPath rbPath : resultsByRbPath.keySet()) {
+ Set<Result> existingResults = resultsByRbPath.get(rbPath);
+ out.putAll(rbPath, existingResults);
+ for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
+ if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
+ out.put(rbPath, fallback);
+ }
}
}
+ return out.build();
+ }
- void collectResults(CldrData resolved, DynamicVars varFn) {
- ValueVisitor collectResults =
- v -> transformer.transform(v, varFn).stream()
- .filter(r -> validRbPaths.contains(r.getKey()))
- .forEach(r -> resultsByRbPath.put(r.getKey(), r));
- resolved.accept(DTD, collectResults);
- }
+ private void collectPaths(CldrData unresolved, DynamicVars varFn) {
+ ValueVisitor collectPaths =
+ v -> transformer.transform(v, varFn).forEach(this::collectResultPath);
+ unresolved.accept(DTD, collectPaths);
+ }
- private void collectSpecials(CldrData cldrData, DynamicVars varFn) {
- cldrData.accept(DTD, v ->
- transformer.transform(v, varFn).forEach(r -> resultsByRbPath.put(r.getKey(), r)));
+ private void collectResultPath(Result result) {
+ RbPath rbPath = result.getKey();
+ validRbPaths.add(rbPath);
+ if (rbPath.isAnonymous()) {
+ RbPath parent = rbPath.getParent();
+ checkState(!parent.isAnonymous(),
+ "anonymous paths should not be nested: %s", rbPath);
+ validRbPaths.add(parent);
}
}
- private LocaleMapper() {}
+ private void collectResults(CldrData resolved, DynamicVars varFn) {
+ ValueVisitor collectResults =
+ v -> transformer.transform(v, varFn).stream()
+ .filter(r -> validRbPaths.contains(r.getKey()))
+ .forEach(r -> resultsByRbPath.put(r.getKey(), r));
+ resolved.accept(DTD, collectResults);
+ }
+
+ private void collectSpecials(CldrData cldrData, DynamicVars varFn) {
+ cldrData.accept(DTD, v ->
+ transformer.transform(v, varFn).forEach(r -> resultsByRbPath.put(r.getKey(), r)));
+ }
}
* <p>This is currently driven by the {@code ldml2icu_supplemental.txt} configuration file via a
* {@code RegexTransformer}, but could use any {@link PathValueTransformer} implementation.
*/
-public final class SupplementalMapper {
+public final class SupplementalMapper extends AbstractPathValueMapper {
private static final RbPath RB_FIFO = RbPath.of("<FIFO>");
/**
* @param src the CLDR data supplier to process.
* @param transformer the transformer to match and transform each CLDR path/value pair.
* @param icuName the name for the generated IcuData.
- * @param includePaths a matcher to select the CLDR paths to be transformed.
+ * @param paths a matcher to select the CLDR paths to be transformed.
* @return An IcuData instance containing the specified subset of supplemental data with the
* given ICU name.
*/
// TODO: Improve external data splitting and remove need for a PathMatcher here.
public static IcuData process(
- CldrDataSupplier src, PathValueTransformer transformer, String icuName,
- PathMatcher includePaths) {
- ResultsCollector collector = new ResultsCollector(includePaths, transformer);
- // Write out the results into the IcuData class, preserving result grouping and expanding
- // path references as necessary.
- IcuData icuData = new IcuData(icuName, false);
- icuData.addResults(collector.getResults(src));
- return icuData;
+ CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
+
+ return new SupplementalMapper(src, transformer, icuName, paths).transform();
}
- private static final class ResultsCollector {
- private final PathMatcher pathMatcher;
- private final PathValueTransformer transformer;
+ private final CldrDataSupplier src;
+ private final PathMatcher paths;
+ private final PathValueTransformer transformer;
- // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
- // each key. The reason is that result comparison is not "consistent with equals", and
- // TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
- // method), and it does this even if using the add() method of the sorted set (this is in
- // fact in violation of the stated behaviour of Set#add).
- private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
- private int fifoCounter = 0;
+ // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
+ // each key. The reason is that result comparison is not "consistent with equals", and
+ // TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
+ // method), and it does this even if using the add() method of the sorted set (this is in
+ // fact in violation of the stated behaviour of Set#add).
+ private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
+ private int fifoCounter = 0;
- ResultsCollector(PathMatcher pathMatcher, PathValueTransformer transformer) {
- this.pathMatcher = checkNotNull(pathMatcher);
- this.transformer = checkNotNull(transformer);
- }
+ private SupplementalMapper(
+ CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
- private void visit(CldrValue value) {
- if (pathMatcher.matchesPrefixOf(value.getPath())) {
- for (Result r : transformer.transform(value)) {
- RbPath rbPath = r.getKey();
- if (rbPath.contains(RB_FIFO)) {
- // The fifo counter needs to be formatted with leading zeros for sorting.
- rbPath = rbPath.mapSegments(
- s -> s.equals("<FIFO>") ? String.format("<%04d>", fifoCounter) : s);
- }
- resultsByRbPath.put(rbPath, r);
- }
- fifoCounter++;
- }
- }
+ super(icuName, false);
+ this.src = checkNotNull(src);
+ this.paths = checkNotNull(paths);
+ this.transformer = checkNotNull(transformer);
+ }
- ImmutableListMultimap<RbPath, Result> getResults(CldrDataSupplier supplier) {
- // DTD and NESTED_GROUPING order differ because of how the magic <FIFO> label works (it
- // basically enforces "encounter order" onto things in unlabeled sequences, which matches
- // the old behaviour). If it wouldn't break anything, it might be worth moving to DTD order
- // to remove any lingering implicit dependencies on the CLDR data behaviour.
- CldrData supplementalData = supplier.getDataForType(CldrDataType.SUPPLEMENTAL);
- PathValueTransformer.DynamicVars varFn = p -> {
- CldrValue cldrValue = supplementalData.get(p);
- return cldrValue != null ? cldrValue.getValue() : null;
- };
+ @Override
+ ImmutableListMultimap<RbPath, Result> getResults() {
+ // DTD and NESTED_GROUPING order differ because of how the magic <FIFO> label works (it
+ // basically enforces "encounter order" onto things in unlabeled sequences, which matches
+ // the old behaviour). If it wouldn't break anything, it might be worth moving to DTD order
+ // to remove any lingering implicit dependencies on the CLDR data behaviour.
+ CldrData supplementalData = src.getDataForType(CldrDataType.SUPPLEMENTAL);
+ PathValueTransformer.DynamicVars varFn = p -> {
+ CldrValue cldrValue = supplementalData.get(p);
+ return cldrValue != null ? cldrValue.getValue() : null;
+ };
- supplementalData.accept(NESTED_GROUPING, this::visit);
+ supplementalData.accept(NESTED_GROUPING, this::visit);
- ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
- out.orderValuesBy(natural());
- for (RbPath rbPath : resultsByRbPath.keySet()) {
- Set<Result> existingResults = resultsByRbPath.get(rbPath);
- out.putAll(rbPath, existingResults);
- for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
- if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
- out.put(rbPath, fallback);
- }
+ ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
+ out.orderValuesBy(natural());
+ for (RbPath rbPath : resultsByRbPath.keySet()) {
+ Set<Result> existingResults = resultsByRbPath.get(rbPath);
+ out.putAll(rbPath, existingResults);
+ for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
+ if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
+ out.put(rbPath, fallback);
}
}
- return out.build();
}
+ return out.build();
}
- private SupplementalMapper() {}
+ private void visit(CldrValue value) {
+ if (paths.matchesPrefixOf(value.getPath())) {
+ for (Result r : transformer.transform(value)) {
+ RbPath rbPath = r.getKey();
+ if (rbPath.contains(RB_FIFO)) {
+ // The fifo counter needs to be formatted with leading zeros for sorting.
+ rbPath = rbPath.mapSegments(
+ s -> s.equals("<FIFO>") ? String.format("<%04d>", fifoCounter) : s);
+ }
+ resultsByRbPath.put(rbPath, r);
+ }
+ fifoCounter++;
+ }
+ }
}
import java.util.function.Function;
import org.unicode.cldr.api.AttributeKey;
+import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.ValueVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
+import com.google.common.annotations.VisibleForTesting;
import com.ibm.icu.text.Transliterator;
/**
* @return the IcuData instance to be written to a file.
*/
public static IcuData process(CldrDataSupplier src, Path ruleFileOutputDir) {
- RuleVisitor visitor = new RuleVisitor(p -> {
+ Function<Path, PrintWriter> fileWriterFn = p -> {
Path file = ruleFileOutputDir.resolve(p);
try {
return new PrintWriter(Files.newBufferedWriter(file, CREATE, TRUNCATE_EXISTING));
} catch (IOException e) {
throw new RuntimeException("error opening file: " + file, e);
}
- });
- src.getDataForType(SUPPLEMENTAL).accept(DTD, visitor);
+ };
+ CldrData cldrData = src.getDataForType(SUPPLEMENTAL);
+ return process(cldrData, fileWriterFn);
+ }
+
+ @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
+ static IcuData process(CldrData cldrData, Function<Path, PrintWriter> fileWriterFn) {
+ RuleVisitor visitor = new RuleVisitor(fileWriterFn);
+ cldrData.accept(DTD, visitor);
+ addSpecialCaseValues(visitor.icuData);
return visitor.icuData;
}
RuleVisitor(Function<Path, PrintWriter> outFn) {
this.outFn = checkNotNull(outFn);
icuData.setFileComment("File: root.txt");
-
- // I have _no_ idea what any of this is about, I'm just trying to mimic the original
- // (complex and undocumented) code in "ConvertTransforms.java".
- // TODO: Understand and document each of the cases below.
- icuData.add(RbPath.of("TransliteratorNamePattern"), "{0,choice,0#|1#{1}|2#{1}-{2}}");
- // Note that this quoting of path segments is almost certainly unnecessary. It matches
- // the old "ConvertTransforms" behaviour, but '%' is used elsewhere without quoting, so
- // it seems very likely that it's not needed here.
- // TODO: Once migration done, remove quotes here & check in RbPath for unwanted quotes.
- icuData.add(RbPath.of("\"%Translit%Hex\""), "%Translit%Hex");
- icuData.add(RbPath.of("\"%Translit%UnicodeName\""), "%Translit%UnicodeName");
- icuData.add(RbPath.of("\"%Translit%UnicodeChar\""), "%Translit%UnicodeChar");
- // Special case, where Latin is a no-op.
- icuData.add(RbPath.of("TransliterateLATIN"), RbValue.of("", ""));
- // Some hard-coded special case mappings.
- icuData.add(
- RB_TRANSLITERATOR_IDS.extendBy("Tone-Digit", "alias"),
- "Pinyin-NumericPinyin");
- icuData.add(
- RB_TRANSLITERATOR_IDS.extendBy("Digit-Tone", "alias"),
- "NumericPinyin-Pinyin");
}
@Override public void visit(CldrValue value) {
String status = visibility == Visibility.internal ? "internal" : "file";
Direction dir = TRANSFORM_DIRECTION.valueFrom(value, Direction.class);
+ // TODO: Consider checks for unused data (e.g. forward aliases in a backward rule).
if (dir != Direction.backward) {
String id = getId(source, target, variant);
TRANSFORM_ALIAS.listOfValuesFrom(value)
}
}
+ private static void addSpecialCaseValues(IcuData icuData) {
+ // I have _no_ idea what any of this is about, I'm just trying to mimic the original
+ // (complex and undocumented) code in "ConvertTransforms.java".
+ // TODO: Understand and document each of the cases below.
+ icuData.add(RbPath.of("TransliteratorNamePattern"), "{0,choice,0#|1#{1}|2#{1}-{2}}");
+ // Note that this quoting of path segments is almost certainly unnecessary. It matches
+ // the old "ConvertTransforms" behaviour, but '%' is used elsewhere without quoting, so
+ // it seems very likely that it's not needed here.
+ // TODO: Once migration done, remove quotes here & check in RbPath for unwanted quotes.
+ icuData.add(RbPath.of("\"%Translit%Hex\""), "%Translit%Hex");
+ icuData.add(RbPath.of("\"%Translit%UnicodeName\""), "%Translit%UnicodeName");
+ icuData.add(RbPath.of("\"%Translit%UnicodeChar\""), "%Translit%UnicodeChar");
+ // Special case, where Latin is a no-op.
+ icuData.add(RbPath.of("TransliterateLATIN"), RbValue.of("", ""));
+ // Some hard-coded special case mappings.
+ icuData.add(
+ RB_TRANSLITERATOR_IDS.extendBy("Tone-Digit", "alias"),
+ "Pinyin-NumericPinyin");
+ icuData.add(
+ RB_TRANSLITERATOR_IDS.extendBy("Digit-Tone", "alias"),
+ "NumericPinyin-Pinyin");
+ }
+
+ // It is important to note that this ID contains a '/' but this is a literal in the path
+ // element and does not add an extra laying in the resource bundle path (the use of '/' to
+ // separate path elements is a purely internal detail for things like LocaleMapper and the
+ // regex-based configuration.
private static String getId(String from, String to, Optional<String> variant) {
String baseId = from + "-" + to;
return variant.map(v -> baseId + "/" + v).orElse(baseId);
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
+
+import java.util.Arrays;
+import java.util.Set;
+
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+
+public class IcuDataTest {
+ @Test
+ public void testSimple() {
+ IcuData icuData = new IcuData("icu-name", true);
+ assertThat(icuData.getName()).isEqualTo("icu-name");
+ assertThat(icuData.hasFallback()).isTrue();
+
+ IcuData otherData = new IcuData("other-name", false);
+ assertThat(otherData.getName()).isEqualTo("other-name");
+ assertThat(otherData.hasFallback()).isFalse();
+ }
+
+ @Test
+ public void testFileComment() {
+ IcuData icuData = new IcuData("icu-name", false);
+ assertThat(icuData.getFileComment()).isEmpty();
+
+ icuData.setFileComment("Hello", "World");
+ assertThat(icuData.getFileComment()).containsExactly("Hello", "World").inOrder();
+
+ icuData.setFileComment(Arrays.asList("Foo", "Bar"));
+ assertThat(icuData.getFileComment()).containsExactly("Foo", "Bar").inOrder();
+
+ icuData.setFileComment(ImmutableList.of());
+ assertThat(icuData.getFileComment()).isEmpty();
+ }
+
+ @Test
+ public void testSetVersion() {
+ IcuData icuData = new IcuData("icu-name", false);
+ icuData.setVersion("VERSION");
+
+ RbPath rbPath = RbPath.of("Version");
+ assertThat(icuData.getPaths()).containsExactly(rbPath);
+ assertThat(icuData.get(rbPath)).isEqualTo(ImmutableList.of(RbValue.of("VERSION")));
+ }
+
+ @Test
+ public void testGetPaths() {
+ IcuData icuData = new IcuData("icu-name", false);
+ // getPaths() is a live view on the data, not a snapshot.
+ Set<RbPath> paths = icuData.getPaths();
+ assertThat(paths).isEmpty();
+
+ RbPath fooBar = RbPath.of("foo", "bar");
+ icuData.add(fooBar, "value1");
+ assertThat(icuData.get(fooBar)).contains(RbValue.of("value1"));
+ assertThat(paths).containsExactly(fooBar);
+ assertThat(paths).hasSize(1);
+
+ RbPath fooBaz = RbPath.of("foo", "baz");
+ icuData.add(fooBaz, "value2");
+ assertThat(icuData.get(fooBaz)).contains(RbValue.of("value2"));
+ assertThat(paths).containsExactly(fooBar, fooBaz).inOrder();
+ assertThat(paths).hasSize(2);
+
+ // Paths is not modifiable.
+ assertThrows(UnsupportedOperationException.class, () -> paths.add(RbPath.of("nope")));
+ assertThrows(UnsupportedOperationException.class, () -> paths.remove(fooBar));
+ assertThrows(UnsupportedOperationException.class, paths::clear);
+ }
+
+ @Test
+ public void addMultiple() {
+ IcuData icuData = new IcuData("icu-name", false);
+ RbPath fooBar = RbPath.of("foo", "bar");
+
+ RbValue value1 = RbValue.of("the", "first", "value");
+ RbValue value2 = RbValue.of("another-value");
+
+ icuData.add(fooBar, value1);
+ assertThat(icuData.get(fooBar)).containsExactly(value1);
+
+ icuData.add(fooBar, "another-value");
+ assertThat(icuData.get(fooBar)).containsExactly(value1, value2).inOrder();
+
+ // It's just a list, with no ordering and no deduplication.
+ icuData.add(fooBar, Arrays.asList(value2, value1));
+ assertThat(icuData.get(fooBar)).containsExactly(value1, value2, value2, value1).inOrder();
+ }
+
+ @Test
+ public void replace() {
+ IcuData icuData = new IcuData("icu-name", false);
+ RbPath fooBar = RbPath.of("foo", "bar");
+
+ RbValue value1 = RbValue.of("the", "first", "value");
+ RbValue value2 = RbValue.of("another-value");
+
+ icuData.replace(fooBar, value1);
+ assertThat(icuData.get(fooBar)).containsExactly(value1);
+
+ icuData.replace(fooBar, "another-value");
+ assertThat(icuData.get(fooBar)).containsExactly(value2);
+ }
+}
\ No newline at end of file
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
-import static org.junit.Assert.fail;
import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
}
@Test
- public void testAnyOf() {
- PathMatcher monthMatch = PathMatcher.of("monthWidth[@type=\"narrow\"]/month[@type=*]");
- PathMatcher dayMatch = PathMatcher.of("dayWidth[@type=\"narrow\"]/day[@type=*]");
- PathMatcher combined = PathMatcher.anyOf(monthMatch, dayMatch);
+ public void testAnyOf_match() {
+ PathMatcher narrowMonth =
+ PathMatcher.of("ldml/dates/calendars/calendar[@type=*]/months"
+ + "/monthContext[@type=\"format\"]/monthWidth[@type=\"narrow\"]/month[@type=*]");
+ PathMatcher narrowDay =
+ PathMatcher.of("ldml/dates/calendars/calendar[@type=*]/days"
+ + "/dayContext[@type=\"format\"]/dayWidth[@type=\"narrow\"]/day[@type=*]");
+ PathMatcher prefix = PathMatcher.anyOf(narrowMonth, narrowDay);
+
+ assertThat(prefix.matches(monthInfo("gregorian", "format", "narrow", 1))).isTrue();
+ assertThat(prefix.matches(dayInfo("buddhist", "format", "narrow", "sun"))).isTrue();
+
+ assertThat(prefix.matches(monthInfo("hindu", "format", "wide", 1))).isFalse();
+ assertThat(prefix.matches(dayInfo("hindu", "format", "wide", "mon"))).isFalse();
+ }
+
+ @Test
+ public void testAnyOf_suffix() {
+ PathMatcher monthSuffix = PathMatcher.of("monthWidth[@type=\"narrow\"]/month[@type=*]");
+ PathMatcher daySuffix = PathMatcher.of("dayWidth[@type=\"narrow\"]/day[@type=*]");
+ PathMatcher suffix = PathMatcher.anyOf(monthSuffix, daySuffix);
- assertThat(combined.matchesSuffixOf(monthInfo("generic", "format", "narrow", 1))).isTrue();
- assertThat(combined.matchesSuffixOf(dayInfo("generic", "format", "narrow", "sun"))).isTrue();
+ assertThat(suffix.matchesSuffixOf(monthInfo("generic", "format", "narrow", 1))).isTrue();
+ assertThat(suffix.matchesSuffixOf(dayInfo("generic", "format", "narrow", "sun"))).isTrue();
- assertThat(combined.matchesSuffixOf(monthInfo("generic", "format", "wide", 1))).isFalse();
- assertThat(combined.matchesSuffixOf(dayInfo("generic", "format", "wide", "mon"))).isFalse();
+ assertThat(suffix.matchesSuffixOf(monthInfo("generic", "format", "wide", 1))).isFalse();
+ assertThat(suffix.matchesSuffixOf(dayInfo("generic", "format", "wide", "mon"))).isFalse();
+ }
+
+ @Test
+ public void testAnyOf_prefix() {
+ PathMatcher monthPrefix =
+ PathMatcher.of("ldml/dates/calendars/calendar[@type=\"gregorian\"]/months");
+ PathMatcher dayPrefix =
+ PathMatcher.of("ldml/dates/calendars/calendar[@type=\"buddhist\"]/days");
+ PathMatcher prefix = PathMatcher.anyOf(monthPrefix, dayPrefix);
+
+ assertThat(prefix.matchesPrefixOf(monthInfo("gregorian", "format", "narrow", 1))).isTrue();
+ assertThat(prefix.matchesPrefixOf(dayInfo("buddhist", "format", "narrow", "sun"))).isTrue();
+
+ assertThat(prefix.matchesPrefixOf(monthInfo("hindu", "format", "wide", 1))).isFalse();
+ assertThat(prefix.matchesPrefixOf(dayInfo("hindu", "format", "wide", "mon"))).isFalse();
}
@Test
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
-import static org.unicode.icu.tool.cldrtoicu.testing.RbPathSubjectFactory.assertThat;
-import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import static com.google.common.truth.Truth.assertThat;
-import static com.google.common.truth.Truth8.assertThat;
+import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
+import static org.unicode.icu.tool.cldrtoicu.testing.RbPathSubjectFactory.assertThat;
import org.junit.Test;
import org.junit.runner.RunWith;
public class RbPathTest {
@Test
public void testEmpty() {
- assertThat(RbPath.empty()).hasSegments();
- assertThat(RbPath.empty()).hasLength(0);
+ assertThat(RbPath.of()).hasSegments();
+ assertThat(RbPath.of()).hasLength(0);
}
@Test
public void testParseVsOf() {
assertThat(RbPath.of("foo", "bar")).hasSegments("foo", "bar");
- assertThat(RbPath.of("foo/bar")).hasSegments("foo/bar");
- assertThat(RbPath.parse("foo/bar")).hasSegments("foo", "bar");
+ assertThat(RbPath.parse("foo/bar/baz")).hasSegments("foo", "bar", "baz");
+ // Allow and ignore leading '/' for legacy reasons.
+ assertThat(RbPath.parse("/foo/bar/baz")).hasSegments("foo", "bar", "baz");
+ assertThat(RbPath.of("foo/bar", "baz")).hasSegments("foo/bar", "baz");
}
@Test
public void testBadArgs() {
assertBadPath("", "empty path string");
- assertBadPath("foo//bar", "empty path segment");
+ assertBadPath("foo//bar", "path segments must not be empty");
assertBadPath("foo/<bar/baz", "mismatched quoting");
assertBadPath("foo/\"bar", "mismatched quoting");
assertBadPath("foo/\"bar\"baz\"", "invalid character");
assertBadPath("foo/bar baz", "invalid character");
}
+ @Test
+ public void testIteration() {
+ RbPath path = RbPath.parse("foo/bar/baz");
+ assertThat(path.length()).isEqualTo(3);
+ assertThat(path.getSegment(0)).isEqualTo("foo");
+ assertThat(path.getSegment(1)).isEqualTo("bar");
+ assertThat(path.getSegment(2)).isEqualTo("baz");
+ }
+
+ @Test
+ public void testExtendBy() {
+ assertThat(RbPath.of("foo").extendBy("bar")).hasSegments("foo", "bar");
+ assertThat(RbPath.of("foo").extendBy("bar/baz")).hasSegments("foo", "bar/baz");
+ assertThat(RbPath.of("foo").extendBy("bar/baz")).isNotEqualTo(RbPath.parse("foo/bar/baz"));
+ }
+
+ @Test
+ public void testStartsWith() {
+ RbPath p = RbPath.of("foo", "bar", "baz");
+ assertThat(p).startsWith(p).isTrue();
+ assertThat(p).startsWith(RbPath.of()).isTrue();
+
+ assertThat(p).startsWith(p.getParent()).isTrue();
+ assertThat(p).startsWith(RbPath.of("foo")).isTrue();
+ assertThat(p).startsWith(RbPath.of("bar")).isFalse();
+ assertThat(p).startsWith(RbPath.of("foo/bar/baz")).isFalse();
+ }
+
+ @Test
+ public void testEndsWith() {
+ RbPath p = RbPath.of("foo", "bar", "baz");
+ assertThat(p).endsWith(p).isTrue();
+ assertThat(p).endsWith(RbPath.of()).isTrue();
+
+ assertThat(p).endsWith(RbPath.of("bar", "baz")).isTrue();
+ assertThat(p).endsWith(RbPath.of("bar")).isFalse();
+ assertThat(p).endsWith(RbPath.of("foo/bar/baz")).isFalse();
+ }
+
+ @Test
+ public void testContains() {
+ RbPath p = RbPath.of("foo", "bar", "baz");
+ assertThat(p).contains(p).isTrue();
+ assertThat(p).contains(RbPath.of()).isTrue();
+
+ assertThat(p).contains(RbPath.of("bar", "baz")).isTrue();
+ assertThat(p).contains(RbPath.of("foo", "bar")).isTrue();
+ assertThat(p).contains(RbPath.of("foo/bar/baz")).isFalse();
+ }
+
+ @Test
+ public void testCommonPrefixLength() {
+ RbPath p = RbPath.of("foo", "bar", "baz");
+ RbPath q = RbPath.of("foo", "bar", "quux");
+ assertThat(RbPath.getCommonPrefixLength(p, q)).isEqualTo(2);
+ assertThat(RbPath.getCommonPrefixLength(p, p)).isEqualTo(3);
+ assertThat(RbPath.getCommonPrefixLength(p, RbPath.of())).isEqualTo(0);
+ // Not a prefix even though it's a suffix of the path.
+ assertThat(RbPath.getCommonPrefixLength(p, RbPath.of("bar", "baz"))).isEqualTo(0);
+ }
+
private static void assertBadPath(String path, String errorSnippet) {
IllegalArgumentException e =
assertThrows(IllegalArgumentException.class, () -> RbPath.parse(path));
.that(regressionData.maximize(id).orElse(null))
.isEqualTo(likelySubtags.maximize(id));
}
-
- // ars currently a special case since it's in the ICU data as an alias, but not in the CLDR
- // data at all. This while it's a structurally valid language code, it cannot be maximized.
- assertThat(regressionData.maximize("ars")).isEmpty();
}
@Test
try {
ltc.transform(id);
} catch (NullPointerException e) {
- System.out.println("--> " + id);
+ // Occurs for sh_CS and sh_YU.
continue;
}
// Need to maximize to work around:
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import java.util.Arrays;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
+import org.unicode.icu.tool.cldrtoicu.RbValue;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+
+@RunWith(JUnit4.class)
+public class Bcp47MapperTest {
+ private static final ImmutableMap<RbPath, RbValue> EXPECTED_ALIAS_MAP = ImmutableMap.of(
+ RbPath.of("bcpTypeAlias", "tz:alias"),
+ RbValue.of("/ICUDATA/timezoneTypes/bcpTypeAlias/tz"),
+ RbPath.of("typeAlias", "timezone:alias"),
+ RbValue.of("/ICUDATA/timezoneTypes/typeAlias/timezone"),
+ RbPath.of("typeMap", "timezone:alias"),
+ RbValue.of("/ICUDATA/timezoneTypes/typeMap/timezone"));
+
+ @Test
+ public void testSimple() {
+ CldrData cldrData = cldrData(
+ simpleType("foo", "one"),
+ simpleType("foo", "two"),
+ simpleType("foo", "three"),
+ simpleType("bar", "four"),
+ simpleType("bar", "five"));
+
+ ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
+
+ IcuData bcp47Data = icuData.get(0);
+ assertThat(bcp47Data).hasName("keyTypeData");
+ assertThat(bcp47Data).hasFallback(false);
+
+ // Check the number of paths and verify the special injected values.
+ assertThat(bcp47Data).getPaths().hasSize(7 + EXPECTED_ALIAS_MAP.size());
+ EXPECTED_ALIAS_MAP.forEach((p, v) -> assertThat(bcp47Data).hasValuesFor(p, v));
+
+ assertThat(bcp47Data).hasEmptyValue("/keyMap/foo");
+ assertThat(bcp47Data).hasEmptyValue("/keyMap/bar");
+
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/foo/one");
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/foo/two");
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/foo/three");
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/four");
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/five");
+
+ IcuData tzData = icuData.get(1);
+ assertThat(tzData).hasName("timezoneTypes");
+ assertThat(tzData).hasFallback(false);
+ assertThat(tzData).getPaths().isEmpty();
+ }
+
+ @Test
+ public void testSimpleTimezone() {
+ CldrData cldrData = cldrData(
+ simpleType("tz", "one"),
+ simpleType("tz", "two"),
+ simpleType("tz", "three"),
+ simpleType("bar", "four"),
+ simpleType("bar", "five"));
+
+ ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
+
+ IcuData bcp47Data = icuData.get(0);
+ assertThat(bcp47Data).hasName("keyTypeData");
+ assertThat(bcp47Data).hasFallback(false);
+
+ // Check the number of paths and verify the special injected values.
+ assertThat(bcp47Data).getPaths().hasSize(4 + EXPECTED_ALIAS_MAP.size());
+ EXPECTED_ALIAS_MAP.forEach((p, v) -> assertThat(bcp47Data).hasValuesFor(p, v));
+
+ // The key-map is only ever in the main bcp47 data and contains the timezone key.
+ assertThat(bcp47Data).hasEmptyValue("/keyMap/tz");
+ assertThat(bcp47Data).hasEmptyValue("/keyMap/bar");
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/four");
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/five");
+
+ IcuData tzData = icuData.get(1);
+ assertThat(tzData).hasName("timezoneTypes");
+ assertThat(tzData).hasFallback(false);
+
+ // Only the type-map paths/values are split into the timezone data.
+ assertThat(tzData).getPaths().hasSize(3);
+ assertThat(tzData).hasEmptyValue("/typeMap/tz/one");
+ assertThat(tzData).hasEmptyValue("/typeMap/tz/two");
+ assertThat(tzData).hasEmptyValue("/typeMap/tz/three");
+ }
+
+ @Test
+ public void testKeyAliases() {
+ CldrData cldrData = cldrData(
+ alias("key", "ALIAS", "type"));
+
+ ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
+ IcuData bcp47Data = icuData.get(0);
+
+ // Key aliases are lower-cased (though it's not entirely obvious as to why).
+ assertThat(bcp47Data).hasValuesFor("/keyMap/alias", "key");
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/alias/type");
+ }
+
+ @Test
+ public void testTypeAliases_single() {
+ CldrData cldrData = cldrData(
+ alias("key", null, "type", "main"));
+
+ ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
+ IcuData bcp47Data = icuData.get(0);
+
+ assertThat(bcp47Data).hasEmptyValue("/keyMap/key");
+ assertThat(bcp47Data).hasValuesFor("/typeMap/key/main", "type");
+ }
+
+ @Test
+ public void testTypeAliases_multiple() {
+ CldrData cldrData = cldrData(
+ alias("key", null, "type", "main", "alias1", "alias2", "alias3"));
+
+ ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
+ IcuData bcp47Data = icuData.get(0);
+
+ assertThat(bcp47Data).hasEmptyValue("/keyMap/key");
+ assertThat(bcp47Data).hasValuesFor("/typeMap/key/main", "type");
+
+ // Only aliases after the first (main) one go in the typeAlias set.
+ assertThat(bcp47Data).getPaths().doesNotContain(RbPath.parse("typeAlias/key/main"));
+ assertThat(bcp47Data).hasValuesFor("/typeAlias/key/alias1", "main");
+ assertThat(bcp47Data).hasValuesFor("/typeAlias/key/alias2", "main");
+ assertThat(bcp47Data).hasValuesFor("/typeAlias/key/alias3", "main");
+ }
+
+ @Test
+ public void testKeyAndTypeAliases() {
+ CldrData cldrData = cldrData(
+ alias("key", "key-alias", "type", "main", "type-alias"));
+
+ ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
+ IcuData bcp47Data = icuData.get(0);
+
+ assertThat(bcp47Data).hasValuesFor("/keyMap/key-alias", "key");
+ assertThat(bcp47Data).hasValuesFor("/typeMap/key-alias/main", "type");
+ assertThat(bcp47Data).hasValuesFor("/typeAlias/key-alias/type-alias", "main");
+ }
+
+ @Test
+ public void testPreferredTypeName() {
+ CldrData cldrData = cldrData(
+ deprecated("deprecated-key", true, "type", false, "/preferred/path1"),
+ deprecated("key", false, "deprecated-type", true, "/preferred/path2"));
+
+ ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
+ IcuData bcp47Data = icuData.get(0);
+
+ assertThat(bcp47Data).hasValuesFor("/bcpTypeAlias/deprecated-key/type", "/preferred/path1");
+ assertThat(bcp47Data).hasValuesFor("/bcpTypeAlias/key/deprecated-type", "/preferred/path2");
+ }
+
+ @Test
+ public void testInfoAttributes() {
+ CldrData cldrData = cldrData(
+ // Deprecated without a replacement.
+ deprecated("deprecated-key", true, "type", false, null),
+ deprecated("key", false, "deprecated-type", true, null),
+ valueType("info-key", "info-type", "value-type"));
+
+ ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
+ IcuData bcp47Data = icuData.get(0);
+
+ assertThat(bcp47Data).hasEmptyValue("/keyMap/deprecated-key");
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/deprecated-key/type");
+ assertThat(bcp47Data).hasValuesFor("/keyInfo/deprecated/deprecated-key", "true");
+
+ assertThat(bcp47Data).hasEmptyValue("/keyMap/key");
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/key/deprecated-type");
+ assertThat(bcp47Data).hasValuesFor("/typeInfo/deprecated/key/deprecated-type", "true");
+
+ assertThat(bcp47Data).hasEmptyValue("/keyMap/info-key");
+ assertThat(bcp47Data).hasEmptyValue("/typeMap/info-key/info-type");
+ assertThat(bcp47Data).hasValuesFor("/keyInfo/valueType/info-key", "value-type");
+ }
+
+ // This will hopefully one day be the responsibility of the IcuTextWriter.
+ @Test
+ public void testTimezonePathQuotingForAliases() {
+ CldrData cldrData = cldrData(
+ alias("tz", null, "escaped", "foo/bar", "hello/world"),
+ alias("tz", null, "unescaped", "foo_bar", "hello_world"));
+
+ ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
+ IcuData tzData = icuData.get(1);
+
+ // Only the type-map paths/values are split into the timezone data.
+ assertThat(tzData).getPaths().hasSize(4);
+ assertThat(tzData).hasValuesFor("/typeMap/tz/\"foo:bar\"", "escaped");
+ assertThat(tzData).hasValuesFor("/typeMap/tz/foo_bar", "unescaped");
+
+ // TODO: Check this is correct! Having foo/bar as the value rather than foo:bar seems wrong.
+ assertThat(tzData).hasValuesFor("/typeAlias/tz/\"hello:world\"", "foo/bar");
+ assertThat(tzData).hasValuesFor("/typeAlias/tz/hello_world", "foo_bar");
+ }
+
+ private static CldrData cldrData(CldrValue... values) {
+ return CldrDataSupplier.forValues(Arrays.asList(values));
+ }
+
+ private static CldrValue simpleType(String keyName, String typeName) {
+ StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword");
+ cldrPath.append("/key");
+ appendAttribute(cldrPath, "name", keyName);
+ cldrPath.append("/type");
+ appendAttribute(cldrPath, "name", typeName);
+ return CldrValue.parseValue(cldrPath.toString(), "");
+ }
+
+ private static CldrValue alias(
+ String keyName, String keyAlias, String typeName, String... typeAliases) {
+
+ StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword");
+ cldrPath.append("/key");
+ appendAttribute(cldrPath, "name", keyName);
+ if (keyAlias != null) {
+ appendAttribute(cldrPath, "alias", keyAlias);
+ }
+ cldrPath.append("/type");
+ appendAttribute(cldrPath, "name", typeName);
+ if (typeAliases.length > 0) {
+ appendAttribute(cldrPath, "alias", Joiner.on(" ").join(typeAliases));
+ }
+ return CldrValue.parseValue(cldrPath.toString(), "");
+ }
+
+ private static CldrValue deprecated(
+ String keyName,
+ boolean keyDeprecated,
+ String typeName,
+ boolean typeDeprecated,
+ String preferred) {
+
+ StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword");
+ cldrPath.append("/key");
+ appendAttribute(cldrPath, "name", keyName);
+ if (keyDeprecated) {
+ appendAttribute(cldrPath, "deprecated", keyDeprecated);
+ }
+ cldrPath.append("/type");
+ appendAttribute(cldrPath, "name", typeName);
+ if (preferred != null) {
+ appendAttribute(cldrPath, "preferred", preferred);
+ }
+ if (typeDeprecated) {
+ appendAttribute(cldrPath, "deprecated", typeDeprecated);
+ }
+ return CldrValue.parseValue(cldrPath.toString(), "");
+ }
+
+ private static CldrValue valueType(String keyName, String typeName, String valueType) {
+ StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword");
+ cldrPath.append("/key");
+ appendAttribute(cldrPath, "name", keyName);
+ appendAttribute(cldrPath, "valueType", valueType);
+ cldrPath.append("/type");
+ appendAttribute(cldrPath, "name", typeName);
+ return CldrValue.parseValue(cldrPath.toString(), "");
+ }
+
+ private static void appendAttribute(StringBuilder out, String k, Object v) {
+ out.append(String.format("[@%s=\"%s\"]", k, v));
+ }
+}
\ No newline at end of file
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.GRAPHEME;
+import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.SENTENCE;
+import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.LINE_BREAK;
+import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.SENTENCE_BREAK;
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import java.util.Arrays;
+import java.util.Optional;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.RbValue;
+
+import com.google.common.base.Ascii;
+import com.google.common.base.CaseFormat;
+
+@RunWith(JUnit4.class)
+public class BreakIteratorMapperTest {
+ enum SegmentationType {
+ GRAPHEME_CLUSTER_BREAK, LINE_BREAK, SENTENCE_BREAK, WORD_BREAK;
+
+ @Override public String toString() {
+ return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name());
+ }
+ }
+
+ enum BoundaryType {
+ GRAPHEME, WORD, LINE, SENTENCE, TITLE;
+
+ // E.g. "icu:grapheme"
+ @Override public String toString() {
+ return "icu:" + Ascii.toLowerCase(name());
+ }
+ }
+
+ @Test
+ public void testSingleSuppression() {
+ int idx = 0;
+ CldrData cldrData = cldrData(
+ suppression(SENTENCE_BREAK, "L.P.", ++idx),
+ suppression(SENTENCE_BREAK, "Alt.", ++idx),
+ suppression(SENTENCE_BREAK, "Approx.", ++idx));
+
+ IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty());
+
+ assertThat(icuData).getPaths().hasSize(1);
+ assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
+ RbValue.of("L.P."),
+ RbValue.of("Alt."),
+ RbValue.of("Approx."));
+ }
+
+ // In real data, suppression is only a SentenceBreak thing, but we might as well test it for
+ // other types.
+ @Test
+ public void testMultipleSupressionTypes() {
+ int idx = 0;
+ CldrData cldrData = cldrData(
+ suppression(SENTENCE_BREAK, "L.P.", ++idx),
+ suppression(SENTENCE_BREAK, "Alt.", ++idx),
+ suppression(SENTENCE_BREAK, "Approx.", ++idx),
+ suppression(LINE_BREAK, "Foo", ++idx),
+ suppression(LINE_BREAK, "Bar", ++idx),
+ suppression(LINE_BREAK, "Baz", ++idx));
+
+ IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty());
+
+ assertThat(icuData).getPaths().hasSize(2);
+ assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
+ RbValue.of("L.P."),
+ RbValue.of("Alt."),
+ RbValue.of("Approx."));
+ assertThat(icuData).hasValuesFor("/exceptions/LineBreak:array",
+ RbValue.of("Foo"),
+ RbValue.of("Bar"),
+ RbValue.of("Baz"));
+ }
+
+ @Test
+ public void testSpecials_dictionary() {
+ CldrData specials = cldrData(
+ dictionary("foo", "<foo deps>"),
+ dictionary("bar", "<bar deps>"));
+
+ IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials));
+
+ assertThat(icuData).getPaths().hasSize(2);
+ assertThat(icuData).hasValuesFor("/dictionaries/foo:process(dependency)", "<foo deps>");
+ assertThat(icuData).hasValuesFor("/dictionaries/bar:process(dependency)", "<bar deps>");
+ }
+
+ @Test
+ public void testSpecials_boundaries() {
+ CldrData specials = cldrData(
+ boundaries(GRAPHEME, "<grapheme deps>", null),
+ boundaries(SENTENCE, "<sentence deps>", "altName"));
+
+ IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials));
+
+ assertThat(icuData).getPaths().hasSize(2);
+ assertThat(icuData)
+ .hasValuesFor("/boundaries/grapheme:process(dependency)", "<grapheme deps>");
+ assertThat(icuData)
+ .hasValuesFor("/boundaries/sentence_altName:process(dependency)", "<sentence deps>");
+ }
+
+ private static CldrData cldrData(CldrValue... values) {
+ return CldrDataSupplier.forValues(Arrays.asList(values));
+ }
+
+ private static CldrValue suppression(SegmentationType type, String value, int index) {
+ StringBuilder cldrPath = new StringBuilder("//ldml/segmentations");
+ appendAttribute(cldrPath.append("/segmentation"), "type", type);
+ cldrPath.append("/suppressions[@type=\"standard\"]");
+ // Suppression is an ordered element, so needs a sort index.
+ cldrPath.append("/suppression#").append(index);
+ return CldrValue.parseValue(cldrPath.toString(), value);
+ }
+
+ private static CldrValue dictionary(String type, String dependency) {
+ StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData");
+ cldrPath.append("/icu:dictionaries/icu:dictionary");
+ appendAttribute(cldrPath, "type", type);
+ appendAttribute(cldrPath, "icu:dependency", dependency);
+ return CldrValue.parseValue(cldrPath.toString(), "");
+ }
+
+ private static CldrValue boundaries(BoundaryType type, String dependency, String alt) {
+ StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData");
+ cldrPath.append("/icu:boundaries/").append(type);
+ appendAttribute(cldrPath, "icu:dependency", dependency);
+ if (alt != null) {
+ appendAttribute(cldrPath, "alt", alt);
+ }
+ return CldrValue.parseValue(cldrPath.toString(), "");
+ }
+
+ private static void appendAttribute(StringBuilder out, String k, Object v) {
+ out.append(String.format("[@%s=\"%s\"]", k, v));
+ }
+}
\ No newline at end of file
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static org.unicode.cldr.api.CldrDataSupplier.getCldrVersionString;
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import java.util.Arrays;
+import java.util.Optional;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+
+import com.google.common.base.Joiner;
+
+@RunWith(JUnit4.class)
+public class CollationMapperTest {
+ @Test
+ public void testEmpty() {
+ IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.empty());
+ assertThat(icuData).hasName("xx");
+ assertThat(icuData).hasFallback(true);
+ assertThat(icuData).getPaths().isEmpty();
+
+ // Root gets a couple of special paths added to it due to the need to work around a CLDR
+ // data bug.
+ IcuData rootData = CollationMapper.process("root", cldrData(), Optional.empty());
+ assertThat(rootData).hasName("root");
+ assertThat(rootData).hasFallback(true);
+ assertThat(rootData).getPaths().hasSize(2);
+ assertThat(rootData).hasValuesFor("/collations/standard/Version", getCldrVersionString());
+ assertThat(rootData).hasEmptyValue("/collations/standard/Sequence");
+ }
+
+ @Test
+ public void testDefault() {
+ CldrData cldrData =
+ cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value"));
+
+ IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
+ assertThat(icuData).getPaths().hasSize(1);
+ assertThat(icuData).hasValuesFor("/collations/default", "any value");
+ }
+
+ // This tests legacy behaviour which mimics the original converter code. There's no promise
+ // that it's semantically correct though.
+ @Test
+ public void testLastAltRuleOverridesExisting() {
+ // Note that in DTD order (which is what the paths are processed in) the path with no "alt"
+ // attribute comes after everything else, but the first "alt" path is overwritten by the
+ // second. It's not even clear there should ever be two alt paths, or what the paths mean
+ // (the original code seems to suggest it's looking for the "short" alternate form, but
+ // the "alt" attribute can have more that the value "short"...)
+ CldrData cldrData = cldrData(
+ collationRule("foo", "alt1", "First alt rule"),
+ collationRule("foo", "alt2", "Second alt rule"),
+ collationRule("foo", null, "First rule"));
+
+ IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
+ assertThat(icuData).getPaths().hasSize(2);
+ assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString());
+ assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule");
+ }
+
+ @Test
+ public void testCommentAndWhitespaceStripping() {
+ CldrData cldrData = cldrData(
+ collationRule("foo", null,
+ "# Comments are stripped",
+ "",
+ " # As are empty lines and leading/trailing spaces",
+ " Here is a value ",
+ "# And more comments to be stripped",
+ "And another value"));
+
+ IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
+ assertThat(icuData).hasValuesFor("/collations/foo/Sequence",
+ "Here is a value",
+ "And another value");
+ }
+
+ // Just in case anything weird happens with non-BMP char sequences:
+ // <collation type='emoji'>
+ // <cr><![CDATA[
+ // # START AUTOGENERATED EMOJI ORDER
+ // & [last primary ignorable]<<*🦰🦱🦳🦲🏻🏼🏽🏾🏿
+ // & [before 1]\uFDD1€
+ // <*😀😃😄😁😆😅🤣😂🙂🙃😉😊😇
+ // <*🥰😍🤩😘😗☺😚😙
+ // <*😋😛😜🤪😝🤑
+ // ...
+ @Test
+ public void testEmoji() {
+ CldrData cldrData = cldrData(
+ collationRule("emoji", null,
+ " # START AUTOGENERATED EMOJI ORDER",
+ " & [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
+ + "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
+ " & [before 1]\uFDD1€",
+ " <*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05"
+ + "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A"
+ + "\uD83D\uDE07",
+ " <*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A"
+ + "\uD83D\uDE19",
+ " <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"));
+
+ IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
+
+ assertThat(icuData).getPaths().hasSize(2);
+ assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString());
+ assertThat(icuData).hasValuesFor("/collations/emoji/Sequence",
+ "& [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
+ + "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
+ "& [before 1]\uFDD1€",
+ "<*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05"
+ + "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A"
+ + "\uD83D\uDE07",
+ "<*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A"
+ + "\uD83D\uDE19",
+ "<*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11");
+ }
+
+ @Test
+ public void testSpecials() {
+ CldrData specials = cldrData(
+ CldrValue.parseValue("//ldml/special/icu:UCARules[@icu:uca_rules=\"special rule\"]", ""),
+ CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", ""));
+
+ IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.of(specials));
+ assertThat(icuData).getPaths().hasSize(2);
+ assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule");
+ assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps");
+ }
+
+ private static CldrData cldrData(CldrValue... values) {
+ return CldrDataSupplier.forValues(Arrays.asList(values));
+ }
+
+ private static CldrValue collationRule(String type, String alt, String... lines) {
+ StringBuilder cldrPath = new StringBuilder("//ldml/collations");
+ appendAttribute(cldrPath.append("/collation"), "type", type);
+ cldrPath.append("/cr");
+ if (alt != null) {
+ appendAttribute(cldrPath, "alt", alt);
+ }
+ return CldrValue.parseValue(cldrPath.toString(), Joiner.on('\n').join(lines));
+ }
+
+ private static void appendAttribute(StringBuilder out, String k, Object v) {
+ out.append(String.format("[@%s=\"%s\"]", k, v));
+ }
+}
\ No newline at end of file
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.AFTERNOON1;
+import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.EVENING1;
+import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.MIDNIGHT;
+import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.MORNING1;
+import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.NIGHT1;
+import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.NOON;
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+
+import com.google.common.base.Ascii;
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+
+@RunWith(JUnit4.class)
+public class DayPeriodsMapperTest {
+ // A subset of rule types for testing.
+ enum RuleType {
+ MORNING1, NOON, AFTERNOON1, EVENING1, NIGHT1, MIDNIGHT;
+
+ @Override public String toString() {
+ return Ascii.toLowerCase(name());
+ }
+ }
+
+ // Possible rule names (these are the value attributes).
+ enum RuleName {
+ AT, BEFORE, FROM;
+
+ @Override public String toString() {
+ return Ascii.toLowerCase(name());
+ }
+ }
+
+ @Test
+ public void testSimple() {
+ Set<String> locales = ImmutableSet.of("en_GB", "en_AU", "en_NZ");
+ CldrData cldrData = cldrData(
+ dayPeriodRule(locales, MORNING1, isBetween("04:00", "12:00")),
+ dayPeriodRule(locales, NOON, isAt("12:00")),
+ dayPeriodRule(locales, AFTERNOON1, isBetween("12:00", "18:00")),
+ dayPeriodRule(locales, EVENING1, isBetween("18:00", "21:00")),
+ dayPeriodRule(locales, NIGHT1, isBetween("21:00", "04:00")),
+ dayPeriodRule(locales, MIDNIGHT, isAt("00:00")));
+
+ IcuData icuData = DayPeriodsMapper.process(cldrData);
+
+ assertThat(icuData).hasName("dayPeriods");
+ assertThat(icuData).hasFallback(false);
+ assertThat(icuData).hasValuesFor("/locales/en_AU", "set1");
+ assertThat(icuData).hasValuesFor("/locales/en_GB", "set1");
+ assertThat(icuData).hasValuesFor("/locales/en_NZ", "set1");
+
+ assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "04:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "12:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "12:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/afternoon1/from", "12:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/afternoon1/before", "18:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/evening1/from", "18:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/evening1/before", "21:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/night1/from", "21:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/night1/before", "04:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/midnight/at", "00:00");
+ }
+
+ @Test
+ public void testMultipleRuleSets() {
+ Set<String> locales1 = ImmutableSet.of("en_GB");
+ Set<String> locales2 = ImmutableSet.of("en_AU", "en_NZ");
+ CldrData cldrData = cldrData(
+ dayPeriodRule(locales1, MORNING1, isBetween("04:00", "12:00")),
+ dayPeriodRule(locales1, NOON, isAt("12:00")),
+ dayPeriodRule(locales2, MORNING1, isBetween("06:00", "13:00")),
+ dayPeriodRule(locales2, NOON, isAt("13:00")));
+
+ IcuData icuData = DayPeriodsMapper.process(cldrData);
+
+ // This reversal of the set ordering (as compared to the order of the input paths) is
+ // because visitation requires nested path ordering, which is achieved by lexicographical
+ // ordering of path strings ("en_AU" < "en_GB"). This is an implementation detail however
+ // and might one day change. If this were switched to use DTD order, then it would be
+ // stable (but also affect the ordering of paths in the released ICU data).
+ assertThat(icuData).hasValuesFor("/locales/en_AU", "set1");
+ assertThat(icuData).hasValuesFor("/locales/en_NZ", "set1");
+ assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "06:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "13:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "13:00");
+
+ assertThat(icuData).hasValuesFor("/locales/en_GB", "set2");
+ assertThat(icuData).hasValuesFor("/rules/set2/morning1/from", "04:00");
+ assertThat(icuData).hasValuesFor("/rules/set2/morning1/before", "12:00");
+ assertThat(icuData).hasValuesFor("/rules/set2/noon/at", "12:00");
+ }
+
+ @Test
+ public void testRulesetLabels() {
+ Set<String> locales = ImmutableSet.of("en_GB");
+ // Note that there's an implicit assumption in the mapper that the ruleset label is the
+ // same for all of the rules of any given locale (since it comes from the parent element).
+ CldrData cldrData = cldrData(
+ dayPeriodRule(locales, MORNING1, isBetween("04:00", "12:00"), "foo"),
+ dayPeriodRule(locales, NOON, isAt("12:00"), "foo"));
+
+ IcuData icuData = DayPeriodsMapper.process(cldrData);
+
+ assertThat(icuData).hasValuesFor("/locales_foo/en_GB", "set1");
+ assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "04:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "12:00");
+ assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "12:00");
+ }
+
+ // Just demonstrating that the mapper does no data validation.
+ @Test
+ public void testNoDataValidation() {
+ Set<String> locales = ImmutableSet.of("foo", "bar");
+ CldrData cldrData = cldrData(
+ dayPeriodRule(locales, MORNING1, isBetween("start", "end")),
+ dayPeriodRule(locales, NOON, isAt("moment")));
+
+ IcuData icuData = DayPeriodsMapper.process(cldrData);
+
+ // This reversal of the set ordering (as compared to the order of the input paths) is
+ // because visitation requires nested path ordering, which is achieved by lexicographical
+ // ordering of path strings. This is an implementation detail however and might one day
+ // change. If this were switched to use DTD order, then it would be stable (but also
+ // affect the ordering of paths in the released ICU data).
+ assertThat(icuData).hasValuesFor("/locales/foo", "set1");
+ assertThat(icuData).hasValuesFor("/locales/bar", "set1");
+ assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "start");
+ assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "end");
+ assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "moment");
+ }
+
+ private static CldrData cldrData(CldrValue... values) {
+ return CldrDataSupplier.forValues(Arrays.asList(values));
+ }
+
+ private static CldrValue dayPeriodRule(
+ Set<String> locales, RuleType type, Map<RuleName, String> rules) {
+
+ return dayPeriodRule(locales, type, rules, null);
+ }
+
+ private static CldrValue dayPeriodRule(
+ Set<String> locales, RuleType type, Map<RuleName, String> rules, String label) {
+
+ StringBuilder cldrPath = new StringBuilder("//supplementalData/dayPeriodRuleSet");
+ if (label != null) {
+ appendAttribute(cldrPath, "type", label);
+ }
+ appendAttribute(cldrPath.append("/dayPeriodRules"), "locales", Joiner.on(' ').join(locales));
+ appendAttribute(cldrPath.append("/dayPeriodRule"), "type", type);
+ rules.forEach((k, v) -> cldrPath.append(String.format("[@%s=\"%s\"]", k, v)));
+ return CldrValue.parseValue(cldrPath.toString(), "");
+ }
+
+ private static Map<RuleName, String> isAt(String time) {
+ return ImmutableMap.of(RuleName.AT, time);
+ }
+
+ private static Map<RuleName, String> isBetween(String from, String to) {
+ return ImmutableMap.of(RuleName.FROM, from, RuleName.BEFORE, to);
+ }
+
+ private static void appendAttribute(StringBuilder out, String k, Object v) {
+ out.append(String.format("[@%s=\"%s\"]", k, v));
+ }
+}
\ No newline at end of file
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.FEW;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.MANY;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.ONE;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.OTHER;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.TWO;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.ZERO;
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import java.util.Arrays;
+import java.util.Set;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.RbValue;
+
+import com.google.common.base.Ascii;
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableSet;
+
+@RunWith(JUnit4.class)
+public class PluralRangesMapperTest {
+ // Possible rule names (these are the value attributes).
+ enum PluralCount {
+ ZERO, ONE, TWO, FEW, MANY, OTHER;
+
+ @Override public String toString() {
+ return Ascii.toLowerCase(name());
+ }
+ }
+
+ @Test
+ public void testSimple() {
+ Set<String> locales = ImmutableSet.of("en_GB", "en_NZ");
+ CldrData cldrData = cldrData(
+ pluralRange(locales, ZERO, ONE, ZERO),
+ pluralRange(locales, ZERO, FEW, FEW),
+ pluralRange(locales, ONE, TWO, OTHER),
+ pluralRange(locales, ONE, MANY, MANY));
+
+ IcuData icuData = PluralRangesMapper.process(cldrData);
+
+ assertThat(icuData).hasName("pluralRanges");
+ assertThat(icuData).hasFallback(false);
+
+ assertThat(icuData).hasValuesFor("/locales/en_GB", "set00");
+ assertThat(icuData).hasValuesFor("/locales/en_NZ", "set00");
+ // Note that ordering is based on incoming CLDR path ordering, which is reordered by virtue
+ // of being processed in "nested grouping" order. This should probably be made to use DTD
+ // order to make output more isolated once it's clear that this doesn't affect output.
+ assertThat(icuData)
+ .hasValuesFor("/rules/set00",
+ RbValue.of("one", "many", "many"),
+ RbValue.of("one", "two", "other"),
+ RbValue.of("zero", "few", "few"),
+ RbValue.of("zero", "one", "zero"));
+ }
+
+ @Test
+ public void testMultipleSets() {
+ Set<String> locales1 = ImmutableSet.of("en_GB");
+ Set<String> locales2 = ImmutableSet.of("en_AU");
+ CldrData cldrData = cldrData(
+ pluralRange(locales1, ZERO, ONE, ZERO),
+ pluralRange(locales1, ZERO, FEW, FEW),
+ pluralRange(locales2, ONE, TWO, OTHER),
+ pluralRange(locales2, ONE, MANY, MANY));
+
+ IcuData icuData = PluralRangesMapper.process(cldrData);
+
+ assertThat(icuData).hasName("pluralRanges");
+ assertThat(icuData).hasFallback(false);
+
+ assertThat(icuData).hasValuesFor("/locales/en_AU", "set00");
+ assertThat(icuData)
+ .hasValuesFor("/rules/set00",
+ RbValue.of("one", "many", "many"),
+ RbValue.of("one", "two", "other"));
+
+ assertThat(icuData).hasValuesFor("/locales/en_GB", "set01");
+ assertThat(icuData)
+ .hasValuesFor("/rules/set01",
+ RbValue.of("zero", "few", "few"),
+ RbValue.of("zero", "one", "zero"));
+ }
+
+ private static CldrData cldrData(CldrValue... values) {
+ return CldrDataSupplier.forValues(Arrays.asList(values));
+ }
+
+ private static CldrValue pluralRange(
+ Set<String> locales, PluralCount start, PluralCount end, PluralCount result) {
+
+ StringBuilder cldrPath = new StringBuilder("//supplementalData/plurals");
+ appendAttribute(cldrPath.append("/pluralRanges"), "locales", Joiner.on(' ').join(locales));
+ cldrPath.append("/pluralRange");
+ appendAttribute(cldrPath, "start", start);
+ appendAttribute(cldrPath, "end", end);
+ appendAttribute(cldrPath, "result", result);
+ return CldrValue.parseValue(cldrPath.toString(), "");
+ }
+
+ private static void appendAttribute(StringBuilder out, String k, Object v) {
+ out.append(String.format("[@%s=\"%s\"]", k, v));
+ }
+}
\ No newline at end of file
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.FEW;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.MANY;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.ONE;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.OTHER;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.TWO;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.ZERO;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralType.CARDINAL;
+import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralType.ORDINAL;
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import java.util.Arrays;
+import java.util.Set;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+
+import com.google.common.base.Ascii;
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableSet;
+
+@RunWith(JUnit4.class)
+public class PluralsMapperTest {
+ enum PluralType {
+ ORDINAL, CARDINAL;
+
+ @Override public String toString() {
+ return Ascii.toLowerCase(name());
+ }
+ }
+
+ // Possible rule names (these are the value attributes).
+ enum PluralCount {
+ ZERO, ONE, TWO, FEW, MANY, OTHER;
+
+ @Override public String toString() {
+ return Ascii.toLowerCase(name());
+ }
+ }
+
+ @Test
+ public void testSimple() {
+ Set<String> locales = ImmutableSet.of("en_GB", "en_NZ");
+ CldrData cldrData = cldrData(
+ pluralRule(ORDINAL, locales, ZERO, "zero"),
+ pluralRule(ORDINAL, locales, ONE, "one"),
+ pluralRule(ORDINAL, locales, TWO, "two"),
+ pluralRule(ORDINAL, locales, FEW, "few"),
+ pluralRule(ORDINAL, locales, MANY, "many"),
+ pluralRule(ORDINAL, locales, OTHER, "other"),
+
+ pluralRule(CARDINAL, locales, ZERO, "!zero!"),
+ pluralRule(CARDINAL, locales, ONE, "!one!"),
+ pluralRule(CARDINAL, locales, TWO, "!two!"),
+ pluralRule(CARDINAL, locales, FEW, "!few!"),
+ pluralRule(CARDINAL, locales, MANY, "!many!"),
+ pluralRule(CARDINAL, locales, OTHER, "!other!"));
+
+ IcuData icuData = PluralsMapper.process(cldrData);
+
+ assertThat(icuData).hasName("plurals");
+ assertThat(icuData).hasFallback(false);
+
+ // Cardinals are assigned first, regardless of the CLDR path order (this could change).
+ assertThat(icuData).hasValuesFor("/locales/en_GB", "set0");
+ assertThat(icuData).hasValuesFor("/locales/en_NZ", "set0");
+ assertThat(icuData).hasValuesFor("/locales_ordinals/en_GB", "set1");
+ assertThat(icuData).hasValuesFor("/locales_ordinals/en_NZ", "set1");
+
+ assertThat(icuData).hasValuesFor("/rules/set0/zero", "!zero!");
+ assertThat(icuData).hasValuesFor("/rules/set0/one", "!one!");
+ assertThat(icuData).hasValuesFor("/rules/set0/two", "!two!");
+ assertThat(icuData).hasValuesFor("/rules/set0/few", "!few!");
+ assertThat(icuData).hasValuesFor("/rules/set0/many", "!many!");
+ assertThat(icuData).hasValuesFor("/rules/set0/other", "!other!");
+
+ assertThat(icuData).hasValuesFor("/rules/set1/zero", "zero");
+ assertThat(icuData).hasValuesFor("/rules/set1/one", "one");
+ assertThat(icuData).hasValuesFor("/rules/set1/two", "two");
+ assertThat(icuData).hasValuesFor("/rules/set1/few", "few");
+ assertThat(icuData).hasValuesFor("/rules/set1/many", "many");
+ assertThat(icuData).hasValuesFor("/rules/set1/other", "other");
+ }
+
+ @Test
+ public void testGroupDeduplication_subsets() {
+ Set<String> locales1 = ImmutableSet.of("en_GB");
+ Set<String> locales2 = ImmutableSet.of("en_NZ");
+ CldrData cldrData = cldrData(
+ // One group is a subset of the other, but this does not trigger deduplication.
+ pluralRule(CARDINAL, locales1, ZERO, "zero"),
+ pluralRule(CARDINAL, locales1, ONE, "one"),
+ pluralRule(CARDINAL, locales1, TWO, "two"),
+
+ pluralRule(CARDINAL, locales2, ZERO, "zero"),
+ pluralRule(CARDINAL, locales2, ONE, "one"),
+ pluralRule(CARDINAL, locales2, TWO, "two"),
+ pluralRule(CARDINAL, locales2, FEW, "few"));
+
+ IcuData icuData = PluralsMapper.process(cldrData);
+
+ assertThat(icuData).hasValuesFor("/locales/en_GB", "set0");
+ assertThat(icuData).hasValuesFor("/locales/en_NZ", "set1");
+
+ assertThat(icuData).hasValuesFor("/rules/set0/zero", "zero");
+ assertThat(icuData).hasValuesFor("/rules/set0/one", "one");
+ assertThat(icuData).hasValuesFor("/rules/set0/two", "two");
+
+ assertThat(icuData).hasValuesFor("/rules/set1/zero", "zero");
+ assertThat(icuData).hasValuesFor("/rules/set1/one", "one");
+ assertThat(icuData).hasValuesFor("/rules/set1/two", "two");
+ assertThat(icuData).hasValuesFor("/rules/set1/few", "few");
+ }
+
+ @Test
+ public void testGroupDeduplication_type() {
+ Set<String> locales = ImmutableSet.of("en_GB");
+ CldrData cldrData = cldrData(
+ // Groups are the same, but assigned separately to different types.
+ pluralRule(CARDINAL, locales, ZERO, "zero"),
+ pluralRule(CARDINAL, locales, ONE, "one"),
+ pluralRule(CARDINAL, locales, TWO, "two"),
+
+ pluralRule(ORDINAL, locales, ZERO, "zero"),
+ pluralRule(ORDINAL, locales, ONE, "one"),
+ pluralRule(ORDINAL, locales, TWO, "two"));
+
+ IcuData icuData = PluralsMapper.process(cldrData);
+
+ // Group is deduplicated!
+ assertThat(icuData).hasValuesFor("/locales/en_GB", "set0");
+ assertThat(icuData).hasValuesFor("/locales_ordinals/en_GB", "set0");
+
+ assertThat(icuData).hasValuesFor("/rules/set0/zero", "zero");
+ assertThat(icuData).hasValuesFor("/rules/set0/one", "one");
+ assertThat(icuData).hasValuesFor("/rules/set0/two", "two");
+ }
+
+
+ @Test
+ public void testGroupDeduplication_locales() {
+ Set<String> locales1 = ImmutableSet.of("en_GB");
+ Set<String> locales2 = ImmutableSet.of("en_NZ");
+ CldrData cldrData = cldrData(
+ // Groups are the same, but assigned separately to different locales.
+ pluralRule(CARDINAL, locales1, ZERO, "zero"),
+ pluralRule(CARDINAL, locales1, ONE, "one"),
+ pluralRule(CARDINAL, locales1, TWO, "two"),
+
+ pluralRule(CARDINAL, locales2, ZERO, "zero"),
+ pluralRule(CARDINAL, locales2, ONE, "one"),
+ pluralRule(CARDINAL, locales2, TWO, "two"));
+
+ IcuData icuData = PluralsMapper.process(cldrData);
+
+ // Group is deduplicated!
+ assertThat(icuData).hasValuesFor("/locales/en_GB", "set0");
+ assertThat(icuData).hasValuesFor("/locales/en_NZ", "set0");
+
+ assertThat(icuData).hasValuesFor("/rules/set0/zero", "zero");
+ assertThat(icuData).hasValuesFor("/rules/set0/one", "one");
+ assertThat(icuData).hasValuesFor("/rules/set0/two", "two");
+ }
+
+ private static CldrData cldrData(CldrValue... values) {
+ return CldrDataSupplier.forValues(Arrays.asList(values));
+ }
+
+ private static CldrValue pluralRule(
+ PluralType type, Set<String> locales, PluralCount count, String value) {
+
+ StringBuilder cldrPath = new StringBuilder("//supplementalData");
+ appendAttribute(cldrPath.append("/plurals"), "type", type);
+ appendAttribute(cldrPath.append("/pluralRules"), "locales", Joiner.on(' ').join(locales));
+ appendAttribute(cldrPath.append("/pluralRule"), "count", count);
+ return CldrValue.parseValue(cldrPath.toString(), value);
+ }
+
+ private static void appendAttribute(StringBuilder out, String k, Object v) {
+ out.append(String.format("[@%s=\"%s\"]", k, v));
+ }
+}
\ No newline at end of file
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Access.PRIVATE;
+import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Access.PUBLIC;
+import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Group.DURATION_RULES;
+import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Group.ORDINAL_RULES;
+import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Group.SPELLOUT_RULES;
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import java.util.Arrays;
+import java.util.Optional;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.RbValue;
+
+import com.google.common.base.Ascii;
+import com.google.common.base.CaseFormat;
+
+@RunWith(JUnit4.class)
+public class RbnfMapperTest {
+ enum Access {
+ PUBLIC, PRIVATE;
+
+ @Override public String toString() {
+ return Ascii.toLowerCase(name());
+ }
+ }
+
+ // IMPORTANT: The ldml.dtd only defines 3 groups:
+ // NumberingSystemRules, OrdinalRules, SpelloutRules
+ // but the "specials" files used by ICU introduce additional group names (e.g. DurationRules)
+ // which are strictly speaking invalid according to the DTD.
+ enum Group {
+ NUMBERING_SYSTEM_RULES, ORDINAL_RULES, SPELLOUT_RULES, DURATION_RULES;
+
+ @Override public String toString() {
+ // It's "NumberingSystemRules" not "numberingSystemRules"
+ return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name());
+ }
+ }
+
+ @Test
+ public void testSingleRuleset() {
+ int idx = 1;
+ CldrData cldrData = cldrData(
+ rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "0", "hundred;", idx++),
+ rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", idx++),
+ rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", idx++));
+
+ IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
+
+ assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
+ // Double-% prefix for "private" access.
+ RbValue.of("%%2d-year:"),
+ RbValue.of("0: hundred;"),
+ RbValue.of("1: oh-=%first-set=;"),
+ RbValue.of("10: =%first-set=;"));
+ }
+
+
+ @Test
+ public void testMultipleRulesets() {
+ // Note that input order of these paths shouldn't matter since they are ordered (and thus
+ // grouped) by DTD order (relative order matters for values in the same set, but values
+ // do not have to grouped together).
+ int idx = 1;
+ CldrData cldrData = cldrData(
+ rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "-x", "one;", idx++),
+ rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "Inf", "two;", idx++),
+ rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "-x", "five;", idx++),
+ rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "Inf", "six;", idx++),
+ rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "NaN", "three;", idx++),
+ rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "0", "four;", idx++),
+ rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", idx++),
+ rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", idx++));
+
+ IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
+
+ assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
+ // Single-% prefix for "public" access.
+ RbValue.of("%first-set:"),
+ RbValue.of("-x: one;"),
+ RbValue.of("Inf: two;"),
+ RbValue.of("NaN: three;"),
+ RbValue.of("0: four;"),
+ // Each "heading" appears once at the start of the section.
+ RbValue.of("%second-set:"),
+ RbValue.of("-x: five;"),
+ RbValue.of("Inf: six;"),
+ RbValue.of("NaN: seven;"),
+ RbValue.of("0: eight;"));
+ }
+
+ @Test
+ public void testSpecials() {
+ int idx = 1;
+ CldrData specials = cldrData(
+ rbnfRule(DURATION_RULES, "min", PRIVATE, "0", "0 minutes; 1 minute; =0= minutes;", idx++),
+ rbnfRule(DURATION_RULES, "hr", PRIVATE, "0", "0 hours; 1 hour; =0= hours;", idx++),
+ rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "0", "=0= sec.;", idx++),
+ rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "60", "=%%min-sec=;", idx++),
+ rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "3600", "=%%hr-min-sec=;", idx++));
+
+ idx = 1;
+ CldrData cldrData = cldrData(
+ rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "-x", "−→→;", idx++),
+ rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "0",
+ "=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", idx++));
+
+ IcuData icuData = RbnfMapper.process("en", cldrData, Optional.of(specials));
+
+ assertThat(icuData).hasValuesFor("/RBNFRules/OrdinalRules",
+ RbValue.of("%digits-ordinal:"),
+ RbValue.of("-x: \\u2212>>;"),
+ RbValue.of("0: =#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;"));
+
+ // The headings are sorted in the output ("hr" < "in-numerals" < min").
+ assertThat(icuData).hasValuesFor("/RBNFRules/DurationRules",
+ RbValue.of("%%hr:"),
+ RbValue.of("0: 0 hours; 1 hour; =0= hours;"),
+ RbValue.of("%in-numerals:"),
+ RbValue.of("0: =0= sec.;"),
+ RbValue.of("60: =%%min-sec=;"),
+ RbValue.of("3600: =%%hr-min-sec=;"),
+ RbValue.of("%%min:"),
+ RbValue.of("0: 0 minutes; 1 minute; =0= minutes;"));
+ }
+
+ // Note that while this is testing the escaping behaviour, the implementation was largely
+ // derived from a mostly undocumented method in the previous converter, and while it behaves
+ // the same, it's not entirely obviously why some of the special cases really exist.
+ @Test
+ public void testEscaping() {
+ int idx = 1;
+ CldrData cldrData = cldrData(
+ rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k1", "\\ Backslash", idx++),
+ rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k2", "←← Arrows →→", idx++),
+ rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", idx++),
+ rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", idx++));
+
+ IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
+
+ assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
+ RbValue.of("%escaping:"),
+ RbValue.of("k1: \\\\ Backslash"),
+ RbValue.of("k2: << Arrows >>"),
+ RbValue.of("k3: \\u00DC Umlaut"),
+ RbValue.of("k4: \\U0001F603 Smiley"));
+ }
+
+ private static CldrData cldrData(CldrValue... values) {
+ return CldrDataSupplier.forValues(Arrays.asList(values));
+ }
+
+ // Both ruleset and rbnfrule are "ordered" elements, but to mimic the XML below, it's the
+ // rbnfrule which needs to have an incrementing sort index:
+ //
+ // <ruleset type="<set-type>" access="<access>">
+ // <rbnfrule value="<key-1>">value-1</rbnfrule>
+ // <rbnfrule value="<key-2>">value-2</rbnfrule>
+ // <rbnfrule value="<key-3>">value-3</rbnfrule>
+ // </ruleset>
+ private static CldrValue rbnfRule(
+ Group group, String setType, Access access, String key, String value, int ruleIndex) {
+
+ StringBuilder cldrPath = new StringBuilder("//ldml/rbnf");
+ appendAttribute(cldrPath.append("/rulesetGrouping"), "type", group);
+ cldrPath.append("/ruleset");
+ appendAttribute(cldrPath, "type", setType);
+ appendAttribute(cldrPath, "access", access);
+ cldrPath.append("/rbnfrule#").append(ruleIndex);
+ appendAttribute(cldrPath, "value", key);
+ return CldrValue.parseValue(cldrPath.toString(), value);
+ }
+
+ private static void appendAttribute(StringBuilder out, String k, Object v) {
+ out.append(String.format("[@%s=\"%s\"]", k, v));
+ }
+}
\ No newline at end of file
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.mapper;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.truth.Truth.assertThat;
+import static java.util.stream.Collectors.joining;
+import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Direction.BACKWARD;
+import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Direction.BOTH;
+import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Direction.FORWARD;
+import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Visibility.EXTERNAL;
+import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Visibility.INTERNAL;
+import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.function.Function;
+import java.util.stream.Stream;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
+
+import com.google.common.base.Ascii;
+import com.google.common.collect.ImmutableList;
+
+@RunWith(JUnit4.class)
+public class TransformsMapperTest {
+
+ private static final ImmutableList<String> FILE_HEADER = ImmutableList.of(
+ "\uFEFF# © 2016 and later: Unicode, Inc. and others.",
+ "# License & terms of use: http://www.unicode.org/copyright.html#License",
+ "#");
+
+ private static final int DEFAULT_PATH_COUNT = 7;
+
+ enum Direction {
+ FORWARD, BACKWARD, BOTH;
+
+ @Override public String toString() {
+ return Ascii.toLowerCase(name());
+ }
+ }
+
+ enum Visibility {
+ INTERNAL, EXTERNAL;
+
+ @Override public String toString() {
+ return Ascii.toLowerCase(name());
+ }
+ }
+
+ @Test
+ public void testDefaultContent() {
+ Map<String, String> fileMap = new TreeMap<>();
+ IcuData icuData = TransformsMapper.process(cldrData(), wrap(fileMap));
+
+ assertThat(fileMap).isEmpty();
+
+ assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT);
+ assertThat(icuData).hasValuesFor("/\"%Translit%Hex\"", "%Translit%Hex");
+ assertThat(icuData).hasValuesFor("/\"%Translit%UnicodeChar\"", "%Translit%UnicodeChar");
+ assertThat(icuData).hasValuesFor("/\"%Translit%UnicodeName\"", "%Translit%UnicodeName");
+ assertThat(icuData)
+ .hasValuesFor("/RuleBasedTransliteratorIDs/Digit-Tone/alias", "NumericPinyin-Pinyin");
+ assertThat(icuData)
+ .hasValuesFor("/RuleBasedTransliteratorIDs/Tone-Digit/alias", "Pinyin-NumericPinyin");
+ assertThat(icuData).hasValuesFor("TransliterateLATIN", "", "");
+ assertThat(icuData)
+ .hasValuesFor("TransliteratorNamePattern", "{0,choice,0#|1#{1}|2#{1}-{2}}");
+ }
+
+ @Test
+ public void testForward() {
+ int idx = 0;
+ CldrData cldrData =
+ cldrData(oneWay("foo", "bar", FORWARD, null, INTERNAL, "first second third", ++idx));
+
+ Map<String, String> fileMap = new TreeMap<>();
+ IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap));
+
+ assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 5);
+ assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/first/alias", "foo-bar");
+ assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/second/alias", "foo-bar");
+ assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/third/alias", "foo-bar");
+ assertThat(icuData)
+ .hasValuesFor("RuleBasedTransliteratorIDs/foo-bar/internal/direction", "FORWARD");
+ assertThat(icuData)
+ .hasValuesFor(
+ "RuleBasedTransliteratorIDs/foo-bar/internal/resource:process(transliterator)",
+ "foo_bar.txt");
+
+ assertThat(fileMap).hasSize(1);
+ assertThat(fileMap).containsEntry("foo_bar.txt", headerPlusLines(
+ "# File: foo_bar.txt",
+ "# Generated from CLDR",
+ "#",
+ "",
+ "foo --> bar [internal]:",
+ "first second third"));
+ }
+
+ @Test
+ public void testBackward() {
+ int idx = 0;
+ CldrData cldrData =
+ cldrData(oneWay("foo", "bar", BACKWARD, "variant", EXTERNAL, "one two three", ++idx));
+
+ Map<String, String> fileMap = new TreeMap<>();
+ IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap));
+
+ assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 5);
+ assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/one/alias", "bar-foo/variant");
+ assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/two/alias", "bar-foo/variant");
+ assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/three/alias", "bar-foo/variant");
+
+ // Since the variant uses a '/' in the path element (not a path separator) we cannot just
+ // parse a string to get the expected path, so we do it the "hard way".
+ RbPath prefix = RbPath.of("RuleBasedTransliteratorIDs", "bar-foo/variant", "file");
+ assertThat(icuData).hasValuesFor(prefix.extendBy("direction"), "REVERSE");
+ assertThat(icuData)
+ .hasValuesFor(prefix.extendBy("resource:process(transliterator)"), "foo_bar_variant.txt");
+
+ assertThat(fileMap).hasSize(1);
+ assertThat(fileMap).containsEntry("foo_bar_variant.txt", headerPlusLines(
+ "# File: foo_bar_variant.txt",
+ "# Generated from CLDR",
+ "#",
+ "",
+ "foo <-- bar [external]:",
+ "one two three"));
+ }
+
+ @Test
+ public void testBoth() {
+ int idx = 0;
+ CldrData cldrData = cldrData(
+ both("foo", "bar", null, INTERNAL, "forward-alias", "backward-alias", ++idx));
+
+ Map<String, String> fileMap = new TreeMap<>();
+ IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap));
+
+ // 3 for each direction.
+ assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 6);
+
+ // Both directions.
+ assertThat(icuData)
+ .hasValuesFor("RuleBasedTransliteratorIDs/foo-bar/internal/direction", "FORWARD");
+ assertThat(icuData)
+ .hasValuesFor("RuleBasedTransliteratorIDs/bar-foo/internal/direction", "REVERSE");
+
+ // Both aliases.
+ assertThat(icuData)
+ .hasValuesFor("RuleBasedTransliteratorIDs/forward-alias/alias", "foo-bar");
+ assertThat(icuData)
+ .hasValuesFor("RuleBasedTransliteratorIDs/backward-alias/alias", "bar-foo");
+
+ // But the file is the same (obvious really since there's only one).
+ assertThat(icuData).hasValuesFor(
+ "RuleBasedTransliteratorIDs/foo-bar/internal/resource:process(transliterator)",
+ "foo_bar.txt");
+ assertThat(icuData).hasValuesFor(
+ "RuleBasedTransliteratorIDs/bar-foo/internal/resource:process(transliterator)",
+ "foo_bar.txt");
+
+ assertThat(fileMap).hasSize(1);
+ assertThat(fileMap).containsEntry("foo_bar.txt", headerPlusLines(
+ "# File: foo_bar.txt",
+ "# Generated from CLDR",
+ "#",
+ "",
+ "foo <-> bar [internal]:",
+ "forward-alias",
+ "backward-alias"));
+ }
+
+ private String headerPlusLines(String... lines) {
+ // For now the files always contain a blank line at the end (to match legacy behaviour) but
+ // this can, and probably should be changed.
+ return Stream
+ .concat(FILE_HEADER.stream(), Arrays.stream(lines))
+ .collect(joining("\n", "", "\n\n"));
+ }
+
+ private static CldrData cldrData(CldrValue... values) {
+ return CldrDataSupplier.forValues(Arrays.asList(values));
+ }
+
+ private static CldrValue oneWay(
+ String src, String dst, Direction dir, String var, Visibility vis, String alias, int idx) {
+
+ checkArgument(dir != BOTH, "use both() for bidirectional transforms");
+ StringBuilder cldrPath = new StringBuilder("//supplementalData/transforms/transform");
+ appendAttribute(cldrPath, "source", src);
+ appendAttribute(cldrPath, "target", dst);
+ appendAttribute(cldrPath, "direction", dir);
+ if (var != null) {
+ appendAttribute(cldrPath, "variant", var);
+ }
+ appendAttribute(cldrPath, "visibility", vis);
+ appendAttribute(cldrPath, dir == FORWARD ? "alias" : "backwardAlias", alias);
+ cldrPath.append("/tRule#").append(idx);
+
+ String arrow = dir == FORWARD ? "-->" : "<--";
+ return CldrValue.parseValue(
+ cldrPath.toString(),
+ String.format("%s %s %s [%s]:\n%s", src, arrow, dst, vis, alias));
+ }
+
+ private static CldrValue both(
+ String src, String dst, String var, Visibility vis, String alias, String backAlias, int idx) {
+
+ StringBuilder cldrPath = new StringBuilder("//supplementalData/transforms/transform");
+ appendAttribute(cldrPath, "source", src);
+ appendAttribute(cldrPath, "target", dst);
+ appendAttribute(cldrPath, "direction", BOTH);
+ if (var != null) {
+ appendAttribute(cldrPath, "variant", var);
+ }
+ appendAttribute(cldrPath, "visibility", vis);
+ appendAttribute(cldrPath, "alias", alias);
+ appendAttribute(cldrPath, "backwardAlias", backAlias);
+ cldrPath.append("/tRule#").append(idx);
+
+ return CldrValue.parseValue(
+ cldrPath.toString(),
+ String.format("%s <-> %s [%s]:\n%s\n%s", src, dst, vis, alias, backAlias));
+ }
+
+ private static void appendAttribute(StringBuilder out, String k, Object v) {
+ out.append(String.format("[@%s=\"%s\"]", k, v));
+ }
+
+ private static Function<Path, PrintWriter> wrap(Map<String, String> data) {
+ return path -> {
+ Writer writer = new Writer() {
+ StringWriter buffer = new StringWriter();
+ @Override public void write(char[] chars, int offset, int length) {
+ buffer.write(chars, offset, length);
+ }
+
+ @Override public void flush() {
+ buffer.flush();
+ }
+
+ @Override public void close() throws IOException {
+ buffer.close();
+ data.put(path.toString(), buffer.toString());
+ }
+ };
+ return new PrintWriter(writer);
+ };
+ }
+}
\ No newline at end of file
import java.util.List;
-import javax.annotation.concurrent.Immutable;
-
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
/**
* Tests for the regex transformer class. Note that in most cases, the rules used here are taken
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.testing;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrDataType;
+import org.unicode.cldr.api.CldrDraftStatus;
+import org.unicode.cldr.api.CldrPath;
+import org.unicode.cldr.api.CldrValue;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Table;
+import com.google.common.collect.TreeBasedTable;
+
+/**
+ * Fake data supplier for testing (especially the path value mappers).
+ */
+public final class FakeDataSupplier extends CldrDataSupplier {
+ private final Map<CldrPath, CldrValue> nonLocaleData = new LinkedHashMap<>();
+ private final Table<String, CldrPath, CldrValue> unresolvedData = TreeBasedTable.create();
+ private final Table<String, CldrPath, CldrValue> resolvedData = TreeBasedTable.create();
+
+ public FakeDataSupplier addLocaleData(String localeId, CldrValue... values) {
+ Arrays.stream(values).forEach(v -> {
+ unresolvedData.put(localeId, v.getPath(), v);
+ resolvedData.put(localeId, v.getPath(), v);
+ });
+ return this;
+ }
+
+ public FakeDataSupplier addInheritedData(String localeId, CldrValue... values) {
+ Arrays.stream(values)
+ .forEach(v -> checkArgument(resolvedData.put(localeId, v.getPath(), v) == null,
+ "path already present in unresolved CLDR data: %s", v.getPath()));
+ return this;
+ }
+
+ public FakeDataSupplier addSupplementalData(CldrValue... values) {
+ Arrays.stream(values).forEach(v -> nonLocaleData.put(v.getPath(), v));
+ return this;
+ }
+
+ @Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
+ Table<String, CldrPath, CldrValue> data =
+ resolution == CldrResolution.UNRESOLVED ? unresolvedData : resolvedData;
+ return CldrDataSupplier.forValues(data.row(localeId).values());
+ }
+
+ @Override public CldrData getDataForType(CldrDataType type) {
+ return CldrDataSupplier.forValues(
+ Iterables.filter(nonLocaleData.values(), v -> v.getPath().getDataType() == type));
+ }
+
+ @Override public Set<String> getAvailableLocaleIds() {
+ return Collections.unmodifiableSet(resolvedData.rowKeySet());
+ }
+
+ @Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus cldrDraftStatus) {
+ throw new UnsupportedOperationException("not supported in fake data supplier");
+ }
+}
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.testing;
+
+import static com.google.common.base.Preconditions.checkState;
+
+import java.util.Comparator;
+import java.util.Objects;
+
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * A fake result, primarily for testing mappers. This implementation does not "play well" with
+ * other result implementations.
+ */
+public final class FakeResult extends Result {
+ private static final Comparator<FakeResult> ORDERING =
+ Comparator.comparing(FakeResult::getKey)
+ .thenComparing(r -> r.index)
+ .thenComparing(r -> r.isFallback);
+
+ /**
+ * Returns a primary result. Care must be taken to ensure that multiple "equal()" results are
+ * not used in the same test (results are equal if their path and index are equal, and they
+ * share the same fallback state).
+ *
+ * @param path the path of the result
+ * @param index the sort index of the result (to distinguish paths with the same path).
+ * @param isGrouped whether values in the result should be grouped into a separate sub-array.
+ * @param values the result values.
+ */
+ public static Result of(String path, int index, boolean isGrouped, String... values) {
+ return new FakeResult(
+ RbPath.parse(path), ImmutableList.copyOf(values), isGrouped, index, false);
+ }
+
+ /**
+ * Returns a fallback result. Note that currently fallbacks are never "grouped".
+ *
+ * @param path the path of the result
+ * @param index the sort index of the result (to distinguish paths with the same path).
+ * @param values the result values.
+ */
+ public static Result fallback(String path, int index, String... values) {
+ return new FakeResult(RbPath.parse(path), ImmutableList.copyOf(values), false, index, true);
+ }
+
+ private final boolean grouped;
+ private final ImmutableList<String> values;
+ private final boolean isFallback;
+ private final int index;
+
+ private FakeResult(
+ RbPath path, ImmutableList<String> values, boolean grouped, int index, boolean isFallback) {
+ super(path);
+ this.grouped = grouped;
+ this.values = values;
+ this.isFallback = isFallback;
+ this.index = index;
+ }
+
+ boolean isFallback() {
+ return isFallback;
+ }
+
+ @Override public boolean isGrouped() {
+ return grouped;
+ }
+
+ @Override public ImmutableList<String> getValues() {
+ return values;
+ }
+
+ @Override public boolean isFallbackFor(Result r) {
+ FakeResult other = (FakeResult) r;
+ return isFallback && !other.isFallback
+ && getKey().equals(r.getKey())
+ && index == (other).index;
+ }
+
+ @Override public int compareTo(Result other) {
+ int signum = ORDERING.compare(this, (FakeResult) other);
+ checkState(signum != 0 || this == other,
+ "equivalent but non-identical results found in test data: %s / %s", this, other);
+ return signum;
+ }
+
+ // We really don't want to pretend to support mixing implementations of Result in tests.
+ @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
+ @Override public boolean equals(Object obj) {
+ FakeResult other = (FakeResult) obj;
+ boolean isEqual = getKey().equals(other.getKey())
+ && index == other.index
+ && isFallback == other.isFallback;
+ checkState(!isEqual || this == other,
+ "equivalent but non-identical results found in test data: %s / %s", this, other);
+ return isEqual;
+ }
+
+ @Override public int hashCode() {
+ return Objects.hash(getKey(), index, isFallback);
+ }
+}
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.testing;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
+
+import com.google.common.collect.ImmutableList;
+
+public final class FakeTransformer extends PathValueTransformer {
+ private final Map<CldrValue, ImmutableList<Result>> resultMap = new HashMap<>();
+ private final Map<RbPath, ImmutableList<Result>> fallbackMap = new HashMap<>();
+
+ public void addResults(CldrValue value, Result... results) {
+ resultMap.put(value, ImmutableList.copyOf(results));
+ }
+
+ public void addFallbacks(String path, Result... results) {
+ fallbackMap.put(RbPath.parse(path), ImmutableList.copyOf(results));
+ }
+
+ @Override public ImmutableList<Result> transform(CldrValue value) {
+ return resultMap.getOrDefault(value, ImmutableList.of());
+ }
+
+ @Override public ImmutableList<Result> transform(CldrValue value, DynamicVars ignored) {
+ return resultMap.getOrDefault(value, ImmutableList.of());
+ }
+
+ @Override public ImmutableList<Result> getFallbackResultsFor(RbPath key, DynamicVars ignored) {
+ return fallbackMap.getOrDefault(key, ImmutableList.of());
+ }
+}
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.testing;
+
+import java.util.List;
+
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
+import org.unicode.icu.tool.cldrtoicu.RbValue;
+
+import com.google.common.truth.FailureMetadata;
+import com.google.common.truth.IterableSubject;
+import com.google.common.truth.Subject;
+
+public final class IcuDataSubject extends Subject {
+ private final IcuData actual;
+
+ protected IcuDataSubject(FailureMetadata metadata, IcuData actual) {
+ super(metadata, actual);
+ this.actual = actual;
+ }
+
+ public void hasName(String name) {
+ check("getName()").that(actual.getName()).isEqualTo(name);
+ }
+
+ public void hasFallback(boolean fallback) {
+ check("hasFallback()").that(actual.hasFallback()).isEqualTo(fallback);
+ }
+
+ public IterableSubject getPaths() {
+ return check("getPaths()").that(actual.getPaths());
+ }
+
+ public void hasEmptyValue(String rbPath) {
+ hasValuesFor(rbPath, RbValue.of(""));
+ }
+
+ public void hasValuesFor(String rbPath, String... values) {
+ hasValuesFor(rbPath, RbValue.of(values));
+ }
+
+ public void hasValuesFor(String rbPath, RbValue... values) {
+ hasValuesFor(RbPath.parse(rbPath), values);
+ }
+
+ public void hasValuesFor(RbPath p, String... values) {
+ hasValuesFor(p, RbValue.of(values));
+ }
+
+ public void hasValuesFor(RbPath p, RbValue... values) {
+ List<RbValue> rbValues = actual.get(p);
+ check("get('%s')", p).that(rbValues).isNotNull();
+ check("get('%s')", p).that(rbValues).containsExactlyElementsIn(values).inOrder();
+ }
+}
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.testing;
+
+import org.unicode.icu.tool.cldrtoicu.IcuData;
+
+import com.google.common.truth.FailureMetadata;
+import com.google.common.truth.Subject;
+import com.google.common.truth.Truth;
+
+/** Truth subject for asserting about ICU data instances (makes tests much more readable). */
+public final class IcuDataSubjectFactory implements Subject.Factory<IcuDataSubject, IcuData> {
+ public static IcuDataSubject assertThat(IcuData result) {
+ return Truth.assertAbout(new IcuDataSubjectFactory()).that(result);
+ }
+
+ @Override
+ public IcuDataSubject createSubject(FailureMetadata failureMetadata, IcuData that) {
+ return new IcuDataSubject(failureMetadata, that);
+ }
+
+ IcuDataSubjectFactory() {}
+}
\ No newline at end of file
import static com.google.common.base.Preconditions.checkArgument;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
+
+import com.google.common.truth.BooleanSubject;
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
-import org.unicode.icu.tool.cldrtoicu.RbPath;
public final class RbPathSubject extends Subject {
// For use when chaining from other subjects.
checkArgument(n >= 0, "invalid path length: %s", n);
check("length()").that(actual.length()).isEqualTo(n);
}
+
+ public final BooleanSubject startsWith(RbPath path) {
+ return check("startsWith('%s')", path).that(actual.startsWith(path));
+ }
+
+ public final BooleanSubject endsWith(RbPath path) {
+ return check("endsWith('%s')", path).that(actual.endsWith(path));
+ }
+
+ public final BooleanSubject contains(RbPath path) {
+ return check("contains('%s')", path).that(actual.contains(path));
+ }
}
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
+import org.unicode.icu.tool.cldrtoicu.RbPath;
+
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
import com.google.common.truth.Truth;
-import org.unicode.icu.tool.cldrtoicu.RbPath;
/** Truth subject for asserting about resource bundle paths (makes tests much more readable). */
public final class RbPathSubjectFactory implements Subject.Factory<RbPathSubject, RbPath> {
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.testing;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import org.unicode.icu.tool.cldrtoicu.RbValue;
+
+import com.google.common.truth.FailureMetadata;
+import com.google.common.truth.Subject;
+
+public final class RbValueSubject extends Subject {
+ // For use when chaining from other subjects.
+ public static Factory<RbValueSubject, RbValue> rbValues() {
+ return RbValueSubject::new;
+ }
+
+ private final RbValue actual;
+
+ protected RbValueSubject(FailureMetadata metadata, RbValue actual) {
+ super(metadata, actual);
+ this.actual = actual;
+ }
+
+ /** Asserts the value of the path, as segments (use this if a segment can contain '/'). */
+ public final void hasValue(String value) {
+ check("getElements()").that(actual.getElements()).containsExactly(value);
+ }
+
+ /** Asserts the value of the path, as segments (use this if a segment can contain '/'). */
+ public final void hasValues(String... values) {
+ check("getElements()").that(actual.getElements()).containsExactlyElementsIn(values).inOrder();
+ }
+
+ public final void hasSize(int n) {
+ checkArgument(n > 0, "invalid element count: %s", n);
+ check("getElements().size()").that(actual.getElements().size()).isEqualTo(n);
+ }
+}
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.testing;
+
+import org.unicode.icu.tool.cldrtoicu.RbValue;
+
+import com.google.common.truth.FailureMetadata;
+import com.google.common.truth.Subject;
+import com.google.common.truth.Truth;
+
+/** Truth subject for asserting about resource bundle paths (makes tests much more readable). */
+public final class RbValueSubjectFactory implements Subject.Factory<RbValueSubject, RbValue> {
+ public static RbValueSubject assertThat(RbValue result) {
+ return Truth.assertAbout(new RbValueSubjectFactory()).that(result);
+ }
+
+ @Override
+ public RbValueSubject createSubject(FailureMetadata failureMetadata, RbValue that) {
+ return new RbValueSubject(failureMetadata, that);
+ }
+
+ RbValueSubjectFactory() {}
+}
\ No newline at end of file
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
+import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
+
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
import com.google.common.truth.Truth;
-import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
/** Truth subject for asserting about transformation results (makes tests much more readable). */
public class ResultSubjectFactory implements Subject.Factory<ResultSubject, Result> {