From: David Beaumont Date: Mon, 15 Jun 2020 21:56:23 +0000 (+0000) Subject: ICU-21149 Adding a helper to allow simpler debugging of mappers. X-Git-Tag: cldr/2020-09-22~182 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=03bb079d3f990d5b0a61f7c2e5c8e5b18df1f016;p=icu ICU-21149 Adding a helper to allow simpler debugging of mappers. --- diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/DebugWriter.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/DebugWriter.java new file mode 100644 index 00000000000..72994e1e0b3 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/DebugWriter.java @@ -0,0 +1,138 @@ +package org.unicode.icu.tool.cldrtoicu; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.function.Function; +import java.util.logging.Formatter; +import java.util.logging.LogManager; +import java.util.logging.LogRecord; + +import org.unicode.cldr.api.CldrDataSupplier; + +import com.google.common.base.CharMatcher; +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import com.google.common.io.CharStreams; + +/** + * Utility to allow any "mapper" class to trivially support a main method and useful + * logging behaviour to help avoid the need for ad-hoc logging via {@code System.out}. + * + *

In most cases a mapping class can just have a {@code main} method like: + *

{@code
+ *   // Arguments:  []
+ *   public static void main(String[] args) throws IOException {
+ *       DebugWriter.writeForDebugging(args, MapperClass::process);
+ *   }
+ * }
+ * + *

Note however that running this still requires that the {@code CLDR_DIR} system + * property is set. + */ +public final class DebugWriter { + private static final String PACKAGE_ROOT = "org.unicode.icu.tool.cldrtoicu"; + private static final String PACKAGE_PREFIX = PACKAGE_ROOT + "."; + + /** + * Writes the IcuData generated by the given function using the default {@code CLDR_DIR} + * system property. + * + *

This is a helper method to make it easy for each mapper to have its own main + * method for debugging, and it should not be used directly by {@code LdmlConverter}. + */ + public static void writeForDebugging(String[] args, Function fn) + throws IOException { + writeMultipleForDebugging(args, src -> ImmutableList.of(fn.apply(src))); + } + + /** + * Writes the IcuData generated by the given function using the default {@code CLDR_DIR} + * system property. + * + *

This is a helper method to make it easy for each mapper to have its own main + * method for debugging, and it should not be used directly by {@code LdmlConverter}. + */ + public static void writeMultipleForDebugging( + String[] args, Function> fn) + throws IOException { + String cldrPath = System.getProperty("CLDR_DIR", System.getenv("CLDR_DIR")); + checkState(cldrPath != null, + "required 'CLDR_DIR' system property or environment variable not set"); + checkArgument(args.length >= 1, "expected output directory"); + Path outDir = Paths.get(args[0]); + String logLevel = (args.length == 2) ? args[1] : "OFF"; + + String loggerConfig = Joiner.on("\n").join( + "handlers = java.util.logging.ConsoleHandler", + "java.util.logging.ConsoleHandler.level = ALL", + "java.util.logging.ConsoleHandler.encoding = UTF-8", + "java.util.logging.ConsoleHandler.formatter = " + LogFormatter.class.getName(), + "", + PACKAGE_ROOT + ".level = " + logLevel); + LogManager.getLogManager() + .readConfiguration(new ByteArrayInputStream(loggerConfig.getBytes(UTF_8))); + + Files.createDirectories(outDir); + CldrDataSupplier src = CldrDataSupplier.forCldrFilesIn(Paths.get(cldrPath)); + ImmutableList header = readLinesFromResource("/ldml2icu_header.txt"); + for (IcuData icuData : fn.apply(src)) { + IcuTextWriter.writeToFile(icuData, outDir, header, true); + } + } + + private static ImmutableList readLinesFromResource(String name) { + try (InputStream in = DebugWriter.class.getResourceAsStream(name)) { + return ImmutableList.copyOf(CharStreams.readLines(new InputStreamReader(in, UTF_8))); + } catch (IOException e) { + throw new RuntimeException("cannot read resource: " + name, e); + } + } + + // Format is "#: " since this is a fairly + // small code base and keeping logs concise is helpful. + // This is only public because it has to be reflectively instantiated. + public static final class LogFormatter extends Formatter { + private static final CharMatcher SEPARATORS = CharMatcher.anyOf("$#"); + + @Override + public String format(LogRecord logRecord) { + String message = String.format("%s#%s: %s\n", + localClassName(logRecord.getSourceClassName()), + plainMethodName(logRecord.getSourceMethodName()), + logRecord.getMessage()); + if (logRecord.getThrown() != null) { + message += logRecord.getThrown() + "\n"; + } + return message; + } + + // Since everything is in the same base package, elide that (if present). + private String localClassName(String className) { + return className.startsWith(PACKAGE_PREFIX) + ? className.substring(className.lastIndexOf(".") + 1) + : className; + } + + // Trim method names to remove things like lambda prefixes and anonymous + // class suffixes (these add noise to every log and aren't that useful). + private String plainMethodName(String methodName) { + if (methodName.startsWith("lambda$")) { + methodName = methodName.substring("lambda$".length()); + } + if (SEPARATORS.matchesAnyOf(methodName)) { + methodName = methodName.substring(0, SEPARATORS.indexIn(methodName)); + } + return methodName; + } + } +}