+++ /dev/null
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-/*
-**********************************************************************
-* Copyright (c) 2002-2010, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Author: Mark Davis
-**********************************************************************
-*/
-package com.ibm.icu.dev.test.cldr;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-import org.junit.Ignore;
-import org.junit.Test;
-import org.xml.sax.Attributes;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-import com.ibm.icu.dev.test.TestFmwk;
-import com.ibm.icu.text.DateFormat;
-import com.ibm.icu.text.NumberFormat;
-import com.ibm.icu.text.SimpleDateFormat;
-import com.ibm.icu.text.UTF16;
-import com.ibm.icu.text.UnicodeSet;
-import com.ibm.icu.util.Currency;
-import com.ibm.icu.util.TimeZone;
-import com.ibm.icu.util.ULocale;
-
-/**
- * This is a test file that takes in the CLDR XML test files and test against
- * ICU4J. This test file is used to verify that ICU4J is implemented correctly.
- * As it stands, the test generates all the errors to the console by logging it.
- * The logging is only possible if "-v" or verbose is set as an argument.
- * This will allow users to know what problems occurred within CLDR and ICU.
- * Collator was disabled in this test file and therefore will be skipped.
- *
- * Instructions:
- * 1) In order for this to work correctly, you must download the latest CLDR data
- * in the form of XML. You must also set the CLDR directory using:
- * -DCLDR_DIRECTORY=<top level of cldr>
- * 2) You may also consider increasing the memory using -Xmx512m.
- * 3) For speed purposes, you may consider creating a temporary directory for the
- * CLDR cache using:
- * -DCLDR_DTD_CACHE=<cldr cache directory>
- * 4) You may use other environment variables to narrow down your tests using:
- * -DXML_MATCH=".*"
- * -DXML_MATCH="de.*" (or whatever regex you want) to just test certain locales.
- * -DTEST_MATCH="zone.*" (or whatever regex you want) to just test collation, numbers, etc.
- * -DZONE_MATCH="(?!America/Argentina).*"
- * -DZONE_MATCH=".*Moscow.*" (to only test certain zones)
-
- * @author medavis
- * @author John Huan Vu (johnvu@us.ibm.com)
- */
-public class TestCLDRVsICU extends TestFmwk {
- static final boolean DEBUG = false;
-
- // ULocale uLocale = ULocale.ENGLISH;
- // Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
- // static PrintWriter log;
- SAXParser SAX;
- static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH;
- static String CLDR_DIRECTORY;
- static {
- System.out.println();
- LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*");
- TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*");
- ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*");
-
- // CLDR_DIRECTORY is where all the CLDR XML test files are located
- // WARNING: THIS IS TEMPORARY DIRECTORY UNTIL THE FILES ARE STRAIGHTENED OUT
- CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\");
- System.out.println();
- }
-
- private static Matcher getEnvironmentRegex(String key, String defaultValue) {
- return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher("");
- }
-
- private static String getEnvironmentString(String key, String defaultValue) {
- String temp = System.getProperty(key);
- if (temp == null)
- temp = defaultValue;
- else
- System.out.print("-D" + key + "=\"" + temp + "\" ");
- return temp;
- }
-
- Set allLocales = new TreeSet();
-
- // TODO(junit): seems to be failing with missing locales - maybe rewrite as parameterized
- @Ignore
- @Test
- public void TestFiles() throws SAXException, IOException {
- // only get ICU's locales
- Set s = new TreeSet();
- addLocales(NumberFormat.getAvailableULocales(), s);
- addLocales(DateFormat.getAvailableULocales(), s);
-
- // johnvu: Collator was originally disabled
- // addLocales(Collator.getAvailableULocales(), s);
-
- // filter, to make tracking down bugs easier
- for (Iterator it = s.iterator(); it.hasNext();) {
- String locale = (String) it.next();
- if (!LOCALE_MATCH.reset(locale).matches())
- continue;
- _test(locale);
- }
- }
-
- public void addLocales(ULocale[] list, Collection s) {
- for (int i = 0; i < list.length; ++i) {
- allLocales.add(list[i].toString());
- s.add(list[i].getLanguage());
- }
- }
-
- public String getLanguage(ULocale uLocale) {
- String result = uLocale.getLanguage();
- String script = uLocale.getScript();
- if (script.length() != 0)
- result += "_" + script;
- return result;
- }
-
- private void _test(String localeName) throws SAXException, IOException {
- // uLocale = new ULocale(localeName);
- // oLocale = uLocale.toLocale();
-
- File f = new File(CLDR_DIRECTORY, "test/" + localeName + ".xml");
- logln("Testing " + f.getCanonicalPath());
- SAX.parse(f, DEFAULT_HANDLER);
- }
-
- private static class ToHex {
- public String transliterate(String in) {
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < in.length(); ++i) {
- char c = in.charAt(i);
- sb.append("\\u");
- if (c < 1000) {
- sb.append('0');
- if (c < 100) {
- sb.append('0');
- if (c < 10) {
- sb.append('0');
- }
- }
- }
- sb.append(Integer.toHexString((int) c));
- }
- return sb.toString();
- }
- }
-
- // static Transliterator toUnicode = Transliterator.getInstance("any-hex");
- private static final ToHex toUnicode = new ToHex();
-
- static public String showString(String in) {
- return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")";
- }
-
- // ============ SAX Handler Infrastructure ============
-
- abstract public class Handler {
- Map settings = new TreeMap();
- String name;
- List currentLocales = new ArrayList();
- int failures = 0;
-
- void setName(String name) {
- this.name = name;
- }
-
- void set(String attributeName, String attributeValue) {
- // if (DEBUG) logln(attributeName + " => " + attributeValue);
- settings.put(attributeName, attributeValue);
- }
-
- void checkResult(String value) {
- if (settings.get("draft").equals("unconfirmed") || settings.get("draft").equals("provisional")) {
- return; // skip draft
- }
- ULocale ul = new ULocale("xx");
- try {
- for (int i = 0; i < currentLocales.size(); ++i) {
- ul = (ULocale) currentLocales.get(i);
- // loglnSAX(" Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);
- handleResult(ul, value);
- if (failures != 0) {
- errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH)
- + ")");
- failures = 0;
- }
- }
- } catch (Exception e) {
- StringWriter sw = new StringWriter();
- PrintWriter pw = new PrintWriter(sw);
- e.printStackTrace(pw);
- pw.flush();
- errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString());
- }
- }
-
- public void loglnSAX(String message) {
- String temp = message + "\t[" + name;
- for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
- String attributeName = (String) it.next();
- String attributeValue = (String) settings.get(attributeName);
- temp += " " + attributeName + "=<" + attributeValue + ">";
- }
- logln(temp + "]");
- }
-
- int lookupValue(Object x, Object[] list) {
- for (int i = 0; i < list.length; ++i) {
- if (x.equals(list[i]))
- return i;
- }
- loglnSAX("Unknown String: " + x);
- return -1;
- }
-
- abstract void handleResult(ULocale currentLocale, String value) throws Exception;
-
- /**
- * @param attributes
- */
- public void setAttributes(Attributes attributes) {
- String localeList = attributes.getValue("locales");
- String[] currentLocaleString = new String[50];
- com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString);
- currentLocales.clear();
- for (int i = 0; i < currentLocaleString.length; ++i) {
- if (currentLocaleString[i].length() == 0)
- continue;
- if (allLocales.contains("")) {
- logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]);
- continue;
- }
- currentLocales.add(new ULocale(currentLocaleString[i]));
- }
- if (DEBUG)
- logln("Setting locales: " + currentLocales);
- }
- }
-
- public Handler getHandler(String name, Attributes attributes) {
- if (DEBUG)
- logln("Creating Handler: " + name);
- Handler result = (Handler) RegisteredHandlers.get(name);
- if (result == null)
- logln("Unexpected test type: " + name);
- else {
- result.setAttributes(attributes);
- }
- return result;
- }
-
- public void addHandler(String name, Handler handler) {
- if (!TEST_MATCH.reset(name).matches())
- handler = new NullHandler();
- handler.setName(name);
- RegisteredHandlers.put(name, handler);
- }
-
- Map RegisteredHandlers = new HashMap();
-
- class NullHandler extends Handler {
- void handleResult(ULocale currentLocale, String value) throws Exception {
- }
- }
-
- // ============ Statics for Date/Number Support ============
-
- static TimeZone utc = TimeZone.getTimeZone("GMT");
- static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
- {
- iso.setTimeZone(utc);
- }
-
- static int[] DateFormatValues = { -1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL };
-
- // The following are different data format types that are part of the parameters in CLDR
- static String[] DateFormatNames = { "none", "short", "medium", "long", "full" };
-
- // The following are different number types that are part of the parameters in CLDR
- static String[] NumberNames = { "standard", "integer", "decimal", "percent", "scientific", "GBP" };
-
-
- // ============ Handler for Collation ============
- static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
-
- static String remove(String in, UnicodeSet toRemove) {
- int cp;
- StringBuffer result = new StringBuffer();
- for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
- cp = UTF16.charAt(in, i);
- if (!toRemove.contains(cp))
- UTF16.append(result, cp);
- }
- return result.toString();
- }
-
- {
- // johnvu: Collator was originally disabled
- // TODO (dougfelt) move this test
- /*
- addHandler("collation", new Handler() {
- public void handleResult(ULocale currentLocale, String value) {
- Collator col = Collator.getInstance(currentLocale);
- String lastLine = "";
- int count = 0;
- for (int pos = 0; pos < value.length();) {
- int nextPos = value.indexOf('\n', pos);
- if (nextPos < 0)
- nextPos = value.length();
- String line = value.substring(pos, nextPos);
- line = remove(line, controlsAndSpace); HACK for SAX
- if (line.trim().length() != 0) { HACK for SAX
- int comp = col.compare(lastLine, line);
- if (comp > 0) {
- failures++;
- errln("\tLine " + (count + 1) + "\tFailure: "
- + showString(lastLine) + " should be leq "
- + showString(line));
- } else if (DEBUG) {
- logln("OK: " + line);
- }
- lastLine = line;
- }
- pos = nextPos + 1;
- count++;
- }
- }
- });
- */
-
- // ============ Handler for Numbers ============
- addHandler("number", new Handler() {
- public void handleResult(ULocale locale, String result) {
- NumberFormat nf = null;
- double v = Double.NaN;
- for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
- String attributeName = (String) it.next();
- String attributeValue = (String) settings.get(attributeName);
-
- // Checks if the attribute name is a draft and whether
- // or not it has been approved / contributed by CLDR yet
- // otherwise, skips it because it is most likely rejected by ICU
- if (attributeName.equals("draft")) {
- if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) {
- break;
- }
- continue;
- }
-
- // Update the value to be checked
- if (attributeName.equals("input")) {
- v = Double.parseDouble(attributeValue);
- continue;
- }
-
- // At this point, it must be a numberType
- int index = lookupValue(attributeValue, NumberNames);
-
- if (DEBUG)
- logln("Getting number format for " + locale);
- switch (index) {
- case 0:
- nf = NumberFormat.getInstance(locale);
- break;
- case 1:
- nf = NumberFormat.getIntegerInstance(locale);
- break;
- case 2:
- nf = NumberFormat.getNumberInstance(locale);
- break;
- case 3:
- nf = NumberFormat.getPercentInstance(locale);
- break;
- case 4:
- nf = NumberFormat.getScientificInstance(locale);
- break;
- default:
- nf = NumberFormat.getCurrencyInstance(locale);
- nf.setCurrency(Currency.getInstance(attributeValue));
- break;
- }
- String temp = nf.format(v).trim();
- result = result.trim(); // HACK because of SAX
- if (!temp.equals(result)) {
- logln("Number: Locale: " + locale +
- "\n\tType: " + attributeValue +
- "\n\tDraft: " + settings.get("draft") +
- "\n\tCLDR: <" + result + ">" +
- "\n\tICU: <" + temp + ">");
- }
-
- }
- }
- });
-
- // ============ Handler for Dates ============
- addHandler("date", new Handler() {
- public void handleResult(ULocale locale, String result) throws ParseException {
- int dateFormat = 0;
- int timeFormat = 0;
- Date date = new Date();
- boolean approved = true;
-
- for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
- String attributeName = (String) it.next();
- String attributeValue = (String) settings.get(attributeName);
-
- // Checks if the attribute name is a draft and whether
- // or not it has been approved / contributed by CLDR yet
- // otherwise, skips it because it is most likely rejected by ICU
- if (attributeName.equals("draft")) {
- if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) {
- approved = false;
- break;
- }
- continue;
- }
-
- // Update the value to be checked
- if (attributeName.equals("input")) {
- date = iso.parse(attributeValue);
- continue;
- }
- // At this point, it must be either dateType or timeType
- int index = lookupValue(attributeValue, DateFormatNames);
- if (attributeName.equals("dateType"))
- dateFormat = index;
- else if (attributeName.equals("timeType"))
- timeFormat = index;
-
- }
-
- // The attribute value must be approved in order to be checked,
- // if it hasn't been approved, it shouldn't be checked if it
- // matches with ICU
- if (approved) {
- SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat);
- dt.setTimeZone(utc);
- String temp = dt.format(date).trim();
- result = result.trim(); // HACK because of SAX
- if (!temp.equals(result)) {
- logln("DateTime: Locale: " + locale +
- "\n\tDate: " + DateFormatNames[dateFormat] +
- "\n\tTime: " + DateFormatNames[timeFormat] +
- "\n\tDraft: " + settings.get("draft") +
- "\n\tCLDR: <" + result + "> " +
- "\n\tICU: <" + temp + ">");
- }
- }
- }
-
- private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) {
- if (DEBUG)
- logln("Getting date/time format for " + locale);
- if (DEBUG && "ar_EG".equals(locale.toString())) {
- logln("debug here");
- }
- DateFormat dt;
- if (dateFormat == 0) {
- dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale);
- if (DEBUG)
- System.out.print("getTimeInstance");
- } else if (timeFormat == 0) {
- dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale);
- if (DEBUG)
- System.out.print("getDateInstance");
- } else {
- dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat],
- locale);
- if (DEBUG)
- System.out.print("getDateTimeInstance");
- }
- if (DEBUG)
- logln("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat) dt).toPattern());
- return (SimpleDateFormat) dt;
- }
- });
-
- // ============ Handler for Zones ============
- addHandler("zoneFields", new Handler() {
- String date = "";
- String zone = "";
- String parse = "";
- String pattern = "";
-
- public void handleResult(ULocale locale, String result) throws ParseException {
- for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
- String attributeName = (String) it.next();
- String attributeValue = (String) settings.get(attributeName);
- if (attributeName.equals("date")) {
- date = attributeValue;
- } else if (attributeName.equals("field")) {
- pattern = attributeValue;
- } else if (attributeName.equals("zone")) {
- zone = attributeValue;
- } else if (attributeName.equals("parse")) {
- parse = attributeValue;
- }
- }
-
- if (!ZONE_MATCH.reset(zone).matches()) return;
- Date dateValue = iso.parse(date);
- SimpleDateFormat field = new SimpleDateFormat(pattern, locale);
- field.setTimeZone(TimeZone.getTimeZone(zone));
- String temp = field.format(dateValue).trim();
- // SKIP PARSE FOR NOW
- result = result.trim(); // HACK because of SAX
- if (!temp.equals(result)) {
- temp = field.format(dateValue).trim(); // call again for debugging
- logln("Zone Format: Locale: " + locale
- + "\n\tZone: " + zone
- + "\n\tDate: " + date
- + "\n\tField: " + pattern
- + "\n\tParse: " + parse
- + "\n\tDraft: " + settings.get("draft")
- + "\n\tCLDR: <" + result
- + ">\n\tICU: <" + temp + ">");
- }
- }
- });
- }
-
- // ============ Gorp for SAX ============
-
- {
- try {
- SAXParserFactory factory = SAXParserFactory.newInstance();
- factory.setValidating(true);
- SAX = factory.newSAXParser();
- } catch (Exception e) {
- throw new IllegalArgumentException("SAXParserFacotry was unable to start.");
- }
- }
-
- DefaultHandler DEFAULT_HANDLER = new DefaultHandler() {
- static final boolean DEBUG = false;
- StringBuffer lastChars = new StringBuffer();
- // boolean justPopped = false;
- Handler handler;
-
- public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
- // data.put(new ContextStack(contextStack), lastChars);
- // lastChars = "";
- try {
- if (qName.equals("cldrTest")) {
- // skip
- } else if (qName.equals("result") && handler != null) {
- for (int i = 0; i < attributes.getLength(); ++i) {
- handler.set(attributes.getQName(i), attributes.getValue(i));
- }
- } else {
- handler = getHandler(qName, attributes);
- // handler.set("locale", uLocale.toString());
- }
- // if (DEBUG) logln("startElement:\t" + contextStack);
- // justPopped = false;
- } catch (RuntimeException e) {
- e.printStackTrace();
- throw e;
- }
- }
-
- public void endElement(String uri, String localName, String qName) throws SAXException {
- try {
- // if (DEBUG) logln("endElement:\t" + contextStack);
- if (qName.equals("result") && handler != null) {
- handler.checkResult(lastChars.toString());
- } else if (qName.length() != 0) {
- // logln("Unexpected contents of: " + qName + ", <" + lastChars + ">");
- }
- lastChars.setLength(0);
- // justPopped = true;
- } catch (RuntimeException e) {
- e.printStackTrace();
- throw e;
- }
- }
-
- // Have to hack around the fact that the character data might be in pieces
- public void characters(char[] ch, int start, int length) throws SAXException {
- try {
- String value = new String(ch, start, length);
- if (DEBUG)
- logln("characters:\t" + value);
- lastChars.append(value);
- // justPopped = false;
- } catch (RuntimeException e) {
- e.printStackTrace();
- throw e;
- }
- }
-
- // just for debugging
-
- public void notationDecl(String name, String publicId, String systemId) throws SAXException {
- logln("notationDecl: " + name + ", " + publicId + ", " + systemId);
- }
-
- public void processingInstruction(String target, String data) throws SAXException {
- logln("processingInstruction: " + target + ", " + data);
- }
-
- public void skippedEntity(String name) throws SAXException {
- logln("skippedEntity: " + name);
- }
-
- public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName)
- throws SAXException {
- logln("unparsedEntityDecl: " + name + ", " + publicId + ", " + systemId + ", " + notationName);
- }
- };
-}