ICU-8856 text file line endings fixed for icu4j

author Abhinav Gupta <mail@abhinavg.net>

Wed, 28 Sep 2011 21:16:24 +0000 (21:16 +0000)

committer Abhinav Gupta <mail@abhinavg.net>

Wed, 28 Sep 2011 21:16:24 +0000 (21:16 +0000)
author Abhinav Gupta <mail@abhinavg.net>
Wed, 28 Sep 2011 21:16:24 +0000 (21:16 +0000)
committer Abhinav Gupta <mail@abhinavg.net>
Wed, 28 Sep 2011 21:16:24 +0000 (21:16 +0000)
diff --git a/.gitattributes b/.gitattributes

index 2d4fed26a729c99af7e4a231b591dfc5fe8add0c..e26f5860ef498bfcfe7523b48dccae16b0c69336 100644 (file)
--- a/.gitattributes
+++ b/.gitattributes
@@ -701,10 +701,6 @@ icu4j/main/tests/translit/.externalToolBuilders/copy-translit-test-data.launch -
  icu4j/main/tests/translit/.settings/org.eclipse.core.resources.prefs -text
  icu4j/main/tests/translit/.settings/org.eclipse.jdt.core.prefs -text
  icu4j/main/tests/translit/.settings/org.eclipse.jdt.ui.prefs -text
-icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java -text
-icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java -text
-icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java -text
-icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java -text
  icu4j/main/tests/translit/translit-tests-build.launch -text
  icu4j/manifest.stub -text
  icu4j/tools/build/.settings/org.eclipse.core.resources.prefs -text
diff --git a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java

index 78597117b37ad6ba3c9c1e256304ad960f71fd26..737bf659aab1d41c701af1aafce5ce6e9ced2e97 100644 (file)
--- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java
+++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java
@@ -1 +1,118 @@
-/*\r *******************************************************************************\r * Copyright (C) 2011, International Business Machines Corporation and         *\r * others. All Rights Reserved.                                                *\r *******************************************************************************\r */\rpackage com.ibm.icu.dev.test.translit;\r\rimport java.util.List;\r\rimport com.ibm.icu.dev.test.TestFmwk;\rimport com.ibm.icu.dev.test.util.ICUPropertyFactory;\rimport com.ibm.icu.dev.test.util.UnicodeProperty;\rimport com.ibm.icu.dev.test.util.UnicodeProperty.Factory;\rimport com.ibm.icu.dev.test.util.UnicodePropertySymbolTable;\rimport com.ibm.icu.text.UnicodeSet;\r\r/**\r * @author markdavis\r *\r */\rpublic class TestUnicodeProperty extends TestFmwk{\r    public static void main(String[] args) {\r        new TestUnicodeProperty().run(args);\r    }\r    static final UnicodeSet casedLetter = new UnicodeSet("[:gc=cased letter:]");\r    static final UnicodeSet letter = new UnicodeSet("[:gc=L:]");\r\r\r    public void TestBasic() {\r        Factory factory = ICUPropertyFactory.make();\r        UnicodeProperty property = factory.getProperty("gc");\r        List values = property.getAvailableValues();\r        assertTrue("Values contain GC values", values.contains("Unassigned"));\r        final UnicodeSet lu = property.getSet("Lu");\r        if (!assertTrue("Gc=L contains 'A'", lu.contains('A'))) {\r            errln("Contents:\t" + lu.complement().complement().toPattern(false));\r        }\r    }\r\r    public void TestSymbolTable() {\r        Factory factory = ICUPropertyFactory.make();\r        UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory);\r        UnicodeSet.setDefaultXSymbolTable(upst);\r        try {\r            final UnicodeSet luSet = new UnicodeSet("[:gc=L:]");\r            assertTrue("Gc=L contains 'A'", luSet.contains('A'));\r            assertTrue("Gc=L contains 'Z'", luSet.contains('Z'));\r            assertFalse("Gc=L contains 'a'", luSet.contains('1'));\r            UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]");\r            assertEquals("gc=lc are equal", casedLetter, casedLetter2);\r        } finally {\r            // restore the world\r            UnicodeSet.setDefaultXSymbolTable(null);\r        }\r    }\r\r    public void TestSymbolTable2() {\r        Factory factory = new MyUnicodePropertyFactory();\r        UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory);\r        UnicodeSet.setDefaultXSymbolTable(upst);\r        try {\r            final UnicodeSet luSet = new UnicodeSet("[:gc=L:]");\r            assertFalse("Gc=L contains 'A'", luSet.contains('A'));\r            if (!assertTrue("Gc=L contains 'Z'", luSet.contains('Z'))) {\r                errln("Contents:\t" + luSet.complement().complement().toPattern(false));\r            }\r            assertFalse("Gc=L contains 'a'", luSet.contains('1'));\r            UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]");\r            assertNotEquals("gc=lc should not be equal", casedLetter, casedLetter2);\r        } finally {\r            // restore the world\r            UnicodeSet.setDefaultXSymbolTable(null);\r        }\r    }\r\r\r    /**\r     * For testing, override to set A-M to Cn.\r     */\r    static class MyUnicodeGCProperty extends UnicodeProperty.SimpleProperty {\r        UnicodeProperty icuProperty = ICUPropertyFactory.make().getProperty("Gc");\r        {\r            setName(icuProperty.getName());\r            setType(icuProperty.getType());\r        }\r        @Override\r        protected String _getValue(int codepoint) {\r            if (codepoint >= 'A' && codepoint <= 'M') {\r                return "Unassigned";\r            } else {\r                return icuProperty.getValue(codepoint);\r            }\r        }\r        @Override\r        protected List _getValueAliases(String valueAlias, List result) {\r            return icuProperty.getValueAliases(valueAlias, result);\r        }\r        @Override\r        public List _getNameAliases(List result) {\r            return icuProperty.getNameAliases();\r        }\r    }\r\r    /**\r     * For testing, override to set A-Z to Cn.\r     */\r    static class MyUnicodePropertyFactory extends ICUPropertyFactory {\r        private MyUnicodePropertyFactory() {\r            add(new MyUnicodeGCProperty());\r        }\r    }\r\r    static class MyUnicodePropertySymbolTable extends UnicodePropertySymbolTable {\r        public MyUnicodePropertySymbolTable(Factory factory) {\r            super(factory);\r        }\r    }\r}\r
-\ No newline at end of file
+/* *******************************************************************************
+ * Copyright (C) 2011, International Business Machines Corporation and         *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.translit;
+
+import java.util.List;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.dev.test.util.ICUPropertyFactory;
+import com.ibm.icu.dev.test.util.UnicodeProperty;
+import com.ibm.icu.dev.test.util.UnicodeProperty.Factory;
+import com.ibm.icu.dev.test.util.UnicodePropertySymbolTable;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author markdavis
+ *
+ */
+public class TestUnicodeProperty extends TestFmwk{
+    public static void main(String[] args) {
+        new TestUnicodeProperty().run(args);
+    }
+    static final UnicodeSet casedLetter = new UnicodeSet("[:gc=cased letter:]");
+    static final UnicodeSet letter = new UnicodeSet("[:gc=L:]");
+
+
+    public void TestBasic() {
+        Factory factory = ICUPropertyFactory.make();
+        UnicodeProperty property = factory.getProperty("gc");
+        List values = property.getAvailableValues();
+        assertTrue("Values contain GC values", values.contains("Unassigned"));
+        final UnicodeSet lu = property.getSet("Lu");
+        if (!assertTrue("Gc=L contains 'A'", lu.contains('A'))) {
+            errln("Contents:\t" + lu.complement().complement().toPattern(false));
+        }
+    }
+
+    public void TestSymbolTable() {
+        Factory factory = ICUPropertyFactory.make();
+        UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory);
+        UnicodeSet.setDefaultXSymbolTable(upst);
+        try {
+            final UnicodeSet luSet = new UnicodeSet("[:gc=L:]");
+            assertTrue("Gc=L contains 'A'", luSet.contains('A'));
+            assertTrue("Gc=L contains 'Z'", luSet.contains('Z'));
+            assertFalse("Gc=L contains 'a'", luSet.contains('1'));
+            UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]");
+            assertEquals("gc=lc are equal", casedLetter, casedLetter2);
+        } finally {
+            // restore the world
+            UnicodeSet.setDefaultXSymbolTable(null);
+        }
+    }
+
+    public void TestSymbolTable2() {
+        Factory factory = new MyUnicodePropertyFactory();
+        UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory);
+        UnicodeSet.setDefaultXSymbolTable(upst);
+        try {
+            final UnicodeSet luSet = new UnicodeSet("[:gc=L:]");
+            assertFalse("Gc=L contains 'A'", luSet.contains('A'));
+            if (!assertTrue("Gc=L contains 'Z'", luSet.contains('Z'))) {
+                errln("Contents:\t" + luSet.complement().complement().toPattern(false));
+            }
+            assertFalse("Gc=L contains 'a'", luSet.contains('1'));
+            UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]");
+            assertNotEquals("gc=lc should not be equal", casedLetter, casedLetter2);
+        } finally {
+            // restore the world
+            UnicodeSet.setDefaultXSymbolTable(null);
+        }
+    }
+
+
+    /**
+     * For testing, override to set A-M to Cn.
+     */
+    static class MyUnicodeGCProperty extends UnicodeProperty.SimpleProperty {
+        UnicodeProperty icuProperty = ICUPropertyFactory.make().getProperty("Gc");
+        {
+            setName(icuProperty.getName());
+            setType(icuProperty.getType());
+        }
+        @Override
+        protected String _getValue(int codepoint) {
+            if (codepoint >= 'A' && codepoint <= 'M') {
+                return "Unassigned";
+            } else {
+                return icuProperty.getValue(codepoint);
+            }
+        }
+        @Override
+        protected List _getValueAliases(String valueAlias, List result) {
+            return icuProperty.getValueAliases(valueAlias, result);
+        }
+        @Override
+        public List _getNameAliases(List result) {
+            return icuProperty.getNameAliases();
+        }
+    }
+
+    /**
+     * For testing, override to set A-Z to Cn.
+     */
+    static class MyUnicodePropertyFactory extends ICUPropertyFactory {
+        private MyUnicodePropertyFactory() {
+            add(new MyUnicodeGCProperty());
+        }
+    }
+
+    static class MyUnicodePropertySymbolTable extends UnicodePropertySymbolTable {
+        public MyUnicodePropertySymbolTable(Factory factory) {
+            super(factory);
+        }
+    }
+}
diff --git a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java

index dc803bfe6b3db2f6953c0795d33e8fa2191c2cf7..461e5f529d432513d05aa835622b35d84a03eb0e 100644 (file)
--- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java
+++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java
@@ -1 +1,54 @@
-/*\r *******************************************************************************\r * Copyright (C) 2011, International Business Machines Corporation and         *\r * others. All Rights Reserved.                                                *\r *******************************************************************************\r */\rpackage com.ibm.icu.dev.test.util;\r\rimport com.ibm.icu.dev.test.util.UnicodeTransform.Type;\rimport com.ibm.icu.lang.UCharacter;\rimport com.ibm.icu.text.Normalizer2;\rimport com.ibm.icu.text.Normalizer2.Mode;\r\r/**\r * @author markdavis\r *\r */\rpublic class IcuUnicodeNormalizerFactory implements UnicodeTransform.Factory {\r\r    public UnicodeTransform getInstance(Type type) {\r        switch (type) {\r        case NFC: case NFKC:\r            return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type.toString(), Mode.COMPOSE));\r        case NFD: case NFKD:\r            return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type == Type.NFD ? "NFC" : "NFKC", Mode.DECOMPOSE));\r        case CASEFOLD:\r            return new CaseFolder();\r        default:\r            throw new IllegalArgumentException();\r        }\r    }\r\r    private static class CaseFolder extends UnicodeTransform {\r        @Override\r        public String transform(String source) {\r            return UCharacter.foldCase(source.toString(), true);\r        }\r    }\r\r    private static class IcuUnicodeNormalizer extends UnicodeTransform {\r        private Normalizer2 normalizer;\r\r        private IcuUnicodeNormalizer(Normalizer2 normalizer) {\r            this.normalizer = normalizer;\r        }\r\r        public String transform(String src) {\r            return normalizer.normalize(src);\r        }\r\r        public boolean isTransformed(String s) {\r            return normalizer.isNormalized(s);\r        }\r    }\r}\r
-\ No newline at end of file
+/* *******************************************************************************
+ * Copyright (C) 2011, International Business Machines Corporation and         *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.util;
+
+import com.ibm.icu.dev.test.util.UnicodeTransform.Type;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.Normalizer2;
+import com.ibm.icu.text.Normalizer2.Mode;
+
+/**
+ * @author markdavis
+ *
+ */
+public class IcuUnicodeNormalizerFactory implements UnicodeTransform.Factory {
+
+    public UnicodeTransform getInstance(Type type) {
+        switch (type) {
+        case NFC: case NFKC:
+            return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type.toString(), Mode.COMPOSE));
+        case NFD: case NFKD:
+            return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type == Type.NFD ? "NFC" : "NFKC", Mode.DECOMPOSE));
+        case CASEFOLD:
+            return new CaseFolder();
+        default:
+            throw new IllegalArgumentException();
+        }
+    }
+
+    private static class CaseFolder extends UnicodeTransform {
+        @Override
+        public String transform(String source) {
+            return UCharacter.foldCase(source.toString(), true);
+        }
+    }
+
+    private static class IcuUnicodeNormalizer extends UnicodeTransform {
+        private Normalizer2 normalizer;
+
+        private IcuUnicodeNormalizer(Normalizer2 normalizer) {
+            this.normalizer = normalizer;
+        }
+
+        public String transform(String src) {
+            return normalizer.normalize(src);
+        }
+
+        public boolean isTransformed(String s) {
+            return normalizer.isNormalized(s);
+        }
+    }
+}
diff --git a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java

index 5e39d3889c481af951a55d601ed39dcc68ce1682..b709eda18a156725675a164db98b656da1d83d44 100644 (file)
--- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java
+++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java
@@ -1 +1,249 @@
-/*\r *******************************************************************************\r * Copyright (C) 1996-2011, Google, International Business Machines Corporation and    *\r * others. All Rights Reserved.                                                *\r *******************************************************************************\r */\rpackage com.ibm.icu.dev.test.util;\r\rimport java.util.Comparator;\rimport java.util.HashMap;\rimport java.util.List;\rimport java.util.Locale;\rimport java.util.Set;\r\rimport com.ibm.icu.dev.test.util.UnicodeProperty.PatternMatcher;\rimport com.ibm.icu.impl.UnicodeRegex;\rimport com.ibm.icu.text.UTF16;\rimport com.ibm.icu.text.UnicodeSet;\r\r/**\r * Allows for overriding the parsing of UnicodeSet property patterns.\r * <p>\r * WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the\r * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call\r * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}\r * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.\r * \r * @author markdavis\r */\rpublic class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable {\r    UnicodeRegex unicodeRegex;\r    final UnicodeProperty.Factory factory;\r\r    public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) {\r      unicodeRegex = new UnicodeRegex().setSymbolTable(this);\r      this.factory = factory;\r    }\r\r\r    //    public boolean applyPropertyAlias0(String propertyName,\r    //            String propertyValue, UnicodeSet result) {\r    //      if (!propertyName.contains("*")) {\r    //        return applyPropertyAlias(propertyName, propertyValue, result);\r    //      }\r    //      String[] propertyNames = propertyName.split("[*]");\r    //      for (int i = propertyNames.length - 1; i >= 0; ++i) {\r    //        String pname = propertyNames[i];\r    //        \r    //      }\r    //      return null;\r    //    }\r\r    public boolean applyPropertyAlias(String propertyName,\r            String propertyValue, UnicodeSet result) {\r      boolean status = false;\r      boolean invert = false;\r      int posNotEqual = propertyName.indexOf('\u2260');\r      int posColon = propertyName.indexOf(':');\r      if (posNotEqual >= 0 || posColon >= 0) {\r          if (posNotEqual < 0) posNotEqual = propertyName.length();\r          if (posColon < 0) posColon = propertyName.length();\r          int opPos = posNotEqual < posColon ? posNotEqual : posColon;\r          propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) \r                  : propertyName.substring(opPos+1) + "=" + propertyValue;\r          propertyName = propertyName.substring(0,opPos);\r          if (posNotEqual < posColon) {\r              invert = true;\r          }\r      }\r      if (propertyName.endsWith("!")) {\r        propertyName = propertyName.substring(0, propertyName.length() - 1);\r        invert = !invert;\r      }\r      propertyValue = propertyValue.trim();\r      if (propertyValue.length() != 0) {\r        status = applyPropertyAlias0(propertyName, propertyValue, result);\r      } else {\r        try {\r          status = applyPropertyAlias0("gc", propertyName, result);\r        } catch (Exception e) {};\r        if (!status) {\r          try {\r            status = applyPropertyAlias0("sc", propertyName, result);\r          } catch (Exception e) {};\r          if (!status) {\r            try {\r              status = applyPropertyAlias0(propertyName, "Yes", result);\r            } catch (Exception e) {};\r            if (!status) {\r              status = applyPropertyAlias0(propertyName, "", result);\r            }\r          }\r        }\r      }\r      if (status && invert) {\r        result.complement();\r      }\r      return status;\r    }\r\r    static final HashMap<String,String[]> GC_REMAP = new HashMap();\r    {\r        GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" "));\r        GC_REMAP.put("other", GC_REMAP.get("c"));\r        \r        GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" "));\r        GC_REMAP.put("letter", GC_REMAP.get("l"));\r        \r        GC_REMAP.put("lc", "Ll Lt Lu".split(" "));\r        GC_REMAP.put("casedletter", GC_REMAP.get("lc"));\r        \r        GC_REMAP.put("m", "Mc Me Mn".split(" "));\r        GC_REMAP.put("mark", GC_REMAP.get("m"));\r        \r        GC_REMAP.put("n", "Nd Nl No".split(" "));\r        GC_REMAP.put("number", GC_REMAP.get("n"));\r        \r        GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" "));\r        GC_REMAP.put("punctuation", GC_REMAP.get("p"));\r        GC_REMAP.put("punct", GC_REMAP.get("p"));\r        \r        GC_REMAP.put("s", "Sc Sk Sm So".split(" "));\r        GC_REMAP.put("symbol", GC_REMAP.get("s"));\r        \r        GC_REMAP.put("z", "Zl Zp Zs".split(" "));\r        GC_REMAP.put("separator", GC_REMAP.get("z"));\r    }\r    \r    public boolean applyPropertyAlias0(String propertyName,\r            String propertyValue, UnicodeSet result) {\r      result.clear();\r      UnicodeProperty prop = factory.getProperty(propertyName);\r      String canonicalName = prop.getName();\r      boolean isAge = UnicodeProperty.equalNames("Age", canonicalName);\r\r      // Hack for special GC values\r      if (canonicalName.equals("General_Category")) {\r          String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue));\r          if (parts != null) {\r              for (String part : parts) {\r                  prop.getSet(part, result);\r              }\r              return true;\r          }\r      }\r\r      PatternMatcher patternMatcher = null;\r      if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) {\r        String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1));\r        patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex);\r      }\r      UnicodeProperty otherProperty = null;\r      boolean testCp = false;\r      if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) {\r        String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim();\r        if ("cp".equalsIgnoreCase(otherPropName)) {\r          testCp = true;\r        } else {\r          otherProperty = factory.getProperty(otherPropName);\r        }\r      }\r      if (prop != null) {\r        UnicodeSet set;\r        if (testCp) {\r          set = new UnicodeSet();\r          for (int i = 0; i <= 0x10FFFF; ++i) {\r            if (UnicodeProperty.equals(i, prop.getValue(i))) {\r              set.add(i);\r            }\r          }\r        } else if (otherProperty != null) {\r          set = new UnicodeSet();\r          for (int i = 0; i <= 0x10FFFF; ++i) {\r            String v1 = prop.getValue(i);\r            String v2 = otherProperty.getValue(i);\r            if (UnicodeProperty.equals(v1, v2)) {\r              set.add(i);\r            }\r          }\r        } else if (patternMatcher == null) {\r          if (!isValid(prop, propertyValue)) {\r            throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName\r                    + " must be in "\r                    + prop.getAvailableValues() + " or in " + prop.getValueAliases());\r          }\r          if (isAge) {\r            set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq));\r          } else {\r            set = prop.getSet(propertyValue);\r          }\r        } else if (isAge) {\r          set = new UnicodeSet();\r          List<String> values = prop.getAvailableValues();\r          for (String value : values) {\r            if (patternMatcher.matches(value)) {\r              for (String other : values) {\r                if (other.compareTo(value) <= 0) {\r                  set.addAll(prop.getSet(other));\r                }\r              }\r            }\r          }\r        } else {\r          set = prop.getSet(patternMatcher);\r        }\r        result.addAll(set);\r        return true;\r      }\r      throw new IllegalArgumentException("Illegal property: " + propertyName);\r    }\r\r    \r\r    private boolean isValid(UnicodeProperty prop, String propertyValue) {\r//      if (prop.getName().equals("General_Category")) {\r//        if (propertyValue)\r//      }\r      return prop.isValidValue(propertyValue);\r    }\r\r    public enum Relation {less, leq, equal, geq, greater}\r\r    public static class ComparisonMatcher implements PatternMatcher {\r        Relation relation;\r        static Comparator comparator = new UTF16.StringComparator(true, false,0);\r\r        String pattern;\r\r        public ComparisonMatcher(String pattern, Relation comparator) {\r          this.relation = comparator;\r          this.pattern = pattern;\r        }\r\r        public boolean matches(Object value) {\r          int comp = comparator.compare(pattern, value.toString());\r          switch (relation) {\r          case less: return comp < 0;\r          case leq: return comp <= 0;\r          default: return comp == 0;\r          case geq: return comp >= 0;\r          case greater: return comp > 0;\r          }\r        }\r\r        public PatternMatcher set(String pattern) {\r          this.pattern = pattern;\r          return this;\r        }\r      }\r  }
-\ No newline at end of file
+/* *******************************************************************************
+ * Copyright (C) 1996-2011, Google, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.util;
+
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+
+import com.ibm.icu.dev.test.util.UnicodeProperty.PatternMatcher;
+import com.ibm.icu.impl.UnicodeRegex;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * Allows for overriding the parsing of UnicodeSet property patterns.
+ * <p>
+ * WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the
+ * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call
+ * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
+ * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
+ * 
+ * @author markdavis
+ */
+public class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable {
+    UnicodeRegex unicodeRegex;
+    final UnicodeProperty.Factory factory;
+
+    public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) {
+      unicodeRegex = new UnicodeRegex().setSymbolTable(this);
+      this.factory = factory;
+    }
+
+
+    //    public boolean applyPropertyAlias0(String propertyName,
+    //            String propertyValue, UnicodeSet result) {
+    //      if (!propertyName.contains("*")) {
+    //        return applyPropertyAlias(propertyName, propertyValue, result);
+    //      }
+    //      String[] propertyNames = propertyName.split("[*]");
+    //      for (int i = propertyNames.length - 1; i >= 0; ++i) {
+    //        String pname = propertyNames[i];
+    //        
+    //      }
+    //      return null;
+    //    }
+
+    public boolean applyPropertyAlias(String propertyName,
+            String propertyValue, UnicodeSet result) {
+      boolean status = false;
+      boolean invert = false;
+      int posNotEqual = propertyName.indexOf('\u2260');
+      int posColon = propertyName.indexOf(':');
+      if (posNotEqual >= 0 || posColon >= 0) {
+          if (posNotEqual < 0) posNotEqual = propertyName.length();
+          if (posColon < 0) posColon = propertyName.length();
+          int opPos = posNotEqual < posColon ? posNotEqual : posColon;
+          propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) 
+                  : propertyName.substring(opPos+1) + "=" + propertyValue;
+          propertyName = propertyName.substring(0,opPos);
+          if (posNotEqual < posColon) {
+              invert = true;
+          }
+      }
+      if (propertyName.endsWith("!")) {
+        propertyName = propertyName.substring(0, propertyName.length() - 1);
+        invert = !invert;
+      }
+      propertyValue = propertyValue.trim();
+      if (propertyValue.length() != 0) {
+        status = applyPropertyAlias0(propertyName, propertyValue, result);
+      } else {
+        try {
+          status = applyPropertyAlias0("gc", propertyName, result);
+        } catch (Exception e) {};
+        if (!status) {
+          try {
+            status = applyPropertyAlias0("sc", propertyName, result);
+          } catch (Exception e) {};
+          if (!status) {
+            try {
+              status = applyPropertyAlias0(propertyName, "Yes", result);
+            } catch (Exception e) {};
+            if (!status) {
+              status = applyPropertyAlias0(propertyName, "", result);
+            }
+          }
+        }
+      }
+      if (status && invert) {
+        result.complement();
+      }
+      return status;
+    }
+
+    static final HashMap<String,String[]> GC_REMAP = new HashMap();
+    {
+        GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" "));
+        GC_REMAP.put("other", GC_REMAP.get("c"));
+        
+        GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" "));
+        GC_REMAP.put("letter", GC_REMAP.get("l"));
+        
+        GC_REMAP.put("lc", "Ll Lt Lu".split(" "));
+        GC_REMAP.put("casedletter", GC_REMAP.get("lc"));
+        
+        GC_REMAP.put("m", "Mc Me Mn".split(" "));
+        GC_REMAP.put("mark", GC_REMAP.get("m"));
+        
+        GC_REMAP.put("n", "Nd Nl No".split(" "));
+        GC_REMAP.put("number", GC_REMAP.get("n"));
+        
+        GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" "));
+        GC_REMAP.put("punctuation", GC_REMAP.get("p"));
+        GC_REMAP.put("punct", GC_REMAP.get("p"));
+        
+        GC_REMAP.put("s", "Sc Sk Sm So".split(" "));
+        GC_REMAP.put("symbol", GC_REMAP.get("s"));
+        
+        GC_REMAP.put("z", "Zl Zp Zs".split(" "));
+        GC_REMAP.put("separator", GC_REMAP.get("z"));
+    }
+    
+    public boolean applyPropertyAlias0(String propertyName,
+            String propertyValue, UnicodeSet result) {
+      result.clear();
+      UnicodeProperty prop = factory.getProperty(propertyName);
+      String canonicalName = prop.getName();
+      boolean isAge = UnicodeProperty.equalNames("Age", canonicalName);
+
+      // Hack for special GC values
+      if (canonicalName.equals("General_Category")) {
+          String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue));
+          if (parts != null) {
+              for (String part : parts) {
+                  prop.getSet(part, result);
+              }
+              return true;
+          }
+      }
+
+      PatternMatcher patternMatcher = null;
+      if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) {
+        String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1));
+        patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex);
+      }
+      UnicodeProperty otherProperty = null;
+      boolean testCp = false;
+      if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) {
+        String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim();
+        if ("cp".equalsIgnoreCase(otherPropName)) {
+          testCp = true;
+        } else {
+          otherProperty = factory.getProperty(otherPropName);
+        }
+      }
+      if (prop != null) {
+        UnicodeSet set;
+        if (testCp) {
+          set = new UnicodeSet();
+          for (int i = 0; i <= 0x10FFFF; ++i) {
+            if (UnicodeProperty.equals(i, prop.getValue(i))) {
+              set.add(i);
+            }
+          }
+        } else if (otherProperty != null) {
+          set = new UnicodeSet();
+          for (int i = 0; i <= 0x10FFFF; ++i) {
+            String v1 = prop.getValue(i);
+            String v2 = otherProperty.getValue(i);
+            if (UnicodeProperty.equals(v1, v2)) {
+              set.add(i);
+            }
+          }
+        } else if (patternMatcher == null) {
+          if (!isValid(prop, propertyValue)) {
+            throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName
+                    + " must be in "
+                    + prop.getAvailableValues() + " or in " + prop.getValueAliases());
+          }
+          if (isAge) {
+            set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq));
+          } else {
+            set = prop.getSet(propertyValue);
+          }
+        } else if (isAge) {
+          set = new UnicodeSet();
+          List<String> values = prop.getAvailableValues();
+          for (String value : values) {
+            if (patternMatcher.matches(value)) {
+              for (String other : values) {
+                if (other.compareTo(value) <= 0) {
+                  set.addAll(prop.getSet(other));
+                }
+              }
+            }
+          }
+        } else {
+          set = prop.getSet(patternMatcher);
+        }
+        result.addAll(set);
+        return true;
+      }
+      throw new IllegalArgumentException("Illegal property: " + propertyName);
+    }
+
+    
+
+    private boolean isValid(UnicodeProperty prop, String propertyValue) {
+//      if (prop.getName().equals("General_Category")) {
+//        if (propertyValue)
+//      }
+      return prop.isValidValue(propertyValue);
+    }
+
+    public enum Relation {less, leq, equal, geq, greater}
+
+    public static class ComparisonMatcher implements PatternMatcher {
+        Relation relation;
+        static Comparator comparator = new UTF16.StringComparator(true, false,0);
+
+        String pattern;
+
+        public ComparisonMatcher(String pattern, Relation comparator) {
+          this.relation = comparator;
+          this.pattern = pattern;
+        }
+
+        public boolean matches(Object value) {
+          int comp = comparator.compare(pattern, value.toString());
+          switch (relation) {
+          case less: return comp < 0;
+          case leq: return comp <= 0;
+          default: return comp == 0;
+          case geq: return comp >= 0;
+          case greater: return comp > 0;
+          }
+        }
+
+        public PatternMatcher set(String pattern) {
+          this.pattern = pattern;
+          return this;
+        }
+      }
+  }
+\ No newline at end of file
diff --git a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java

index 69359ecd03d1b8807d5fabfbb74574c842be2a72..0728b3e1cf269619a0af51d8b3b100d40493c7e5 100644 (file)
--- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java
+++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java
@@ -1 +1,58 @@
-/*\r *******************************************************************************\r * Copyright (C) 2011, Google, International Business Machines Corporation and         *\r * others. All Rights Reserved.                                                *\r *******************************************************************************\r */\rpackage com.ibm.icu.dev.test.util;\r\rimport com.ibm.icu.text.Transform;\rimport com.ibm.icu.text.UTF16;\r\r/**\r * Simple wrapping for normalizer that allows for both the standard ICU normalizer, and one built directly from the UCD.\r */\rpublic abstract class UnicodeTransform implements Transform<String,String> {\r    public enum Type {\r        NFD, NFC, NFKD, NFKC, CASEFOLD\r    }\r    \r    public interface Factory {\r        public UnicodeTransform getInstance(Type type);\r    }\r    \r    private static Factory factory = new IcuUnicodeNormalizerFactory();\r    \r    public static synchronized Factory getFactory() {\r        return factory;\r    }\r\r    public static synchronized void setFactory(Factory factory) {\r        UnicodeTransform.factory = factory;\r    }\r\r    public static synchronized UnicodeTransform getInstance(Type type) {\r        return factory.getInstance(type);\r    }\r    \r    public abstract String transform(String source);\r    \r    /**\r     * Can be overridden for performance.\r     */\r    public boolean isTransformed(String source) {\r        return source.equals(transform(source));\r    }\r    /**\r     * Can be overridden for performance.\r     */\r    public String transform(int source) {\r        return transform(UTF16.valueOf(source));\r    }\r    /**\r     * Can be overridden for performance.\r     */\r    public boolean isTransformed(int source) {\r        return isTransformed(UTF16.valueOf(source));\r    }\r}\r\r
-\ No newline at end of file
+/* *******************************************************************************
+ * Copyright (C) 2011, Google, International Business Machines Corporation and         *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.util;
+
+import com.ibm.icu.text.Transform;
+import com.ibm.icu.text.UTF16;
+
+/**
+ * Simple wrapping for normalizer that allows for both the standard ICU normalizer, and one built directly from the UCD.
+ */
+public abstract class UnicodeTransform implements Transform<String,String> {
+    public enum Type {
+        NFD, NFC, NFKD, NFKC, CASEFOLD
+    }
+    
+    public interface Factory {
+        public UnicodeTransform getInstance(Type type);
+    }
+    
+    private static Factory factory = new IcuUnicodeNormalizerFactory();
+    
+    public static synchronized Factory getFactory() {
+        return factory;
+    }
+
+    public static synchronized void setFactory(Factory factory) {
+        UnicodeTransform.factory = factory;
+    }
+
+    public static synchronized UnicodeTransform getInstance(Type type) {
+        return factory.getInstance(type);
+    }
+    
+    public abstract String transform(String source);
+    
+    /**
+     * Can be overridden for performance.
+     */
+    public boolean isTransformed(String source) {
+        return source.equals(transform(source));
+    }
+    /**
+     * Can be overridden for performance.
+     */
+    public String transform(int source) {
+        return transform(UTF16.valueOf(source));
+    }
+    /**
+     * Can be overridden for performance.
+     */
+    public boolean isTransformed(int source) {
+        return isTransformed(UTF16.valueOf(source));
+    }
+}
+
author	Abhinav Gupta <mail@abhinavg.net>
	Wed, 28 Sep 2011 21:16:24 +0000 (21:16 +0000)
committer	Abhinav Gupta <mail@abhinavg.net>
	Wed, 28 Sep 2011 21:16:24 +0000 (21:16 +0000)
.gitattributes		patch \| blob \| history
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java		patch \| blob \| history
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java		patch \| blob \| history
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java		patch \| blob \| history
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java		patch \| blob \| history