From: Abhinav Gupta Date: Wed, 28 Sep 2011 21:16:24 +0000 (+0000) Subject: ICU-8856 text file line endings fixed for icu4j X-Git-Tag: milestone-59-0-1~4470 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=deca5a81970bc3460e0f6727aaf2498d01df2ccb;p=icu ICU-8856 text file line endings fixed for icu4j X-SVN-Rev: 30741 --- diff --git a/.gitattributes b/.gitattributes index 2d4fed26a72..e26f5860ef4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -701,10 +701,6 @@ icu4j/main/tests/translit/.externalToolBuilders/copy-translit-test-data.launch - icu4j/main/tests/translit/.settings/org.eclipse.core.resources.prefs -text icu4j/main/tests/translit/.settings/org.eclipse.jdt.core.prefs -text icu4j/main/tests/translit/.settings/org.eclipse.jdt.ui.prefs -text -icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java -text -icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java -text -icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java -text -icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java -text icu4j/main/tests/translit/translit-tests-build.launch -text icu4j/manifest.stub -text icu4j/tools/build/.settings/org.eclipse.core.resources.prefs -text diff --git a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java index 78597117b37..737bf659aab 100644 --- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java +++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java @@ -1 +1,118 @@ -/* ******************************************************************************* * Copyright (C) 2011, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.translit; import java.util.List; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.dev.test.util.ICUPropertyFactory; import com.ibm.icu.dev.test.util.UnicodeProperty; import com.ibm.icu.dev.test.util.UnicodeProperty.Factory; import com.ibm.icu.dev.test.util.UnicodePropertySymbolTable; import com.ibm.icu.text.UnicodeSet; /** * @author markdavis * */ public class TestUnicodeProperty extends TestFmwk{ public static void main(String[] args) { new TestUnicodeProperty().run(args); } static final UnicodeSet casedLetter = new UnicodeSet("[:gc=cased letter:]"); static final UnicodeSet letter = new UnicodeSet("[:gc=L:]"); public void TestBasic() { Factory factory = ICUPropertyFactory.make(); UnicodeProperty property = factory.getProperty("gc"); List values = property.getAvailableValues(); assertTrue("Values contain GC values", values.contains("Unassigned")); final UnicodeSet lu = property.getSet("Lu"); if (!assertTrue("Gc=L contains 'A'", lu.contains('A'))) { errln("Contents:\t" + lu.complement().complement().toPattern(false)); } } public void TestSymbolTable() { Factory factory = ICUPropertyFactory.make(); UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory); UnicodeSet.setDefaultXSymbolTable(upst); try { final UnicodeSet luSet = new UnicodeSet("[:gc=L:]"); assertTrue("Gc=L contains 'A'", luSet.contains('A')); assertTrue("Gc=L contains 'Z'", luSet.contains('Z')); assertFalse("Gc=L contains 'a'", luSet.contains('1')); UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]"); assertEquals("gc=lc are equal", casedLetter, casedLetter2); } finally { // restore the world UnicodeSet.setDefaultXSymbolTable(null); } } public void TestSymbolTable2() { Factory factory = new MyUnicodePropertyFactory(); UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory); UnicodeSet.setDefaultXSymbolTable(upst); try { final UnicodeSet luSet = new UnicodeSet("[:gc=L:]"); assertFalse("Gc=L contains 'A'", luSet.contains('A')); if (!assertTrue("Gc=L contains 'Z'", luSet.contains('Z'))) { errln("Contents:\t" + luSet.complement().complement().toPattern(false)); } assertFalse("Gc=L contains 'a'", luSet.contains('1')); UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]"); assertNotEquals("gc=lc should not be equal", casedLetter, casedLetter2); } finally { // restore the world UnicodeSet.setDefaultXSymbolTable(null); } } /** * For testing, override to set A-M to Cn. */ static class MyUnicodeGCProperty extends UnicodeProperty.SimpleProperty { UnicodeProperty icuProperty = ICUPropertyFactory.make().getProperty("Gc"); { setName(icuProperty.getName()); setType(icuProperty.getType()); } @Override protected String _getValue(int codepoint) { if (codepoint >= 'A' && codepoint <= 'M') { return "Unassigned"; } else { return icuProperty.getValue(codepoint); } } @Override protected List _getValueAliases(String valueAlias, List result) { return icuProperty.getValueAliases(valueAlias, result); } @Override public List _getNameAliases(List result) { return icuProperty.getNameAliases(); } } /** * For testing, override to set A-Z to Cn. */ static class MyUnicodePropertyFactory extends ICUPropertyFactory { private MyUnicodePropertyFactory() { add(new MyUnicodeGCProperty()); } } static class MyUnicodePropertySymbolTable extends UnicodePropertySymbolTable { public MyUnicodePropertySymbolTable(Factory factory) { super(factory); } } } \ No newline at end of file +/* ******************************************************************************* + * Copyright (C) 2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.test.translit; + +import java.util.List; + +import com.ibm.icu.dev.test.TestFmwk; +import com.ibm.icu.dev.test.util.ICUPropertyFactory; +import com.ibm.icu.dev.test.util.UnicodeProperty; +import com.ibm.icu.dev.test.util.UnicodeProperty.Factory; +import com.ibm.icu.dev.test.util.UnicodePropertySymbolTable; +import com.ibm.icu.text.UnicodeSet; + +/** + * @author markdavis + * + */ +public class TestUnicodeProperty extends TestFmwk{ + public static void main(String[] args) { + new TestUnicodeProperty().run(args); + } + static final UnicodeSet casedLetter = new UnicodeSet("[:gc=cased letter:]"); + static final UnicodeSet letter = new UnicodeSet("[:gc=L:]"); + + + public void TestBasic() { + Factory factory = ICUPropertyFactory.make(); + UnicodeProperty property = factory.getProperty("gc"); + List values = property.getAvailableValues(); + assertTrue("Values contain GC values", values.contains("Unassigned")); + final UnicodeSet lu = property.getSet("Lu"); + if (!assertTrue("Gc=L contains 'A'", lu.contains('A'))) { + errln("Contents:\t" + lu.complement().complement().toPattern(false)); + } + } + + public void TestSymbolTable() { + Factory factory = ICUPropertyFactory.make(); + UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory); + UnicodeSet.setDefaultXSymbolTable(upst); + try { + final UnicodeSet luSet = new UnicodeSet("[:gc=L:]"); + assertTrue("Gc=L contains 'A'", luSet.contains('A')); + assertTrue("Gc=L contains 'Z'", luSet.contains('Z')); + assertFalse("Gc=L contains 'a'", luSet.contains('1')); + UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]"); + assertEquals("gc=lc are equal", casedLetter, casedLetter2); + } finally { + // restore the world + UnicodeSet.setDefaultXSymbolTable(null); + } + } + + public void TestSymbolTable2() { + Factory factory = new MyUnicodePropertyFactory(); + UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory); + UnicodeSet.setDefaultXSymbolTable(upst); + try { + final UnicodeSet luSet = new UnicodeSet("[:gc=L:]"); + assertFalse("Gc=L contains 'A'", luSet.contains('A')); + if (!assertTrue("Gc=L contains 'Z'", luSet.contains('Z'))) { + errln("Contents:\t" + luSet.complement().complement().toPattern(false)); + } + assertFalse("Gc=L contains 'a'", luSet.contains('1')); + UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]"); + assertNotEquals("gc=lc should not be equal", casedLetter, casedLetter2); + } finally { + // restore the world + UnicodeSet.setDefaultXSymbolTable(null); + } + } + + + /** + * For testing, override to set A-M to Cn. + */ + static class MyUnicodeGCProperty extends UnicodeProperty.SimpleProperty { + UnicodeProperty icuProperty = ICUPropertyFactory.make().getProperty("Gc"); + { + setName(icuProperty.getName()); + setType(icuProperty.getType()); + } + @Override + protected String _getValue(int codepoint) { + if (codepoint >= 'A' && codepoint <= 'M') { + return "Unassigned"; + } else { + return icuProperty.getValue(codepoint); + } + } + @Override + protected List _getValueAliases(String valueAlias, List result) { + return icuProperty.getValueAliases(valueAlias, result); + } + @Override + public List _getNameAliases(List result) { + return icuProperty.getNameAliases(); + } + } + + /** + * For testing, override to set A-Z to Cn. + */ + static class MyUnicodePropertyFactory extends ICUPropertyFactory { + private MyUnicodePropertyFactory() { + add(new MyUnicodeGCProperty()); + } + } + + static class MyUnicodePropertySymbolTable extends UnicodePropertySymbolTable { + public MyUnicodePropertySymbolTable(Factory factory) { + super(factory); + } + } +} diff --git a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java index dc803bfe6b3..461e5f529d4 100644 --- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java +++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java @@ -1 +1,54 @@ -/* ******************************************************************************* * Copyright (C) 2011, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.util; import com.ibm.icu.dev.test.util.UnicodeTransform.Type; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.Normalizer2; import com.ibm.icu.text.Normalizer2.Mode; /** * @author markdavis * */ public class IcuUnicodeNormalizerFactory implements UnicodeTransform.Factory { public UnicodeTransform getInstance(Type type) { switch (type) { case NFC: case NFKC: return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type.toString(), Mode.COMPOSE)); case NFD: case NFKD: return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type == Type.NFD ? "NFC" : "NFKC", Mode.DECOMPOSE)); case CASEFOLD: return new CaseFolder(); default: throw new IllegalArgumentException(); } } private static class CaseFolder extends UnicodeTransform { @Override public String transform(String source) { return UCharacter.foldCase(source.toString(), true); } } private static class IcuUnicodeNormalizer extends UnicodeTransform { private Normalizer2 normalizer; private IcuUnicodeNormalizer(Normalizer2 normalizer) { this.normalizer = normalizer; } public String transform(String src) { return normalizer.normalize(src); } public boolean isTransformed(String s) { return normalizer.isNormalized(s); } } } \ No newline at end of file +/* ******************************************************************************* + * Copyright (C) 2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.test.util; + +import com.ibm.icu.dev.test.util.UnicodeTransform.Type; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.Normalizer2; +import com.ibm.icu.text.Normalizer2.Mode; + +/** + * @author markdavis + * + */ +public class IcuUnicodeNormalizerFactory implements UnicodeTransform.Factory { + + public UnicodeTransform getInstance(Type type) { + switch (type) { + case NFC: case NFKC: + return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type.toString(), Mode.COMPOSE)); + case NFD: case NFKD: + return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type == Type.NFD ? "NFC" : "NFKC", Mode.DECOMPOSE)); + case CASEFOLD: + return new CaseFolder(); + default: + throw new IllegalArgumentException(); + } + } + + private static class CaseFolder extends UnicodeTransform { + @Override + public String transform(String source) { + return UCharacter.foldCase(source.toString(), true); + } + } + + private static class IcuUnicodeNormalizer extends UnicodeTransform { + private Normalizer2 normalizer; + + private IcuUnicodeNormalizer(Normalizer2 normalizer) { + this.normalizer = normalizer; + } + + public String transform(String src) { + return normalizer.normalize(src); + } + + public boolean isTransformed(String s) { + return normalizer.isNormalized(s); + } + } +} diff --git a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java index 5e39d3889c4..b709eda18a1 100644 --- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java +++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java @@ -1 +1,249 @@ -/* ******************************************************************************* * Copyright (C) 1996-2011, Google, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.util; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Set; import com.ibm.icu.dev.test.util.UnicodeProperty.PatternMatcher; import com.ibm.icu.impl.UnicodeRegex; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; /** * Allows for overriding the parsing of UnicodeSet property patterns. *

* WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable} * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}. * * @author markdavis */ public class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable { UnicodeRegex unicodeRegex; final UnicodeProperty.Factory factory; public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) { unicodeRegex = new UnicodeRegex().setSymbolTable(this); this.factory = factory; } // public boolean applyPropertyAlias0(String propertyName, // String propertyValue, UnicodeSet result) { // if (!propertyName.contains("*")) { // return applyPropertyAlias(propertyName, propertyValue, result); // } // String[] propertyNames = propertyName.split("[*]"); // for (int i = propertyNames.length - 1; i >= 0; ++i) { // String pname = propertyNames[i]; // // } // return null; // } public boolean applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result) { boolean status = false; boolean invert = false; int posNotEqual = propertyName.indexOf('\u2260'); int posColon = propertyName.indexOf(':'); if (posNotEqual >= 0 || posColon >= 0) { if (posNotEqual < 0) posNotEqual = propertyName.length(); if (posColon < 0) posColon = propertyName.length(); int opPos = posNotEqual < posColon ? posNotEqual : posColon; propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) : propertyName.substring(opPos+1) + "=" + propertyValue; propertyName = propertyName.substring(0,opPos); if (posNotEqual < posColon) { invert = true; } } if (propertyName.endsWith("!")) { propertyName = propertyName.substring(0, propertyName.length() - 1); invert = !invert; } propertyValue = propertyValue.trim(); if (propertyValue.length() != 0) { status = applyPropertyAlias0(propertyName, propertyValue, result); } else { try { status = applyPropertyAlias0("gc", propertyName, result); } catch (Exception e) {}; if (!status) { try { status = applyPropertyAlias0("sc", propertyName, result); } catch (Exception e) {}; if (!status) { try { status = applyPropertyAlias0(propertyName, "Yes", result); } catch (Exception e) {}; if (!status) { status = applyPropertyAlias0(propertyName, "", result); } } } } if (status && invert) { result.complement(); } return status; } static final HashMap GC_REMAP = new HashMap(); { GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" ")); GC_REMAP.put("other", GC_REMAP.get("c")); GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" ")); GC_REMAP.put("letter", GC_REMAP.get("l")); GC_REMAP.put("lc", "Ll Lt Lu".split(" ")); GC_REMAP.put("casedletter", GC_REMAP.get("lc")); GC_REMAP.put("m", "Mc Me Mn".split(" ")); GC_REMAP.put("mark", GC_REMAP.get("m")); GC_REMAP.put("n", "Nd Nl No".split(" ")); GC_REMAP.put("number", GC_REMAP.get("n")); GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" ")); GC_REMAP.put("punctuation", GC_REMAP.get("p")); GC_REMAP.put("punct", GC_REMAP.get("p")); GC_REMAP.put("s", "Sc Sk Sm So".split(" ")); GC_REMAP.put("symbol", GC_REMAP.get("s")); GC_REMAP.put("z", "Zl Zp Zs".split(" ")); GC_REMAP.put("separator", GC_REMAP.get("z")); } public boolean applyPropertyAlias0(String propertyName, String propertyValue, UnicodeSet result) { result.clear(); UnicodeProperty prop = factory.getProperty(propertyName); String canonicalName = prop.getName(); boolean isAge = UnicodeProperty.equalNames("Age", canonicalName); // Hack for special GC values if (canonicalName.equals("General_Category")) { String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue)); if (parts != null) { for (String part : parts) { prop.getSet(part, result); } return true; } } PatternMatcher patternMatcher = null; if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) { String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1)); patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex); } UnicodeProperty otherProperty = null; boolean testCp = false; if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) { String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim(); if ("cp".equalsIgnoreCase(otherPropName)) { testCp = true; } else { otherProperty = factory.getProperty(otherPropName); } } if (prop != null) { UnicodeSet set; if (testCp) { set = new UnicodeSet(); for (int i = 0; i <= 0x10FFFF; ++i) { if (UnicodeProperty.equals(i, prop.getValue(i))) { set.add(i); } } } else if (otherProperty != null) { set = new UnicodeSet(); for (int i = 0; i <= 0x10FFFF; ++i) { String v1 = prop.getValue(i); String v2 = otherProperty.getValue(i); if (UnicodeProperty.equals(v1, v2)) { set.add(i); } } } else if (patternMatcher == null) { if (!isValid(prop, propertyValue)) { throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName + " must be in " + prop.getAvailableValues() + " or in " + prop.getValueAliases()); } if (isAge) { set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq)); } else { set = prop.getSet(propertyValue); } } else if (isAge) { set = new UnicodeSet(); List values = prop.getAvailableValues(); for (String value : values) { if (patternMatcher.matches(value)) { for (String other : values) { if (other.compareTo(value) <= 0) { set.addAll(prop.getSet(other)); } } } } } else { set = prop.getSet(patternMatcher); } result.addAll(set); return true; } throw new IllegalArgumentException("Illegal property: " + propertyName); } private boolean isValid(UnicodeProperty prop, String propertyValue) { // if (prop.getName().equals("General_Category")) { // if (propertyValue) // } return prop.isValidValue(propertyValue); } public enum Relation {less, leq, equal, geq, greater} public static class ComparisonMatcher implements PatternMatcher { Relation relation; static Comparator comparator = new UTF16.StringComparator(true, false,0); String pattern; public ComparisonMatcher(String pattern, Relation comparator) { this.relation = comparator; this.pattern = pattern; } public boolean matches(Object value) { int comp = comparator.compare(pattern, value.toString()); switch (relation) { case less: return comp < 0; case leq: return comp <= 0; default: return comp == 0; case geq: return comp >= 0; case greater: return comp > 0; } } public PatternMatcher set(String pattern) { this.pattern = pattern; return this; } } } \ No newline at end of file +/* ******************************************************************************* + * Copyright (C) 1996-2011, Google, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.test.util; + +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Set; + +import com.ibm.icu.dev.test.util.UnicodeProperty.PatternMatcher; +import com.ibm.icu.impl.UnicodeRegex; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * Allows for overriding the parsing of UnicodeSet property patterns. + *

+ * WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the + * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call + * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable} + * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}. + * + * @author markdavis + */ +public class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable { + UnicodeRegex unicodeRegex; + final UnicodeProperty.Factory factory; + + public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) { + unicodeRegex = new UnicodeRegex().setSymbolTable(this); + this.factory = factory; + } + + + // public boolean applyPropertyAlias0(String propertyName, + // String propertyValue, UnicodeSet result) { + // if (!propertyName.contains("*")) { + // return applyPropertyAlias(propertyName, propertyValue, result); + // } + // String[] propertyNames = propertyName.split("[*]"); + // for (int i = propertyNames.length - 1; i >= 0; ++i) { + // String pname = propertyNames[i]; + // + // } + // return null; + // } + + public boolean applyPropertyAlias(String propertyName, + String propertyValue, UnicodeSet result) { + boolean status = false; + boolean invert = false; + int posNotEqual = propertyName.indexOf('\u2260'); + int posColon = propertyName.indexOf(':'); + if (posNotEqual >= 0 || posColon >= 0) { + if (posNotEqual < 0) posNotEqual = propertyName.length(); + if (posColon < 0) posColon = propertyName.length(); + int opPos = posNotEqual < posColon ? posNotEqual : posColon; + propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) + : propertyName.substring(opPos+1) + "=" + propertyValue; + propertyName = propertyName.substring(0,opPos); + if (posNotEqual < posColon) { + invert = true; + } + } + if (propertyName.endsWith("!")) { + propertyName = propertyName.substring(0, propertyName.length() - 1); + invert = !invert; + } + propertyValue = propertyValue.trim(); + if (propertyValue.length() != 0) { + status = applyPropertyAlias0(propertyName, propertyValue, result); + } else { + try { + status = applyPropertyAlias0("gc", propertyName, result); + } catch (Exception e) {}; + if (!status) { + try { + status = applyPropertyAlias0("sc", propertyName, result); + } catch (Exception e) {}; + if (!status) { + try { + status = applyPropertyAlias0(propertyName, "Yes", result); + } catch (Exception e) {}; + if (!status) { + status = applyPropertyAlias0(propertyName, "", result); + } + } + } + } + if (status && invert) { + result.complement(); + } + return status; + } + + static final HashMap GC_REMAP = new HashMap(); + { + GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" ")); + GC_REMAP.put("other", GC_REMAP.get("c")); + + GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" ")); + GC_REMAP.put("letter", GC_REMAP.get("l")); + + GC_REMAP.put("lc", "Ll Lt Lu".split(" ")); + GC_REMAP.put("casedletter", GC_REMAP.get("lc")); + + GC_REMAP.put("m", "Mc Me Mn".split(" ")); + GC_REMAP.put("mark", GC_REMAP.get("m")); + + GC_REMAP.put("n", "Nd Nl No".split(" ")); + GC_REMAP.put("number", GC_REMAP.get("n")); + + GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" ")); + GC_REMAP.put("punctuation", GC_REMAP.get("p")); + GC_REMAP.put("punct", GC_REMAP.get("p")); + + GC_REMAP.put("s", "Sc Sk Sm So".split(" ")); + GC_REMAP.put("symbol", GC_REMAP.get("s")); + + GC_REMAP.put("z", "Zl Zp Zs".split(" ")); + GC_REMAP.put("separator", GC_REMAP.get("z")); + } + + public boolean applyPropertyAlias0(String propertyName, + String propertyValue, UnicodeSet result) { + result.clear(); + UnicodeProperty prop = factory.getProperty(propertyName); + String canonicalName = prop.getName(); + boolean isAge = UnicodeProperty.equalNames("Age", canonicalName); + + // Hack for special GC values + if (canonicalName.equals("General_Category")) { + String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue)); + if (parts != null) { + for (String part : parts) { + prop.getSet(part, result); + } + return true; + } + } + + PatternMatcher patternMatcher = null; + if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) { + String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1)); + patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex); + } + UnicodeProperty otherProperty = null; + boolean testCp = false; + if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) { + String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim(); + if ("cp".equalsIgnoreCase(otherPropName)) { + testCp = true; + } else { + otherProperty = factory.getProperty(otherPropName); + } + } + if (prop != null) { + UnicodeSet set; + if (testCp) { + set = new UnicodeSet(); + for (int i = 0; i <= 0x10FFFF; ++i) { + if (UnicodeProperty.equals(i, prop.getValue(i))) { + set.add(i); + } + } + } else if (otherProperty != null) { + set = new UnicodeSet(); + for (int i = 0; i <= 0x10FFFF; ++i) { + String v1 = prop.getValue(i); + String v2 = otherProperty.getValue(i); + if (UnicodeProperty.equals(v1, v2)) { + set.add(i); + } + } + } else if (patternMatcher == null) { + if (!isValid(prop, propertyValue)) { + throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName + + " must be in " + + prop.getAvailableValues() + " or in " + prop.getValueAliases()); + } + if (isAge) { + set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq)); + } else { + set = prop.getSet(propertyValue); + } + } else if (isAge) { + set = new UnicodeSet(); + List values = prop.getAvailableValues(); + for (String value : values) { + if (patternMatcher.matches(value)) { + for (String other : values) { + if (other.compareTo(value) <= 0) { + set.addAll(prop.getSet(other)); + } + } + } + } + } else { + set = prop.getSet(patternMatcher); + } + result.addAll(set); + return true; + } + throw new IllegalArgumentException("Illegal property: " + propertyName); + } + + + + private boolean isValid(UnicodeProperty prop, String propertyValue) { +// if (prop.getName().equals("General_Category")) { +// if (propertyValue) +// } + return prop.isValidValue(propertyValue); + } + + public enum Relation {less, leq, equal, geq, greater} + + public static class ComparisonMatcher implements PatternMatcher { + Relation relation; + static Comparator comparator = new UTF16.StringComparator(true, false,0); + + String pattern; + + public ComparisonMatcher(String pattern, Relation comparator) { + this.relation = comparator; + this.pattern = pattern; + } + + public boolean matches(Object value) { + int comp = comparator.compare(pattern, value.toString()); + switch (relation) { + case less: return comp < 0; + case leq: return comp <= 0; + default: return comp == 0; + case geq: return comp >= 0; + case greater: return comp > 0; + } + } + + public PatternMatcher set(String pattern) { + this.pattern = pattern; + return this; + } + } + } \ No newline at end of file diff --git a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java index 69359ecd03d..0728b3e1cf2 100644 --- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java +++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java @@ -1 +1,58 @@ -/* ******************************************************************************* * Copyright (C) 2011, Google, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.util; import com.ibm.icu.text.Transform; import com.ibm.icu.text.UTF16; /** * Simple wrapping for normalizer that allows for both the standard ICU normalizer, and one built directly from the UCD. */ public abstract class UnicodeTransform implements Transform { public enum Type { NFD, NFC, NFKD, NFKC, CASEFOLD } public interface Factory { public UnicodeTransform getInstance(Type type); } private static Factory factory = new IcuUnicodeNormalizerFactory(); public static synchronized Factory getFactory() { return factory; } public static synchronized void setFactory(Factory factory) { UnicodeTransform.factory = factory; } public static synchronized UnicodeTransform getInstance(Type type) { return factory.getInstance(type); } public abstract String transform(String source); /** * Can be overridden for performance. */ public boolean isTransformed(String source) { return source.equals(transform(source)); } /** * Can be overridden for performance. */ public String transform(int source) { return transform(UTF16.valueOf(source)); } /** * Can be overridden for performance. */ public boolean isTransformed(int source) { return isTransformed(UTF16.valueOf(source)); } } \ No newline at end of file +/* ******************************************************************************* + * Copyright (C) 2011, Google, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.test.util; + +import com.ibm.icu.text.Transform; +import com.ibm.icu.text.UTF16; + +/** + * Simple wrapping for normalizer that allows for both the standard ICU normalizer, and one built directly from the UCD. + */ +public abstract class UnicodeTransform implements Transform { + public enum Type { + NFD, NFC, NFKD, NFKC, CASEFOLD + } + + public interface Factory { + public UnicodeTransform getInstance(Type type); + } + + private static Factory factory = new IcuUnicodeNormalizerFactory(); + + public static synchronized Factory getFactory() { + return factory; + } + + public static synchronized void setFactory(Factory factory) { + UnicodeTransform.factory = factory; + } + + public static synchronized UnicodeTransform getInstance(Type type) { + return factory.getInstance(type); + } + + public abstract String transform(String source); + + /** + * Can be overridden for performance. + */ + public boolean isTransformed(String source) { + return source.equals(transform(source)); + } + /** + * Can be overridden for performance. + */ + public String transform(int source) { + return transform(UTF16.valueOf(source)); + } + /** + * Can be overridden for performance. + */ + public boolean isTransformed(int source) { + return isTransformed(UTF16.valueOf(source)); + } +} +