From f6622ab2f16fd3dc114a1114996d019c85df8fac Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Thu, 19 Mar 2020 01:57:29 +0000 Subject: [PATCH] ICU-21016 Special handling of Spanish and Hebrew list format until CLDR get the data See #1043 --- icu4c/source/i18n/listformatter.cpp | 226 ++++++++++++++++-- icu4c/source/i18n/unicode/listformatter.h | 6 +- icu4c/source/test/depstest/dependencies.txt | 2 +- .../test/intltest/listformattertest.cpp | 80 ++++++- .../source/test/intltest/listformattertest.h | 1 + .../src/com/ibm/icu/text/ListFormatter.java | 133 ++++++++++- .../dev/test/format/ListFormatterTest.java | 62 +++++ 7 files changed, 479 insertions(+), 31 deletions(-) diff --git a/icu4c/source/i18n/listformatter.cpp b/icu4c/source/i18n/listformatter.cpp index f8945d55155..b9065e8796d 100644 --- a/icu4c/source/i18n/listformatter.cpp +++ b/icu4c/source/i18n/listformatter.cpp @@ -21,6 +21,7 @@ #include "unicode/listformatter.h" #include "unicode/simpleformatter.h" #include "unicode/ulistformatter.h" +#include "unicode/uscript.h" #include "fphdlimp.h" #include "mutex.h" #include "hash.h" @@ -35,34 +36,203 @@ U_NAMESPACE_BEGIN -struct ListFormatInternal : public UMemory { +namespace { + +class PatternHandler : public UObject { +public: + PatternHandler(const UnicodeString& two, const UnicodeString& end, UErrorCode& errorCode) : + twoPattern(two, 2, 2, errorCode), + endPattern(end, 2, 2, errorCode) { } + + PatternHandler(const SimpleFormatter& two, const SimpleFormatter& end) : + twoPattern(two), + endPattern(end) { } + + virtual ~PatternHandler(); + + virtual PatternHandler* clone() const { return new PatternHandler(twoPattern, endPattern); } + + virtual const SimpleFormatter& getTwoPattern(const UnicodeString&) const { + return twoPattern; + } + + virtual const SimpleFormatter& getEndPattern(const UnicodeString&) const { + return endPattern; + } + +protected: SimpleFormatter twoPattern; + SimpleFormatter endPattern; +}; + +PatternHandler::~PatternHandler() { +} + +class ContextualHandler : public PatternHandler { +public: + ContextualHandler(bool (*testFunc)(const UnicodeString& text), + const UnicodeString& thenTwo, + const UnicodeString& elseTwo, + const UnicodeString& thenEnd, + const UnicodeString& elseEnd, + UErrorCode& errorCode) : + PatternHandler(elseTwo, elseEnd, errorCode), + test(testFunc), + thenTwoPattern(thenTwo, 2, 2, errorCode), + thenEndPattern(thenEnd, 2, 2, errorCode) { } + + ContextualHandler(bool (*testFunc)(const UnicodeString& text), + const SimpleFormatter& thenTwo, SimpleFormatter elseTwo, + const SimpleFormatter& thenEnd, SimpleFormatter elseEnd) : + PatternHandler(elseTwo, elseEnd), + test(testFunc), + thenTwoPattern(thenTwo), + thenEndPattern(thenEnd) { } + + ~ContextualHandler() override; + + PatternHandler* clone() const override { + return new ContextualHandler( + test, thenTwoPattern, twoPattern, thenEndPattern, endPattern); + } + + const SimpleFormatter& getTwoPattern( + const UnicodeString& text) const override { + return (test)(text) ? thenTwoPattern : twoPattern; + } + + const SimpleFormatter& getEndPattern( + const UnicodeString& text) const override { + return (test)(text) ? thenEndPattern : endPattern; + } + +private: + bool (*test)(const UnicodeString&); + SimpleFormatter thenTwoPattern; + SimpleFormatter thenEndPattern; +}; + +ContextualHandler::~ContextualHandler() { +} + +static const char16_t *spanishY = u"{0} y {1}"; +static const char16_t *spanishE = u"{0} e {1}"; +static const char16_t *spanishO = u"{0} o {1}"; +static const char16_t *spanishU = u"{0} u {1}"; +static const char16_t *hebrewVav = u"{0} \u05D5{1}"; +static const char16_t *hebrewVavDash = u"{0} \u05D5-{1}"; + +// Condiction to change to e. +// Starts with "hi" or "i" but not with "hie" nor "hia" +static bool shouldChangeToE(const UnicodeString& text) { + int32_t len = text.length(); + if (len == 0) { return false; } + // Case insensitive match hi but not hie nor hia. + if ((text[0] == u'h' || text[0] == u'H') && + ((len > 1) && (text[1] == u'i' || text[1] == u'I')) && + ((len == 2) || !(text[2] == u'a' || text[2] == u'A' || text[2] == u'e' || text[2] == u'E'))) { + return true; + } + // Case insensitive for "start with i" + if (text[0] == u'i' || text[0] == u'I') { return true; } + return false; +} + +// Condiction to change to u. +// Starts with "o", "ho", and "8". Also "11" by itself. +// re: ^((o|ho|8).*|11)$ +static bool shouldChangeToU(const UnicodeString& text) { + int32_t len = text.length(); + if (len == 0) { return false; } + // Case insensitive match o.* and 8.* + if (text[0] == u'o' || text[0] == u'O' || text[0] == u'8') { return true; } + // Case insensitive match ho.* + if ((text[0] == u'h' || text[0] == u'H') && + ((len > 1) && (text[1] == 'o' || text[1] == u'O'))) { + return true; + } + // match "^11$" and "^11 .*" + if ((len >= 2) && text[0] == u'1' && text[1] == u'1' && (len == 2 || text[2] == u' ')) { return true; } + return false; +} + +// Condiction to change to VAV follow by a dash. +// Starts with non Hebrew letter. +static bool shouldChangeToVavDash(const UnicodeString& text) { + if (text.isEmpty()) { return false; } + UErrorCode status = U_ZERO_ERROR; + return uscript_getScript(text.char32At(0), &status) != USCRIPT_HEBREW; +} + +PatternHandler* createPatternHandler( + const char* lang, const UnicodeString& two, const UnicodeString& end, + UErrorCode& status) { + if (uprv_strcmp(lang, "es") == 0) { + // Spanish + UnicodeString spanishYStr(TRUE, spanishY, -1); + bool twoIsY = two == spanishYStr; + bool endIsY = end == spanishYStr; + if (twoIsY || endIsY) { + UnicodeString replacement(TRUE, spanishE, -1); + return new ContextualHandler( + shouldChangeToE, + twoIsY ? replacement : two, two, + endIsY ? replacement : end, end, status); + } + UnicodeString spanishOStr(TRUE, spanishO, -1); + bool twoIsO = two == spanishOStr; + bool endIsO = end == spanishOStr; + if (twoIsO || endIsO) { + UnicodeString replacement(TRUE, spanishU, -1); + return new ContextualHandler( + shouldChangeToU, + twoIsO ? replacement : two, two, + endIsO ? replacement : end, end, status); + } + } else if (uprv_strcmp(lang, "he") == 0 || uprv_strcmp(lang, "iw") == 0) { + // Hebrew + UnicodeString hebrewVavStr(TRUE, hebrewVav, -1); + bool twoIsVav = two == hebrewVavStr; + bool endIsVav = end == hebrewVavStr; + if (twoIsVav || endIsVav) { + UnicodeString replacement(TRUE, hebrewVavDash, -1); + return new ContextualHandler( + shouldChangeToVavDash, + twoIsVav ? replacement : two, two, + endIsVav ? replacement : end, end, status); + } + } + return new PatternHandler(two, end, status); +} + +} // namespace + +struct ListFormatInternal : public UMemory { SimpleFormatter startPattern; SimpleFormatter middlePattern; - SimpleFormatter endPattern; + LocalPointer patternHandler; ListFormatInternal( const UnicodeString& two, const UnicodeString& start, const UnicodeString& middle, const UnicodeString& end, + const Locale& locale, UErrorCode &errorCode) : - twoPattern(two, 2, 2, errorCode), startPattern(start, 2, 2, errorCode), middlePattern(middle, 2, 2, errorCode), - endPattern(end, 2, 2, errorCode) {} + patternHandler(createPatternHandler(locale.getLanguage(), two, end, errorCode), errorCode) { } ListFormatInternal(const ListFormatData &data, UErrorCode &errorCode) : - twoPattern(data.twoPattern, errorCode), startPattern(data.startPattern, errorCode), middlePattern(data.middlePattern, errorCode), - endPattern(data.endPattern, errorCode) { } + patternHandler(createPatternHandler( + data.locale.getLanguage(), data.twoPattern, data.endPattern, errorCode), errorCode) { } ListFormatInternal(const ListFormatInternal &other) : - twoPattern(other.twoPattern), startPattern(other.startPattern), middlePattern(other.middlePattern), - endPattern(other.endPattern) { } + patternHandler(other.patternHandler->clone()) { } }; @@ -322,7 +492,8 @@ ListFormatInternal* ListFormatter::loadListFormatInternal( errorCode = U_MISSING_RESOURCE_ERROR; return nullptr; } - ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, errorCode); + + ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, locale, errorCode); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; return nullptr; @@ -524,16 +695,29 @@ UnicodeString& ListFormatter::format_( // for n items, there are 2 * (n + 1) boundary including 0 and the upper // edge. MaybeStackArray offsets((handler != nullptr) ? 2 * (nItems + 1): 0); - joinStringsAndReplace( - nItems == 2 ? data->twoPattern : data->startPattern, - result, - items[1], - result, - index == 1, - offset, - &offsetFirst, - &offsetSecond, - errorCode); + if (nItems == 2) { + joinStringsAndReplace( + data->patternHandler->getTwoPattern(items[1]), + result, + items[1], + result, + index == 1, + offset, + &offsetFirst, + &offsetSecond, + errorCode); + } else { + joinStringsAndReplace( + data->startPattern, + result, + items[1], + result, + index == 1, + offset, + &offsetFirst, + &offsetSecond, + errorCode); + } if (handler != nullptr) { offsets[0] = 0; prefixLength += offsetFirst; @@ -557,7 +741,7 @@ UnicodeString& ListFormatter::format_( } } joinStringsAndReplace( - data->endPattern, + data->patternHandler->getEndPattern(items[nItems - 1]), result, items[nItems - 1], result, @@ -612,5 +796,5 @@ UnicodeString& ListFormatter::format_( #endif return appendTo; } - + U_NAMESPACE_END diff --git a/icu4c/source/i18n/unicode/listformatter.h b/icu4c/source/i18n/unicode/listformatter.h index 59be1cb073b..9e886660aeb 100644 --- a/icu4c/source/i18n/unicode/listformatter.h +++ b/icu4c/source/i18n/unicode/listformatter.h @@ -50,9 +50,11 @@ struct ListFormatData : public UMemory { UnicodeString startPattern; UnicodeString middlePattern; UnicodeString endPattern; + Locale locale; - ListFormatData(const UnicodeString& two, const UnicodeString& start, const UnicodeString& middle, const UnicodeString& end) : - twoPattern(two), startPattern(start), middlePattern(middle), endPattern(end) {} + ListFormatData(const UnicodeString& two, const UnicodeString& start, const UnicodeString& middle, const UnicodeString& end, + const Locale& loc) : + twoPattern(two), startPattern(start), middlePattern(middle), endPattern(end), locale(loc) {} }; /** \endcond */ diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt index 1d726b6ea32..8437b4e3f64 100644 --- a/icu4c/source/test/depstest/dependencies.txt +++ b/icu4c/source/test/depstest/dependencies.txt @@ -945,7 +945,7 @@ group: dayperiodrules group: listformatter listformatter.o ulistformatter.o deps - resourcebundle simpleformatter format uclean_i18n formatted_value_iterimpl + uchar resourcebundle simpleformatter format uclean_i18n formatted_value_iterimpl group: double_conversion double-conversion-bignum.o double-conversion-double-to-string.o diff --git a/icu4c/source/test/intltest/listformattertest.cpp b/icu4c/source/test/intltest/listformattertest.cpp index f22d8a57c24..c57c8f5e1a9 100644 --- a/icu4c/source/test/intltest/listformattertest.cpp +++ b/icu4c/source/test/intltest/listformattertest.cpp @@ -47,6 +47,7 @@ void ListFormatterTest::runIndexedTest(int32_t index, UBool exec, TESTCASE_AUTO(TestDifferentStyles); TESTCASE_AUTO(TestBadStylesFail); TESTCASE_AUTO(TestCreateStyled); + TESTCASE_AUTO(TestContextual); TESTCASE_AUTO_END; } @@ -473,8 +474,9 @@ void ListFormatterTest::TestOutOfOrderPatterns() { }; IcuTestErrorCode errorCode(*this, "TestOutOfOrderPatterns()"); + Locale locale("en"); ListFormatData data("{1} after {0}", "{1} after the first {0}", - "{1} after {0}", "{1} in the last after {0}"); + "{1} after {0}", "{1} in the last after {0}", locale); ListFormatter formatter(data, errorCode); UnicodeString input1[] = {one}; @@ -622,4 +624,80 @@ void ListFormatterTest::TestCreateStyled() { } } +void ListFormatterTest::TestContextual() { + IcuTestErrorCode status(*this, "TestContextual"); + std::vector es = { "es", "es_419" , "es_PY", "es_DO" }; + std::vector he = { "he", "he_IL", "iw", "iw_IL" }; + UListFormatterWidth widths [] = { + ULISTFMT_WIDTH_WIDE, ULISTFMT_WIDTH_SHORT, ULISTFMT_WIDTH_NARROW + }; + struct TestCase { + std::vector locales; + UListFormatterType type; + const char16_t* expected; + const char16_t* data1; + const char16_t* data2; + const char16_t* data3; + } cases[] = { + { es, ULISTFMT_TYPE_AND, u"fascinante e increíblemente", + u"fascinante", u"increíblemente", nullptr }, + { es, ULISTFMT_TYPE_AND, u"Comunicaciones Industriales e IIoT", + u"Comunicaciones Industriales", u"IIoT", nullptr }, + { es, ULISTFMT_TYPE_AND, u"España e Italia", u"España", u"Italia", nullptr }, + { es, ULISTFMT_TYPE_AND, u"hijas intrépidas e hijos solidarios", + u"hijas intrépidas", u"hijos solidarios", nullptr }, + { es, ULISTFMT_TYPE_AND, u"a un hombre e hirieron a otro", + u"a un hombre", u"hirieron a otro", nullptr }, + { es, ULISTFMT_TYPE_AND, u"hija e hijo", u"hija", u"hijo", nullptr }, + { es, ULISTFMT_TYPE_AND, u"esposa, hija e hijo", u"esposa", u"hija", u"hijo" }, + // For 'y' exception + { es, ULISTFMT_TYPE_AND, u"oro y hierro", u"oro", u"hierro", nullptr }, + { es, ULISTFMT_TYPE_AND, u"agua y hielo", u"agua", u"hielo", nullptr }, + { es, ULISTFMT_TYPE_AND, u"colágeno y hialurónico", u"colágeno", u"hialurónico", nullptr }, + + { es, ULISTFMT_TYPE_OR, u"desierto u oasis", u"desierto", u"oasis", nullptr }, + { es, ULISTFMT_TYPE_OR, u"oasis, desierto u océano", u"oasis", u"desierto", u"océano" }, + { es, ULISTFMT_TYPE_OR, u"7 u 8", u"7", u"8", nullptr }, + { es, ULISTFMT_TYPE_OR, u"7 u 80", u"7", u"80", nullptr }, + { es, ULISTFMT_TYPE_OR, u"7 u 800", u"7", u"800", nullptr }, + { es, ULISTFMT_TYPE_OR, u"6, 7 u 8", u"6", u"7", u"8" }, + { es, ULISTFMT_TYPE_OR, u"10 u 11", u"10", u"11", nullptr }, + { es, ULISTFMT_TYPE_OR, u"10 o 111", u"10", u"111", nullptr }, + { es, ULISTFMT_TYPE_OR, u"10 o 11.2", u"10", u"11.2", nullptr }, + { es, ULISTFMT_TYPE_OR, u"9, 10 u 11", u"9", u"10", u"11" }, + + { he, ULISTFMT_TYPE_AND, u"a, b ו-c", u"a", u"b", u"c" }, + { he, ULISTFMT_TYPE_AND, u"a ו-b", u"a", u"b", nullptr }, + { he, ULISTFMT_TYPE_AND, u"1, 2 ו-3", u"1", u"2", u"3" }, + { he, ULISTFMT_TYPE_AND, u"1 ו-2", u"1", u"2", nullptr }, + { he, ULISTFMT_TYPE_AND, u"אהבה ומקווה", u"אהבה", u"מקווה", nullptr }, + { he, ULISTFMT_TYPE_AND, u"אהבה, מקווה ואמונה", u"אהבה", u"מקווה", u"אמונה" }, + }; + for (auto width : widths) { + for (auto cas : cases) { + for (auto locale : cas.locales) { + LocalPointer fmt( + ListFormatter::createInstance(locale.c_str(), cas.type, width, status), + status); + if (status.errIfFailureAndReset()) { + continue; + } + UnicodeString message = UnicodeString(u"TestContextual loc=") + + locale.c_str() + u" type=" + + Int64ToUnicodeString(cas.type) + u" width=" + + Int64ToUnicodeString(width); + if (cas.data3 == nullptr) { + const UnicodeString inputs2[] = { cas.data1, cas.data2 }; + FormattedList result = fmt->formatStringsToValue(inputs2, UPRV_LENGTHOF(inputs2), status); + assertEquals(message, cas.expected, result.toTempString(status)); + } else { + const UnicodeString inputs3[] = { cas.data1, cas.data2, cas.data3 }; + FormattedList result = fmt->formatStringsToValue(inputs3, UPRV_LENGTHOF(inputs3), status); + assertEquals(message, cas.expected, result.toTempString(status)); + } + } + } + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/listformattertest.h b/icu4c/source/test/intltest/listformattertest.h index f16dd23905d..9c7a5dd20d6 100644 --- a/icu4c/source/test/intltest/listformattertest.h +++ b/icu4c/source/test/intltest/listformattertest.h @@ -54,6 +54,7 @@ class ListFormatterTest : public IntlTestWithFieldPosition { void TestDifferentStyles(); void TestBadStylesFail(); void TestCreateStyled(); + void TestContextual(); private: void CheckFormatting( diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java index d288e217dce..6e7dbf7905e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java @@ -16,6 +16,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.Locale; +import java.util.regex.Pattern; import com.ibm.icu.impl.FormattedStringBuilder; import com.ibm.icu.impl.FormattedValueStringBuilderImpl; @@ -39,12 +40,16 @@ import com.ibm.icu.util.UResourceBundle; */ final public class ListFormatter { // Compiled SimpleFormatter patterns. - private final String two; private final String start; private final String middle; - private final String end; private final ULocale locale; + private interface PatternHandler { + public String getTwoPattern(String text); + public String getEndPattern(String text); + } + private final PatternHandler patternHandler; + /** * Indicates the style of Listformatter * TODO(ICU-20888): Remove this in ICU 68. @@ -371,11 +376,10 @@ final public class ListFormatter { } private ListFormatter(String two, String start, String middle, String end, ULocale locale) { - this.two = two; this.start = start; this.middle = middle; - this.end = end; this.locale = locale; + this.patternHandler = createPatternHandler(two, end); } private static String compilePattern(String pattern, StringBuilder sb) { @@ -526,14 +530,131 @@ final public class ListFormatter { case 1: return new FormattedListBuilder(it.next(), needsFields); case 2: - return new FormattedListBuilder(it.next(), needsFields).append(two, it.next(), 1); + Object first = it.next(); + Object second = it.next(); + return new FormattedListBuilder(first, needsFields) + .append(patternHandler.getTwoPattern(String.valueOf(second)), second, 1); } FormattedListBuilder builder = new FormattedListBuilder(it.next(), needsFields); builder.append(start, it.next(), 1); for (int idx = 2; idx < count - 1; ++idx) { builder.append(middle, it.next(), idx); } - return builder.append(end, it.next(), count - 1); + Object last = it.next(); + return builder.append(patternHandler.getEndPattern(String.valueOf(last)), last, count - 1); + } + + // A static handler just returns the pattern without considering the input text. + private class StaticHandler implements PatternHandler { + StaticHandler(String two, String end) { + twoPattern = two; + endPattern = end; + } + + @Override + public String getTwoPattern(String text) { return twoPattern; } + + @Override + public String getEndPattern(String text) { return endPattern; } + + private final String twoPattern; + private final String endPattern; + } + + // A contextual handler returns one of the two patterns depending on whether the text matched the regexp. + private class ContextualHandler implements PatternHandler { + ContextualHandler(Pattern regexp, String thenTwo, String elseTwo, String thenEnd, String elseEnd) { + this.regexp = regexp; + thenTwoPattern = thenTwo; + elseTwoPattern = elseTwo; + thenEndPattern = thenEnd; + elseEndPattern = elseEnd; + } + + @Override + public String getTwoPattern(String text) { + if(regexp.matcher(text).matches()) { + return thenTwoPattern; + } else { + return elseTwoPattern; + } + } + + @Override + public String getEndPattern(String text) { + if(regexp.matcher(text).matches()) { + return thenEndPattern; + } else { + return elseEndPattern; + } + } + + private final Pattern regexp; + private final String thenTwoPattern; + private final String elseTwoPattern; + private final String thenEndPattern; + private final String elseEndPattern; + + } + + // Pattern in the ICU Data which might be replaced y by e. + private static final String compiledY = compilePattern("{0} y {1}", new StringBuilder()); + + // The new pattern to replace y to e + private static final String compiledE = compilePattern("{0} e {1}", new StringBuilder()); + + // Pattern in the ICU Data which might be replaced o by u. + private static final String compiledO = compilePattern("{0} o {1}", new StringBuilder()); + + // The new pattern to replace u to o + private static final String compiledU = compilePattern("{0} u {1}", new StringBuilder()); + + // Condition to change to e. + // Starts with "hi" or "i" but not with "hie" nor "hia"a + private static final Pattern changeToE = Pattern.compile("(i.*|hi|hi[^ae].*)", Pattern.CASE_INSENSITIVE); + + // Condition to change to u. + // Starts with "o", "ho", and "8". Also "11" by itself. + private static final Pattern changeToU = Pattern.compile("((o|ho|8).*|11)", Pattern.CASE_INSENSITIVE); + + // Pattern in the ICU Data which might need to add a DASH after VAV. + private static final String compiledVav = compilePattern("{0} \u05D5{1}", new StringBuilder()); + + // Pattern to add a DASH after VAV. + private static final String compiledVavDash = compilePattern("{0} \u05D5-{1}", new StringBuilder()); + + // Condition to change to VAV follow by a dash. + // Starts with non Hebrew letter. + private static final Pattern changeToVavDash = Pattern.compile("^[\\P{InHebrew}].*$"); + + // A factory function to create function based on locale + // Handle specal case of Spanish and Hebrew + private PatternHandler createPatternHandler(String two, String end) { + if (this.locale != null) { + String language = this.locale.getLanguage(); + if (language.equals("es")) { + boolean twoIsY = two.equals(compiledY); + boolean endIsY = end.equals(compiledY); + if (twoIsY || endIsY) { + return new ContextualHandler( + changeToE, twoIsY ? compiledE : two, two, endIsY ? compiledE : end, end); + } + boolean twoIsO = two.equals(compiledO); + boolean endIsO = end.equals(compiledO); + if (twoIsO || endIsO) { + return new ContextualHandler( + changeToU, twoIsO ? compiledU : two, two, endIsO ? compiledU : end, end); + } + } else if (language.equals("he") || language.equals("iw")) { + boolean twoIsVav = two.equals(compiledVav); + boolean endIsVav = end.equals(compiledVav); + if (twoIsVav || endIsVav) { + return new ContextualHandler(changeToVavDash, + twoIsVav ? compiledVavDash : two, two, endIsVav ? compiledVavDash : end, end); + } + } + } + return new StaticHandler(two, end); } /** diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java index 8a1d30a678b..186a4196030 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java @@ -10,6 +10,7 @@ package com.ibm.icu.dev.test.format; import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Locale; import org.junit.Test; @@ -292,4 +293,65 @@ public class ListFormatterTest extends TestFmwk { assertEquals(message, expected, result); } } + + @Test + public void TestContextual() { + String [] es = { "es", "es_419", "es_PY", "es_DO" }; + String [] he = { "he", "he_IL", "iw", "iw_IL" }; + Width[] widths = {Width.WIDE, Width.SHORT, Width.NARROW}; + Object[][] cases = { + { es, Type.AND, "fascinante e incre\u00EDblemente", "fascinante", "incre\u00EDblemente"}, + { es, Type.AND, "Comunicaciones Industriales e IIoT", "Comunicaciones Industriales", "IIoT"}, + { es, Type.AND, "Espa\u00F1a e Italia", "Espa\u00F1a", "Italia"}, + { es, Type.AND, "hijas intr\u00E9pidas e hijos solidarios", "hijas intr\u00E9pidas", "hijos solidarios"}, + { es, Type.AND, "a un hombre e hirieron a otro", "a un hombre", "hirieron a otro"}, + { es, Type.AND, "hija e hijo", "hija", "hijo"}, + { es, Type.AND, "esposa, hija e hijo", "esposa", "hija", "hijo"}, + // For 'y' exception + { es, Type.AND, "oro y hierro", "oro", "hierro"}, + { es, Type.AND, "agua y hielo", "agua", "hielo"}, + { es, Type.AND, "col\u00E1geno y hialur\u00F3nico", "col\u00E1geno", "hialur\u00F3nico"}, + + { es, Type.OR, "desierto u oasis", "desierto", "oasis"}, + { es, Type.OR, "oasis, desierto u océano", "oasis", "desierto", "océano"}, + { es, Type.OR, "7 u 8", "7", "8"}, + { es, Type.OR, "7 u 80", "7", "80"}, + { es, Type.OR, "7 u 800", "7", "800"}, + { es, Type.OR, "6, 7 u 8", "6", "7", "8"}, + { es, Type.OR, "10 u 11", "10", "11"}, + { es, Type.OR, "10 o 111", "10", "111"}, + { es, Type.OR, "10 o 11.2", "10", "11.2"}, + { es, Type.OR, "9, 10 u 11", "9", "10", "11"}, + + { he, Type.AND, "a, b \u05D5-c", "a", "b", "c" }, + { he, Type.AND, "a \u05D5-b", "a", "b" }, + { he, Type.AND, "1, 2 \u05D5-3", "1", "2", "3" }, + { he, Type.AND, "1 \u05D5-2", "1", "2" }, + { he, Type.AND, "\u05D0\u05D4\u05D1\u05D4 \u05D5\u05DE\u05E7\u05D5\u05D5\u05D4", + "\u05D0\u05D4\u05D1\u05D4", "\u05DE\u05E7\u05D5\u05D5\u05D4" }, + { he, Type.AND, "\u05D0\u05D4\u05D1\u05D4, \u05DE\u05E7\u05D5\u05D5\u05D4 \u05D5\u05D0\u05DE\u05D5\u05E0\u05D4", + "\u05D0\u05D4\u05D1\u05D4", "\u05DE\u05E7\u05D5\u05D5\u05D4", "\u05D0\u05DE\u05D5\u05E0\u05D4" }, + }; + for (Width width : widths) { + for (Object[] cas : cases) { + String [] locales = (String[]) cas[0]; + Type type = (Type) cas[1]; + String expected = (String) cas[2]; + for (String locale : locales) { + ULocale uloc = new ULocale(locale); + List inputs = Arrays.asList(cas).subList(3, cas.length); + ListFormatter fmt = ListFormatter.getInstance(uloc, type, width); + String message = "TestContextual uloc=" + + uloc + " type=" + + type + " width=" + + width + "data="; + for (Object i : inputs) { + message += i + ","; + } + String result = fmt.format(inputs); + assertEquals(message, expected, result); + } + } + } + } } -- 2.40.0