ICU-21016 Special handling of Spanish and Hebrew list format until CLDR get the data

author Frank Tang <ftang@chromium.org>

Thu, 19 Mar 2020 01:57:29 +0000 (01:57 +0000)

committer Frank Yung-Fong Tang <41213225+FrankYFTang@users.noreply.github.com>

Fri, 20 Mar 2020 02:36:15 +0000 (19:36 -0700)
author Frank Tang <ftang@chromium.org>
Thu, 19 Mar 2020 01:57:29 +0000 (01:57 +0000)
committer Frank Yung-Fong Tang <41213225+FrankYFTang@users.noreply.github.com>
Fri, 20 Mar 2020 02:36:15 +0000 (19:36 -0700)
diff --git a/icu4c/source/i18n/listformatter.cpp b/icu4c/source/i18n/listformatter.cpp

index f8945d55155174dcd17c08c0df5bff5f2224e74d..b9065e8796d5d51bb788890c3f118626314c9ca7 100644 (file)
--- a/icu4c/source/i18n/listformatter.cpp
+++ b/icu4c/source/i18n/listformatter.cpp
@@ -21,6 +21,7 @@
  #include "unicode/listformatter.h"
  #include "unicode/simpleformatter.h"
  #include "unicode/ulistformatter.h"
+#include "unicode/uscript.h"
  #include "fphdlimp.h"
  #include "mutex.h"
  #include "hash.h"
@@ -35,34 +36,203 @@
  
  U_NAMESPACE_BEGIN
  
-struct ListFormatInternal : public UMemory {
+namespace {
+
+class PatternHandler : public UObject {
+public:
+    PatternHandler(const UnicodeString& two, const UnicodeString& end, UErrorCode& errorCode) :
+        twoPattern(two, 2, 2, errorCode),
+        endPattern(end, 2, 2, errorCode) {  }
+
+    PatternHandler(const SimpleFormatter& two, const SimpleFormatter& end) :
+        twoPattern(two),
+        endPattern(end) { }
+
+    virtual ~PatternHandler();
+
+    virtual PatternHandler* clone() const { return new PatternHandler(twoPattern, endPattern); }
+
+    virtual const SimpleFormatter& getTwoPattern(const UnicodeString&) const {
+        return twoPattern;
+    }
+
+    virtual const SimpleFormatter& getEndPattern(const UnicodeString&) const {
+        return endPattern;
+    }
+
+protected:
      SimpleFormatter twoPattern;
+    SimpleFormatter endPattern;
+};
+
+PatternHandler::~PatternHandler() {
+}
+
+class ContextualHandler : public PatternHandler {
+public:
+    ContextualHandler(bool (*testFunc)(const UnicodeString& text),
+                      const UnicodeString& thenTwo,
+                      const UnicodeString& elseTwo,
+                      const UnicodeString& thenEnd,
+                      const UnicodeString& elseEnd,
+                      UErrorCode& errorCode) :
+        PatternHandler(elseTwo, elseEnd, errorCode),
+        test(testFunc),
+        thenTwoPattern(thenTwo, 2, 2, errorCode),
+        thenEndPattern(thenEnd, 2, 2, errorCode) {  }
+
+    ContextualHandler(bool (*testFunc)(const UnicodeString& text),
+                      const SimpleFormatter& thenTwo, SimpleFormatter elseTwo,
+                      const SimpleFormatter& thenEnd, SimpleFormatter elseEnd) :
+      PatternHandler(elseTwo, elseEnd),
+      test(testFunc),
+      thenTwoPattern(thenTwo),
+      thenEndPattern(thenEnd) { }
+
+    ~ContextualHandler() override;
+
+    PatternHandler* clone() const override {
+        return new ContextualHandler(
+            test, thenTwoPattern, twoPattern, thenEndPattern, endPattern);
+    }
+
+    const SimpleFormatter& getTwoPattern(
+        const UnicodeString& text) const override {
+        return (test)(text) ? thenTwoPattern : twoPattern;
+    }
+
+    const SimpleFormatter& getEndPattern(
+        const UnicodeString& text) const override {
+        return (test)(text) ? thenEndPattern : endPattern;
+    }
+
+private:
+    bool (*test)(const UnicodeString&);
+    SimpleFormatter thenTwoPattern;
+    SimpleFormatter thenEndPattern;
+};
+
+ContextualHandler::~ContextualHandler() {
+}
+
+static const char16_t *spanishY = u"{0} y {1}";
+static const char16_t *spanishE = u"{0} e {1}";
+static const char16_t *spanishO = u"{0} o {1}";
+static const char16_t *spanishU = u"{0} u {1}";
+static const char16_t *hebrewVav = u"{0} \u05D5{1}";
+static const char16_t *hebrewVavDash = u"{0} \u05D5-{1}";
+
+// Condiction to change to e.
+// Starts with "hi" or "i" but not with "hie" nor "hia"
+static bool shouldChangeToE(const UnicodeString& text) {
+    int32_t len = text.length();
+    if (len == 0) { return false; }
+    // Case insensitive match hi but not hie nor hia.
+    if ((text[0] == u'h' || text[0] == u'H') &&
+            ((len > 1) && (text[1] == u'i' || text[1] == u'I')) &&
+            ((len == 2) || !(text[2] == u'a' || text[2] == u'A' || text[2] == u'e' || text[2] == u'E'))) {
+        return true;
+    }
+    // Case insensitive for "start with i"
+    if (text[0] == u'i' || text[0] == u'I') { return true; }
+    return false;
+}
+
+// Condiction to change to u.
+// Starts with "o", "ho", and "8". Also "11" by itself.
+// re: ^((o|ho|8).*|11)$
+static bool shouldChangeToU(const UnicodeString& text) {
+    int32_t len = text.length();
+    if (len == 0) { return false; }
+    // Case insensitive match o.* and 8.*
+    if (text[0] == u'o' || text[0] == u'O' || text[0] == u'8') { return true; }
+    // Case insensitive match ho.*
+    if ((text[0] == u'h' || text[0] == u'H') &&
+            ((len > 1) && (text[1] == 'o' || text[1] == u'O'))) {
+        return true;
+    }
+    // match "^11$" and "^11 .*"
+    if ((len >= 2) && text[0] == u'1' && text[1] == u'1' && (len == 2 || text[2] == u' ')) { return true; }
+    return false;
+}
+
+// Condiction to change to VAV follow by a dash.
+// Starts with non Hebrew letter.
+static bool shouldChangeToVavDash(const UnicodeString& text) {
+    if (text.isEmpty()) { return false; }
+    UErrorCode status = U_ZERO_ERROR;
+    return uscript_getScript(text.char32At(0), &status) != USCRIPT_HEBREW;
+}
+
+PatternHandler* createPatternHandler(
+        const char* lang, const UnicodeString& two, const UnicodeString& end,
+    UErrorCode& status) {
+    if (uprv_strcmp(lang, "es") == 0) {
+        // Spanish
+        UnicodeString spanishYStr(TRUE, spanishY, -1);
+        bool twoIsY = two == spanishYStr;
+        bool endIsY = end == spanishYStr;
+        if (twoIsY || endIsY) {
+            UnicodeString replacement(TRUE, spanishE, -1);
+            return new ContextualHandler(
+                shouldChangeToE,
+                twoIsY ? replacement : two, two,
+                endIsY ? replacement : end, end, status);
+        }
+        UnicodeString spanishOStr(TRUE, spanishO, -1);
+        bool twoIsO = two == spanishOStr;
+        bool endIsO = end == spanishOStr;
+        if (twoIsO || endIsO) {
+            UnicodeString replacement(TRUE, spanishU, -1);
+            return new ContextualHandler(
+                shouldChangeToU,
+                twoIsO ? replacement : two, two,
+                endIsO ? replacement : end, end, status);
+        }
+    } else if (uprv_strcmp(lang, "he") == 0 || uprv_strcmp(lang, "iw") == 0) {
+        // Hebrew
+        UnicodeString hebrewVavStr(TRUE, hebrewVav, -1);
+        bool twoIsVav = two == hebrewVavStr;
+        bool endIsVav = end == hebrewVavStr;
+        if (twoIsVav || endIsVav) {
+            UnicodeString replacement(TRUE, hebrewVavDash, -1);
+            return new ContextualHandler(
+                shouldChangeToVavDash,
+                twoIsVav ? replacement : two, two,
+                endIsVav ? replacement : end, end, status);
+        }
+    }
+    return new PatternHandler(two, end, status);
+}
+
+}  // namespace
+
+struct ListFormatInternal : public UMemory {
      SimpleFormatter startPattern;
      SimpleFormatter middlePattern;
-    SimpleFormatter endPattern;
+    LocalPointer<PatternHandler> patternHandler;
  
  ListFormatInternal(
          const UnicodeString& two,
          const UnicodeString& start,
          const UnicodeString& middle,
          const UnicodeString& end,
+        const Locale& locale,
          UErrorCode &errorCode) :
-        twoPattern(two, 2, 2, errorCode),
          startPattern(start, 2, 2, errorCode),
          middlePattern(middle, 2, 2, errorCode),
-        endPattern(end, 2, 2, errorCode) {}
+        patternHandler(createPatternHandler(locale.getLanguage(), two, end, errorCode), errorCode) { }
  
  ListFormatInternal(const ListFormatData &data, UErrorCode &errorCode) :
-        twoPattern(data.twoPattern, errorCode),
          startPattern(data.startPattern, errorCode),
          middlePattern(data.middlePattern, errorCode),
-        endPattern(data.endPattern, errorCode) { }
+        patternHandler(createPatternHandler(
+            data.locale.getLanguage(), data.twoPattern, data.endPattern, errorCode), errorCode) { }
  
  ListFormatInternal(const ListFormatInternal &other) :
-    twoPattern(other.twoPattern),
      startPattern(other.startPattern),
      middlePattern(other.middlePattern),
-    endPattern(other.endPattern) { }
+    patternHandler(other.patternHandler->clone()) { }
  };
  
  
@@ -322,7 +492,8 @@ ListFormatInternal* ListFormatter::loadListFormatInternal(
          errorCode = U_MISSING_RESOURCE_ERROR;
          return nullptr;
      }
-    ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, errorCode);
+
+    ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, locale, errorCode);
      if (result == nullptr) {
          errorCode = U_MEMORY_ALLOCATION_ERROR;
          return nullptr;
@@ -524,16 +695,29 @@ UnicodeString& ListFormatter::format_(
      // for n items, there are 2 * (n + 1) boundary including 0 and the upper
      // edge.
      MaybeStackArray<int32_t, 10> offsets((handler != nullptr) ? 2 * (nItems + 1): 0);
-    joinStringsAndReplace(
-            nItems == 2 ? data->twoPattern : data->startPattern,
-            result,
-            items[1],
-            result,
-            index == 1,
-            offset,
-            &offsetFirst,
-            &offsetSecond,
-            errorCode);
+    if (nItems == 2) {
+        joinStringsAndReplace(
+                data->patternHandler->getTwoPattern(items[1]),
+                result,
+                items[1],
+                result,
+                index == 1,
+                offset,
+                &offsetFirst,
+                &offsetSecond,
+                errorCode);
+    } else {
+        joinStringsAndReplace(
+                data->startPattern,
+                result,
+                items[1],
+                result,
+                index == 1,
+                offset,
+                &offsetFirst,
+                &offsetSecond,
+                errorCode);
+    }
      if (handler != nullptr) {
          offsets[0] = 0;
          prefixLength += offsetFirst;
@@ -557,7 +741,7 @@ UnicodeString& ListFormatter::format_(
              }
          }
          joinStringsAndReplace(
-                data->endPattern,
+                data->patternHandler->getEndPattern(items[nItems - 1]),
                  result,
                  items[nItems - 1],
                  result,
@@ -612,5 +796,5 @@ UnicodeString& ListFormatter::format_(
  #endif  
      return appendTo;
  }
-
+ 
  U_NAMESPACE_END
diff --git a/icu4c/source/i18n/unicode/listformatter.h b/icu4c/source/i18n/unicode/listformatter.h

index 59be1cb073b16661ffc834656349c412f3335cf0..9e886660aeb3d87fc4193ae12e353228cf1568d6 100644 (file)
--- a/icu4c/source/i18n/unicode/listformatter.h
+++ b/icu4c/source/i18n/unicode/listformatter.h
@@ -50,9 +50,11 @@ struct ListFormatData : public UMemory {
      UnicodeString startPattern;
      UnicodeString middlePattern;
      UnicodeString endPattern;
+    Locale locale;
  
-  ListFormatData(const UnicodeString& two, const UnicodeString& start, const UnicodeString& middle, const UnicodeString& end) :
-      twoPattern(two), startPattern(start), middlePattern(middle), endPattern(end) {}
+  ListFormatData(const UnicodeString& two, const UnicodeString& start, const UnicodeString& middle, const UnicodeString& end,
+                 const Locale& loc) :
+      twoPattern(two), startPattern(start), middlePattern(middle), endPattern(end), locale(loc) {}
  };
  /** \endcond */
  
diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt

index 1d726b6ea32a6c61b53a363213b14ddc857e7d66..8437b4e3f6453672166fca3de9627cefbca66d37 100644 (file)
--- a/icu4c/source/test/depstest/dependencies.txt
+++ b/icu4c/source/test/depstest/dependencies.txt
@@ -945,7 +945,7 @@ group: dayperiodrules
  group: listformatter
      listformatter.o ulistformatter.o
    deps
-    resourcebundle simpleformatter format uclean_i18n formatted_value_iterimpl
+    uchar resourcebundle simpleformatter format uclean_i18n formatted_value_iterimpl
  
  group: double_conversion
      double-conversion-bignum.o double-conversion-double-to-string.o
diff --git a/icu4c/source/test/intltest/listformattertest.cpp b/icu4c/source/test/intltest/listformattertest.cpp

index f22d8a57c24849eefb9111b57a8c0395d39c3d61..c57c8f5e1a9e1fcda05acce97fedc5cdc21c9d0c 100644 (file)
--- a/icu4c/source/test/intltest/listformattertest.cpp
+++ b/icu4c/source/test/intltest/listformattertest.cpp
@@ -47,6 +47,7 @@ void ListFormatterTest::runIndexedTest(int32_t index, UBool exec,
      TESTCASE_AUTO(TestDifferentStyles);
      TESTCASE_AUTO(TestBadStylesFail);
      TESTCASE_AUTO(TestCreateStyled);
+    TESTCASE_AUTO(TestContextual);
      TESTCASE_AUTO_END;
  }
  
@@ -473,8 +474,9 @@ void ListFormatterTest::TestOutOfOrderPatterns() {
      };
  
      IcuTestErrorCode errorCode(*this, "TestOutOfOrderPatterns()");
+    Locale locale("en");
      ListFormatData data("{1} after {0}", "{1} after the first {0}",
-                        "{1} after {0}", "{1} in the last after {0}");
+                        "{1} after {0}", "{1} in the last after {0}", locale);
      ListFormatter formatter(data, errorCode);
  
      UnicodeString input1[] = {one};
@@ -622,4 +624,80 @@ void ListFormatterTest::TestCreateStyled() {
      }
  }
  
+void ListFormatterTest::TestContextual() {
+    IcuTestErrorCode status(*this, "TestContextual");
+    std::vector<std::string> es = { "es", "es_419" , "es_PY", "es_DO" };
+    std::vector<std::string> he = { "he", "he_IL", "iw", "iw_IL" };
+    UListFormatterWidth widths [] = {
+        ULISTFMT_WIDTH_WIDE, ULISTFMT_WIDTH_SHORT, ULISTFMT_WIDTH_NARROW
+    };
+    struct TestCase {
+        std::vector<std::string> locales;
+        UListFormatterType type;
+        const char16_t* expected;
+        const char16_t* data1;
+        const char16_t* data2;
+        const char16_t* data3;
+    } cases[] = {
+        { es, ULISTFMT_TYPE_AND, u"fascinante e increíblemente",
+          u"fascinante",                     u"increíblemente",       nullptr },
+        { es, ULISTFMT_TYPE_AND, u"Comunicaciones Industriales e IIoT",
+          u"Comunicaciones Industriales",    u"IIoT",                 nullptr },
+        { es, ULISTFMT_TYPE_AND, u"España e Italia",         u"España",   u"Italia",      nullptr },
+        { es, ULISTFMT_TYPE_AND, u"hijas intrépidas e hijos solidarios",
+          u"hijas intrépidas",               u"hijos solidarios",     nullptr },
+        { es, ULISTFMT_TYPE_AND, u"a un hombre e hirieron a otro",
+          u"a un hombre",                    u"hirieron a otro",      nullptr },
+        { es, ULISTFMT_TYPE_AND, u"hija e hijo",             u"hija",     u"hijo",        nullptr },
+        { es, ULISTFMT_TYPE_AND, u"esposa, hija e hijo",     u"esposa",   u"hija",        u"hijo" },
+        // For 'y' exception
+        { es, ULISTFMT_TYPE_AND, u"oro y hierro",            u"oro",      u"hierro",      nullptr },
+        { es, ULISTFMT_TYPE_AND, u"agua y hielo",            u"agua",     u"hielo",       nullptr },
+        { es, ULISTFMT_TYPE_AND, u"colágeno y hialurónico",  u"colágeno", u"hialurónico", nullptr },
+
+        { es, ULISTFMT_TYPE_OR, u"desierto u oasis",         u"desierto", u"oasis",       nullptr },
+        { es, ULISTFMT_TYPE_OR, u"oasis, desierto u océano", u"oasis",    u"desierto",    u"océano" },
+        { es, ULISTFMT_TYPE_OR, u"7 u 8",                    u"7",        u"8",           nullptr },
+        { es, ULISTFMT_TYPE_OR, u"7 u 80",                   u"7",        u"80",          nullptr },
+        { es, ULISTFMT_TYPE_OR, u"7 u 800",                  u"7",        u"800",         nullptr },
+        { es, ULISTFMT_TYPE_OR, u"6, 7 u 8",                 u"6",        u"7",           u"8" },
+        { es, ULISTFMT_TYPE_OR, u"10 u 11",                  u"10",       u"11",          nullptr },
+        { es, ULISTFMT_TYPE_OR, u"10 o 111",                 u"10",       u"111",         nullptr },
+        { es, ULISTFMT_TYPE_OR, u"10 o 11.2",                u"10",       u"11.2",        nullptr },
+        { es, ULISTFMT_TYPE_OR, u"9, 10 u 11",               u"9",        u"10",          u"11" },
+
+        { he, ULISTFMT_TYPE_AND, u"a, b ו-c",               u"a",      u"b",      u"c" },
+        { he, ULISTFMT_TYPE_AND, u"a ו-b",                  u"a",      u"b",      nullptr },
+        { he, ULISTFMT_TYPE_AND, u"1, 2 ו-3",               u"1",      u"2",      u"3" },
+        { he, ULISTFMT_TYPE_AND, u"1 ו-2",                  u"1",      u"2",      nullptr },
+        { he, ULISTFMT_TYPE_AND, u"אהבה ומקווה",            u"אהבה",   u"מקווה",  nullptr },
+        { he, ULISTFMT_TYPE_AND, u"אהבה, מקווה ואמונה",     u"אהבה",   u"מקווה",  u"אמונה" },
+    };
+    for (auto width : widths) {
+        for (auto cas : cases) {
+            for (auto locale : cas.locales) {
+                LocalPointer<ListFormatter> fmt(
+                    ListFormatter::createInstance(locale.c_str(), cas.type, width, status),
+                    status);
+                if (status.errIfFailureAndReset()) {
+                    continue;
+                }
+                UnicodeString message = UnicodeString(u"TestContextual loc=")
+                    + locale.c_str() + u" type="
+                    + Int64ToUnicodeString(cas.type) + u" width="
+                    + Int64ToUnicodeString(width);
+                if (cas.data3 == nullptr) {
+                    const UnicodeString inputs2[] = { cas.data1, cas.data2 };
+                    FormattedList result = fmt->formatStringsToValue(inputs2, UPRV_LENGTHOF(inputs2), status);
+                    assertEquals(message, cas.expected, result.toTempString(status));
+                } else {
+                    const UnicodeString inputs3[] = { cas.data1, cas.data2, cas.data3 };
+                    FormattedList result = fmt->formatStringsToValue(inputs3, UPRV_LENGTHOF(inputs3), status);
+                    assertEquals(message, cas.expected, result.toTempString(status));
+                }
+            }
+        }
+    }
+}
+
  #endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/icu4c/source/test/intltest/listformattertest.h b/icu4c/source/test/intltest/listformattertest.h

index f16dd23905d101788befb742f444a54944fb4341..9c7a5dd20d678a3ade93f033bbdbdae22ebe6777 100644 (file)
--- a/icu4c/source/test/intltest/listformattertest.h
+++ b/icu4c/source/test/intltest/listformattertest.h
@@ -54,6 +54,7 @@ class ListFormatterTest : public IntlTestWithFieldPosition {
      void TestDifferentStyles();
      void TestBadStylesFail();
      void TestCreateStyled();
+    void TestContextual();
  
    private:
      void CheckFormatting(
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java

index d288e217dce50485322712db5dfc084518d541ba..6e7dbf7905e97a2ebee24edc1d2249c8c6fcb363 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java
@@ -16,6 +16,7 @@ import java.util.Arrays;
  import java.util.Collection;
  import java.util.Iterator;
  import java.util.Locale;
+import java.util.regex.Pattern;
  
  import com.ibm.icu.impl.FormattedStringBuilder;
  import com.ibm.icu.impl.FormattedValueStringBuilderImpl;
@@ -39,12 +40,16 @@ import com.ibm.icu.util.UResourceBundle;
   */
  final public class ListFormatter {
      // Compiled SimpleFormatter patterns.
-    private final String two;
      private final String start;
      private final String middle;
-    private final String end;
      private final ULocale locale;
  
+    private interface PatternHandler {
+        public String getTwoPattern(String text);
+        public String getEndPattern(String text);
+    }
+    private final PatternHandler patternHandler;
+
      /**
       * Indicates the style of Listformatter
       * TODO(ICU-20888): Remove this in ICU 68.
@@ -371,11 +376,10 @@ final public class ListFormatter {
      }
  
      private ListFormatter(String two, String start, String middle, String end, ULocale locale) {
-        this.two = two;
          this.start = start;
          this.middle = middle;
-        this.end = end;
          this.locale = locale;
+        this.patternHandler = createPatternHandler(two, end);
      }
  
      private static String compilePattern(String pattern, StringBuilder sb) {
@@ -526,14 +530,131 @@ final public class ListFormatter {
          case 1:
              return new FormattedListBuilder(it.next(), needsFields);
          case 2:
-            return new FormattedListBuilder(it.next(), needsFields).append(two, it.next(), 1);
+            Object first = it.next();
+            Object second = it.next();
+            return new FormattedListBuilder(first, needsFields)
+                .append(patternHandler.getTwoPattern(String.valueOf(second)), second, 1);
          }
          FormattedListBuilder builder = new FormattedListBuilder(it.next(), needsFields);
          builder.append(start, it.next(), 1);
          for (int idx = 2; idx < count - 1; ++idx) {
              builder.append(middle, it.next(), idx);
          }
-        return builder.append(end, it.next(), count - 1);
+        Object last = it.next();
+        return builder.append(patternHandler.getEndPattern(String.valueOf(last)), last, count - 1);
+    }
+
+    // A static handler just returns the pattern without considering the input text.
+    private class StaticHandler implements PatternHandler {
+        StaticHandler(String two, String end) {
+            twoPattern = two;
+            endPattern = end;
+        }
+
+        @Override
+        public String getTwoPattern(String text) { return twoPattern; }
+
+        @Override
+        public String getEndPattern(String text) { return endPattern; }
+
+        private final String twoPattern;
+        private final String endPattern;
+    }
+
+    // A contextual handler returns one of the two patterns depending on whether the text matched the regexp.
+    private class ContextualHandler implements PatternHandler {
+        ContextualHandler(Pattern regexp, String thenTwo, String elseTwo, String thenEnd, String elseEnd) {
+            this.regexp = regexp;
+            thenTwoPattern = thenTwo;
+            elseTwoPattern = elseTwo;
+            thenEndPattern = thenEnd;
+            elseEndPattern = elseEnd;
+        }
+
+        @Override
+        public String getTwoPattern(String text) {
+            if(regexp.matcher(text).matches()) {
+                return thenTwoPattern;
+            } else {
+                return elseTwoPattern;
+            }
+        }
+
+        @Override
+        public String getEndPattern(String text) {
+            if(regexp.matcher(text).matches()) {
+                return thenEndPattern;
+            } else {
+                return elseEndPattern;
+            }
+        }
+
+        private final Pattern regexp;
+        private final String thenTwoPattern;
+        private final String elseTwoPattern;
+        private final String thenEndPattern;
+        private final String elseEndPattern;
+
+    }
+
+    // Pattern in the ICU Data which might be replaced y by e.
+    private static final String compiledY = compilePattern("{0} y {1}", new StringBuilder());
+
+    // The new pattern to replace y to e
+    private static final String compiledE = compilePattern("{0} e {1}", new StringBuilder());
+
+    // Pattern in the ICU Data which might be replaced o by u.
+    private static final String compiledO = compilePattern("{0} o {1}", new StringBuilder());
+
+    // The new pattern to replace u to o
+    private static final String compiledU = compilePattern("{0} u {1}", new StringBuilder());
+
+    // Condition to change to e.
+    // Starts with "hi" or "i" but not with "hie" nor "hia"a
+    private static final Pattern changeToE = Pattern.compile("(i.*|hi|hi[^ae].*)", Pattern.CASE_INSENSITIVE);
+
+    // Condition to change to u.
+    // Starts with "o", "ho", and "8". Also "11" by itself.
+    private static final Pattern changeToU = Pattern.compile("((o|ho|8).*|11)", Pattern.CASE_INSENSITIVE);
+
+    // Pattern in the ICU Data which might need to add a DASH after VAV.
+    private static final String compiledVav = compilePattern("{0} \u05D5{1}", new StringBuilder());
+
+    // Pattern to add a DASH after VAV.
+    private static final String compiledVavDash = compilePattern("{0} \u05D5-{1}", new StringBuilder());
+
+    // Condition to change to VAV follow by a dash.
+    // Starts with non Hebrew letter.
+    private static final Pattern changeToVavDash = Pattern.compile("^[\\P{InHebrew}].*$");
+
+    // A factory function to create function based on locale
+    // Handle specal case of Spanish and Hebrew
+    private PatternHandler createPatternHandler(String two, String end) {
+        if (this.locale != null) {
+            String language = this.locale.getLanguage();
+            if (language.equals("es")) {
+                boolean twoIsY = two.equals(compiledY);
+                boolean endIsY = end.equals(compiledY);
+                if (twoIsY || endIsY) {
+                    return new ContextualHandler(
+                        changeToE, twoIsY ? compiledE : two, two, endIsY ? compiledE : end, end);
+                }
+                boolean twoIsO = two.equals(compiledO);
+                boolean endIsO = end.equals(compiledO);
+                if (twoIsO || endIsO) {
+                    return new ContextualHandler(
+                        changeToU, twoIsO ? compiledU : two, two, endIsO ? compiledU : end, end);
+                }
+            } else if (language.equals("he") || language.equals("iw")) {
+                boolean twoIsVav = two.equals(compiledVav);
+                boolean endIsVav = end.equals(compiledVav);
+                if (twoIsVav || endIsVav) {
+                    return new ContextualHandler(changeToVavDash,
+                        twoIsVav ? compiledVavDash : two, two, endIsVav ? compiledVavDash : end, end);
+                }
+            }
+        }
+        return new StaticHandler(two, end);
      }
  
      /**
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java

index 8a1d30a678bccf4d0bb9df294d4d92052d1096bf..186a4196030a4d6119bc3e88baf566f02c5be8b2 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java
@@ -10,6 +10,7 @@ package com.ibm.icu.dev.test.format;
  
  import java.util.ArrayList;
  import java.util.Arrays;
+import java.util.List;
  import java.util.Locale;
  
  import org.junit.Test;
@@ -292,4 +293,65 @@ public class ListFormatterTest extends TestFmwk {
              assertEquals(message, expected, result);
          }
      }
+
+    @Test
+    public void TestContextual() {
+        String [] es = { "es", "es_419", "es_PY", "es_DO" };
+        String [] he = { "he", "he_IL", "iw", "iw_IL" };
+        Width[] widths = {Width.WIDE, Width.SHORT, Width.NARROW};
+        Object[][] cases = {
+            { es, Type.AND, "fascinante e incre\u00EDblemente", "fascinante", "incre\u00EDblemente"},
+            { es, Type.AND, "Comunicaciones Industriales e IIoT", "Comunicaciones Industriales", "IIoT"},
+            { es, Type.AND, "Espa\u00F1a e Italia", "Espa\u00F1a", "Italia"},
+            { es, Type.AND, "hijas intr\u00E9pidas e hijos solidarios", "hijas intr\u00E9pidas", "hijos solidarios"},
+            { es, Type.AND, "a un hombre e hirieron a otro", "a un hombre", "hirieron a otro"},
+            { es, Type.AND, "hija e hijo", "hija", "hijo"},
+            { es, Type.AND, "esposa, hija e hijo", "esposa", "hija", "hijo"},
+            // For 'y' exception
+            { es, Type.AND, "oro y hierro", "oro", "hierro"},
+            { es, Type.AND, "agua y hielo", "agua", "hielo"},
+            { es, Type.AND, "col\u00E1geno y hialur\u00F3nico", "col\u00E1geno", "hialur\u00F3nico"},
+
+            { es, Type.OR, "desierto u oasis", "desierto", "oasis"},
+            { es, Type.OR, "oasis, desierto u océano", "oasis", "desierto", "océano"},
+            { es, Type.OR, "7 u 8", "7", "8"},
+            { es, Type.OR, "7 u 80", "7", "80"},
+            { es, Type.OR, "7 u 800", "7", "800"},
+            { es, Type.OR, "6, 7 u 8", "6", "7", "8"},
+            { es, Type.OR, "10 u 11", "10", "11"},
+            { es, Type.OR, "10 o 111", "10", "111"},
+            { es, Type.OR, "10 o 11.2", "10", "11.2"},
+            { es, Type.OR, "9, 10 u 11", "9", "10", "11"},
+
+            { he, Type.AND, "a, b \u05D5-c", "a", "b", "c" },
+            { he, Type.AND, "a \u05D5-b", "a", "b" },
+            { he, Type.AND, "1, 2 \u05D5-3", "1", "2", "3" },
+            { he, Type.AND, "1 \u05D5-2", "1", "2" },
+            { he, Type.AND, "\u05D0\u05D4\u05D1\u05D4 \u05D5\u05DE\u05E7\u05D5\u05D5\u05D4",
+              "\u05D0\u05D4\u05D1\u05D4", "\u05DE\u05E7\u05D5\u05D5\u05D4" },
+            { he, Type.AND, "\u05D0\u05D4\u05D1\u05D4, \u05DE\u05E7\u05D5\u05D5\u05D4 \u05D5\u05D0\u05DE\u05D5\u05E0\u05D4",
+              "\u05D0\u05D4\u05D1\u05D4", "\u05DE\u05E7\u05D5\u05D5\u05D4", "\u05D0\u05DE\u05D5\u05E0\u05D4" },
+        };
+        for (Width width : widths) {
+            for (Object[] cas : cases) {
+                String [] locales = (String[]) cas[0];
+                Type type = (Type) cas[1];
+                String expected = (String) cas[2];
+                for (String locale : locales) {
+                    ULocale uloc = new ULocale(locale);
+                    List inputs = Arrays.asList(cas).subList(3, cas.length);
+                    ListFormatter fmt = ListFormatter.getInstance(uloc, type, width);
+                    String message = "TestContextual uloc="
+                        + uloc + " type="
+                        + type + " width="
+                        + width + "data=";
+                    for (Object i : inputs) {
+                        message += i + ",";
+                    }
+                    String result = fmt.format(inputs);
+                    assertEquals(message, expected, result);
+                }
+            }
+        }
+    }
  }
author	Frank Tang <ftang@chromium.org>
	Thu, 19 Mar 2020 01:57:29 +0000 (01:57 +0000)
committer	Frank Yung-Fong Tang <41213225+FrankYFTang@users.noreply.github.com>
	Fri, 20 Mar 2020 02:36:15 +0000 (19:36 -0700)
icu4c/source/i18n/listformatter.cpp		patch \| blob \| history
icu4c/source/i18n/unicode/listformatter.h		patch \| blob \| history
icu4c/source/test/depstest/dependencies.txt		patch \| blob \| history
icu4c/source/test/intltest/listformattertest.cpp		patch \| blob \| history
icu4c/source/test/intltest/listformattertest.h		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java		patch \| blob \| history