ICU-22265 Update PersonNameFormatter and its associated classes so that the behavior...

author Rich Gillam <richard_gillam@apple.com>

Sat, 4 Mar 2023 04:19:14 +0000 (20:19 -0800)

committer Rich Gillam <62772518+richgillam@users.noreply.github.com>

Wed, 8 Mar 2023 21:56:17 +0000 (13:56 -0800)
author Rich Gillam <richard_gillam@apple.com>
Sat, 4 Mar 2023 04:19:14 +0000 (20:19 -0800)
committer Rich Gillam <62772518+richgillam@users.noreply.github.com>
Wed, 8 Mar 2023 21:56:17 +0000 (13:56 -0800)
diff --git a/icu4j/build.xml b/icu4j/build.xml

index b9027dbc7a19158e3eb4814298ee836a989351b6..7424c739559949b62c86f36989e776ca393216c9 100644 (file)
--- a/icu4j/build.xml
+++ b/icu4j/build.xml
@@ -621,6 +621,7 @@
              <junit-fileset>
                  <fileset dir="${icu4j.core-tests.dir}/${bin.dir}">
                      <patternset refid="test-classes-patternset"/>
+                    <exclude name="**/ExhaustivePersonNameFormatterTest*" />
                  </fileset>
              </junit-fileset>
          </icu-junit>
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java

index 6fb2062003651d2f30075410da0a7197b7c28f2f..2cb3bb5094d406b709618cb12d80004113833ef6 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java
@@ -5,11 +5,7 @@ package com.ibm.icu.impl.personname;
  import static com.ibm.icu.util.UResourceBundle.ARRAY;
  import static com.ibm.icu.util.UResourceBundle.STRING;
  
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Locale;
-import java.util.Set;
+import java.util.*;
  
  import com.ibm.icu.impl.ICUData;
  import com.ibm.icu.impl.ICUResourceBundle;
@@ -32,6 +28,7 @@ public class PersonNameFormatterImpl {
      private final String initialSequencePattern;
      private final boolean capitalizeSurname;
      private final String foreignSpaceReplacement;
+    private final String nativeSpaceReplacement;
      private final boolean formatterLocaleUsesSpaces;
      private final PersonNameFormatter.Length length;
      private final PersonNameFormatter.Usage usage;
@@ -58,6 +55,7 @@ public class PersonNameFormatterImpl {
          this.initialSequencePattern = rb.getStringWithFallback("personNames/initialPattern/initialSequence");
          this.foreignSpaceReplacement = rb.getStringWithFallback("personNames/foreignSpaceReplacement");
          this.formatterLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(locale.getLanguage());
+        this.nativeSpaceReplacement = formatterLocaleUsesSpaces ? " " : "";
  
          // asjust for combinations of parameters that don't make sense in practice
          if (usage == PersonNameFormatter.Usage.MONOGRAM) {
@@ -113,6 +111,7 @@ public class PersonNameFormatterImpl {
          initialSequencePattern = "{0} {1}";
          capitalizeSurname = false;
          foreignSpaceReplacement = " ";
+        nativeSpaceReplacement = " ";
          formatterLocaleUsesSpaces = true;
  
          // then, set values for the fields we actually care about
@@ -121,33 +120,43 @@ public class PersonNameFormatterImpl {
  
      }
  
+    @Override
+    public String toString() {
+        return "PersonNameFormatter: " + displayOrder + "-" + length + "-" + usage + "-" + formality + ", " + locale;
+    }
+
      public String formatToString(PersonName name) {
          // TODO: Should probably return a FormattedPersonName object
  
-        // if the formatter is for a language that doesn't use spaces between words and the name is from a language
-        // that does, create a formatter for the NAME'S locale and use THAT to format the name
-        Locale nameLocale = getNameLocale(name);
-        boolean nameLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(nameLocale.getLanguage());
-        if (!formatterLocaleUsesSpaces && nameLocaleUsesSpaces) {
-            PersonNameFormatterImpl nativeFormatter = new PersonNameFormatterImpl(nameLocale, this.length,
+        if (!nameScriptMatchesLocale(name, this.locale)) {
+            Locale nameLocale = getNameLocale(name);
+            PersonNameFormatterImpl nameLocaleFormatter = new PersonNameFormatterImpl(nameLocale, this.length,
                      this.usage, this.formality, this.displayOrder, this.capitalizeSurname);
-            String result = nativeFormatter.formatToString(name);
-
-            // BUT, if the name is actually written in the formatter locale's script, replace any spaces in the name
-            // with the foreignSpaceReplacement character
-            if (!foreignSpaceReplacement.equals(" ") && scriptMatchesLocale(result, this.locale)) {
-                result = result.replace(" ", this.foreignSpaceReplacement);
-            }
-            return result;
+            return nameLocaleFormatter.formatToString(name);
          }
  
-        // if we get down to here, we're just doing normal formatting-- if we have both given-first and surname-first
-        // rules, choose which one to use based on the name's locale and preferred field order
+        String result = null;
+        Locale nameLocale = getNameLocale(name);
+
+        // choose the GN-first or SN-first pattern based on the name itself and use that to format it
          if (snFirstPatterns == null || nameIsGnFirst(name)) {
-            return getBestPattern(gnFirstPatterns, name).format(name);
+            result = getBestPattern(gnFirstPatterns, name).format(name);
          } else {
-            return getBestPattern(snFirstPatterns, name).format(name);
+            result = getBestPattern(snFirstPatterns, name).format(name);
+        }
+
+        // if either of the space-replacement characters is something other than a space,
+        // check to see if the name locale's language matches the formatter locale's language.
+        // If they match, replace all spaces with the native space-replacement character,
+        // and if they don't, replace all spaces with the foreign space-replacement character
+        if (!nativeSpaceReplacement.equals(" ") || !foreignSpaceReplacement.equals(" ")) {
+            if (localesMatch(nameLocale, this.locale)) {
+                result = result.replace(" ", nativeSpaceReplacement);
+            } else {
+                result = result.replace(" ", foreignSpaceReplacement);
+            }
          }
+        return result;
      }
  
      public Locale getLocale() {
@@ -175,7 +184,7 @@ public class PersonNameFormatterImpl {
          return capitalizeSurname;
      }
  
-    private final Set<String> LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "th", "yue", "km", "lo"));
+    private final Set<String> LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "yue", "km", "lo", "my"));
  
      /**
       * Returns the value of the resource, as a string array.
@@ -297,15 +306,20 @@ public class PersonNameFormatterImpl {
      }
  
      /**
-     * Returns true if the script of `s` is one of the default scripts for `locale`.
-     * This function only checks the script of the first character whose script isn't "common,"
-     * so it probably won't work right on mixed-script strings.
+     * Returns true if the characters in the name match one of the scripts for the specified locale.
       */
-    private boolean scriptMatchesLocale(String s, Locale locale) {
-        int[] localeScripts = UScript.getCode(locale);
+    private boolean nameScriptMatchesLocale(PersonName name, Locale formatterLocale) {
+        // Rather than exhaustively checking all the fields in the name, we just check the given-name
+        // and surname fields, giving preference to the script of the surname if they're different
+        // (we concatenate them into one string for simplicity).  The "name script" is the script
+        // of the first character we find whose script isn't "common".  If that script is one
+        // of the scripts used by the specified locale, we have a match.
+        String nameText = name.getFieldValue(PersonName.NameField.GIVEN, Collections.emptySet())
+                + name.getFieldValue(PersonName.NameField.SURNAME, Collections.emptySet());
+        int[] localeScripts = UScript.getCode(formatterLocale);
          int stringScript = UScript.COMMON;
-        for (int i = 0; stringScript == UScript.COMMON && i < s.length(); i++) {
-            char c = s.charAt(i);
+        for (int i = 0; stringScript == UScript.COMMON && i < nameText.length(); i++) {
+            char c = nameText.charAt(i);
              stringScript = UScript.getScript(c);
          }
  
@@ -316,4 +330,24 @@ public class PersonNameFormatterImpl {
          }
          return false;
      }
+
+    /**
+     * Returns true if the two locales should be considered equivalent for space-replacement purposes.
+     */
+    private boolean localesMatch(Locale nameLocale, Locale formatterLocale) {
+        String nameLanguage = nameLocale.getLanguage();
+        String formatterLanguage = formatterLocale.getLanguage();
+
+        if (nameLanguage.equals(formatterLanguage)) {
+            return true;
+        }
+
+        // HACK to make Japanese and Chinese names use the native format and native space replacement
+        // (do we want to do something more general here?)
+        if ((nameLanguage.equals("ja") || nameLanguage.equals("zh")) && (formatterLanguage.equals("ja") || formatterLanguage.equals("zh"))) {
+            return true;
+        }
+
+        return false;
+    }
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java

index e91edcb02486018f1cdc5d7c7cea778936bbee0e..409fe83c4ce48546d2c81acc9320a2b57386a2d8 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java
@@ -2,13 +2,7 @@
  // License & terms of use: http://www.unicode.org/copyright.html
  package com.ibm.icu.impl.personname;
  
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.StringTokenizer;
+import java.util.*;
  
  import com.ibm.icu.text.PersonName;
  
@@ -27,6 +21,11 @@ class PersonNamePattern {
          return result;
      }
  
+    @Override
+    public String toString() {
+        return patternText;
+    }
+
      private PersonNamePattern(String patternText, PersonNameFormatterImpl formatterImpl) {
          this.patternText = patternText;
  
@@ -88,6 +87,11 @@ class PersonNamePattern {
          StringBuilder textBefore = new StringBuilder();
          StringBuilder textAfter = new StringBuilder();
  
+        // if the name doesn't have a surname field and the pattern doesn't have a given-name field,
+        // we actually format a modified version of the name object where the contents of the
+        // given-name field has been copied into the surname field
+        name = hackNameForEmptyFields(name);
+
          // the logic below attempts to implement the following algorithm:
          // - If one or more fields at the beginning of the name are empty, also skip all literal text
          //   from the beginning of the name up to the first populated field.
@@ -148,7 +152,7 @@ class PersonNamePattern {
      public int numEmptyFields(PersonName name) {
          int result = 0;
          for (Element element : patternElements) {
-            result += element.isPopulated(name) ? 0 : 1;
+            result += (!element.isLiteral() && !element.isPopulated(name)) ? 1 : 0;
          }
          return result;
      }
@@ -161,6 +165,11 @@ class PersonNamePattern {
       * @param s2 The literal text after the omitted field.
       */
      private String coalesce(StringBuilder s1, StringBuilder s2) {
+        // if the contents of s2 occur at the end of s1, we just use s1
+        if (endsWith(s1, s2)) {
+            s2.setLength(0);
+        }
+
          // get the range of non-whitespace characters at the beginning of s1
          int p1 = 0;
          while (p1 < s1.length() && !Character.isWhitespace(s1.charAt(p1))) {
@@ -191,6 +200,45 @@ class PersonNamePattern {
          return result;
      }
  
+    /**
+     * Returns true if s1 ends with s2.
+     */
+    private boolean endsWith(StringBuilder s1, StringBuilder s2) {
+        int p1 = s1.length() - 1;
+        int p2 = s2.length() - 1;
+
+        while (p1 >= 0 && p2 >= 0 && s1.charAt(p1) == s2.charAt(p2)) {
+            --p1;
+            --p2;
+        }
+        return p2 < 0;
+    }
+
+    private PersonName hackNameForEmptyFields(PersonName originalName) {
+        // this is a hack to deal with mononyms (name objects that don't have both a given name and a surname)--
+        // if the name object has a given-name field but not a surname field and the pattern either doesn't
+        // have a given-name field or only has "{given-initial}", we return a PersonName object that will
+        // return the value of the given-name field when asked for the value of the surname field and that
+        // will return null when asked for the value of the given-name field (all other field values and
+        // properties of the underlying object are returned unchanged)
+        PersonName result = originalName;
+        if (originalName.getFieldValue(PersonName.NameField.SURNAME, Collections.emptySet()) == null) {
+            boolean patternHasNonInitialGivenName = false;
+            for (PersonNamePattern.Element element : patternElements) {
+                if (!element.isLiteral()
+                        && ((NameFieldImpl)element).fieldID == PersonName.NameField.GIVEN
+                        && !((NameFieldImpl)element).modifiers.containsKey(PersonName.FieldModifier.INITIAL)) {
+                    patternHasNonInitialGivenName = true;
+                    break;
+                }
+            }
+            if (!patternHasNonInitialGivenName) {
+                return new GivenToSurnamePersonName(originalName);
+            }
+        }
+        return result;
+    }
+
      /**
       * A single element in a NamePattern.  This is either a name field or a range of literal text.
       */
@@ -210,6 +258,11 @@ class PersonNamePattern {
              this.text = text;
          }
  
+        @Override
+        public String toString() {
+            return text;
+        }
+
          public boolean isLiteral() {
              return true;
          }
@@ -250,6 +303,19 @@ class PersonNamePattern {
              }
          }
  
+        @Override
+        public String toString() {
+            StringBuilder sb = new StringBuilder();
+            sb.append("{");
+            sb.append(fieldID);
+            for (PersonName.FieldModifier modifier : modifiers.keySet()) {
+                sb.append("-");
+                sb.append(modifier.toString());
+            }
+            sb.append("}");
+            return sb.toString();
+        }
+
          public boolean isLiteral() {
              return false;
          }
@@ -266,10 +332,48 @@ class PersonNamePattern {
          }
  
          public boolean isPopulated(PersonName name) {
-            // just check whether the unmodified field contains a value
-            Set<PersonName.FieldModifier> modifierIDs = new HashSet<>();
-            String fieldValue = name.getFieldValue(fieldID, modifierIDs);
-            return fieldValue != null && !fieldValue.isEmpty();
+            String result = this.format(name);
+            return result != null && ! result.isEmpty();
+        }
+    }
+
+    /**
+     * Internal class used when formatting a mononym (a PersonName object that only has
+     * a given-name field).  If the name doesn't have a surname field and the pattern
+     * doesn't have a given-name field (or only has one that produces an initial), we
+     * use this class to behave as though the value supplied in the given-name field
+     * had instead been supplied in the surname field.
+     */
+    private static class GivenToSurnamePersonName implements PersonName {
+        private PersonName underlyingPersonName;
+
+        public GivenToSurnamePersonName(PersonName underlyingPersonName) {
+            this.underlyingPersonName = underlyingPersonName;
+        }
+
+        @Override
+        public String toString() {
+            return "Inverted version os " + underlyingPersonName.toString();
+        }
+        @Override
+        public Locale getNameLocale() {
+            return underlyingPersonName.getNameLocale();
+        }
+
+        @Override
+        public PreferredOrder getPreferredOrder() {
+            return underlyingPersonName.getPreferredOrder();
+        }
+
+        @Override
+        public String getFieldValue(NameField identifier, Set<FieldModifier> modifiers) {
+            if (identifier == NameField.SURNAME) {
+                return underlyingPersonName.getFieldValue(NameField.GIVEN, modifiers);
+            } else if (identifier == NameField.GIVEN) {
+                return null;
+            } else {
+                return underlyingPersonName.getFieldValue(identifier, modifiers);
+            }
          }
      }
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java

index 8cd0a5e1fc8ef7fcedc8b4f63db9fee5f2f29819..517e2565f5da5a3eeecc3665b9827b6a49b395f0 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java
@@ -324,4 +324,13 @@ public class PersonNameFormatter {
      public PersonNameFormatter(Locale locale, String[] patterns) {
          this.impl = new PersonNameFormatterImpl(locale, patterns);
      }
+
+    /**
+     * @internal For debugging only!
+     * @deprecated This API is for debugging only.
+     */
+    @Override
+    public String toString() {
+        return impl.toString();
+    }
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java

index f1139da8a937532d72518055e43216b2050b0f35..5ece4156472b58c725f19618757dda7fc99db2bf 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java
@@ -94,9 +94,16 @@ public class SimplePersonName implements PersonName {
              if (fieldValues.get("surname") == null) {
                  String surnamePrefix = fieldValues.get("surname-prefix");
                  String surnameCore = fieldValues.get("surname-core");
+
+                StringBuilder sb = new StringBuilder();
                  if (surnamePrefix != null && surnameCore != null) {
                      fieldValues.put("surname", surnamePrefix + " " + surnameCore);
+                } else if (surnamePrefix != null) {
+                    fieldValues.put("surname", surnamePrefix);
+                } else if (surnameCore != null) {
+                    fieldValues.put("surname", surnameCore);
                  }
+                // if both "surname-prefix" and "surname-core" are empty, don't fill in "surname" either
              }
  
              return new SimplePersonName(locale, preferredOrder, fieldValues);
@@ -202,6 +209,23 @@ public class SimplePersonName implements PersonName {
          return result;
      }
  
+    /**
+     * @internal Debugging only!
+     * @return
+     */
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        for (String key : fieldValues.keySet()) {
+            if (sb.length() > 0) {
+                sb.append(",");
+            }
+            sb.append(key + "=" + fieldValues.get(key));
+        }
+        sb.append(",locale=" + nameLocale);
+        return sb.toString();
+    }
+
      private static String makeModifiedFieldName(NameField fieldName,
                                                  Collection<FieldModifier> modifiers) {
          StringBuilder result = new StringBuilder();
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ExhaustivePersonNameFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ExhaustivePersonNameFormatterTest.java

new file mode 100644 (file)

index 0000000..2bed7b3
--- /dev/null
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ExhaustivePersonNameFormatterTest.java
@@ -0,0 +1,223 @@
+// © 2023 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package com.ibm.icu.dev.test.format;
+
+import com.ibm.icu.text.PersonName;
+import com.ibm.icu.text.PersonNameFormatter;
+import com.ibm.icu.text.SimplePersonName;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * This is a test designed to parse the files generated by GeneratePersonNameTestData.java in
+ * the CLDR project.  It takes one command-line parameter-- the path to the directory that
+ * contains the test files (common/testData/personNameTest in the CLDR source tree).
+ * This isn't set up as a unit test because of the dependency on the CLDR files (I didn't
+ * want to copy all of those over into the ICU tree) and because I thought the test would
+ * take too long to run.
+ */
+public class ExhaustivePersonNameFormatterTest {
+    public static void main(String[] args) throws IOException {
+        if (args.length < 1) {
+            throw new IllegalArgumentException("No data file directory specified!");
+        }
+
+        String dataFilePath = args[0];
+        File dataFileDir = new File(dataFilePath);
+
+        if (!dataFileDir.isDirectory()) {
+            throw new IllegalArgumentException(dataFilePath + " is not a directory!");
+        }
+
+        int filesWithErrors = 0;
+        int filesWithoutErrors = 0;
+        int skippedFiles = 0;
+        int totalErrors = 0;
+
+        for (String filename : dataFileDir.list()) {
+            File dataFile = new File(dataFileDir, filename);
+            if (dataFile.isDirectory() || !filename.endsWith(".txt")) {
+                System.out.println("Skipping " + filename + "...");
+                continue;
+            }
+            String[] FILENAMES_TO_SKIP = {"gaa.txt", "dsb.txt", "syr.txt", "hsb.txt", "lij.txt"};
+            if (Arrays.asList(FILENAMES_TO_SKIP).contains(filename)) {
+                // extra check to narrow down the files for debugging
+                System.out.println("Skipping " + filename + "...");
+                ++skippedFiles;
+                continue;
+            }
+            int testErrors = runTest(dataFile);
+            if (testErrors == 0) {
+                ++filesWithoutErrors;
+            } else {
+                ++filesWithErrors;
+                totalErrors += testErrors;
+            }
+        }
+
+        System.out.println();
+        System.out.println("Files without errors: " + filesWithoutErrors);
+        System.out.println("Files with errors: " + filesWithErrors);
+        if (skippedFiles > 0) {
+            System.out.println("Skipped files: " + skippedFiles);
+        }
+        System.out.println("Total number of errors: " + totalErrors);
+    }
+
+    private static int runTest(File testFile) throws IOException {
+        LineNumberReader in = new LineNumberReader(new InputStreamReader(new FileInputStream(testFile)));
+        String line = null;
+        PersonNameTester tester = new PersonNameTester(testFile.getName());
+
+        do {
+            line = in.readLine();
+            tester.processLine(line, in.getLineNumber());
+        } while (line != null);
+
+        System.out.println(testFile.getAbsolutePath() + " had " + tester.getErrorCount() + " errors");
+        return tester.getErrorCount();
+    }
+
+    private static class PersonNameTester {
+        SimplePersonName name = null;
+        SimplePersonName.Builder nameBuilder = null;
+        String expectedResult = null;
+        Locale formatterLocale = null;
+        int errorCount = 0;
+
+        public PersonNameTester(String testFileName) {
+            formatterLocale = Locale.forLanguageTag(testFileName.substring(0, testFileName.length() - ".txt".length()).replace('_', '-'));
+        }
+
+        public void processLine(String line, int lineNumber) {
+            if (line == null || line.isEmpty() || line.startsWith("#")) {
+                return;
+            }
+
+            String[] lineFields = line.split(";");
+            String opcode = lineFields[0].trim();
+            String[] parameters = Arrays.copyOfRange(lineFields,1, lineFields.length);
+
+            processCommand(opcode, parameters, lineNumber);
+        }
+
+        public int getErrorCount() {
+            return errorCount;
+        }
+
+        private void processCommand(String opcode, String[] parameters, int lineNumber) {
+            if (opcode.equals("enum")) {
+                processEnumLine();
+            } else if (opcode.equals("name")) {
+                processNameLine(parameters, lineNumber);
+            } else if (opcode.equals("expectedResult")) {
+                processExpectedResultLine(parameters, lineNumber);
+            } else if (opcode.equals("parameters")) {
+                processParametersLine(parameters, lineNumber);
+            } else if (opcode.equals("endName")) {
+                processEndNameLine();
+            } else {
+                System.err.println("Unknown command '" + opcode + "' at line " + lineNumber);
+            }
+        }
+
+        private void processEnumLine() {
+            // this test isn't actually going to do anything with "enum" lines
+        }
+
+        private void processNameLine(String[] parameters, int lineNumber) {
+            if (checkState(name == null, "name", lineNumber)
+                    && checkNumParams(parameters, 2, "name", lineNumber)) {
+                if (nameBuilder == null) {
+                    nameBuilder = SimplePersonName.builder();
+                }
+
+                String fieldName = parameters[0].trim();
+                String fieldValue = parameters[1].trim();
+
+                if (fieldName.equals("locale")) {
+                    nameBuilder.setLocale(Locale.forLanguageTag(fieldValue.replace("_", "-")));
+                } else {
+                    String[] fieldNamePieces = fieldName.split("-");
+                    PersonName.NameField nameField = PersonName.NameField.forString(fieldNamePieces[0]);
+                    List<PersonName.FieldModifier> fieldModifiers = new ArrayList<>();
+                    for (int i = 1; i < fieldNamePieces.length; i++) {
+                        fieldModifiers.add(PersonName.FieldModifier.forString(fieldNamePieces[i]));
+                    }
+                    nameBuilder.addField(nameField, fieldModifiers, fieldValue);
+                }
+            }
+        }
+
+        private void processExpectedResultLine(String[] parameters, int lineNumber) {
+            if (checkState(name != null || nameBuilder != null, "expectedResult", lineNumber)
+                    && checkNumParams(parameters, 1, "expectedResult", lineNumber)) {
+                if (name == null) {
+                    name = nameBuilder.build();
+                    nameBuilder = null;
+                }
+                expectedResult = parameters[0].trim();
+            }
+        }
+
+        private void processParametersLine(String[] parameters, int lineNumber) {
+            if (checkState(name != null && expectedResult != null, "parameters", lineNumber)
+                    && checkNumParams(parameters, 4, "parameters", lineNumber)) {
+                String optionsStr = parameters[0].trim();
+                String lengthStr = parameters[1].trim();
+                String usageStr = parameters[2].trim();
+                String formalityStr = parameters[3].trim();
+
+                PersonNameFormatter.Builder builder = PersonNameFormatter.builder();
+                builder.setLocale(formatterLocale);
+                if (optionsStr.equals("sorting")) {
+                    builder.setDisplayOrder(PersonNameFormatter.DisplayOrder.SORTING);
+                }
+                builder.setLength(PersonNameFormatter.Length.valueOf(lengthStr.toUpperCase()));
+                builder.setUsage(PersonNameFormatter.Usage.valueOf(usageStr.toUpperCase()));
+                builder.setFormality(PersonNameFormatter.Formality.valueOf(formalityStr.toUpperCase()));
+
+                PersonNameFormatter formatter = builder.build();
+                String actualResult = formatter.formatToString(name);
+
+                checkResult(actualResult, lineNumber);
+            }
+        }
+
+        private void processEndNameLine() {
+            name = null;
+            expectedResult = null;
+            nameBuilder = null;
+        }
+
+        private boolean checkNumParams(String[] parameters, int expectedLength, String opcode, int lineNumber) {
+            boolean result = parameters.length == expectedLength;
+            if (!result) {
+                reportError("'" + opcode + "' line doesn't have " + expectedLength + " parameters", lineNumber);
+            }
+            return result;
+        }
+
+        private boolean checkState(boolean state, String opcode, int lineNumber) {
+            if (!state) {
+                reportError("Misplaced '" + opcode + "' line", lineNumber);
+            }
+            return state;
+        }
+
+        private boolean checkResult(String actualResult, int lineNumber) {
+            boolean result = expectedResult.equals(actualResult);
+            if (!result) {
+                reportError("Expected '" + expectedResult + "', got '" + actualResult + "'", lineNumber);
+            }
+            return result;
+        }
+
+        private void reportError(String error, int lineNumber) {
+            System.out.println("    " + error + " at line " + lineNumber);
+            ++errorCount;
+        }
+    }
+}
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java

index d4ce68ab9b00a1f167d9d423d1a8b1d2c71a5195..234e7d7ff226f488b9a03401babd0d8e57b77e25 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java
@@ -218,6 +218,15 @@ public class PersonNameFormatterTest extends TestFmwk{
                  { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "DEFAULT", "", "WVDP" },
                  { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "DEFAULT", "", "WVDP" },
              }),
+            new NameAndTestCases("locale=en_US,given=John,surname-core=Smith", new String[][] {
+                // if the PersonName object just fills in the "surname-core" field, treat it as the "surname" field
+                { "en_US", "LONG",   "REFERRING",  "FORMAL", "DEFAULT", "", "John Smith" },
+                { "en_US", "LONG",   "REFERRING",  "INFORMAL", "DEFAULT", "", "John Smith" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL", "DEFAULT", "", "John Smith" },
+                { "en_US", "MEDIUM", "REFERRING",  "INFORMAL", "DEFAULT", "", "John Smith" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL", "DEFAULT", "", "J. Smith" },
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "DEFAULT", "", "John S." },
+            }),
          }, false);
      }
  
@@ -226,8 +235,8 @@ public class PersonNameFormatterTest extends TestFmwk{
          executeTestCases(new NameAndTestCases[]{
              new NameAndTestCases("locale=en_US,given=George,given2=Herbert Walker,surname=Bush", new String[][] {
                  { "en_US", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "George Herbert Walker Bush" },
-                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "DEFAULT", "", "George H. W. Bush" },
-                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "DEFAULT", "", "G. H. W. Bush" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "DEFAULT", "", "George H.W. Bush" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "DEFAULT", "", "G.H.W. Bush" },
                  { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "DEFAULT", "", "George B." },
                  { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "DEFAULT", "", "GHB" },
                  { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "DEFAULT", "", "GB" },
@@ -236,19 +245,19 @@ public class PersonNameFormatterTest extends TestFmwk{
                  { "en_US", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "Ralph Vaughan Williams" },
                  { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "DEFAULT", "", "Ralph Vaughan Williams" },
                  { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "DEFAULT", "", "R. Vaughan Williams" },
-                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "DEFAULT", "", "Ralph V. W." },
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "DEFAULT", "", "Ralph V.W." },
                  { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "DEFAULT", "", "RV" },
                  { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "DEFAULT", "", "RV" },
              }),
              new NameAndTestCases("locale=en_US,given=John Paul,given2=Stephen David George,surname=Smith", new String[][] {
                  { "en_US", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "John Paul Stephen David George Smith" },
-                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "DEFAULT", "", "John Paul S. D. G. Smith" },
-                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "DEFAULT", "", "J. P. S. D. G. Smith" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "DEFAULT", "", "John Paul S.D.G. Smith" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "DEFAULT", "", "J.P.S.D.G. Smith" },
                  { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "DEFAULT", "", "John Paul S." },
                  { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "DEFAULT", "", "JSS" },
                  { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "DEFAULT", "", "JS" },
              }),
-        }, true);
+        }, false);
      }
  
      @Test
@@ -299,8 +308,8 @@ public class PersonNameFormatterTest extends TestFmwk{
      public void TestNameOrder() {
          executeTestCases(new NameAndTestCases[]{
              // the name's locale is used to determine the field order.  For the English name formatter, if the
-            // name is English, the order is GN first.  If it's Japanese, it's SN first.  This is true whether the
-            // Japanese name is written in Latin letters or Han characters
+            // name is English, the order is GN first.  If it's Japanese, it's SN first.  And if the name is written
+            // in Japanese characters, we just use the Japanese formatter.
              new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe", new String[][] {
                  { "en_US", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "Shinzo Abe" },
              }),
@@ -308,7 +317,7 @@ public class PersonNameFormatterTest extends TestFmwk{
                  { "en_US", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "Abe Shinzo" },
              }),
              new NameAndTestCases("locale=ja_JP,given=晋三,surname=安倍", new String[][] {
-                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "安倍 晋三" },
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "安倍晋三" },
              }),
  
              // the name can also declare its order directly, with the optional "preferredOrder" field.  If it does this,
@@ -342,35 +351,48 @@ public class PersonNameFormatterTest extends TestFmwk{
      @Test
      public void TestNameSpacing() {
          executeTestCases(new NameAndTestCases[]{
-            // if the formatter locale uses spaces, the result will use its formats (complete with spaces),
-            // regardless of locale
+            // if the name uses the same characters as the formatter locale, even if the name locale doesn't
+            // match (i.e., the name is transliterated), we use the formatter's format and the name's
+            // field order
              new NameAndTestCases("locale=ja_JP,given=Hayao,surname=Miyazaki", new String[][] {
                  { "en_US", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "Miyazaki Hayao" },
              }),
-            new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] {
-                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "宮崎 駿" },
-            }),
  
-            // if the formatter locale doesn't use spaces and the name's locale doesn't either, just use
-            // the native formatter
+            // if the name is in a script the formatter's locale doesn't use, we just use a formatter for
+            // whatever the name locale is
              new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] {
-                { "ja_JP", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "宮崎駿" },
-                { "zh_CN", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "宮崎 駿" },
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "宮崎駿" },
              }),
-
-            // if the formatter locale doesn't use spaces and the name's locale does, use the name locale's formatter,
-            // but if the name is still using the formatter locale's script, use the native formatter's
-            // "foreign space replacement" character instead of spaces
              new NameAndTestCases("locale=en_US,given=Albert,surname=Einstein", new String[][] {
                  { "ja_JP", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "Albert Einstein" },
                  { "zh_CN", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "Albert Einstein" },
              }),
+
+            // if the name is in a script the formatter's locale does use, we use it, but if the name locale's
+            // language doesn't match the formatter locale's language, we replace any spaces in the result
+            // with the foreignSpaceReplacement character
              new NameAndTestCases("locale=en_US,given=アルベルト,surname=アインシュタイン", new String[][] {
                  { "ja_JP", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "アルベルト・アインシュタイン" },
              }),
              new NameAndTestCases("locale=en_US,given=阿尔伯特,surname=爱因斯坦", new String[][] {
                  { "zh_CN", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "阿尔伯特·爱因斯坦" },
              }),
+
+            // if the name's script and locale both match the formatter, we format as normal, but replace
+            // any spaces in the result with the nativeSpaceReplacement character (which, for Japanese,
+            // is the empty string, giving us the name without spaces)
+            new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] {
+                { "ja_JP", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "宮崎駿" },
+                { "zh_CN", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "宮崎駿" },
+            }),
+            // (Thai, despite not using spaces between words, DOES use spaces between the given name and surname_
+            new NameAndTestCases("locale=th_TH,given=ไอริณ,surname=กล้าหาญ", new String[][] {
+                { "th_TH", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "ไอริณ กล้าหาญ" },
+            }),
+            // (Lao, on the other hand, does NOT put a space between the given name and surname)
+            new NameAndTestCases("locale=lo_LA,given=ໄອຣີນ,surname=ແອດເລີ", new String[][] {
+                { "lo_LA", "LONG",   "REFERRING",  "FORMAL",   "DEFAULT", "", "ໄອຣີນແອດເລີ" },
+            }),
          }, false);
      }
  
@@ -379,16 +401,38 @@ public class PersonNameFormatterTest extends TestFmwk{
          executeTestCases(new NameAndTestCases[]{
              // here, we're leaving out the locale on the name object.  In the first case, we
              // see the Latin letters and assume English, giving us GN-first ordering.  In the
-            // second, we see the Han characters and guess Japanese, giving us SN-first ordering.
+            // second, we see the Han characters and guess Japanese, giving us SN-first ordering
+            // (and the Japanese format with no space between the fields).
              new NameAndTestCases("given=Hayao,surname=Miyazaki", new String[][]{
                      {"en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "Hayao Miyazaki"},
              }),
              new NameAndTestCases("given=駿,surname=宮崎", new String[][]{
-                    {"en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "宮崎 駿"},
+                    {"en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "宮崎駿"},
              }),
          }, false);
      }
  
+    @Test
+    public void TestMissingSurname() {
+        executeTestCases(new NameAndTestCases[]{
+                // test handling of monomyns: names that only have a given name.  Formatting patterns that only
+                // use the surname field will display as empty (or, in some of the examples below, with just
+                // the title) unless we do something special.  The special thing we do is that when the pattern
+                // has no given-name field and the name object has no surname field, we behave as though the
+                // contents of the given-name field are in the surname field.  (Note that this only happens
+                // for the "given" and "surname" fields; "given2" and "surname2" don't have this logic.)
+                new NameAndTestCases("title=Ms.,given=Zendaya", new String[][]{
+                        {"en_US", "MEDIUM", "ADDRESSING", "FORMAL",   "DEFAULT", "", "Ms. Zendaya"},
+                        {"en_US", "SHORT",  "ADDRESSING", "FORMAL",   "DEFAULT", "", "Ms. Zendaya"},
+                        {"en_US", "MEDIUM", "ADDRESSING", "INFORMAL", "DEFAULT", "", "Zendaya"},
+                        {"en_US", "SHORT",  "ADDRESSING", "INFORMAL", "DEFAULT", "", "Zendaya"},
+                        {"en_US", "SHORT",  "MONOGRAM",   "FORMAL",   "DEFAULT", "", "Z"},
+                        {"en_US", "SHORT",  "REFERRING",  "FORMAL",   "DEFAULT", "", "Zendaya"},
+                        {"en_US", "SHORT",  "REFERRING",  "FORMAL",   "SORTING", "", "Zendaya"},
+                }),
+        }, false);
+    }
+
      @Test
      public void TestLiteralTextElision2() {
          // a more extensive text of the literal text elision logic
author	Rich Gillam <richard_gillam@apple.com>
	Sat, 4 Mar 2023 04:19:14 +0000 (20:19 -0800)
committer	Rich Gillam <62772518+richgillam@users.noreply.github.com>
	Wed, 8 Mar 2023 21:56:17 +0000 (13:56 -0800)
icu4j/build.xml		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ExhaustivePersonNameFormatterTest.java	[new file with mode: 0644]	patch \| blob
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java		patch \| blob \| history