ICU-22081 PersonNameFormatter tech preview

author Rich Gillam <62772518+richgillam@users.noreply.github.com>

Fri, 12 Aug 2022 23:07:52 +0000 (16:07 -0700)

committer Rich Gillam <62772518+richgillam@users.noreply.github.com>

Thu, 1 Sep 2022 20:36:05 +0000 (13:36 -0700)
author Rich Gillam <62772518+richgillam@users.noreply.github.com>
Fri, 12 Aug 2022 23:07:52 +0000 (16:07 -0700)
committer Rich Gillam <62772518+richgillam@users.noreply.github.com>
Thu, 1 Sep 2022 20:36:05 +0000 (13:36 -0700)
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/FieldModifierImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/FieldModifierImpl.java

new file mode 100644 (file)

index 0000000..257704a
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/FieldModifierImpl.java
@@ -0,0 +1,156 @@
+// © 2022 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package com.ibm.icu.impl.personname;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.CaseMap;
+import com.ibm.icu.text.PersonNameFormatter;
+import com.ibm.icu.text.SimpleFormatter;
+import com.ibm.icu.util.ULocale;
+
+import java.util.StringTokenizer;
+
+/**
+ * Parent class for classes that implement field-modifier behavior.
+ */
+abstract class FieldModifierImpl {
+    public abstract String modifyField(String fieldValue);
+
+    public static FieldModifierImpl forName(PersonNameFormatter.FieldModifier modifierID, PersonNameFormatterImpl formatterImpl) {
+        switch (modifierID) {
+            case INFORMAL:
+                return NOOP_MODIFIER;
+            case PREFIX:
+                return NULL_MODIFIER;
+            case CORE:
+                return NOOP_MODIFIER;
+            case ALL_CAPS:
+                return new AllCapsModifier(formatterImpl.getLocale());
+            case INITIAL_CAP:
+                return new InitialCapModifier(formatterImpl.getLocale());
+            case INITIAL:
+                return new InitialModifier(formatterImpl.getInitialPattern(), formatterImpl.getInitialSequencePattern());
+            case MONOGRAM:
+                return MONOGRAM_MODIFIER;
+            default:
+                throw new IllegalArgumentException("Invalid modifier ID " + modifierID);
+        }
+    }
+
+    /**
+     * A field modifier that just returns the field value unmodified.  This is used to implement the default
+     * behavior of the "informal" and "core" modifiers ("real" informal or core variants have to be supplied or
+     * calculated by the PersonName object).
+     */
+    private static final FieldModifierImpl NOOP_MODIFIER = new FieldModifierImpl() {
+        @Override
+        public String modifyField(String fieldValue) {
+            return fieldValue;
+        }
+    };
+
+    /**
+     * A field modifier that just returns the empty string.  This is used to implement the default behavior of the
+     * "prefix" modifier ("real" prefix variants have to be supplied to calculated by the PersonName object).
+     */
+    private static final FieldModifierImpl NULL_MODIFIER = new FieldModifierImpl() {
+        @Override
+        public String modifyField(String fieldValue) {
+            return "";
+        }
+    };
+
+    /**
+     * A field modifier that returns the field value converted to ALL CAPS.  This is the default behavior
+     * for the "allCaps" modifier.
+     */
+    private static class AllCapsModifier extends FieldModifierImpl {
+        private final ULocale locale;
+
+        public AllCapsModifier(ULocale locale) {
+            this.locale = locale;
+        }
+
+        @Override
+        public String modifyField(String fieldValue) {
+            return UCharacter.toUpperCase(locale, fieldValue);
+        }
+    }
+
+    /**
+     * A field modifier that returns the field value with the first letter of each word capitalized.  This is
+     * the default behavior of the "initialCap" modifier.
+     */
+    private static class InitialCapModifier extends FieldModifierImpl {
+        private final ULocale locale;
+        private static final CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE = CaseMap.toTitle().wholeString().noLowercase();
+
+        public InitialCapModifier(ULocale locale) {
+            this.locale = locale;
+        }
+
+        @Override
+        public String modifyField(String fieldValue) {
+            return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, fieldValue);
+        }
+    }
+
+    /**
+     * A field modifier that returns the field value converted into one or more initials.  This is the first grapheme
+     * cluster of each word in the field value, modified using the initialPattern/initial resource value from the
+     * locale data, and strung together using the initialPattern/initialSequence resource value from the locale data.
+     * (In English, these patterns put periods after each initial and connect them with spaces.)
+     * This is default behavior of the "initial" modifier.
+     */
+    private static class InitialModifier extends FieldModifierImpl {
+        private final SimpleFormatter initialFormatter;
+        private final SimpleFormatter initialSequenceFormatter;
+
+        public InitialModifier(String initialPattern, String initialSequencePattern) {
+            this.initialFormatter = SimpleFormatter.compile(initialPattern);
+            this.initialSequenceFormatter = SimpleFormatter.compile(initialSequencePattern);
+        }
+
+        @Override
+        public String modifyField(String fieldValue) {
+            String result = null;
+            StringTokenizer tok = new StringTokenizer(fieldValue, " ");
+            while (tok.hasMoreTokens()) {
+                String curInitial = getFirstGrapheme(tok.nextToken());
+                if (result == null) {
+                    result = initialFormatter.format(curInitial);
+                } else {
+                    result = initialSequenceFormatter.format(result, initialFormatter.format(curInitial));
+                }
+            }
+            return result;
+        }
+    }
+
+    /**
+     * A field modifier that simply returns the first grapheme cluster in the field value.
+     * This is the default implementation of the "monogram" modifier.
+     */
+    private static final FieldModifierImpl MONOGRAM_MODIFIER = new FieldModifierImpl() {
+        @Override
+        public String modifyField(String fieldValue) {
+            return getFirstGrapheme(fieldValue);
+        }
+    };
+
+    /**
+     * A utility function that just returns the first grapheme cluster in the string.
+     */
+    private static String getFirstGrapheme(String s) {
+        // early out if the string is empty to avoid StringIndexOutOfBoundsException
+        if (s.isEmpty()) {
+            return "";
+        }
+
+        // (currently, no locale overrides the grapheme-break rules, so we just use "root" instead of passing in the locale)
+        BreakIterator bi = BreakIterator.getCharacterInstance(ULocale.ROOT);
+        bi.setText(s);
+        return s.substring(0, bi.next());
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java

new file mode 100644 (file)

index 0000000..877c627
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java
@@ -0,0 +1,251 @@
+// © 2022 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package com.ibm.icu.impl.personname;
+
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.FormattedValue;
+import com.ibm.icu.text.PersonNameFormatter;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+
+import java.util.*;
+
+import static com.ibm.icu.util.UResourceBundle.ARRAY;
+import static com.ibm.icu.util.UResourceBundle.STRING;
+
+/**
+ * Actual implementation class for PersonNameFormatter.
+ */
+public class PersonNameFormatterImpl {
+    private final ULocale locale;
+    private final PersonNamePattern[] gnFirstPatterns;
+    private final PersonNamePattern[] snFirstPatterns;
+    private final Set<String> gnFirstLocales;
+    private final Set<String> snFirstLocales;
+    private final String initialPattern;
+    private final String initialSequencePattern;
+    private final boolean capitalizeSurname;
+    private final String foreignSpaceReplacement;
+    private final boolean formatterLocaleUsesSpaces;
+    private final PersonNameFormatter.Length length;
+    private final PersonNameFormatter.Usage usage;
+    private final PersonNameFormatter.Formality formality;
+    private final Set<PersonNameFormatter.Options> options;
+
+    public PersonNameFormatterImpl(ULocale locale,
+                                   PersonNameFormatter.Length length,
+                                   PersonNameFormatter.Usage usage,
+                                   PersonNameFormatter.Formality formality,
+                                   Set<PersonNameFormatter.Options> options) {
+        // null for `options` is the same as the empty set
+        if (options == null) {
+            options = new HashSet<>();
+        }
+
+        // save off our creation parameters (these are only used if we have to create a second formatter)
+        this.length = length;
+        this.usage = usage;
+        this.formality = formality;
+        this.options = options;
+
+        // load simple property values from the resource bundle (or the options set)
+        ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, locale);
+        this.locale = locale;
+        this.initialPattern = rb.getStringWithFallback("personNames/initialPattern/initial");
+        this.initialSequencePattern = rb.getStringWithFallback("personNames/initialPattern/initialSequence");
+        this.capitalizeSurname = options.contains(PersonNameFormatter.Options.SURNAME_ALLCAPS);
+        this.foreignSpaceReplacement = rb.getStringWithFallback("personNames/foreignSpaceReplacement");
+        this.formatterLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(locale.getLanguage());
+
+        // asjust for combinations of parameters that don't make sense in practice
+        if (usage == PersonNameFormatter.Usage.MONOGRAM) {
+            // we don't support SORTING in conjunction with MONOGRAM; if the caller passes in SORTING, remove it from
+            // the options list
+            options.remove(PersonNameFormatter.Options.SORTING);
+        } else if (options.contains(PersonNameFormatter.Options.SORTING)) {
+            // we only support SORTING in conjunction with REFERRING; if the caller passes in ADDRESSING, treat it
+            // the same as REFERRING
+            usage = PersonNameFormatter.Usage.REFERRING;
+        }
+
+        // load the actual formatting patterns-- since we don't know the name order until formatting time (it can be
+        // different for different names), load patterns for both GN-first and SN-first names.  (If the user has
+        // specified SORTING, we don't need to do this-- we just load the "sorting" patterns and ignore the name's order.)
+        final String RESOURCE_PATH_PREFIX = "personNames/namePattern/";
+        String resourceNameBody = length.toString().toLowerCase() + "-" + usage.toString().toLowerCase() + "-"
+                + formality.toString().toLowerCase();
+        if (!options.contains(PersonNameFormatter.Options.SORTING)) {
+            ICUResourceBundle gnFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "givenFirst-" + resourceNameBody);
+            ICUResourceBundle snFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "surnameFirst-" + resourceNameBody);
+
+            gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(gnFirstResource), this);
+            snFirstPatterns = PersonNamePattern.makePatterns(asStringArray(snFirstResource), this);
+
+            gnFirstLocales = new HashSet<>();
+            Collections.addAll(gnFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/givenFirst")));
+            snFirstLocales = new HashSet<>();
+            Collections.addAll(snFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/surnameFirst")));
+        } else {
+            ICUResourceBundle patternResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "sorting-" + resourceNameBody);
+
+            gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(patternResource), this);
+            snFirstPatterns = null;
+            gnFirstLocales = null;
+            snFirstLocales = null;
+        }
+    }
+
+    public String format(PersonNameFormatter.PersonName name) {
+        // TODO: Should probably return a FormattedPersonName object
+
+        // if the formatter is for a language that doesn't use spaces between words and the name is from a language
+        // that does, create a formatter for the NAME'S locale and use THAT to format the name
+        ULocale nameLocale = name.getNameLocale();
+        boolean nameLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(nameLocale.getLanguage());
+        if (!formatterLocaleUsesSpaces && nameLocaleUsesSpaces) {
+            PersonNameFormatterImpl nativeFormatter = new PersonNameFormatterImpl(nameLocale, this.length,
+                    this.usage, this.formality, this.options);
+            String result = nativeFormatter.format(name);
+
+            // BUT, if the name is actually written in the formatter locale's script, replace any spaces in the name
+            // with the foreignSpaceReplacement character
+            if (!foreignSpaceReplacement.equals(" ") && scriptMatchesLocale(result, this.locale)) {
+                result = result.replace(" ", this.foreignSpaceReplacement);
+            }
+            return result;
+        }
+
+        // if we get down to here, we're just doing normal formatting-- if we have both GN-first and SN-first rules,
+        // choose which one to use based on the name's locale and preferred field order
+        if (snFirstPatterns == null || nameIsGnFirst(name)) {
+            return getBestPattern(gnFirstPatterns, name).format(name);
+        } else {
+            return getBestPattern(snFirstPatterns, name).format(name);
+        }
+    }
+
+    public ULocale getLocale() {
+        return locale;
+    }
+
+    public String getInitialPattern() {
+        return initialPattern;
+    }
+
+    public String getInitialSequencePattern() {
+        return initialSequencePattern;
+    }
+
+    public boolean shouldCapitalizeSurname() {
+        return capitalizeSurname;
+    }
+
+    private final Set<String> LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "th", "yue"));
+
+    /**
+     * Returns the value of the resource, as a string array.
+     * @param resource An ICUResourceBundle of type STRING or ARRAY.  If ARRAY, this function just returns it
+     *                 as a string array.  If STRING, it returns a one-element array containing that string.
+     * @return The resource's value, as an array of Strings.
+     */
+    private String[] asStringArray(ICUResourceBundle resource) {
+        if (resource.getType() == STRING) {
+            return new String[] { resource.getString() };
+        } else if (resource.getType() == ARRAY){
+            return resource.getStringArray();
+        } else {
+            throw new IllegalStateException("Unsupported resource type " + resource.getType());
+        }
+    }
+
+    /**
+     * Returns the field order to use when formatting this name, taking into account the name's preferredOrder
+     * field, as well as the name and formatter's respective locales.
+     * @param name The name to be formatted.
+     * @return If true, use GN-first order to format the name; if false, use SN-first order.
+     */
+    private boolean nameIsGnFirst(PersonNameFormatter.PersonName name) {
+        // the name can declare its order-- check that first (it overrides any locale-based calculation)
+        Set<PersonNameFormatter.FieldModifier> modifiers = new HashSet<>();
+        String preferredOrder = name.getFieldValue(PersonNameFormatter.NameField.PREFERRED_ORDER, modifiers);
+        if (preferredOrder != null) {
+            if (preferredOrder.equals("givenFirst")) {
+                return true;
+            } else if (preferredOrder.equals("surnameFirst")) {
+                return false;
+            } else {
+                throw new IllegalArgumentException("Illegal preferredOrder value " + preferredOrder);
+            }
+        }
+
+        String localeStr = name.getNameLocale().toString();
+        do {
+            if (gnFirstLocales.contains(localeStr)) {
+                return true;
+            } else if (snFirstLocales.contains(localeStr)) {
+                return false;
+            }
+
+            int lastUnderbarPos = localeStr.lastIndexOf("_");
+            if (lastUnderbarPos >= 0) {
+                localeStr = localeStr.substring(0, lastUnderbarPos);
+            } else {
+                localeStr = "root";
+            }
+        } while (!localeStr.equals("root"));
+
+        // should never get here-- "root" should always be in one of the locales
+        return true;
+    }
+
+    private PersonNamePattern getBestPattern(PersonNamePattern[] patterns, PersonNameFormatter.PersonName name) {
+        // early out if there's only one pattern
+        if (patterns.length == 1) {
+            return patterns[0];
+        } else {
+            // if there's more than one pattern, return the one that contains the greatest number of fields that
+            // actually have values in `name`.  If there's a tie, return the pattern that contains the lowest number
+            // of fields that DON'T have values in `name`.
+            int maxPopulatedFields = 0;
+            int minEmptyFields = Integer.MAX_VALUE;
+            PersonNamePattern bestPattern = null;
+
+            for (PersonNamePattern pattern : patterns) {
+                int populatedFields = pattern.numPopulatedFields(name);
+                int emptyFields = pattern.numEmptyFields(name);
+                if (populatedFields > maxPopulatedFields) {
+                    maxPopulatedFields = populatedFields;
+                    minEmptyFields = emptyFields;
+                    bestPattern = pattern;
+                } else if (populatedFields == maxPopulatedFields && emptyFields < minEmptyFields) {
+                    minEmptyFields = emptyFields;
+                    bestPattern = pattern;
+                }
+            }
+            return bestPattern;
+        }
+    }
+
+    /**
+     * Returns true if the script of `s` is one of the default scripts for `locale`.
+     * This function only checks the script of the first character whose script isn't "common,"
+     * so it probably won't work right on mixed-script strings.
+     */
+    private boolean scriptMatchesLocale(String s, ULocale locale) {
+        int[] localeScripts = UScript.getCode(locale);
+        int stringScript = UScript.COMMON;
+        for (int i = 0; stringScript == UScript.COMMON && i < s.length(); i++) {
+            char c = s.charAt(i);
+            stringScript = UScript.getScript(c);
+        }
+
+        for (int localeScript : localeScripts) {
+            if (localeScript == stringScript) {
+                return true;
+            }
+        }
+        return false;
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java

new file mode 100644 (file)

index 0000000..c7d547f
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java
@@ -0,0 +1,269 @@
+// © 2022 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package com.ibm.icu.impl.personname;
+
+import com.ibm.icu.text.PersonNameFormatter;
+
+import java.util.*;
+
+/**
+ * A single name formatting pattern, corresponding to a single namePattern element in CLDR.
+ */
+class PersonNamePattern {
+    private String patternText; // for debugging
+    private Element[] patternElements;
+
+    public static PersonNamePattern[] makePatterns(String[] patternText, PersonNameFormatterImpl formatterImpl) {
+        PersonNamePattern[] result = new PersonNamePattern[patternText.length];
+        for (int i = 0; i < patternText.length; i++) {
+            result[i] = new PersonNamePattern(patternText[i], formatterImpl);
+        }
+        return result;
+    }
+
+    private PersonNamePattern(String patternText, PersonNameFormatterImpl formatterImpl) {
+        this.patternText = patternText;
+
+        List<Element> elements = new ArrayList<>();
+        boolean inField = false;
+        boolean inEscape = false;
+        StringBuilder workingString = new StringBuilder();
+        for (int i = 0; i < patternText.length(); i++) {
+            char c = patternText.charAt(i);
+
+            if (inEscape) {
+                workingString.append(c);
+                inEscape = false;
+            } else {
+                switch (c) {
+                    case '\\':
+                        inEscape = true;
+                        break;
+                    case '{':
+                        if (!inField) {
+                            if (workingString.length() > 0) {
+                                elements.add(new LiteralText(workingString.toString()));
+                                workingString = new StringBuilder();
+                            }
+                            inField = true;
+                        } else {
+                            throw new IllegalArgumentException("Nested braces are not allowed in name patterns");
+                        }
+                        break;
+                    case '}':
+                        if (inField) {
+                            if (workingString.length() > 0) {
+                                elements.add(new NameFieldImpl(workingString.toString(), formatterImpl));
+                                workingString = new StringBuilder();
+                            } else {
+                                throw new IllegalArgumentException("No field name inside braces");
+                            }
+                            inField = false;
+                        } else {
+                            throw new IllegalArgumentException("Unmatched closing brace in literal text");
+                        }
+                        break;
+                    default:
+                        workingString.append(c);
+                }
+            }
+        }
+        if (workingString.length() > 0) {
+            elements.add(new LiteralText(workingString.toString()));
+        }
+        this.patternElements = elements.toArray(new Element[0]);
+    }
+
+    public String format(PersonNameFormatter.PersonName name) {
+        StringBuilder result = new StringBuilder();
+        boolean seenLeadingField = false;
+        boolean seenEmptyLeadingField = false;
+        boolean seenEmptyField = false;
+        StringBuilder textBefore = new StringBuilder();
+        StringBuilder textAfter = new StringBuilder();
+
+        // the logic below attempts to implement the following algorithm:
+        // - If one or more fields at the beginning of the name are empty, also skip all literal text
+        //   from the beginning of the name up to the first populated field.
+        // - If one or more fields at the end of the name are empty, also skip all literal text from
+        //   the last populated field to the end of the name.
+        // - If one or more contiguous fields in the middle of the name are empty, skip the literal text
+        //   between them, omit characters from the literal text on either side of the empty fields up to
+        //   the first space on either side, and make sure that the resulting literal text doesn't end up
+        //   with two spaces in a row.
+        for (Element element : patternElements) {
+            if (element.isLiteral()) {
+                if (seenEmptyLeadingField) {
+                    // do nothing; throw away the literal text
+                } else if (seenEmptyField) {
+                    textAfter.append(element.format(name));
+                } else {
+                    textBefore.append(element.format(name));
+                }
+            } else {
+                String fieldText = element.format(name);
+                if (fieldText == null || fieldText.isEmpty()) {
+                    if (!seenLeadingField) {
+                        seenEmptyLeadingField = true;
+                        textBefore.setLength(0);
+                    } else {
+                        seenEmptyField = true;
+                        textAfter.setLength(0);
+                    }
+                } else {
+                    seenLeadingField = true;
+                    seenEmptyLeadingField = false;
+                    if (seenEmptyField) {
+                        result.append(coalesce(textBefore, textAfter));
+                        result.append(fieldText);
+                        seenEmptyField = false;
+                    } else {
+                        result.append(textBefore);
+                        textBefore.setLength(0);
+                        result.append(element.format(name));
+                    }
+                }
+            }
+        }
+        if (!seenEmptyField) {
+            result.append(textBefore);
+        }
+        return result.toString();
+    }
+
+    public int numPopulatedFields(PersonNameFormatter.PersonName name) {
+        int result = 0;
+        for (Element element : patternElements) {
+            result += element.isPopulated(name) ? 1 : 0;
+        }
+        return result;
+    }
+
+    public int numEmptyFields(PersonNameFormatter.PersonName name) {
+        int result = 0;
+        for (Element element : patternElements) {
+            result += element.isPopulated(name) ? 0 : 1;
+        }
+        return result;
+    }
+
+    /**
+     * Stitches together the literal text on either side of an omitted field by deleting any
+     * non-whitespace characters immediately neighboring the omitted field and coalescing any
+     * adjacent spaces at the join point down to one.
+     * @param s1 The literal text before the omitted field.
+     * @param s2 The literal text after the omitted field.
+     */
+    private String coalesce(StringBuilder s1, StringBuilder s2) {
+        // get the range of non-whitespace characters at the beginning of s1
+        int p1 = 0;
+        while (p1 < s1.length() && !Character.isWhitespace(s1.charAt(p1))) {
+            ++p1;
+        }
+
+        // get the range of non-whitespace characters at the end of s2
+        int p2 = s2.length() - 1;
+        while (p2 >= 0 && !Character.isWhitespace(s2.charAt(p2))) {
+            --p2;
+        }
+
+        // also include one whitespace character from s1 or, if there aren't
+        // any, one whitespace character from s2
+        if (p1 < s1.length()) {
+            ++p1;
+        } else if (p2 >= 0) {
+            --p2;
+        }
+
+        // concatenate those two ranges to get the coalesced literal text
+        String result = s1.substring(0, p1) + s2.substring(p2 + 1);
+
+        // clear out s1 and s2 (done here to improve readability in format() above))
+        s1.setLength(0);
+        s2.setLength(0);
+
+        return result;
+    }
+
+    /**
+     * A single element in a NamePattern.  This is either a name field or a range of literal text.
+     */
+    private interface Element {
+        boolean isLiteral();
+        String format(PersonNameFormatter.PersonName name);
+        boolean isPopulated(PersonNameFormatter.PersonName name);
+    }
+
+    /**
+     * Literal text from a name pattern.
+     */
+    private static class LiteralText implements Element {
+        private String text;
+
+        public LiteralText(String text) {
+            this.text = text;
+        }
+
+        public boolean isLiteral() {
+            return true;
+        }
+
+        public String format(PersonNameFormatter.PersonName name) {
+            return text;
+        }
+
+        public boolean isPopulated(PersonNameFormatter.PersonName name) {
+            return false;
+        }
+    }
+
+    /**
+     * An actual name field in a NamePattern (i.e., the stuff represented in the pattern by text
+     * in braces).  This class actually handles fetching the value for the field out of a
+     * PersonName object and applying any modifiers to it.
+     */
+    private static class NameFieldImpl implements Element {
+        private PersonNameFormatter.NameField fieldID;
+        private Map<PersonNameFormatter.FieldModifier, FieldModifierImpl> modifiers;
+
+        public NameFieldImpl(String fieldNameAndModifiers, PersonNameFormatterImpl formatterImpl) {
+            List<PersonNameFormatter.FieldModifier> modifierIDs = new ArrayList<>();
+            StringTokenizer tok = new StringTokenizer(fieldNameAndModifiers, "-");
+
+            this.fieldID = PersonNameFormatter.NameField.forString(tok.nextToken());
+            while (tok.hasMoreTokens()) {
+                modifierIDs.add(PersonNameFormatter.FieldModifier.forString(tok.nextToken()));
+            }
+            if (this.fieldID == PersonNameFormatter.NameField.SURNAME && formatterImpl.shouldCapitalizeSurname()) {
+                modifierIDs.add(PersonNameFormatter.FieldModifier.ALL_CAPS);
+            }
+
+            this.modifiers = new HashMap<>();
+            for (PersonNameFormatter.FieldModifier modifierID : modifierIDs) {
+                this.modifiers.put(modifierID, FieldModifierImpl.forName(modifierID, formatterImpl));
+            }
+        }
+
+        public boolean isLiteral() {
+            return false;
+        }
+
+        public String format(PersonNameFormatter.PersonName name) {
+            Set<PersonNameFormatter.FieldModifier> modifierIDs = new HashSet<>(modifiers.keySet());
+            String result = name.getFieldValue(fieldID, modifierIDs);
+            if (result != null) {
+                for (PersonNameFormatter.FieldModifier modifierID : modifierIDs) {
+                    result = modifiers.get(modifierID).modifyField(result);
+                }
+            }
+            return result;
+        }
+
+        public boolean isPopulated(PersonNameFormatter.PersonName name) {
+            // just check whether the unmodified field contains a value
+            Set<PersonNameFormatter.FieldModifier> modifierIDs = new HashSet<>();
+            String fieldValue = name.getFieldValue(fieldID, modifierIDs);
+            return fieldValue != null && !fieldValue.isEmpty();
+        }
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java

new file mode 100644 (file)

index 0000000..160d32c
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java
@@ -0,0 +1,370 @@
+// © 2022 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package com.ibm.icu.text;
+
+import com.ibm.icu.impl.personname.PersonNameFormatterImpl;
+import com.ibm.icu.util.ULocale;
+
+import java.util.Set;
+
+/**
+ * A class for formatting names of people.  Takes raw name data for a person and renders it into a string according to
+ * the caller's specifications, taking into account how people's names are rendered in the caller's locale.
+ *
+ * The Length, Usage, and Formality options can be used to get a wide variety of results.  In English, they would
+ * produce results along these lines:
+ *
+ * |        | REFERRING             | REFERRING    | ADDRESSING | ADDRESSING | MONOGRAM | MONOGRAM |
+ * |        | FORMAL                | INFORMAL     | FORMAL     | INFORMAL   | FORMAL   | INFORMAL |
+ * |--------|-----------------------|--------------|------------|------------|----------|----------|
+ * | LONG   | James Earl Carter Jr. | Jimmy Carter | Mr. Carter | Jimmy      | JEC      | JC       |
+ * | MEDIUM | James E. Carter Jr.   | Jimmy Carter | Mr. Carter | Jimmy      | C        | J        |
+ * | SHORT  | J. E. Carter          | Jimmy Carter | Mr. Carter | Jimmy      | C        | J        |
+ *
+ * @internal
+ */
+public class PersonNameFormatter {
+    //==============================================================================
+    // Parameters that control formatting behavior
+
+    /**
+     * Specifies the desired length of the formatted name.
+     * @internal
+     */
+    public enum Length {
+        /**
+         * The longest name length.  Generally uses most of the fields in the name object.
+         * @internal
+         */
+        LONG,
+
+        /**
+         * The most typical name length.  Generally includes the given name and surname, but generally
+         * nost most of the other fields.
+         * @internal
+         */
+        MEDIUM,
+
+        /**
+         * A shortened name.  Skips most fields and may abbreviate some name fields to just their initials.
+         * When Formality is INFORMAL, may only include one field.
+         */
+        SHORT
+    }
+
+    /**
+     * Specifies the intended usage of the formatted name.
+     * @internal
+     */
+    public enum Usage {
+        /**
+         * Used for when the name is going to be used to address the user directly: "Turn left here, John."
+         * @internal
+         */
+        ADDRESSING,
+
+        /**
+         * Used in general cases, when the name is used to refer to somebody else.
+         * @internal
+         */
+        REFERRING,
+
+        /**
+         * Used to generate monograms, short 1 to 3-character versions of the name suitable for use in things
+         * like chat avatars.  In English, this is usually the person's initials, but this isn't true in all
+         * languages.  When the caller specifies Usage.MONOGRAM, the Length parameter can be used to get different
+         * lengths of monograms: Length.SHORT is generally a single letter; Length.LONG may be as many as three or four.
+         * @internal
+         */
+        MONOGRAM
+    }
+
+    /**
+     * Specifies the intended formality of the formatted name.
+     * @internal
+     */
+    public enum Formality {
+        /**
+         * The more formal version of the name.
+         * @internal
+         */
+        FORMAL,
+
+        /**
+         * The more informal version of the name.  In English, this might omit fields or use the "informal" variant
+         * of the given name.
+         * @internal
+         */
+        INFORMAL
+    }
+
+    /**
+     * Additional options to customize the behavior of the formatter.
+     * @internal
+     */
+    public enum Options {
+        /**
+         * Causes the formatter to generate results suitable for inclusion in a sorted list.  For GN-first languages,
+         * this generally means moving the surname to the beginning of the string, with a comma between it and
+         * the rest of the name: e.g., "Carter, James E. Jr.".
+         * @internal
+         */
+        SORTING,
+
+        /**
+         * Requests that the surname in the formatted result be rendered in ALL CAPS.  This is often done with
+         * Japanese names to highlight which name is the surname.
+         * @internal
+         */
+        SURNAME_ALLCAPS
+    }
+
+    //==============================================================================
+    // Identifiers used to request field values from the PersonName object
+
+    /**
+     * Identifiers for the name fields supported by the PersonName object.
+     * @internal
+     */
+    public enum NameField {
+        /**
+         * Contains titles and other words that precede the actual name, such as "Mr."
+         * @internal
+         */
+        PREFIX("prefix"),
+
+        /**
+         * The given name.  May contain more than one token.
+         * @internal
+         */
+        GIVEN("given"),
+
+        /**
+         * Additional given names.  (In English, this is usually the "middle name" and
+         * may contain more than one word.)
+         * @internal
+         */
+        GIVEN2("given2"),
+
+        /**
+         * The surname.  In Spanish, this is the patronymic surname.
+         * @internal
+         */
+        SURNAME("surname"),
+
+        /**
+         * Additional surnames.  This is only used in a few languages, such as Spanish,
+         * where it is the matronymic surname.  (In most languages, multiple surnames all
+         * just go in the SURNAME field.)
+         * @internal
+         */
+        SURNAME2("surname2"),
+
+        /**
+         * Generational and professional qualifiers that generally follow the actual name,
+         * such as "Jr." or "M.D."
+         * @internal
+         */
+        SUFFIX("suffix"),
+
+        /**
+         * The preferred field order for the name.  PersonName objects generally shouldn't provide
+         * this field, allowing the PersonNameFormatter to deduce the proper field order based on
+         * the locales of the name of the formatter.  But this can be used to force a particular
+         * field order, generally in cases where the deduction logic in PersonNameFormatter would
+         * guess wrong.  When used, the only valid values are "givenFirst" and "surnameFirst".
+         * @internal
+         */
+        PREFERRED_ORDER("preferredOrder");
+
+        private final String name;
+
+        private NameField(String name) {
+            this.name = name;
+        }
+
+        /**
+         * Returns the NameField's display name.
+         * @internal
+         */
+        @Override
+        public String toString() {
+            return name;
+        }
+
+        /**
+         * Returns the appropriate NameField for its display name.
+         * @internal
+         */
+        public static NameField forString(String name) {
+            for (NameField field : values()) {
+                if (field.name.equals(name)) {
+                    return field;
+                }
+            }
+            throw new IllegalArgumentException("Invalid field name " + name);
+        }
+    }
+
+    /**
+     * Identifiers for the name field modifiers supported by the PersonName and PersonNameFormatter objects.
+     * @internal
+     */
+    public enum FieldModifier {
+        /**
+         * Requests an "informal" variant of the field, generally a nickname of some type:
+         * if "given" is "James", "given-informal" might be "Jimmy".  Only applied to the "given"
+         * field.  If the PersonName object doesn't apply this modifier, PersonNameFormatter just
+         * uses the unmodified version of "given".
+         * @internal
+         */
+        INFORMAL("informal"),
+
+        /**
+         * If the field contains a main word with one or more separate prefixes, such as
+         * "van den Hul", this requests just the prefixes ("van den").  Only applied to the "surname"
+         * field.  If the PersonName object doesn't apply this modifier, PersonNameFormatter
+         * assumes there are no prefixes.
+         * @internal
+         */
+        PREFIX("prefix"),
+
+        /**
+         * If the field contains a main word with one or more separate prefixes, such as
+         * "van den Hul", this requests just the main word ("Hul").  Only applied to the "surname"
+         * field.  If the implementing class doesn't apply this modifier, PersonNameFormatter
+         * assumes the entire "surname" field is the "core".
+         * @internal
+         */
+        CORE("core"),
+
+        /**
+         * Requests an initial for the specified field.  PersonNameFormatter will do
+         * this algorithmically, but a PersonName object can apply this modifier itself if it wants
+         * different initial-generation logic (or stores the initial separately).
+         * @internal
+         */
+        INITIAL("initial"),
+
+        /**
+         * Requests an initial for the specified field, suitable for use in a monogram
+         * (this usually differs from "initial" in that "initial" adds a period and "monogram" doesn't).
+         * PersonNameFormatter will do this algorithmically, but a PersonName object can apply
+         * this modifier itself if it wants different monogram-generation logic.
+         * @internal
+         */
+        MONOGRAM("monogram"),
+
+        /**
+         * Requests the field value converted to ALL CAPS.  PersonName objects
+         * generally won't need to handle this modifier themselves.
+         * @internal
+         */
+        ALL_CAPS("allCaps"),
+
+        /**
+         * Requests the field value with the first letter of each word capitalized.
+         * A PersonName object might handle this modifier itself to capitalize words more
+         * selectively.
+         * @internal
+         */
+        INITIAL_CAP("initialCap");
+
+        private final String name;
+
+        private FieldModifier(String name) {
+            this.name = name;
+        }
+
+        /**
+         * Returns the FieldModifier's display name.
+         * @internal
+         */
+        @Override
+        public String toString() {
+            return name;
+        }
+
+        /**
+         * Returns the appropriate fieldModifier for its display name.
+         * @internal
+         */
+        public static FieldModifier forString(String name) {
+            for (FieldModifier modifier : values()) {
+                if (modifier.name.equals(name)) {
+                    return modifier;
+                }
+            }
+            throw new IllegalArgumentException("Invalid modifier name " + name);
+        }
+    }
+
+    //==============================================================================
+    // The PersonName object
+
+    /**
+     * An object used to provide name data to the PersonNameFormatter for formatting.
+     * Clients can implement this interface to talk directly to some other subsystem
+     * that actually contains the name data (instead of having to copy it into a separate
+     * object just for formatting) or to override the default modifier behavior described
+     * above.  A concrete SimplePersonName object that does store the field values directly
+     * is provided.
+     * @internal
+     * @see SimplePersonName
+     */
+    public interface PersonName {
+        /**
+         * Returns the locale of the name-- that is, the language or country of origin for the person being named.
+         * @return The name's locale.
+         * @internal
+         */
+        public ULocale getNameLocale();
+
+        /**
+         * Returns one field of the name, possibly in a modified form.
+         * @param identifier The identifier of the requested field.
+         * @param modifiers An **IN/OUT** parameter that specifies modifiers to apply to the basic field value.
+         *                  An implementing class can choose to handle or ignore any modifiers; it should modify
+         *                  this parameter so that on exit, it contains only the requested modifiers that it
+         *                  DIDN'T handle.
+         * @return The value of the requested field, optionally modified by some or all of the requested modifiers, or
+         * null if the requested field isn't present in the name.
+         * @internal
+         */
+        public String getFieldValue(NameField identifier, Set<FieldModifier> modifiers);
+    }
+
+    private final PersonNameFormatterImpl impl;
+
+    //==============================================================================
+    // Public API on PersonNameFormatter
+
+    /**
+     * Constructs a PersonNameFormatter.
+     * @param locale The target locale for formatted names.
+     * @param length The requested length.
+     * @param usage The requested usage.
+     * @param formality The requested formality.
+     * @param options A set containing additional formatting options.  May be null.
+     * @see Length
+     * @see Usage
+     * @see Formality
+     * @see Options
+     * @internal
+     */
+    public PersonNameFormatter(ULocale locale, Length length, Usage usage, Formality formality, Set<Options> options) {
+        this.impl = new PersonNameFormatterImpl(locale, length, usage, formality, options);
+    }
+
+    /**
+     * Formats a name.
+     * @param name A PersonName object that supplies individual field values (optionally, with modifiers applied)
+     *             to the formatter for formatting.
+     * @return The name, formatted according to the locale and other parameters passed to the formatter's constructor.
+     * @internal
+     */
+    public String format(PersonName name) {
+        // TODO: Should probably return a FormattedPersonName object
+        return impl.format(name);
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java

new file mode 100644 (file)

index 0000000..5ebea7a
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java
@@ -0,0 +1,163 @@
+// © 2022 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package com.ibm.icu.text;
+
+import com.ibm.icu.util.ULocale;
+
+import java.util.*;
+
+/**
+ * A concrete implementation of PersonNameFormatter.PersonName that simply stores the field
+ * values in a Map.
+ *
+ * A caller can store both raw field values (such as "given") and modified field values (such as "given-informal")
+ * in a SimplePersonName.  But beyond storing and returning modified field values provided to it by the caller,
+ * SimplePersonName relies on the PersonNameFormatter's default handling of field modifiers.
+ * @internal
+ */
+public class SimplePersonName implements PersonNameFormatter.PersonName {
+    /**
+     * Simple constructor.
+     * @param nameLocale The locale of the name (i.e., its ethnic or national origin).
+     * @param fieldValues A Map mapping from field names to field values.  The field names
+     *                    are the values returned by NameField.toString().
+     * @internal
+     */
+    public SimplePersonName(ULocale nameLocale, Map<String, String> fieldValues) {
+        this.nameLocale = nameLocale;
+        this.fieldValues = new HashMap<>(fieldValues);
+    }
+
+    /**
+     * A constructor that takes the locale ID and field values as a single String.  This constructor is really
+     * intended only for the use of the PersonNameFormatter unit tests.
+     * @param keysAndValues A single string containing the locale ID and field values.  This string is organized
+     *                      into key-value pairs separated by commas.  The keys are separated from the values
+     *                      by equal signs.  The keys themselves are field names, as returned by
+     *                      NameField.toString(), optionally followed by a hyphen-delimited set of modifier names,
+     *                      as returned by FieldModifier.toString().
+     * @internal
+     */
+    public SimplePersonName(String keysAndValues) {
+        this.fieldValues = new HashMap<>();
+
+        StringTokenizer tok = new StringTokenizer(keysAndValues, ",");
+        ULocale tempLocale = null;
+        while (tok.hasMoreTokens()) {
+            String entry = tok.nextToken();
+            int equalPos = entry.indexOf('=');
+            if (equalPos < 0) {
+                throw new IllegalArgumentException("No = found in name field entry");
+            }
+            String fieldName = entry.substring(0, equalPos);
+            String fieldValue = entry.substring(equalPos + 1);
+
+            if (fieldName.equals("locale")) {
+                tempLocale = new ULocale(fieldValue);
+            } else {
+                this.fieldValues.put(fieldName, fieldValue);
+            }
+        }
+        this.nameLocale = tempLocale;
+
+        // special-case code for the "surname" field-- if it isn't specified, but "surname-prefix" and
+        // "surname-core" both are, let "surname" be the other two fields joined with a space
+        if (this.fieldValues.get("surname") == null) {
+            String surnamePrefix = this.fieldValues.get("surname-prefix");
+            String surnameCore = this.fieldValues.get("surname-core");
+            if (surnamePrefix != null && surnameCore != null) {
+                this.fieldValues.put("surname", surnamePrefix + " " + surnameCore);
+            }
+        }
+    }
+
+    /**
+     * Returns the locale of the name-- that is, the language or country of origin for the person being named.
+     * @return The name's locale.
+     * @internal
+     */
+    @Override
+    public ULocale getNameLocale() {
+        return nameLocale;
+    }
+
+    /**
+     * Returns one field of the name, possibly in a modified form.  This class can store modified versions of fields,
+     * provided at construction time, and this function will return them.  Otherwise, it ignores modifiers and
+     * relies on PersonNameFormat's default modifier handling.
+     * @param nameField The identifier of the requested field.
+     * @param modifiers An **IN/OUT** parameter that specifies modifiers to apply to the basic field value.
+     *                  On return, this list will contain any modifiers that this object didn't handle.  This class
+     *                  will always return this set unmodified, unless a modified version of the requested field
+     *                  was provided at construction time.
+     * @return The value of the requested field, optionally modified by some or all of the requested modifiers, or
+     * null if the requested field isn't present in the name.
+     * @internal
+     */
+    @Override
+    public String getFieldValue(PersonNameFormatter.NameField nameField, Set<PersonNameFormatter.FieldModifier> modifiers) {
+        // first look for the fully modified name in the internal table
+        String fieldName = nameField.toString();
+        String result = fieldValues.get(makeModifiedFieldName(nameField, modifiers));
+        if (result != null) {
+            modifiers.clear();
+            return result;
+        }
+
+        // if we don't find it, check the fully unmodified name.  If it's not there, nothing else will be
+        result = fieldValues.get(fieldName);
+        if (result == null) {
+            return null;
+        } else if (modifiers.size() == 1) {
+            // and if it IS there and there's only one modifier, we're done
+            return result;
+        }
+
+        // but if there are two or more modifiers, then we have to go through the whole list of fields and look for the best match
+        String winningKey = fieldName;
+        int winningScore = 0;
+        for (String key : fieldValues.keySet()) {
+            if (key.startsWith(fieldName)) {
+                Set<PersonNameFormatter.FieldModifier> keyModifiers = makeModifiersFromName(key);
+                if (modifiers.containsAll(keyModifiers)) {
+                    if (keyModifiers.size() > winningScore || (keyModifiers.size() == winningScore && key.compareTo(winningKey) < 0)) {
+                        winningKey = key;
+                        winningScore = keyModifiers.size();
+                    }
+                }
+            }
+        }
+        result = fieldValues.get(winningKey);
+        modifiers.removeAll(makeModifiersFromName(winningKey));
+        return result;
+    }
+
+    private static String makeModifiedFieldName(PersonNameFormatter.NameField fieldName,
+                                                Collection<PersonNameFormatter.FieldModifier> modifiers) {
+        StringBuilder result = new StringBuilder();
+        result.append(fieldName);
+
+        TreeSet<String> sortedModifierNames = new TreeSet<>();
+        for (PersonNameFormatter.FieldModifier modifier : modifiers) {
+            sortedModifierNames.add(modifier.toString());
+        }
+        for (String modifierName : sortedModifierNames) {
+            result.append("-");
+            result.append(modifierName);
+        }
+        return result.toString();
+    }
+
+    private static Set<PersonNameFormatter.FieldModifier> makeModifiersFromName(String modifiedName) {
+        StringTokenizer tok = new StringTokenizer(modifiedName, "-");
+        Set<PersonNameFormatter.FieldModifier> result = new HashSet<>();
+        String fieldName = tok.nextToken(); // throw away the field name
+        while (tok.hasMoreTokens()) {
+            result.add(PersonNameFormatter.FieldModifier.forString(tok.nextToken()));
+        }
+        return result;
+    }
+
+    private final ULocale nameLocale;
+    private final Map<String, String> fieldValues;
+}
+\ No newline at end of file
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java

new file mode 100644 (file)

index 0000000..1046d19
--- /dev/null
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java
@@ -0,0 +1,341 @@
+// © 2022 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package com.ibm.icu.dev.test.format;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.text.PersonNameFormatter;
+import com.ibm.icu.text.SimplePersonName;
+import com.ibm.icu.util.ULocale;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.*;
+
+@RunWith(JUnit4.class)
+public class PersonNameFormatterTest extends TestFmwk{
+    private static class NameAndTestCases {
+        public String nameFields;
+        public String[][] testCases;
+
+        public NameAndTestCases(String nameFields, String[][] testCases) {
+            this.nameFields = nameFields;
+            this.testCases = testCases;
+        }
+    }
+
+    private void executeTestCases(NameAndTestCases[] namesAndTestCases, boolean forDebugging) {
+        for (NameAndTestCases nameAndTestCases : namesAndTestCases) {
+            SimplePersonName name = new SimplePersonName(nameAndTestCases.nameFields);
+            if (forDebugging) {
+                System.out.println(nameAndTestCases.nameFields);
+            }
+
+            for (String[] testCase : nameAndTestCases.testCases) {
+                ULocale formatterLocale = new ULocale(testCase[0]);
+                PersonNameFormatter.Length formatterLength = PersonNameFormatter.Length.valueOf(testCase[1]);
+                PersonNameFormatter.Usage formatterUsage = PersonNameFormatter.Usage.valueOf(testCase[2]);
+                PersonNameFormatter.Formality formatterFormality = PersonNameFormatter.Formality.valueOf(testCase[3]);
+                Set<PersonNameFormatter.Options> formatterOptions = makeOptionsSet(testCase[4]);
+                String expectedResult = testCase[5];
+
+                PersonNameFormatter formatter = new PersonNameFormatter(formatterLocale, formatterLength, formatterUsage, formatterFormality, formatterOptions);
+                String actualResult = formatter.format(name);
+
+                if (forDebugging) {
+                    System.out.println("    " + formatterLocale + "," + formatterLength + "," + formatterUsage + "," + formatterFormality + "," + formatterOptions + " => " + actualResult);
+                } else {
+                    assertEquals("Wrong formatting result for " + nameAndTestCases.nameFields + "," + Arrays.toString(testCase), expectedResult, actualResult);
+                }
+            }
+        }
+    }
+
+    private static Set<PersonNameFormatter.Options> makeOptionsSet(String optionsStr) {
+        Set<PersonNameFormatter.Options> result = new HashSet<>();
+        StringTokenizer tok = new StringTokenizer(optionsStr, ",");
+        while (tok.hasMoreTokens()) {
+            String optionStr = tok.nextToken();
+            PersonNameFormatter.Options option = PersonNameFormatter.Options.valueOf(optionStr);
+            result.add(option);
+        }
+        return result;
+    }
+
+    @Test
+    public void TestEnglishName() {
+        executeTestCases(new NameAndTestCases[]{
+            new NameAndTestCases("locale=en_US,prefix=Mr.,given=Richard,given-informal=Rich,given2=Theodore,surname=Gillam", new String[][] {
+                // test all the different combinations of parameters with the normal name order
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "Richard Theodore Gillam" },
+                { "en_US", "LONG",   "REFERRING",  "INFORMAL", "",              "Rich Gillam" },
+                { "en_US", "LONG",   "ADDRESSING", "FORMAL",   "",              "Mr. Gillam" },
+                { "en_US", "LONG",   "ADDRESSING", "INFORMAL", "",              "Rich" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "",              "Richard T. Gillam" },
+                { "en_US", "MEDIUM", "REFERRING",  "INFORMAL", "",              "Rich Gillam" },
+                { "en_US", "MEDIUM", "ADDRESSING", "FORMAL",   "",              "Mr. Gillam" },
+                { "en_US", "MEDIUM", "ADDRESSING", "INFORMAL", "",              "Rich" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "",              "R. T. Gillam" },
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "",              "Rich G." },
+                { "en_US", "SHORT",  "ADDRESSING", "FORMAL",   "",              "Mr. Gillam" },
+                { "en_US", "SHORT",  "ADDRESSING", "INFORMAL", "",              "Rich" },
+
+                // test all the different combinations of parameters for "sorting" order
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "SORTING",       "Gillam, Richard Theodore" },
+                { "en_US", "LONG",   "REFERRING",  "INFORMAL", "SORTING",       "Gillam, Rich" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "SORTING",       "Gillam, Richard T." },
+                { "en_US", "MEDIUM", "REFERRING",  "INFORMAL", "SORTING",       "Gillam, Rich" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "SORTING",       "Gillam, R. T." },
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "SORTING",       "Gillam, Rich" },
+
+                // we don't really support ADDRESSING in conjunction with SORTING-- it should always
+                // do the same thing as REFERRING
+                { "en_US", "LONG",   "ADDRESSING", "FORMAL",   "SORTING",       "Gillam, Richard Theodore" },
+                { "en_US", "LONG",   "ADDRESSING", "INFORMAL", "SORTING",       "Gillam, Rich" },
+                { "en_US", "MEDIUM", "ADDRESSING", "FORMAL",   "SORTING",       "Gillam, Richard T." },
+                { "en_US", "MEDIUM", "ADDRESSING", "INFORMAL", "SORTING",       "Gillam, Rich" },
+                { "en_US", "SHORT",  "ADDRESSING", "FORMAL",   "SORTING",       "Gillam, R. T." },
+                { "en_US", "SHORT",  "ADDRESSING", "INFORMAL", "SORTING",       "Gillam, Rich" },
+
+                // finally, try the different variations of MONOGRAM
+                { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "",              "RTG" },
+                { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "",              "RG" },
+                { "en_US", "MEDIUM", "MONOGRAM",   "FORMAL",   "",              "G" },
+                { "en_US", "MEDIUM", "MONOGRAM",   "INFORMAL", "",              "R" },
+                { "en_US", "SHORT",  "MONOGRAM",   "FORMAL",   "",              "G" },
+                { "en_US", "SHORT",  "MONOGRAM",   "INFORMAL", "",              "R" },
+
+                // and again, we don't support SORTING for monograms, so it should also do the
+                // same thing as GIVEN_FIRST
+                { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "SORTING",       "RTG" },
+                { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "SORTING",       "RG" },
+                { "en_US", "MEDIUM", "MONOGRAM",   "FORMAL",   "SORTING",       "G" },
+                { "en_US", "MEDIUM", "MONOGRAM",   "INFORMAL", "SORTING",       "R" },
+                { "en_US", "SHORT",  "MONOGRAM",   "FORMAL",   "SORTING",       "G" },
+                { "en_US", "SHORT",  "MONOGRAM",   "INFORMAL", "SORTING",       "R" },
+            })
+        }, false);
+    }
+
+    @Test
+    public void TestPrefixCore() {
+        executeTestCases(new NameAndTestCases[]{
+            new NameAndTestCases("locale=en_US,given=Willem,surname-prefix=van der,surname-core=Plas", new String[][] {
+                // for normal formatting, the {surname} field is just "{surname-prefix} {surname-core}"
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "Willem van der Plas" },
+                { "en_US", "LONG",   "REFERRING",  "INFORMAL", "",              "Willem van der Plas" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "",              "Willem van der Plas" },
+                { "en_US", "MEDIUM", "REFERRING",  "INFORMAL", "",              "Willem van der Plas" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "",              "W. van der Plas" },
+
+                // for FORMAL SORTING, we sort by "surname-core", with "surname-prefix" at the end
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "SORTING",       "Plas, Willem van der" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "SORTING",       "Plas, Willem van der" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "SORTING",       "Plas, W. van der" },
+
+                // but for INFORMAL SORTING, we keep the surname together and sort by the prefix
+                { "en_US", "LONG",   "REFERRING",  "INFORMAL", "SORTING",       "van der Plas, Willem" },
+                { "en_US", "MEDIUM", "REFERRING",  "INFORMAL", "SORTING",       "van der Plas, Willem" },
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "SORTING",       "van der Plas, Willem" },
+
+                // the default (English) logic for initials doesn't do anything special with the surname-prefix--
+                // it gets initials too, which is probably wrong
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "",              "Willem v. d. P." },
+
+                // and (English) monogram generation doesn't do anything special with the prefix either
+                { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "",              "WV" },
+                { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "",              "WV" },
+
+                // but Dutch monogram generation _does_ handle the prefix specially
+                { "nl_NL", "LONG",   "MONOGRAM",   "FORMAL",   "",              "WvP" },
+                { "nl_NL", "LONG",   "MONOGRAM",   "INFORMAL", "",              "WvP" },
+            }),
+            new NameAndTestCases("locale=en_US,given=Willem,surname=van der Plas", new String[][] {
+                // if we just use the "surname" field instead of "surname-prefix" and "surname-core", everything's
+                // the same, except (obviously) for the cases where we were doing something special with the
+                // prefix and core
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "Willem van der Plas" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "",              "W. van der Plas" },
+
+                // for example, SORTING works the same way regardless of formality
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "SORTING",       "van der Plas, Willem" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "SORTING",       "van der Plas, Willem" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "SORTING",       "van der Plas, W." },
+                { "en_US", "LONG",   "REFERRING",  "INFORMAL", "SORTING",       "van der Plas, Willem" },
+                { "en_US", "MEDIUM", "REFERRING",  "INFORMAL", "SORTING",       "van der Plas, Willem" },
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "SORTING",       "van der Plas, Willem" },
+
+                // and monogram generation works the same in English and Dutch
+                { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "",              "WV" },
+                { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "",              "WV" },
+                { "nl_NL", "LONG",   "MONOGRAM",   "FORMAL",   "",              "WV" },
+                { "nl_NL", "LONG",   "MONOGRAM",   "INFORMAL", "",              "WV" },
+            }),
+            new NameAndTestCases("locale=en_US,given=Willem,surname-prefix=van der,surname-core=Plas,surname-initial=vdP.,surname-monogram=vdP", new String[][] {
+                // we can work around the initial generation by providing a "surname-initial" field in the name object
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "",              "Willem vdP." },
+
+                // we could also (theoretically) work around the monogram-generation problem in English in the same way
+                { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "",              "WVDP" },
+                { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "",              "WVDP" },
+            }),
+        }, false);
+    }
+
+    @Test
+    public void TestInitialGeneration() {
+        executeTestCases(new NameAndTestCases[]{
+            new NameAndTestCases("locale=en_US,given=George,given2=Herbert Walker,surname=Bush", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "George Herbert Walker Bush" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "",              "George H. W. Bush" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "",              "G. H. W. Bush" },
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "",              "George B." },
+                { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "",              "GHB" },
+                { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "",              "GB" },
+            }),
+            new NameAndTestCases("locale=en_US,given=Ralph,surname=Vaughan Williams", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "Ralph Vaughan Williams" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "",              "Ralph Vaughan Williams" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "",              "R. Vaughan Williams" },
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "",              "Ralph V. W." },
+                { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "",              "RV" },
+                { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "",              "RV" },
+            }),
+            new NameAndTestCases("locale=en_US,given=John Paul,given2=Stephen David George,surname=Smith", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "John Paul Stephen David George Smith" },
+                { "en_US", "MEDIUM", "REFERRING",  "FORMAL",   "",              "John Paul S. D. G. Smith" },
+                { "en_US", "SHORT",  "REFERRING",  "FORMAL",   "",              "J. P. S. D. G. Smith" },
+                { "en_US", "SHORT",  "REFERRING",  "INFORMAL", "",              "John Paul S." },
+                { "en_US", "LONG",   "MONOGRAM",   "FORMAL",   "",              "JSS" },
+                { "en_US", "LONG",   "MONOGRAM",   "INFORMAL", "",              "JS" },
+            }),
+        }, false);
+    }
+
+    @Test
+    public void TestLiteralTextElision() {
+        executeTestCases(new NameAndTestCases[]{
+            // literal text elision is difficult to test with the real locale data, although this is a start
+            // perhaps we could add an API for debugging that lets us pass in real pattern strings, but I'd like to stay away from that
+            new NameAndTestCases("locale=en_US,given=John,given2=Paul,surname=Smith,suffix=Jr.", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "John Paul Smith Jr." },
+            }),
+            new NameAndTestCases("locale=en_US,given=John,given2=Paul,surname=Smith", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "John Paul Smith" },
+            }),
+            new NameAndTestCases("locale=en_US,given2=Paul,surname=Smith", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "Paul Smith" },
+            }),
+            new NameAndTestCases("locale=en_US,given2=Paul", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "Paul" },
+            }),
+            new NameAndTestCases("locale=en_US,given=John", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "John" },
+            }),
+            new NameAndTestCases("locale=en_US,given=John,suffix=Jr.", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "John Jr." },
+            }),
+        }, false);
+    }
+
+    @Test
+    public void TestMultiplePatterns() {
+        executeTestCases(new NameAndTestCases[]{
+            // the Spanish rules have two name patterns for many of the sorting cases: one to use if the surname2
+            // field is populated and one to use if not-- these allow the comma between the fields to be displayed
+            // in the right place.  This test checks to make sure we're using the right pattern based on which
+            // fields are present in the actual name
+            new NameAndTestCases("locale=es_ES,given=Andrés,given2=Manuel,surname=López,surname2=Obrador", new String[][] {
+                    { "es_ES", "LONG",   "REFERRING",  "FORMAL",   "",              "Andrés Manuel López Obrador" },
+                    { "es_ES", "LONG",   "REFERRING",  "FORMAL",   "SORTING"    ,   "López Obrador, Andrés Manuel" },
+            }),
+            new NameAndTestCases("locale=es_ES,given=Andrés,given2=Manuel,surname=López", new String[][] {
+                    { "es_ES", "LONG",   "REFERRING",  "FORMAL",   "",              "Andrés Manuel López" },
+                    { "es_ES", "LONG",   "REFERRING",  "FORMAL",   "SORTING"    ,   "López, Andrés Manuel" },
+            }),
+        }, false);
+    }
+
+    @Test
+    public void TestNameOrder() {
+        executeTestCases(new NameAndTestCases[]{
+            // the name's locale is used to determine the field order.  For the English name formatter, if the
+            // name is English, the order is GN first.  If it's Japanese, it's SN first.  This is true whether the
+            // Japanese name is written in Latin letters or Han characters
+            new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "Shinzo Abe" },
+            }),
+            new NameAndTestCases("locale=ja_JP,given=Shinzo,surname=Abe", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "Abe Shinzo" },
+            }),
+            new NameAndTestCases("locale=ja_JP,given=晋三,surname=安倍", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "安倍 晋三" },
+            }),
+
+            // the name can also declare its order directly, with the optional "preferredOrder" field.  If it does this,
+            // the value of that field holds for all formatter locales and overrides determining the order
+            // by looking at the name's locale
+            new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe,preferredOrder=surnameFirst", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "Abe Shinzo" },
+            }),
+            new NameAndTestCases("locale=ja_JP,given=Shinzo,surname=Abe,preferredOrder=givenFirst", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",              "Shinzo Abe" },
+            }),
+        }, false);
+    }
+
+    @Test
+    public void TestCapitalizedSurname() {
+        executeTestCases(new NameAndTestCases[]{
+            // the SURNAME_ALLCAPS option does just what it says: it causes the surname field
+            // to be displayed in all caps
+            new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",                "Shinzo Abe" },
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "SURNAME_ALLCAPS", "Shinzo ABE" },
+            }),
+            new NameAndTestCases("locale=ja_JP,given=Shinzo,surname=Abe", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",                "Abe Shinzo" },
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "SURNAME_ALLCAPS", "ABE Shinzo" },
+            }),
+        }, false);
+    }
+
+    @Test
+    public void TestNameSpacing() {
+        executeTestCases(new NameAndTestCases[]{
+            // if the formatter locale uses spaces, the result will use its formats (complete with spaces),
+            // regardless of locale
+            new NameAndTestCases("locale=ja_JP,given=Hayao,surname=Miyazaki", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",                "Miyazaki Hayao" },
+            }),
+            new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] {
+                { "en_US", "LONG",   "REFERRING",  "FORMAL",   "",                "宮崎 駿" },
+            }),
+
+            // if the formatter locale doesn't use spaces and the name's locale doesn't either, just use
+            // the native formatter
+            new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] {
+                // (the Japanese name formatter actually inserts a space even for native names)
+                { "ja_JP", "LONG",   "REFERRING",  "FORMAL",   "",                "宮崎 駿" },
+                { "zh_CN", "LONG",   "REFERRING",  "FORMAL",   "",                "宮崎駿" },
+            }),
+
+            // if the formatter locale doesn't use spaces and the name's locale does, use the name locale's formatter,
+            // but if the name is still using the formatter locale's script, use the native formatter's
+            // "foreign space replacement" character instead of spaces
+            new NameAndTestCases("locale=en_US,given=Albert,surname=Einstein", new String[][] {
+                { "ja_JP", "LONG",   "REFERRING",  "FORMAL",   "",                "Albert Einstein" },
+                { "zh_CN", "LONG",   "REFERRING",  "FORMAL",   "",                "Albert Einstein" },
+            }),
+            new NameAndTestCases("locale=en_US,given=アルベルト,surname=アインシュタイン", new String[][] {
+                { "ja_JP", "LONG",   "REFERRING",  "FORMAL",   "",                "アルベルト・アインシュタイン" },
+            }),
+            new NameAndTestCases("locale=en_US,given=阿尔伯特,surname=爱因斯坦", new String[][] {
+                { "zh_CN", "LONG",   "REFERRING",  "FORMAL",   "",                "阿尔伯特·爱因斯坦" },
+            }),
+        }, false);
+    }
+
+    // need tests (and implementation?) for:
+    // - foreign space replacement
+}
author	Rich Gillam <62772518+richgillam@users.noreply.github.com>
	Fri, 12 Aug 2022 23:07:52 +0000 (16:07 -0700)
committer	Rich Gillam <62772518+richgillam@users.noreply.github.com>
	Thu, 1 Sep 2022 20:36:05 +0000 (13:36 -0700)
icu4j/main/classes/core/src/com/ibm/icu/impl/personname/FieldModifierImpl.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java	[new file with mode: 0644]	patch \| blob
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java	[new file with mode: 0644]	patch \| blob