From e13ef30930937c506c68c25e2df2975bcc0c04e3 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Sat, 2 Jan 2016 01:12:37 +0000 Subject: [PATCH] ICU-12032 rewrite SimplePatternFormatter: quoting like MessageFormat, shorter, create fewer objects; callers avoid wrapper objects X-SVN-Rev: 38149 --- .../ibm/icu/impl/SimplePatternFormatter.java | 667 +++++++++--------- .../src/com/ibm/icu/text/ListFormatter.java | 68 +- .../src/com/ibm/icu/text/MeasureFormat.java | 69 +- .../com/ibm/icu/text/QuantityFormatter.java | 6 +- .../test/util/SimplePatternFormatterTest.java | 53 +- 5 files changed, 430 insertions(+), 433 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/SimplePatternFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/SimplePatternFormatter.java index 7a54f2f494b..d8f3bdac7db 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/SimplePatternFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/SimplePatternFormatter.java @@ -1,441 +1,416 @@ /* ******************************************************************************* - * Copyright (C) 2014-2015, International Business Machines Corporation and + * Copyright (C) 2014-2016, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ package com.ibm.icu.impl; -import java.util.ArrayList; -import java.util.List; - /** - * Compiled version of a pattern such as "{1} was born in {0}". - *

- * Using SimplePatternFormatter objects is both faster and safer than adhoc replacement - * such as pattern.replace("{0}", "Colorado").replace("{1} "Fred");. - * They are faster because they are precompiled; they are safer because they - * account for curly braces escaped by apostrophe ('). - * - * Placeholders are of the form \{[0-9]+\}. If a curly brace is preceded - * by a single quote, it becomes a curly brace instead of the start of a - * placeholder. Two single quotes resolve to one single quote. - *

- * SimplePatternFormatter objects are immutable and can be safely cached like strings. - *

- * Example: + * Formats simple patterns like "{1} was born in {0}". + * Minimal subset of MessageFormat; fast, simple, minimal dependencies. + * Supports only numbered arguments with no type nor style parameters, + * and formats only string values. + * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior. + * + *

Factory methods throw exceptions for syntax errors + * and for too few or too many arguments/placeholders. + * + *

SimplePatternFormatter objects are immutable and can be safely cached like strings. + * + *

Example: *

- * SimplePatternFormatter fmt = SimplePatternFormatter.compile("{1} '{born} in {0}");
- * 
+ * SimplePatternFormatter fmt = SimplePatternFormatter.compile("{1} '{born}' in {0}");
+ *
  * // Output: "paul {born} in england"
  * System.out.println(fmt.format("england", "paul"));
  * 
+ * + * @see com.ibm.icu.text.MessageFormat + * @see com.ibm.icu.text.MessagePattern.ApostropheMode */ -public class SimplePatternFormatter { - private final String patternWithoutPlaceholders; - private final int placeholderCount; - - // [0] first offset; [1] first placeholderId; [2] second offset; - // [3] second placeholderId etc. - private final int[] placeholderIdsOrderedByOffset; - - private final boolean firstPlaceholderReused; +public final class SimplePatternFormatter { + /** + * Argument numbers must be smaller than this limit. + * Text segment lengths are offset by this much. + * This is currently the only unused char value in compiled patterns, + * except it is the maximum value of the first unit (max arg +1). + */ + private static final int ARG_NUM_LIMIT = 0x100; + /** + * Initial and maximum char/UChar value set for a text segment. + * Segment length char values are from ARG_NUM_LIMIT+1 to this value here. + * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing. + */ + private static final char SEGMENT_LENGTH_PLACEHOLDER_CHAR = (char)0xffff; + /** + * Maximum length of a text segment. Longer segments are split into shorter ones. + */ + private static final int MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT; - private SimplePatternFormatter(String pattern, PlaceholdersBuilder builder) { - this.patternWithoutPlaceholders = pattern; - this.placeholderIdsOrderedByOffset = - builder.getPlaceholderIdsOrderedByOffset(); - this.placeholderCount = builder.getPlaceholderCount(); - this.firstPlaceholderReused = builder.getFirstPlaceholderReused(); + /** + * Binary representation of the compiled pattern. + * Index 0: One more than the highest argument number. + * Followed by zero or more arguments or literal-text segments. + * + *

An argument is stored as its number, less than ARG_NUM_LIMIT. + * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT, + * followed by that many chars. + */ + private final String compiledPattern; + + private SimplePatternFormatter(String compiledPattern) { + this.compiledPattern = compiledPattern; } /** - * Compiles a string. - * @param pattern The string. - * @return the new SimplePatternFormatter object. + * Creates a formatter from the pattern string. + * + * @param pattern The pattern string. + * @return The new SimplePatternFormatter object. */ - public static SimplePatternFormatter compile(String pattern) { + public static SimplePatternFormatter compile(CharSequence pattern) { return compileMinMaxPlaceholders(pattern, 0, Integer.MAX_VALUE); } /** - * Compiles a string. - * @param pattern The string. + * Creates a formatter from the pattern string. + * + * @param pattern The pattern string. * @param min The pattern must have at least this many placeholders. * @param max The pattern must have at most this many placeholders. - * @return the new SimplePatternFormatter object. + * @return The new SimplePatternFormatter object. */ - public static SimplePatternFormatter compileMinMaxPlaceholders(String pattern, int min, int max) { - PlaceholdersBuilder placeholdersBuilder = new PlaceholdersBuilder(); - PlaceholderIdBuilder idBuilder = new PlaceholderIdBuilder(); - StringBuilder newPattern = new StringBuilder(); - State state = State.INIT; - for (int i = 0; i < pattern.length(); i++) { - char ch = pattern.charAt(i); - switch (state) { - case INIT: - if (ch == 0x27) { - state = State.APOSTROPHE; - } else if (ch == '{') { - state = State.PLACEHOLDER; - idBuilder.reset(); + public static SimplePatternFormatter compileMinMaxPlaceholders(CharSequence pattern, int min, int max) { + StringBuilder sb = new StringBuilder(); + String compiledPattern = compileToStringMinMaxPlaceholders(pattern, sb, min, max); + return new SimplePatternFormatter(compiledPattern); + } + + /** + * Creates a compiled form of the pattern string, for use with appropriate static methods. + * + * @param pattern The pattern string. + * @param min The pattern must have at least this many placeholders. + * @param max The pattern must have at most this many placeholders. + * @return The compiled-pattern string. + */ + public static String compileToStringMinMaxPlaceholders( + CharSequence pattern, StringBuilder sb, int min, int max) { + // Parse consistent with MessagePattern, but + // - support only simple numbered arguments + // - build a simple binary structure into the result string + int length = pattern.length(); + sb.ensureCapacity(length); + // Reserve the first char for the number of arguments. + sb.setLength(1); + int textLength = 0; + int maxArg = -1; + boolean inQuote = false; + for (int i = 0; i < length;) { + char c = pattern.charAt(i++); + if (c == '\'') { + if (i < length && (c = pattern.charAt(i)) == '\'') { + // double apostrophe, skip the second one + ++i; + } else if (inQuote) { + // skip the quote-ending apostrophe + inQuote = false; + continue; + } else if (c == '{' || c == '}') { + // Skip the quote-starting apostrophe, find the end of the quoted literal text. + ++i; + inQuote = true; } else { - newPattern.append(ch); + // The apostrophe is part of literal text. + c = '\''; } - break; - case APOSTROPHE: - if (ch == 0x27) { - newPattern.append("'"); - } else if (ch == '{') { - newPattern.append("{"); - } else { - newPattern.append("'"); - newPattern.append(ch); + } else if (!inQuote && c == '{') { + if (textLength > 0) { + sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength)); + textLength = 0; } - state = State.INIT; - break; - case PLACEHOLDER: - if (ch >= '0' && ch <= '9') { - idBuilder.add(ch); - } else if (ch == '}' && idBuilder.isValid()) { - placeholdersBuilder.add(idBuilder.getId(), newPattern.length()); - state = State.INIT; + int argNumber; + if ((i + 1) < length && + 0 <= (argNumber = pattern.charAt(i) - '0') && argNumber <= 9 && + pattern.charAt(i + 1) == '}') { + i += 2; } else { - newPattern.append('{'); - idBuilder.appendTo(newPattern); - newPattern.append(ch); - state = State.INIT; + // Multi-digit argument number (no leading zero) or syntax error. + // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index) + // around the number, but this class does not. + int argStart = i - 1; + argNumber = -1; + if (i < length && '1' <= (c = pattern.charAt(i++)) && c <= '9') { + argNumber = c - '0'; + while (i < length && '0' <= (c = pattern.charAt(i++)) && c <= '9') { + argNumber = argNumber * 10 + (c - '0'); + if (argNumber >= ARG_NUM_LIMIT) { + break; + } + } + } + if (argNumber < 0 || c != '}') { + throw new IllegalArgumentException( + "Argument syntax error in pattern \"" + pattern + + "\" at index " + argStart + + ": " + pattern.subSequence(argStart, i)); + } + } + if (argNumber > maxArg) { + maxArg = argNumber; } - break; - default: - throw new IllegalStateException(); + sb.append((char)argNumber); + continue; + } // else: c is part of literal text + // Append c and track the literal-text segment length. + if (textLength == 0) { + // Reserve a char for the length of a new text segment, preset the maximum length. + sb.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR); + } + sb.append(c); + if (++textLength == MAX_SEGMENT_LENGTH) { + textLength = 0; } } - switch (state) { - case INIT: - break; - case APOSTROPHE: - newPattern.append("'"); - break; - case PLACEHOLDER: - newPattern.append('{'); - idBuilder.appendTo(newPattern); - break; - default: - throw new IllegalStateException(); + if (textLength > 0) { + sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength)); } - if (placeholdersBuilder.getPlaceholderCount() < min) { + int argCount = maxArg + 1; + if (argCount < min) { throw new IllegalArgumentException( "Fewer than minimum " + min + " placeholders in pattern \"" + pattern + "\""); } - if (placeholdersBuilder.getPlaceholderCount() > max) { + if (argCount > max) { throw new IllegalArgumentException( "More than maximum " + max + " placeholders in pattern \"" + pattern + "\""); } - return new SimplePatternFormatter(newPattern.toString(), placeholdersBuilder); + sb.setCharAt(0, (char)argCount); + return sb.toString(); } - + /** - * Returns the max placeholder ID + 1. + * @return The max argument number/placeholder ID + 1. */ public int getPlaceholderCount() { - return placeholderCount; + return getPlaceholderCount(compiledPattern); + } + + /** + * @param compiledPattern Compiled form of a pattern string. + * @return The max argument number/placeholder ID + 1. + */ + public static int getPlaceholderCount(String compiledPattern) { + return compiledPattern.charAt(0); } - + /** * Formats the given values. */ public String format(CharSequence... values) { - return formatAndAppend(new StringBuilder(), null, values).toString(); + return formatCompiledPattern(compiledPattern, values); } /** * Formats the given values. - * - * @param appendTo the result appended here. - * @param offsets position of first value in appendTo stored in offsets[0]; - * second in offsets[1]; third in offsets[2] etc. An offset of -1 means that the - * corresponding value is not in appendTo. offsets.length and values.length may - * differ. If offsets.length < values.length then only the first offsets are written out; - * If offsets.length > values.length then the extra offsets get -1. - * If caller is not interested in offsets, caller may pass null here. - * @param values the placeholder values. A placeholder value may not be the same object as - * appendTo. + * + * @param compiledPattern Compiled form of a pattern string. + */ + public static String formatCompiledPattern(String compiledPattern, CharSequence... values) { + return formatAndAppend(compiledPattern, new StringBuilder(), null, values).toString(); + } + + /** + * Formats the given values, appending to the appendTo builder. + * + * @param appendTo Gets the formatted pattern and values appended. + * @param offsets offsets[i] receives the offset of where + * values[i] replaced pattern argument {i}. + * Can be null, or can be shorter or longer than values. + * If there is no {i} in the pattern, then offsets[i] is set to -1. + * @param values The placeholder values. + * A placeholder value may not be the same object as appendTo. * @return appendTo */ public StringBuilder formatAndAppend( StringBuilder appendTo, int[] offsets, CharSequence... values) { - if (values.length < placeholderCount) { + return formatAndAppend(compiledPattern, appendTo, offsets, values); + } + + /** + * Formats the given values, appending to the appendTo builder. + * + * @param compiledPattern Compiled form of a pattern string. + * @param appendTo Gets the formatted pattern and values appended. + * @param offsets offsets[i] receives the offset of where + * values[i] replaced pattern argument {i}. + * Can be null, or can be shorter or longer than values. + * If there is no {i} in the pattern, then offsets[i] is set to -1. + * @param values The placeholder values. + * A placeholder value may not be the same object as appendTo. + * @return appendTo + */ + public static StringBuilder formatAndAppend( + String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values) { + if (values.length < getPlaceholderCount(compiledPattern)) { throw new IllegalArgumentException("Too few values."); } - PlaceholderValues placeholderValues = new PlaceholderValues(values); - if (placeholderValues.isAppendToInAnyIndexExcept(appendTo, -1)) { - throw new IllegalArgumentException("Parameter values cannot be the same as appendTo."); - } - formatReturningOffsetLength(appendTo, offsets, placeholderValues); - return appendTo; + return format(compiledPattern, appendTo, null, true, offsets, values); } - + /** - * Formats the given values. - * - * @param result The result is stored here overwriting any previously stored value. - * @param offsets position of first value in result stored in offsets[0]; - * second in offsets[1]; third in offsets[2] etc. An offset of -1 means that the - * corresponding value is not in result. offsets.length and values.length may - * differ. If offsets.length < values.length then only the first offsets are written out; - * If offsets.length > values.length then the extra offsets get -1. - * If caller is not interested in offsets, caller may pass null here. - * @param values the placeholder values. A placeholder value may be result itself in which case - * The previous value of result is used. + * Formats the given values, replacing the contents of the result builder. + * May optimize by actually appending to the result if it is the same object + * as the initial argument's corresponding value. + * + * @param result Gets the formatted pattern and values appended. + * @param offsets offsets[i] receives the offset of where + * values[i] replaced pattern argument {i}. + * Can be null, or can be shorter or longer than values. + * If there is no {i} in the pattern, then offsets[i] is set to -1. + * @param values The placeholder values. + * A placeholder value may be the same object as result. * @return result */ public StringBuilder formatAndReplace( StringBuilder result, int[] offsets, CharSequence... values) { - if (values.length < placeholderCount) { + return formatAndReplace(compiledPattern, result, offsets, values); + } + + /** + * Formats the given values, replacing the contents of the result builder. + * May optimize by actually appending to the result if it is the same object + * as the initial argument's corresponding value. + * + * @param compiledPattern Compiled form of a pattern string. + * @param result Gets the formatted pattern and values appended. + * @param offsets offsets[i] receives the offset of where + * values[i] replaced pattern argument {i}. + * Can be null, or can be shorter or longer than values. + * If there is no {i} in the pattern, then offsets[i] is set to -1. + * @param values The placeholder values. + * A placeholder value may be the same object as result. + * @return result + */ + public static StringBuilder formatAndReplace( + String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values) { + if (values.length < getPlaceholderCount(compiledPattern)) { throw new IllegalArgumentException("Too few values."); } - PlaceholderValues placeholderValues = new PlaceholderValues(values); - int placeholderAtStart = getUniquePlaceholderAtStart(); - - // If patterns starts with a placeholder and the value for that placeholder - // is result, then we can may be able optimize by just appending to result. - if (placeholderAtStart >= 0 && values[placeholderAtStart] == result) { - - // If result is the value for other placeholders, call off optimization. - if (placeholderValues.isAppendToInAnyIndexExcept(result, placeholderAtStart)) { - placeholderValues.snapshotAppendTo(result); - result.setLength(0); - formatReturningOffsetLength(result, offsets, placeholderValues); - return result; - } - - // Otherwise we can optimize - int offsetLength = formatReturningOffsetLength(result, offsets, placeholderValues); - - // We have to make the offset for the placeholderAtStart placeholder be 0. - // Otherwise it would be the length of the previous value of result. - if (offsetLength > placeholderAtStart) { - offsets[placeholderAtStart] = 0; + + // If the pattern starts with an argument whose value is the same object + // as the result, then we keep the result contents and append to it. + // Otherwise we replace its contents. + int firstArg = -1; + // If any non-initial argument value is the same object as the result, + // then we first copy its contents and use that instead while formatting. + String resultCopy = null; + if (getPlaceholderCount(compiledPattern) > 0) { + for (int i = 1; i < compiledPattern.length();) { + int n = compiledPattern.charAt(i++); + if (n < ARG_NUM_LIMIT) { + if (values[n] == result) { + if (i == 2) { + firstArg = n; + } else if (resultCopy == null) { + resultCopy = result.toString(); + } + } + } else { + i += n - ARG_NUM_LIMIT; + } } - return result; } - if (placeholderValues.isAppendToInAnyIndexExcept(result, -1)) { - placeholderValues.snapshotAppendTo(result); + if (firstArg < 0) { + result.setLength(0); } - result.setLength(0); - formatReturningOffsetLength(result, offsets, placeholderValues); - return result; + return format(compiledPattern, result, resultCopy, false, offsets, values); } - + /** - * Formats this object using values {0}, {1} etc. Note that this is - * not the same as the original pattern string used to build this object. + * Returns a string similar to the original pattern, only for debugging. */ @Override public String toString() { - String[] values = new String[this.getPlaceholderCount()]; + String[] values = new String[getPlaceholderCount()]; for (int i = 0; i < values.length; i++) { values[i] = String.format("{%d}", i); } return formatAndAppend(new StringBuilder(), null, values).toString(); } - + /** - * Returns this pattern with none of the placeholders. + * Returns the pattern text with none of the placeholders. + * Like formatting with all-empty string values. */ - public String getPatternWithNoPlaceholders() { - return patternWithoutPlaceholders; + public String getTextWithNoPlaceholders() { + return getTextWithNoPlaceholders(compiledPattern); } - + /** - * Just like format, but uses placeholder values exactly as they are. - * A placeholder value that is the same object as appendTo is treated - * as the empty string. In addition, returns the length of the offsets - * array. Returns 0 if offsets is null. + * Returns the pattern text with none of the placeholders. + * Like formatting with all-empty string values. + * + * @param compiledPattern Compiled form of a pattern string. */ - private int formatReturningOffsetLength( - StringBuilder appendTo, - int[] offsets, - PlaceholderValues values) { - int offsetLen = offsets == null ? 0 : offsets.length; - for (int i = 0; i < offsetLen; i++) { - offsets[i] = -1; - } - if (placeholderIdsOrderedByOffset.length == 0) { - appendTo.append(patternWithoutPlaceholders); - return offsetLen; - } - appendTo.append( - patternWithoutPlaceholders, - 0, - placeholderIdsOrderedByOffset[0]); - setPlaceholderOffset( - placeholderIdsOrderedByOffset[1], - appendTo.length(), - offsets, - offsetLen); - CharSequence placeholderValue = values.get(placeholderIdsOrderedByOffset[1]); - if (placeholderValue != appendTo) { - appendTo.append(placeholderValue); - } - for (int i = 2; i < placeholderIdsOrderedByOffset.length; i += 2) { - appendTo.append( - patternWithoutPlaceholders, - placeholderIdsOrderedByOffset[i - 2], - placeholderIdsOrderedByOffset[i]); - setPlaceholderOffset( - placeholderIdsOrderedByOffset[i + 1], - appendTo.length(), - offsets, - offsetLen); - placeholderValue = values.get(placeholderIdsOrderedByOffset[i + 1]); - if (placeholderValue != appendTo) { - appendTo.append(placeholderValue); + public static String getTextWithNoPlaceholders(String compiledPattern) { + int capacity = compiledPattern.length() - 1 - getPlaceholderCount(compiledPattern); + StringBuilder sb = new StringBuilder(capacity); + for (int i = 1; i < compiledPattern.length();) { + int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT; + if (segmentLength > 0) { + int limit = i + segmentLength; + sb.append(compiledPattern, i, limit); + i = limit; } } - appendTo.append( - patternWithoutPlaceholders, - placeholderIdsOrderedByOffset[placeholderIdsOrderedByOffset.length - 2], - patternWithoutPlaceholders.length()); - return offsetLen; - } - - - /** - * Returns the placeholder at the beginning of this pattern (e.g 3 for placeholder {3}). - * Returns -1 if the beginning of pattern is text or if the placeholder at beginning - * of this pattern is used again elsewhere in pattern. - */ - private int getUniquePlaceholderAtStart() { - if (placeholderIdsOrderedByOffset.length == 0 - || firstPlaceholderReused || placeholderIdsOrderedByOffset[0] != 0) { - return -1; - } - return placeholderIdsOrderedByOffset[1]; + return sb.toString(); } - - private static void setPlaceholderOffset( - int placeholderId, int offset, int[] offsets, int offsetLen) { - if (placeholderId < offsetLen) { - offsets[placeholderId] = offset; - } - } - - private static enum State { - INIT, - APOSTROPHE, - PLACEHOLDER, - } - - private static class PlaceholderIdBuilder { - private int id = 0; - private int idLen = 0; - - public void reset() { - id = 0; - idLen = 0; - } - public int getId() { - return id; - } - - public void appendTo(StringBuilder appendTo) { - if (idLen > 0) { - appendTo.append(id); + private static StringBuilder format( + String compiledPattern, + StringBuilder result, String resultCopy, boolean forbidResultAsValue, + int[] offsets, CharSequence[] values) { + int offsetsLength; + if (offsets == null) { + offsetsLength = 0; + } else { + offsetsLength = offsets.length; + for (int i = 0; i < offsetsLength; i++) { + offsets[i] = -1; } } - - public boolean isValid() { - return idLen > 0; - } - - public void add(char ch) { - id = id * 10 + ch - '0'; - idLen++; - } - } - - private static class PlaceholdersBuilder { - private List placeholderIdsOrderedByOffset = new ArrayList(); - private int placeholderCount = 0; - private boolean firstPlaceholderReused = false; - - public void add(int placeholderId, int offset) { - placeholderIdsOrderedByOffset.add(offset); - placeholderIdsOrderedByOffset.add(placeholderId); - if (placeholderId >= placeholderCount) { - placeholderCount = placeholderId + 1; - } - int len = placeholderIdsOrderedByOffset.size(); - if (len > 2 - && placeholderIdsOrderedByOffset.get(len - 1) - .equals(placeholderIdsOrderedByOffset.get(1))) { - firstPlaceholderReused = true; - } - } - - public int getPlaceholderCount() { - return placeholderCount; - } - - public int[] getPlaceholderIdsOrderedByOffset() { - int[] result = new int[placeholderIdsOrderedByOffset.size()]; - for (int i = 0; i < result.length; i++) { - result[i] = placeholderIdsOrderedByOffset.get(i).intValue(); - } - return result; - } - - public boolean getFirstPlaceholderReused() { - return firstPlaceholderReused; - } - } - - /** - * Represents placeholder values. - */ - private static class PlaceholderValues { - private final CharSequence[] values; - private CharSequence appendTo; - private String appendToCopy; - - public PlaceholderValues(CharSequence ...values) { - this.values = values; - this.appendTo = null; - this.appendToCopy = null; - } - - /** - * Returns true if appendTo value is at any index besides exceptIndex. - */ - public boolean isAppendToInAnyIndexExcept(CharSequence appendTo, int exceptIndex) { - for (int i = 0; i < values.length; ++i) { - if (i != exceptIndex && values[i] == appendTo) { - return true; + for (int i = 1; i < compiledPattern.length();) { + int n = compiledPattern.charAt(i++); + if (n < ARG_NUM_LIMIT) { + CharSequence placeholderValue = values[n]; + if (placeholderValue == result) { + if (forbidResultAsValue) { + throw new IllegalArgumentException("Value must not be same object as result"); + } + if (i == 2) { + // We are appending to result which is also the first value object. + if (n < offsetsLength) { + offsets[n] = 0; + } + } else { + if (n < offsetsLength) { + offsets[n] = result.length(); + } + result.append(resultCopy); + } + } else { + if (n < offsetsLength) { + offsets[n] = result.length(); + } + result.append(placeholderValue); } + } else { + int limit = i + (n - ARG_NUM_LIMIT); + result.append(compiledPattern, i, limit); + i = limit; } - return false; - } - - /** - * For each appendTo value, stores the snapshot of it in its place. - */ - public void snapshotAppendTo(CharSequence appendTo) { - this.appendTo = appendTo; - this.appendToCopy = appendTo.toString(); } - - /** - * Return placeholder at given index. - */ - public CharSequence get(int index) { - if (appendTo == null || appendTo != values[index]) { - return values[index]; - } - return appendToCopy; - } + return result; } - } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java index 86ad07968fc..0dda989128d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 2012-2015, Google, International Business Machines Corporation and + * Copyright (C) 2012-2016, Google, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ @@ -27,10 +27,11 @@ import com.ibm.icu.util.UResourceBundle; * @stable ICU 50 */ final public class ListFormatter { - private final SimplePatternFormatter two; - private final SimplePatternFormatter start; - private final SimplePatternFormatter middle; - private final SimplePatternFormatter end; + // Compiled SimplePatternFormatter patterns. + private final String two; + private final String start; + private final String middle; + private final String end; private final ULocale locale; /** @@ -107,14 +108,14 @@ final public class ListFormatter { @Deprecated public ListFormatter(String two, String start, String middle, String end) { this( - compilePattern(two), - compilePattern(start), - compilePattern(middle), - compilePattern(end), + compilePattern(two, new StringBuilder()), + compilePattern(start, new StringBuilder()), + compilePattern(middle, new StringBuilder()), + compilePattern(end, new StringBuilder()), null); } - private ListFormatter(SimplePatternFormatter two, SimplePatternFormatter start, SimplePatternFormatter middle, SimplePatternFormatter end, ULocale locale) { + private ListFormatter(String two, String start, String middle, String end, ULocale locale) { this.two = two; this.start = start; this.middle = middle; @@ -122,8 +123,8 @@ final public class ListFormatter { this.locale = locale; } - private static SimplePatternFormatter compilePattern(String pattern) { - return SimplePatternFormatter.compileMinMaxPlaceholders(pattern, 2, 2); + private static String compilePattern(String pattern, StringBuilder sb) { + return SimplePatternFormatter.compileToStringMinMaxPlaceholders(pattern, sb, 2, 2); } /** @@ -266,25 +267,22 @@ final public class ListFormatter { // added in relation to the rest of the list. {0} represents the rest of the list; {1} // represents the new object in pattern. next is the object to be added. If recordOffset // is true, records the offset of next in the formatted string. - public FormattedListBuilder append(SimplePatternFormatter pattern, Object next, boolean recordOffset) { - if (pattern.getPlaceholderCount() != 2) { - throw new IllegalArgumentException("Need {0} and {1} only in pattern " + pattern); + public FormattedListBuilder append(String pattern, Object next, boolean recordOffset) { + int[] offsets = (recordOffset || offsetRecorded()) ? new int[2] : null; + SimplePatternFormatter.formatAndReplace( + pattern, current, offsets, current, next.toString()); + if (offsets != null) { + if (offsets[0] == -1 || offsets[1] == -1) { + throw new IllegalArgumentException( + "{0} or {1} missing from pattern " + pattern); + } + if (recordOffset) { + offset = offsets[1]; + } else { + offset += offsets[0]; + } } - int[] offsets = (recordOffset || offsetRecorded()) ? new int[2] : null; - pattern.formatAndReplace( - current, offsets, current, next.toString()); - if (offsets != null) { - if (offsets[0] == -1 || offsets[1] == -1) { - throw new IllegalArgumentException( - "{0} or {1} missing from pattern " + pattern); - } - if (recordOffset) { - offset = offsets[1]; - } else { - offset += offsets[0]; - } - } - return this; + return this; } @Override @@ -319,12 +317,12 @@ final public class ListFormatter { private static ListFormatter load(ULocale ulocale, String style) { ICUResourceBundle r = (ICUResourceBundle)UResourceBundle. getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, ulocale); - + StringBuilder sb = new StringBuilder(); return new ListFormatter( - compilePattern(r.getWithFallback("listPattern/" + style + "/2").getString()), - compilePattern(r.getWithFallback("listPattern/" + style + "/start").getString()), - compilePattern(r.getWithFallback("listPattern/" + style + "/middle").getString()), - compilePattern(r.getWithFallback("listPattern/" + style + "/end").getString()), + compilePattern(r.getWithFallback("listPattern/" + style + "/2").getString(), sb), + compilePattern(r.getWithFallback("listPattern/" + style + "/start").getString(), sb), + compilePattern(r.getWithFallback("listPattern/" + style + "/middle").getString(), sb), + compilePattern(r.getWithFallback("listPattern/" + style + "/end").getString(), sb), ulocale); } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java index 9b64ad3314e..2ddc92dbee6 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (c) 2004-2015, International Business Machines + * Copyright (c) 2004-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -449,9 +449,9 @@ public class MeasureFormat extends UFormat { // FieldPosition pos2 = new FieldPosition(0); // currencyFormat.format(currencyHigh, buffer2, pos2); } else { - SimplePatternFormatter formatter = + String formatter = getPluralFormatter(lowValue.getUnit(), formatWidth, resolvedPlural.ordinal()); - return formatter.format(formattedNumber); + return SimplePatternFormatter.formatCompiledPattern(formatter, formattedNumber); } } @@ -747,27 +747,27 @@ public class MeasureFormat extends UFormat { * unitsShort/duration/hour contains other{"{0} hrs"}. */ class UnitPatternSink extends UResource.TableSink { - SimplePatternFormatter[] patterns; + String[] patterns; void setFormatterIfAbsent(int index, UResource.Value value, int minPlaceholders) { if (patterns == null) { - EnumMap styleToPatterns = + EnumMap styleToPatterns = cacheData.unitToStyleToPatterns.get(unit); if (styleToPatterns == null) { styleToPatterns = - new EnumMap(FormatWidth.class); + new EnumMap(FormatWidth.class); cacheData.unitToStyleToPatterns.put(unit, styleToPatterns); } else { patterns = styleToPatterns.get(width); } if (patterns == null) { - patterns = new SimplePatternFormatter[MeasureFormatData.PATTERN_COUNT]; + patterns = new String[MeasureFormatData.PATTERN_COUNT]; styleToPatterns.put(width, patterns); } } if (patterns[index] == null) { - patterns[index] = SimplePatternFormatter. - compileMinMaxPlaceholders(value.getString(), minPlaceholders, 1); + patterns[index] = SimplePatternFormatter.compileToStringMinMaxPlaceholders( + value.getString(), sb, minPlaceholders, 1); } } @@ -813,7 +813,8 @@ public class MeasureFormat extends UFormat { public void put(UResource.Key key, UResource.Value value) { if (key.contentEquals("per")) { cacheData.styleToPerPattern.put(width, - SimplePatternFormatter.compileMinMaxPlaceholders(value.getString(), 2, 2)); + SimplePatternFormatter.compileToStringMinMaxPlaceholders( + value.getString(), sb, 2, 2)); } } } @@ -912,6 +913,9 @@ public class MeasureFormat extends UFormat { FormatWidth width; String type; MeasureUnit unit; + + // Temporary + StringBuilder sb = new StringBuilder(); } /** @@ -933,11 +937,10 @@ public class MeasureFormat extends UFormat { return width; } - private SimplePatternFormatter getFormatterOrNull(MeasureUnit unit, FormatWidth width, int index) { + private String getFormatterOrNull(MeasureUnit unit, FormatWidth width, int index) { width = getRegularWidth(width); - Map styleToPatterns = - cache.unitToStyleToPatterns.get(unit); - SimplePatternFormatter[] patterns = styleToPatterns.get(width); + Map styleToPatterns = cache.unitToStyleToPatterns.get(unit); + String[] patterns = styleToPatterns.get(width); if (patterns != null && patterns[index] != null) { return patterns[index]; } @@ -951,8 +954,8 @@ public class MeasureFormat extends UFormat { return null; } - private SimplePatternFormatter getFormatter(MeasureUnit unit, FormatWidth width, int index) { - SimplePatternFormatter pattern = getFormatterOrNull(unit, width, index); + private String getFormatter(MeasureUnit unit, FormatWidth width, int index) { + String pattern = getFormatterOrNull(unit, width, index); if (pattern == null) { throw new MissingResourceException( "no formatting pattern for " + unit + ", width " + width + ", index " + index, @@ -961,9 +964,9 @@ public class MeasureFormat extends UFormat { return pattern; } - private SimplePatternFormatter getPluralFormatter(MeasureUnit unit, FormatWidth width, int index) { + private String getPluralFormatter(MeasureUnit unit, FormatWidth width, int index) { if (index != StandardPlural.OTHER_INDEX) { - SimplePatternFormatter pattern = getFormatterOrNull(unit, width, index); + String pattern = getFormatterOrNull(unit, width, index); if (pattern != null) { return pattern; } @@ -971,9 +974,9 @@ public class MeasureFormat extends UFormat { return getFormatter(unit, width, StandardPlural.OTHER_INDEX); } - private SimplePatternFormatter getPerFormatter(FormatWidth width) { + private String getPerFormatter(FormatWidth width) { width = getRegularWidth(width); - SimplePatternFormatter perPattern = cache.styleToPerPattern.get(width); + String perPattern = cache.styleToPerPattern.get(width); if (perPattern != null) { return perPattern; } @@ -990,17 +993,17 @@ public class MeasureFormat extends UFormat { private int withPerUnitAndAppend( CharSequence formatted, MeasureUnit perUnit, StringBuilder appendTo) { int[] offsets = new int[1]; - SimplePatternFormatter perUnitPattern = + String perUnitPattern = getFormatterOrNull(perUnit, formatWidth, MeasureFormatData.PER_UNIT_INDEX); if (perUnitPattern != null) { - perUnitPattern.formatAndAppend(appendTo, offsets, formatted); + SimplePatternFormatter.formatAndAppend(perUnitPattern, appendTo, offsets, formatted); return offsets[0]; } - SimplePatternFormatter perPattern = getPerFormatter(formatWidth); - SimplePatternFormatter pattern = - getPluralFormatter(perUnit, formatWidth, StandardPlural.ONE.ordinal()); - String perUnitString = pattern.getPatternWithNoPlaceholders().trim(); - perPattern.formatAndAppend(appendTo, offsets, formatted, perUnitString); + String perPattern = getPerFormatter(formatWidth); + String pattern = getPluralFormatter(perUnit, formatWidth, StandardPlural.ONE.ordinal()); + String perUnitString = SimplePatternFormatter.getTextWithNoPlaceholders(pattern).trim(); + SimplePatternFormatter.formatAndAppend( + perPattern, appendTo, offsets, formatted, perUnitString); return offsets[0]; } @@ -1028,7 +1031,7 @@ public class MeasureFormat extends UFormat { StringBuffer formattedNumber = new StringBuffer(); StandardPlural pluralForm = QuantityFormatter.selectPlural( n, nf.nf, rules, formattedNumber, fieldPosition); - SimplePatternFormatter formatter = getPluralFormatter(unit, formatWidth, pluralForm.ordinal()); + String formatter = getPluralFormatter(unit, formatWidth, pluralForm.ordinal()); return QuantityFormatter.format(formatter, formattedNumber, appendTo, fieldPosition); } @@ -1040,8 +1043,6 @@ public class MeasureFormat extends UFormat { * an array[WIDTH_INDEX_COUNT] or EnumMap of * complete sets of unit & per patterns, * to correspond to the resource data and its aliases. - * - * TODO: Maybe store more sparsely in general, with pointers rather than potentially-empty objects. */ private static final class MeasureFormatData { static final int PER_UNIT_INDEX = StandardPlural.COUNT; @@ -1058,10 +1059,10 @@ public class MeasureFormat extends UFormat { */ final FormatWidth widthFallback[] = new FormatWidth[FormatWidth.INDEX_COUNT]; /** Measure unit -> format width -> array of patterns ("{0} meters") (plurals + PER_UNIT_INDEX) */ - final Map> unitToStyleToPatterns = - new HashMap>(); - final EnumMap styleToPerPattern = - new EnumMap(FormatWidth.class);; + final Map> unitToStyleToPatterns = + new HashMap>(); + final EnumMap styleToPerPattern = + new EnumMap(FormatWidth.class);; } // Wrapper around NumberFormat that provides immutability and thread-safety. diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/QuantityFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/QuantityFormatter.java index 99fd38480b0..bf3b3ed2a6a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/QuantityFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/QuantityFormatter.java @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 2013-2015, International Business Machines Corporation and + * Copyright (C) 2013-2016, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ @@ -120,10 +120,10 @@ class QuantityFormatter { /** * Formats the pattern with the value and adjusts the FieldPosition. */ - public static StringBuilder format(SimplePatternFormatter pattern, CharSequence value, + public static StringBuilder format(String compiledPattern, CharSequence value, StringBuilder appendTo, FieldPosition pos) { int[] offsets = new int[1]; - pattern.formatAndAppend(appendTo, offsets, value); + SimplePatternFormatter.formatAndAppend(compiledPattern, appendTo, offsets, value); if (pos.getBeginIndex() != 0 || pos.getEndIndex() != 0) { if (offsets[0] >= 0) { pos.setBeginIndex(pos.getBeginIndex() + offsets[0]); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/SimplePatternFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/SimplePatternFormatterTest.java index ffcb26c832c..f6ecc6e3fc0 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/SimplePatternFormatterTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/SimplePatternFormatterTest.java @@ -1,13 +1,15 @@ /* ******************************************************************************* - * Copyright (C) 2014, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 2014-2016, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ package com.ibm.icu.dev.test.util; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.impl.SimplePatternFormatter; +import com.ibm.icu.text.MessageFormat; +import com.ibm.icu.util.ULocale; public class SimplePatternFormatterTest extends TestFmwk { @@ -53,29 +55,41 @@ public class SimplePatternFormatterTest extends TestFmwk { "offsets[0]", -1, offsets[0]); - fmt = SimplePatternFormatter.compile("Some {} messed {12d up stuff."); - assertEquals( - "getPlaceholderCount", - 0, - fmt.getPlaceholderCount()); - assertEquals( - "format", - "Some {} messed {12d up stuff.", - fmt.format("to")); } - + + public void TestSyntaxErrors() { + try { + SimplePatternFormatter.compile("{}"); + fail("Syntax error did not yield an exception."); + } catch (IllegalArgumentException expected) { + } + try { + SimplePatternFormatter.compile("{12d"); + fail("Syntax error did not yield an exception."); + } catch (IllegalArgumentException expected) { + } + } + public void TestOnePlaceholder() { assertEquals("TestOnePlaceholder", "1 meter", SimplePatternFormatter.compile("{0} meter").format("1")); } - + + public void TestBigPlaceholder() { + SimplePatternFormatter fmt = SimplePatternFormatter.compile("a{20}c"); + assertEquals("{20} count", 21, fmt.getPlaceholderCount()); + CharSequence[] values = new CharSequence[21]; + values[20] = "b"; + assertEquals("{20}=b", "abc", fmt.format(values)); + } + public void TestGetPatternWithNoPlaceholders() { assertEquals( "", "Templates and are here.", SimplePatternFormatter.compile( - "Templates {1}{2} and {3} are here.").getPatternWithNoPlaceholders()); + "Templates {1}{2} and {3} are here.").getTextWithNoPlaceholders()); } public void TestTooFewPlaceholderValues() { @@ -226,7 +240,16 @@ public class SimplePatternFormatterTest extends TestFmwk { int[] expectedOffsets = {10, 18, 30, 27}; verifyOffsets(expectedOffsets, offsets); } - + + public void TestQuotingLikeMessageFormat() { + String pattern = "{0} don't can''t '{5}''}{a' again '}'{1} to the '{end"; + SimplePatternFormatter spf = SimplePatternFormatter.compile(pattern); + MessageFormat mf = new MessageFormat(pattern, ULocale.ROOT); + String expected = "X don't can't {5}'}{a again }Y to the {end"; + assertEquals("MessageFormat", expected, mf.format(new Object[] { "X", "Y" })); + assertEquals("SimplePatternFormatter", expected, spf.format("X", "Y")); + } + void verifyOffsets(int[] expected, int[] actual) { for (int i = 0; i < expected.length; ++i) { if (expected[i] != actual[i]) { -- 2.40.0