Adds plumbing to trim whitespace near the FormattedNumber API boundary.
That plumbing requires a change to dependencies.txt.
UnicodeString simpleFormats[ARRAY_LENGTH];
getMeasureData(loc, unit, width, simpleFormats, status);
if (U_FAILURE(status)) { return result; }
- // TODO: What field to use for units?
- result->simpleFormatsToModifiers(simpleFormats, UNUM_FIELD_COUNT, status);
+ result->simpleFormatsToModifiers(simpleFormats, UNUM_MEASURE_UNIT_FIELD, status);
return result;
}
compiled.format(UnicodeString(u"{0}"), secondaryString, perUnitFormat, status);
if (U_FAILURE(status)) { return result; }
}
- // TODO: What field to use for units?
- result->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, UNUM_FIELD_COUNT, status);
+ result->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, UNUM_MEASURE_UNIT_FIELD, status);
return result;
}
#if !UCONFIG_NO_FORMATTING
#include "number_stringbuilder.h"
+#include "static_unicode_sets.h"
#include "unicode/utf16.h"
using namespace icu;
bool seenStart = false;
int32_t fractionStart = -1;
int32_t startIndex = fp.getEndIndex();
- for (int i = fZero + startIndex; i <= fZero + fLength; i++) {
+ for (int32_t i = fZero + startIndex; i <= fZero + fLength; i++) {
Field _field = UNUM_FIELD_COUNT;
if (i < fZero + fLength) {
_field = getFieldPtr()[i];
continue;
}
fp.setEndIndex(i - fZero);
- break;
+ // Trim ignorables (whitespace, etc.) from the edge of the field.
+ UFieldPosition ufp = {0, fp.getBeginIndex(), fp.getEndIndex()};
+ if (trimFieldPosition(ufp)) {
+ fp.setBeginIndex(ufp.beginIndex);
+ fp.setEndIndex(ufp.endIndex);
+ break;
+ }
+ // This position was all ignorables; continue to the next position.
+ fp.setEndIndex(fp.getBeginIndex());
+ seenStart = false;
} else if (!seenStart && field == _field) {
fp.setBeginIndex(i - fZero);
seenStart = true;
Field field = fieldAt(i);
if (current == UNUM_INTEGER_FIELD && field == UNUM_GROUPING_SEPARATOR_FIELD) {
// Special case: GROUPING_SEPARATOR counts as an INTEGER.
+ // TODO(ICU-13064): Grouping separator can be more than 1 code unit.
fpih.addAttribute(UNUM_GROUPING_SEPARATOR_FIELD, i, i + 1);
} else if (current != field) {
if (current != UNUM_FIELD_COUNT) {
- fpih.addAttribute(current, currentStart, i);
+ UFieldPosition fp = {0, currentStart, i};
+ if (trimFieldPosition(fp)) {
+ fpih.addAttribute(current, fp.beginIndex, fp.endIndex);
+ }
}
current = field;
currentStart = i;
}
}
if (current != UNUM_FIELD_COUNT) {
- fpih.addAttribute(current, currentStart, fLength);
+ UFieldPosition fp = {0, currentStart, fLength};
+ if (trimFieldPosition(fp)) {
+ fpih.addAttribute(current, fp.beginIndex, fp.endIndex);
+ }
}
}
return false;
}
+bool NumberStringBuilder::trimFieldPosition(UFieldPosition& fp) const {
+ // Trim ignorables from the back
+ int32_t endIgnorablesRelPos = unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack(
+ getCharPtr() + fZero + fp.beginIndex,
+ fp.endIndex - fp.beginIndex,
+ USET_SPAN_CONTAINED);
+
+ // Check if the entire segment is ignorables
+ if (endIgnorablesRelPos == 0) {
+ return false;
+ }
+ fp.endIndex = fp.beginIndex + endIgnorablesRelPos;
+
+ // Trim ignorables from the front
+ int32_t startIgnorablesRelPos = unisets::get(unisets::DEFAULT_IGNORABLES)->span(
+ getCharPtr() + fZero + fp.beginIndex,
+ fp.endIndex - fp.beginIndex,
+ USET_SPAN_CONTAINED);
+ fp.beginIndex = fp.beginIndex + startIgnorablesRelPos;
+ return true;
+}
+
#endif /* #if !UCONFIG_NO_FORMATTING */
int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status);
int32_t remove(int32_t index, int32_t count);
+
+ bool trimFieldPosition(UFieldPosition& fpos) const;
};
} // namespace impl
kPermillField = UNUM_PERMILL_FIELD,
/** @stable ICU 2.0 */
kSignField = UNUM_SIGN_FIELD,
+ /** @draft ICU 64 */
+ kMeasureUnitField = UNUM_MEASURE_UNIT_FIELD,
/**
* These constants are provided for backwards compatibility only.
UNUM_PERMILL_FIELD,
/** @stable ICU 49 */
UNUM_SIGN_FIELD,
+ /** @draft ICU 64 */
+ UNUM_MEASURE_UNIT_FIELD,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UNumberFormatFields value.
{UNUM_GROUPING_SEPARATOR_FIELD, 10, 11},
{UNUM_INTEGER_FIELD, 1, 14},
{UNUM_DECIMAL_SEPARATOR_FIELD, 14, 15},
- {UNUM_FRACTION_FIELD, 15, 17}
+ {UNUM_FRACTION_FIELD, 15, 17},
+ {UNUM_MEASURE_UNIT_FIELD, 18, 19}
};
UFieldPosition actual;
for (int32_t i = 0; i < sizeof(expectedFields) / sizeof(*expectedFields); i++) {
number_decimalquantity.o number_stringbuilder.o numparse_stringsegment.o number_utils.o
deps
decnumber double_conversion
+ # for trimming whitespace around fields
+ static_unicode_sets
# for data loading; that could be split off
resourcebundle
int_functions
void locale();
void formatTypes();
void fieldPositionLogic();
+ void fieldPositionCoverage();
void toFormat();
void errors();
void validRanges();
TESTCASE_AUTO(locale);
TESTCASE_AUTO(formatTypes);
TESTCASE_AUTO(fieldPositionLogic);
+ TESTCASE_AUTO(fieldPositionCoverage);
TESTCASE_AUTO(toFormat);
TESTCASE_AUTO(errors);
TESTCASE_AUTO(validRanges);
assertFalse(u"No fraction part in an integer", fmtd.nextFieldPosition(actual, status));
}
+void NumberFormatterApiTest::fieldPositionCoverage() {
+ IcuTestErrorCode status(*this, "fieldPositionCoverage");
+
+ {
+ const char16_t* message = u"Measure unit field position basic";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ u"measure-unit/temperature-fahrenheit",
+ NumberFormatter::with().unit(FAHRENHEIT),
+ Locale::getEnglish(),
+ 68,
+ u"68°F");
+ static const UFieldPosition expectedFieldPositions[] = {
+ // field, begin index, end index
+ {UNUM_INTEGER_FIELD, 0, 2},
+ {UNUM_MEASURE_UNIT_FIELD, 2, 4}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions,
+ sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
+ }
+
+ {
+ const char16_t* message = u"Measure unit field position with compound unit";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ u"measure-unit/temperature-fahrenheit per-measure-unit/duration-day",
+ NumberFormatter::with().unit(FAHRENHEIT).perUnit(DAY),
+ Locale::getEnglish(),
+ 68,
+ u"68°F/d");
+ static const UFieldPosition expectedFieldPositions[] = {
+ // field, begin index, end index
+ {UNUM_INTEGER_FIELD, 0, 2},
+ // coverage for old enum:
+ {DecimalFormat::kMeasureUnitField, 2, 6}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions,
+ sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
+ }
+
+ {
+ const char16_t* message = u"Measure unit field position with spaces";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ u"measure-unit/length-meter unit-width-full-name",
+ NumberFormatter::with().unit(METER).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME),
+ Locale::getEnglish(),
+ 68,
+ u"68 meters");
+ static const UFieldPosition expectedFieldPositions[] = {
+ // field, begin index, end index
+ {UNUM_INTEGER_FIELD, 0, 2},
+ // note: field starts after the space
+ {UNUM_MEASURE_UNIT_FIELD, 3, 9}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions,
+ sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
+ }
+
+ {
+ const char16_t* message = u"Measure unit field position with prefix and suffix";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ u"measure-unit/length-meter per-measure-unit/duration-second unit-width-full-name",
+ NumberFormatter::with().unit(METER).perUnit(SECOND).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME),
+ "ky", // locale with the interesting data
+ 68,
+ u"секундасына 68 метр");
+ static const UFieldPosition expectedFieldPositions[] = {
+ // field, begin index, end index
+ {UNUM_MEASURE_UNIT_FIELD, 0, 11},
+ {UNUM_INTEGER_FIELD, 12, 14},
+ {UNUM_MEASURE_UNIT_FIELD, 15, 19}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions,
+ sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
+ }
+
+ {
+ const char16_t* message = u"Measure unit field position with inner spaces";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ u"measure-unit/temperature-fahrenheit unit-width-full-name",
+ NumberFormatter::with().unit(FAHRENHEIT).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME),
+ "vi", // locale with the interesting data
+ 68,
+ u"68 độ F");
+ static const UFieldPosition expectedFieldPositions[] = {
+ // field, begin index, end index
+ {UNUM_INTEGER_FIELD, 0, 2},
+ // Should trim leading/trailing spaces, but not inner spaces:
+ {UNUM_MEASURE_UNIT_FIELD, 3, 7}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions,
+ sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
+ }
+
+ {
+ // Data: other{"{0} K"} == "\u200E{0} K"
+ // If that data changes, try to find another example of a non-empty unit prefix/suffix
+ // that is also all ignorables (whitespace and bidi control marks).
+ const char16_t* message = u"Measure unit field position with fully ignorable prefix";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ u"measure-unit/temperature-kelvin",
+ NumberFormatter::with().unit(KELVIN),
+ "fa", // locale with the interesting data
+ 68,
+ u"۶۸ K");
+ static const UFieldPosition expectedFieldPositions[] = {
+ // field, begin index, end index
+ {UNUM_INTEGER_FIELD, 1, 3},
+ {UNUM_MEASURE_UNIT_FIELD, 4, 5}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions,
+ sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
+ }
+}
+
void NumberFormatterApiTest::toFormat() {
IcuTestErrorCode status(*this, "icuFormat");
LocalizedNumberFormatter lnf = NumberFormatter::withLocale("fr")
String[] simpleFormats = new String[ARRAY_LENGTH];
getMeasureData(locale, unit, width, simpleFormats);
- // TODO: What field to use for units?
// TODO(ICU4J): Reduce the number of object creations here?
Map<StandardPlural, SimpleModifier> modifiers = new EnumMap<>(
StandardPlural.class);
LongNameHandler result = new LongNameHandler(modifiers, rules, parent);
- result.simpleFormatsToModifiers(simpleFormats, null);
+ result.simpleFormatsToModifiers(simpleFormats, NumberFormat.Field.MEASURE_UNIT);
return result;
}
.trim();
perUnitFormat = SimpleFormatterImpl.formatCompiledPattern(compiled, "{0}", secondaryString);
}
- // TODO: What field to use for units?
Map<StandardPlural, SimpleModifier> modifiers = new EnumMap<>(
StandardPlural.class);
LongNameHandler result = new LongNameHandler(modifiers, rules, parent);
- result.multiSimpleFormatsToModifiers(primaryData, perUnitFormat, null);
+ result.multiSimpleFormatsToModifiers(primaryData, perUnitFormat, NumberFormat.Field.MEASURE_UNIT);
return result;
}
import java.util.HashMap;
import java.util.Map;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.NumberFormat.Field;
+import com.ibm.icu.text.UnicodeSet;
/**
* A StringBuilder optimized for number formatting. It implements the following key features beyond a
return new String(chars, zero, length);
}
- private static final Map<Field, Character> fieldToDebugChar = new HashMap<Field, Character>();
+ private static final Map<Field, Character> fieldToDebugChar = new HashMap<>();
static {
fieldToDebugChar.put(NumberFormat.Field.SIGN, '-');
continue;
}
fp.setEndIndex(i - zero);
- break;
+ // Trim ignorables (whitespace, etc.) from the edge of the field.
+ if (trimFieldPosition(fp)) {
+ break;
+ }
+ // This position was all ignorables; continue to the next position.
+ seenStart = false;
} else if (!seenStart && field == _field) {
fp.setBeginIndex(i - zero);
seenStart = true;
if (current == NumberFormat.Field.INTEGER
&& field == NumberFormat.Field.GROUPING_SEPARATOR) {
// Special case: GROUPING_SEPARATOR counts as an INTEGER.
+ // TODO(ICU-13064): Grouping separator can be more than 1 code unit.
as.addAttribute(NumberFormat.Field.GROUPING_SEPARATOR,
NumberFormat.Field.GROUPING_SEPARATOR,
i,
i + 1);
} else if (current != field) {
if (current != null) {
- as.addAttribute(current, current, currentStart, i);
+ FieldPosition fp = new FieldPosition(null);
+ fp.setBeginIndex(currentStart);
+ fp.setEndIndex(i);
+ if (trimFieldPosition(fp)) {
+ as.addAttribute(current, current, fp.getBeginIndex(), fp.getEndIndex());
+ }
}
current = field;
currentStart = i;
}
}
if (current != null) {
- as.addAttribute(current, current, currentStart, length);
+ FieldPosition fp = new FieldPosition(null);
+ fp.setBeginIndex(currentStart);
+ fp.setEndIndex(length);
+ if (trimFieldPosition(fp)) {
+ as.addAttribute(current, current, fp.getBeginIndex(), fp.getEndIndex());
+ }
}
return as.getIterator();
}
+
+ private boolean trimFieldPosition(FieldPosition fp) {
+ // Trim ignorables from the back
+ int endIgnorablesIndex = StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
+ .spanBack(this, fp.getEndIndex(), UnicodeSet.SpanCondition.CONTAINED);
+
+ // Check if the entire segment is ignorables
+ if (endIgnorablesIndex <= fp.getBeginIndex()) {
+ return false;
+ }
+ fp.setEndIndex(endIgnorablesIndex);
+
+ // Trim ignorables from the front
+ int startIgnorablesIndex = StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
+ .span(this, fp.getBeginIndex(), UnicodeSet.SpanCondition.CONTAINED);
+ fp.setBeginIndex(startIgnorablesIndex);
+ return true;
+ }
}
*/
public static final Field CURRENCY = new Field("currency");
+ /**
+ * @draft ICU 64
+ */
+ public static final Field MEASURE_UNIT = new Field("measure unit");
+
/**
* Constructs a new instance of NumberFormat.Field with the given field
* name.
assertFalse("No fraction part in an integer", fmtd.nextFieldPosition(actual));
}
+ @Test
+ public void fieldPositionCoverage() {
+ {
+ String message = "Measure unit field position basic";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ "measure-unit/temperature-fahrenheit",
+ NumberFormatter.with().unit(MeasureUnit.FAHRENHEIT),
+ ULocale.ENGLISH,
+ 68,
+ "68°F");
+ Object[][] expectedFieldPositions = new Object[][] {
+ // field, begin index, end index
+ {NumberFormat.Field.INTEGER, 0, 2},
+ {NumberFormat.Field.MEASURE_UNIT, 2, 4}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions);
+ }
+
+ {
+ String message = "Measure unit field position with compound unit";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ "measure-unit/temperature-fahrenheit per-measure-unit/duration-day",
+ NumberFormatter.with().unit(MeasureUnit.FAHRENHEIT).perUnit(MeasureUnit.DAY),
+ ULocale.ENGLISH,
+ 68,
+ "68°F/d");
+ Object[][] expectedFieldPositions = new Object[][] {
+ // field, begin index, end index
+ {NumberFormat.Field.INTEGER, 0, 2},
+ {NumberFormat.Field.MEASURE_UNIT, 2, 6}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions);
+ }
+
+ {
+ String message = "Measure unit field position with spaces";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ "measure-unit/length-meter unit-width-full-name",
+ NumberFormatter.with().unit(MeasureUnit.METER).unitWidth(UnitWidth.FULL_NAME),
+ ULocale.ENGLISH,
+ 68,
+ "68 meters");
+ Object[][] expectedFieldPositions = new Object[][] {
+ // field, begin index, end index
+ {NumberFormat.Field.INTEGER, 0, 2},
+ // note: field starts after the space
+ {NumberFormat.Field.MEASURE_UNIT, 3, 9}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions);
+ }
+
+ {
+ String message = "Measure unit field position with prefix and suffix";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ "measure-unit/length-meter per-measure-unit/duration-second unit-width-full-name",
+ NumberFormatter.with().unit(MeasureUnit.METER).perUnit(MeasureUnit.SECOND).unitWidth(UnitWidth.FULL_NAME),
+ new ULocale("ky"), // locale with the interesting data
+ 68,
+ "секундасына 68 метр");
+ Object[][] expectedFieldPositions = new Object[][] {
+ // field, begin index, end index
+ {NumberFormat.Field.MEASURE_UNIT, 0, 11},
+ {NumberFormat.Field.INTEGER, 12, 14},
+ {NumberFormat.Field.MEASURE_UNIT, 15, 19}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions);
+ }
+
+ {
+ String message = "Measure unit field position with inner spaces";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ "measure-unit/temperature-fahrenheit unit-width-full-name",
+ NumberFormatter.with().unit(MeasureUnit.FAHRENHEIT).unitWidth(UnitWidth.FULL_NAME),
+ new ULocale("vi"), // locale with the interesting data
+ 68,
+ "68 độ F");
+ Object[][] expectedFieldPositions = new Object[][] {
+ // field, begin index, end index
+ {NumberFormat.Field.INTEGER, 0, 2},
+ // Should trim leading/trailing spaces, but not inner spaces:
+ {NumberFormat.Field.MEASURE_UNIT, 3, 7}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions);
+ }
+
+ {
+ // Data: other{"{0} K"} == "\u200E{0} K"
+ // If that data changes, try to find another example of a non-empty unit prefix/suffix
+ // that is also all ignorables (whitespace and bidi control marks).
+ String message = "Measure unit field position with fully ignorable prefix";
+ FormattedNumber result = assertFormatSingle(
+ message,
+ "measure-unit/temperature-kelvin",
+ NumberFormatter.with().unit(MeasureUnit.KELVIN),
+ new ULocale("fa"), // locale with the interesting data
+ 68,
+ "۶۸ K");
+ Object[][] expectedFieldPositions = new Object[][] {
+ // field, begin index, end index
+ {NumberFormat.Field.INTEGER, 1, 3},
+ {NumberFormat.Field.MEASURE_UNIT, 4, 5}};
+ assertFieldPositions(
+ message,
+ result,
+ expectedFieldPositions);
+ }
+ }
+
/** Handler for serialization compatibility test suite. */
public static class FormatHandler implements SerializableTestUtility.Handler {
@Override