]> granicus.if.org Git - icu/commitdiff
ICU-21551 Fixing unit prefix normalization order to match LDML spec
authorYounies Mahmoud <younies.mahmoud@gmail.com>
Tue, 31 Aug 2021 16:40:17 +0000 (16:40 +0000)
committerYounies Mahmoud <younies@chromium.org>
Thu, 2 Sep 2021 10:23:33 +0000 (12:23 +0200)
See #1754

icu4c/source/i18n/measunit_impl.h
icu4c/source/test/intltest/measfmttest.cpp
icu4c/source/test/intltest/numbertest_api.cpp
icu4j/main/classes/core/src/com/ibm/icu/impl/units/SingleUnitImpl.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/MeasureUnitTest.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java

index c86176348c97426f957a64203794e96520b48120..888dc04496b6bf45a328a86e8c614ff44d92a9af 100644 (file)
@@ -96,6 +96,7 @@ struct U_I18N_API SingleUnitImpl : public UMemory {
         if (dimensionality > 0 && other.dimensionality < 0) {
             return -1;
         }
+
         // Sort by official quantity order
         int32_t thisQuantity = this->getUnitCategoryIndex();
         int32_t otherQuantity = other.getUnitCategoryIndex();
@@ -105,6 +106,7 @@ struct U_I18N_API SingleUnitImpl : public UMemory {
         if (thisQuantity > otherQuantity) {
             return 1;
         }
+
         // If quantity order didn't help, then we go by index.
         if (index < other.index) {
             return -1;
@@ -112,15 +114,39 @@ struct U_I18N_API SingleUnitImpl : public UMemory {
         if (index > other.index) {
             return 1;
         }
-        // TODO: revisit if the spec dictates prefix sort order - it doesn't
-        // currently. For now we're sorting binary prefixes before SI prefixes,
-        // as per enum values order.
-        if (unitPrefix < other.unitPrefix) {
+
+        // When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can
+        // multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can
+        // compare the bases.
+        // NOTE: this methodology will fail if the binary prefix more than or equal 98.
+        int32_t unitBase = umeas_getPrefixBase(unitPrefix);
+        int32_t otherUnitBase = umeas_getPrefixBase(other.unitPrefix);
+
+        // Values for comparison purposes only.
+        int32_t unitPower = unitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(unitPrefix) * 3
+                                                                 : umeas_getPrefixPower(unitPrefix);
+        int32_t otherUnitPower =
+            otherUnitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(other.unitPrefix) * 3
+                                                      : umeas_getPrefixPower(other.unitPrefix);
+
+        // NOTE: if the unitPower is less than the other,
+        // we return 1 not -1. Thus because we want th sorting order
+        // for the bigger prefix to be before the smaller.
+        // Example: megabyte should come before kilobyte.
+        if (unitPower < otherUnitPower) {
+            return 1;
+        }
+        if (unitPower > otherUnitPower) {
             return -1;
         }
-        if (unitPrefix > other.unitPrefix) {
+
+        if (unitBase < otherUnitBase) {
             return 1;
         }
+        if (unitBase > otherUnitBase) {
+            return -1;
+        }
+
         return 0;
     }
 
index 3ea8facca3ccff1a05d1e1aedabd4e05626ac0f4..4fcb9258fa5a00e6d381eedb53c8628b1205bd26 100644 (file)
@@ -4480,10 +4480,14 @@ void MeasureFormatTest::TestIdentifiers() {
         {"dot", "dot"},
 
         // Testing sort order of prefixes.
-        //
-        // TODO(icu-units#70): revisit when fixing normalization. For now we're
-        // just checking some consistency between C&J.
-        {"megafoot-mebifoot-kibifoot-kilofoot", "kibifoot-mebifoot-kilofoot-megafoot"},
+        {"megafoot-mebifoot-kibifoot-kilofoot", "mebifoot-megafoot-kibifoot-kilofoot"},
+        {"per-megafoot-mebifoot-kibifoot-kilofoot", "per-mebifoot-megafoot-kibifoot-kilofoot"},
+        {"megafoot-mebifoot-kibifoot-kilofoot-per-megafoot-mebifoot-kibifoot-kilofoot",
+         "mebifoot-megafoot-kibifoot-kilofoot-per-mebifoot-megafoot-kibifoot-kilofoot"},
+        {"microfoot-millifoot-megafoot-mebifoot-kibifoot-kilofoot",
+         "mebifoot-megafoot-kibifoot-kilofoot-millifoot-microfoot"},
+        {"per-microfoot-millifoot-megafoot-mebifoot-kibifoot-kilofoot",
+         "per-mebifoot-megafoot-kibifoot-kilofoot-millifoot-microfoot"},
     };
     for (const auto &cas : cases) {
         status.setScope(cas.id);
@@ -5045,7 +5049,7 @@ void MeasureFormatTest::TestInternalMeasureUnitImpl() {
         assertEquals("append meter & centimeter: units[1]", "meter",
                      mcm.singleUnits[1]->getSimpleUnitID());
     }
-    assertEquals("append meter & centimeter: identifier", "centimeter-meter",
+    assertEquals("append meter & centimeter: identifier", "meter-centimeter",
                  std::move(mcm).build(status).getIdentifier());
 
     MeasureUnitImpl m2m = MeasureUnitImpl::forIdentifier("meter-square-meter", status);
index cc07d8579510ae8a0a9a95ac8e596fb4996ddbb1..d5c539372c58ca70f7c168c4f3c3fc6228371b3c 100644 (file)
@@ -2283,13 +2283,13 @@ void NumberFormatterApiTest::unitInflections() {
             // tests can't test value0 of the following two rules:
             //   <deriveComponent feature="plural" structure="power" value0="one"  value1="compound"/>
             //   <deriveComponent feature="case" structure="power" value0="nominative"  value1="compound"/>
-            {"square-decimeter-dekameter", "de", nullptr, 1, u"1 Quadratdezimeter⋅Dekameter"},
-            {"square-decimeter-dekameter", "de", "genitive", 1, u"1 Quadratdezimeter⋅Dekameters"},
-            {"square-decimeter-dekameter", "de", nullptr, 2, u"2 Quadratdezimeter⋅Dekameter"},
-            {"square-decimeter-dekameter", "de", "dative", 2, u"2 Quadratdezimeter⋅Dekametern"},
+            {"square-decimeter-dekameter", "de", nullptr, 1, u"1 Dekameter⋅Quadratdezimeter"},
+            {"square-decimeter-dekameter", "de", "genitive", 1, u"1 Dekameter⋅Quadratdezimeter"},
+            {"square-decimeter-dekameter", "de", nullptr, 2, u"2 Dekameter⋅Quadratdezimeter"},
+            {"square-decimeter-dekameter", "de", "dative", 2, u"2 Dekameter⋅Quadratdezimeter"},
             // Feminine "Meile" better demonstrates singular-vs-plural form:
-            {"cubic-mile-dekamile", "de", nullptr, 1, u"1 Kubikmeile⋅Dekameile"},
-            {"cubic-mile-dekamile", "de", nullptr, 2, u"2 Kubikmeile⋅Dekameilen"},
+            {"cubic-mile-dekamile", "de", nullptr, 1, u"1 Dekameile⋅Kubikmeile"},
+            {"cubic-mile-dekamile", "de", nullptr, 2, u"2 Dekameile⋅Kubikmeilen"},
 
             // French handles plural "times" and "power" structures differently:
             // plural form impacts all "numerator" units (denominator remains
index dd976cb55530fc75acef5718cfe58114bf1d7451..64afe4621fddc0428f67202f416dd1dfee3a7a47 100644 (file)
@@ -122,21 +122,35 @@ public class SingleUnitImpl {
         if (index > other.index) {
             return 1;
         }
-        // TODO: revisit if the spec dictates prefix sort order - it doesn't
-        // currently. For now we're sorting binary prefixes before SI prefixes,
-        // as per ICU4C's enum values order.
-        if (this.getPrefix().getBase() < other.getPrefix().getBase()) {
+
+        // When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can
+        // multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can
+        // compare the bases.
+        // NOTE: this methodology will fail if the binary prefix more than or equal 98.
+        int unitBase = this.unitPrefix.getBase();
+        int otherUnitBase = other.unitPrefix.getBase();
+        // Values for comparison purposes only.
+        int unitPowerComp =
+                unitBase == 1024 /* Binary Prefix */ ? this.unitPrefix.getPower() * 3
+                        : this.unitPrefix.getPower();
+        int otherUnitPowerComp =
+                otherUnitBase == 1024 /* Binary Prefix */ ? other.unitPrefix.getPower() * 3
+                        : other.unitPrefix.getPower();
+
+        if (unitPowerComp < otherUnitPowerComp) {
             return 1;
         }
-        if (this.getPrefix().getBase() > other.getPrefix().getBase()) {
-            return -1;
-        }
-        if (this.getPrefix().getPower() < other.getPrefix().getPower()) {
+        if (unitPowerComp > otherUnitPowerComp) {
             return -1;
         }
-        if (this.getPrefix().getPower() > other.getPrefix().getPower()) {
+
+        if (unitBase < otherUnitBase) {
             return 1;
         }
+        if (unitBase > otherUnitBase) {
+            return -1;
+        }
+
         return 0;
     }
 
index 2916a955d5218cb0d5eacb43097153a60a49aa02..5a6aef116373f6fa05f110bde6213c2b5f09cc74 100644 (file)
@@ -3923,6 +3923,7 @@ public class MeasureUnitTest extends TestFmwk {
             new TestCase("exbibyte", "exbibyte"),
             new TestCase("zebibyte", "zebibyte"),
             new TestCase("yobibyte", "yobibyte"),
+
             // Testing aliases
             new TestCase("foodcalorie", "foodcalorie"),
             new TestCase("dot-per-centimeter", "dot-per-centimeter"),
@@ -3930,10 +3931,11 @@ public class MeasureUnitTest extends TestFmwk {
             new TestCase("dot", "dot"),
 
             // Testing sort order of prefixes.
-            //
-            // TODO(icu-units#70): revisit when fixing normalization. For now we're
-            // just checking some consistency between C&J.
-            new TestCase("megafoot-mebifoot-kibifoot-kilofoot", "kibifoot-mebifoot-kilofoot-megafoot"),
+            new TestCase("megafoot-mebifoot-kibifoot-kilofoot", "mebifoot-megafoot-kibifoot-kilofoot"),
+            new TestCase("per-megafoot-mebifoot-kibifoot-kilofoot", "per-mebifoot-megafoot-kibifoot-kilofoot"),
+            new TestCase("megafoot-mebifoot-kibifoot-kilofoot-per-megafoot-mebifoot-kibifoot-kilofoot", "mebifoot-megafoot-kibifoot-kilofoot-per-mebifoot-megafoot-kibifoot-kilofoot"),
+            new TestCase("microfoot-millifoot-megafoot-mebifoot-kibifoot-kilofoot", "mebifoot-megafoot-kibifoot-kilofoot-millifoot-microfoot"),
+            new TestCase("per-microfoot-millifoot-megafoot-mebifoot-kibifoot-kilofoot", "per-mebifoot-megafoot-kibifoot-kilofoot-millifoot-microfoot"),
         };
 
         for (TestCase testCase : cases) {
@@ -4409,7 +4411,7 @@ public class MeasureUnitTest extends TestFmwk {
         assertEquals("append meter & centimeter: units length", 2, mcm.getSingleUnits().size());
         assertEquals("append meter & centimeter: units[0]", "meter", mcm.getSingleUnits().get(0).getSimpleUnitID());
         assertEquals("append meter & centimeter: units[1]", "meter", mcm.getSingleUnits().get(1).getSimpleUnitID());
-        assertEquals("append meter & centimeter: identifier", "centimeter-meter", mcm.build().getIdentifier());
+        assertEquals("append meter & centimeter: identifier", "meter-centimeter", mcm.build().getIdentifier());
 
         MeasureUnitImpl m2m = MeasureUnitImpl.forIdentifier("meter-square-meter");
         assertEquals("meter-square-meter: complexity", MeasureUnit.Complexity.SINGLE, m2m.getComplexity());
index 9cf6e61498298c576df917aefb3006f192044bdb..edc2d257eee2334b16395f802a41bd2ffcfdd4b5 100644 (file)
@@ -2255,18 +2255,18 @@ public class NumberFormatterApiTest extends TestFmwk {
                 //   <deriveComponent feature="case" structure="power" value0="nominative" value1="compound"/>
 
                 new UnitInflectionTestCase("square-decimeter-dekameter", "de", null, 1,
-                                           "1 Quadratdezimeter⋅Dekameter"),
+                                           "1 Dekameter⋅Quadratdezimeter"),
                 new UnitInflectionTestCase("square-decimeter-dekameter", "de", "genitive", 1,
-                                           "1 Quadratdezimeter⋅Dekameters"),
+                                           "1 Dekameter⋅Quadratdezimeter"),
                 new UnitInflectionTestCase("square-decimeter-dekameter", "de", null, 2,
-                                           "2 Quadratdezimeter⋅Dekameter"),
+                                           "2 Dekameter⋅Quadratdezimeter"),
                 new UnitInflectionTestCase("square-decimeter-dekameter", "de", "dative", 2,
-                                           "2 Quadratdezimeter⋅Dekametern"),
+                                           "2 Dekameter⋅Quadratdezimeter"),
                 // Feminine "Meile" better demonstrates singular-vs-plural form:
                 new UnitInflectionTestCase("cubic-mile-dekamile", "de", null, 1,
-                                           "1 Kubikmeile⋅Dekameile"),
+                                           "1 Dekameile⋅Kubikmeile"),
                 new UnitInflectionTestCase("cubic-mile-dekamile", "de", null, 2,
-                                           "2 Kubikmeile⋅Dekameilen"),
+                                           "2 Dekameile⋅Kubikmeilen"),
 
                 // French handles plural "times" and "power" structures differently:
                 // plural form impacts all "numerator" units (denominator remains