]> granicus.if.org Git - icu/commitdiff
ICU-21368 unit-test & fix BytesTrie jump delta encoding
authorMarkus Scherer <markus.icu@gmail.com>
Wed, 3 Mar 2021 02:26:42 +0000 (18:26 -0800)
committerMarkus Scherer <markus.icu@gmail.com>
Tue, 9 Mar 2021 01:18:14 +0000 (17:18 -0800)
icu4c/source/common/bytestriebuilder.cpp
icu4c/source/common/unicode/bytestrie.h
icu4c/source/common/unicode/bytestriebuilder.h
icu4c/source/test/intltest/bytestrietest.cpp
icu4j/main/classes/core/src/com/ibm/icu/util/BytesTrie.java
icu4j/main/classes/core/src/com/ibm/icu/util/BytesTrieBuilder.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/BytesTrieTest.java

index ec1ab7d8f5080e5af15799fa8636d3ab98b0ec73..28256f272a74a326bb28e9cb48a607e66a8246ae 100644 (file)
@@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
     U_ASSERT(i>=0);
     if(i<=BytesTrie::kMaxOneByteDelta) {
         return write(i);
+    } else {
+        char intBytes[5];
+        return write(intBytes, internalEncodeDelta(i, intBytes));
     }
-    char intBytes[5];
-    int32_t length;
+}
+
+int32_t
+BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
+    U_ASSERT(i>=0);
+    if(i<=BytesTrie::kMaxOneByteDelta) {
+        intBytes[0]=(char)i;
+        return 1;
+    }
+    int32_t length=1;
     if(i<=BytesTrie::kMaxTwoByteDelta) {
         intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
-        length=1;
     } else {
         if(i<=BytesTrie::kMaxThreeByteDelta) {
             intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
-            length=2;
         } else {
             if(i<=0xffffff) {
                 intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
-                length=3;
             } else {
                 intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
                 intBytes[1]=(char)(i>>24);
-                length=4;
+                length=2;
             }
-            intBytes[1]=(char)(i>>16);
+            intBytes[length++]=(char)(i>>16);
         }
-        intBytes[1]=(char)(i>>8);
+        intBytes[length++]=(char)(i>>8);
     }
     intBytes[length++]=(char)i;
-    return write(intBytes, length);
+    return length;
 }
 
 U_NAMESPACE_END
index 85f802df420262087ed55b9f81d0012b024131e9..271a81d1b4d7d492ae5d3aa595020fab525e79b2 100644 (file)
@@ -30,6 +30,8 @@
 #include "unicode/uobject.h"
 #include "unicode/ustringtrie.h"
 
+class BytesTrieTest;
+
 U_NAMESPACE_BEGIN
 
 class ByteSink;
@@ -378,6 +380,7 @@ public:
 
 private:
     friend class BytesTrieBuilder;
+    friend class ::BytesTrieTest;
 
     /**
      * Constructs a BytesTrie reader instance.
index cae16e48b45b5bc8730710161b97f14b3b58c020..3cff89e443de5e26ceca429340037922ba9ae8ce 100644 (file)
@@ -30,6 +30,8 @@
 #include "unicode/stringpiece.h"
 #include "unicode/stringtriebuilder.h"
 
+class BytesTrieTest;
+
 U_NAMESPACE_BEGIN
 
 class BytesTrieElement;
@@ -125,6 +127,8 @@ public:
     BytesTrieBuilder &clear();
 
 private:
+    friend class ::BytesTrieTest;
+
     BytesTrieBuilder(const BytesTrieBuilder &other);  // no copy constructor
     BytesTrieBuilder &operator=(const BytesTrieBuilder &other);  // no assignment operator
 
@@ -168,6 +172,7 @@ private:
     virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
     virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
     virtual int32_t writeDeltaTo(int32_t jumpTarget);
+    static int32_t internalEncodeDelta(int32_t i, char intBytes[]);
 
     CharString *strings;  // Pointer not object so we need not #include internal charstr.h.
     BytesTrieElement *elements;
index bdf0b9003bb7976e856dbe3417cb01ae1d4a7a0b..3aaa5c9e4fa20b2d612efa5b08eb27fab351ac49 100644 (file)
@@ -56,6 +56,7 @@ public:
     void TestTruncatingIteratorFromLinearMatchLong();
     void TestIteratorFromBytes();
     void TestFailedIterator();
+    void TestDelta();
 
     void checkData(const StringAndValue data[], int32_t dataLength);
     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
@@ -110,6 +111,7 @@ void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
     TESTCASE_AUTO(TestIteratorFromBytes);
     TESTCASE_AUTO(TestFailedIterator);
+    TESTCASE_AUTO(TestDelta);
     TESTCASE_AUTO_END;
 }
 
@@ -599,6 +601,45 @@ void BytesTrieTest::TestFailedIterator() {
     }
 }
 
+void BytesTrieTest::TestDelta() {
+    char intBytes0[5];
+    char intBytes1[5];
+    static constexpr int32_t sampleDeltas[] = {
+        -1, 0, 1, 2, 3, 0xa5, 0xbe, 0xbf,
+        -2, 0xc0, 0xc1, 0xeee, 0x1234, 0x2ffe, 0x2fff,
+        -3, 0x3000, 0x3001, 0x3003, 0x50005, 0xdfffe, 0xdffff,
+        -4, 0xe0000, 0xe0001, 0xef0123, 0xfffffe, 0xffffff,
+        -5, 0x1000000, 0x1000001, 0x7fffffff
+    };
+    int32_t expectedLength = 0;
+    for (int32_t delta : sampleDeltas) {
+        if (delta < 0) {
+            expectedLength = -delta;
+            continue;
+        }
+        // Encoding twice into differently-initialized arrays
+        // catches bytes that are not written to.
+        memset(intBytes0, 0, sizeof(intBytes0));
+        memset(intBytes1, 1, sizeof(intBytes1));
+        int32_t length0 = BytesTrieBuilder::internalEncodeDelta(delta, intBytes0);
+        int32_t length1 = BytesTrieBuilder::internalEncodeDelta(delta, intBytes1);
+        assertTrue(UnicodeString(u"non-zero length to encode delta ") + delta, length0 > 0);
+        assertEquals(UnicodeString(u"consistent length to encode delta ") + delta, length0, length1);
+        assertEquals(UnicodeString(u"expected length to encode delta ") + delta,
+                     expectedLength, length0);
+        for (int32_t i = 0; i < length0; ++i) {
+            uint8_t b0 = intBytes0[i];
+            uint8_t b1 = intBytes1[i];
+            assertEquals(UnicodeString(u"differently encoded delta ") + delta +
+                            u" at byte index " + i, b0, b1);
+        }
+        const uint8_t *start = (const uint8_t *)intBytes0;
+        const uint8_t *pos = BytesTrie::jumpByDelta(start);
+        assertEquals(UnicodeString(u"roundtrip for delta ") + delta,
+                     delta, (int32_t)(pos - start) - length0);
+    }
+}
+
 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
     logln("checkData(dataLength=%d, fast)", (int)dataLength);
     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
index 8bc778eece21e567a40064df712b09af6897dee7..5854b2a98fb42398c458dc56e47faa32e1e35ad1 100644 (file)
@@ -794,8 +794,13 @@ public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
         return skipValue(pos, leadByte);
     }
 
-    // Reads a jump delta and jumps.
-    private static int jumpByDelta(byte[] bytes, int pos) {
+    /**
+     * Reads a jump delta and jumps.
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    @Deprecated
+    public static int jumpByDelta(byte[] bytes, int pos) {
         int delta=bytes[pos++]&0xff;
         if(delta<kMinTwoByteDeltaLead) {
             // nothing to do
index 5e3d47371b0b165dc7d90c574c97eb23e092654f..4ce986e092a5cf91a131df4fab5db0feb767685f 100644 (file)
@@ -35,8 +35,11 @@ public final class BytesTrieBuilder extends StringTrieBuilder {
             s=sequence;
             len=length;
         }
+        @Override
         public char charAt(int i) { return (char)(s[i]&0xff); }
+        @Override
         public int length() { return len; }
+        @Override
         public CharSequence subSequence(int start, int end) { return null; }
 
         private byte[] s;
@@ -278,30 +281,41 @@ public final class BytesTrieBuilder extends StringTrieBuilder {
         assert(i>=0);
         if(i<=BytesTrie.kMaxOneByteDelta) {
             return write(i);
+        } else {
+            return write(intBytes, internalEncodeDelta(i, intBytes));
         }
-        int length;
+    }
+    /**
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    @Deprecated
+    public static final int internalEncodeDelta(int i, byte[] intBytes) {
+        assert(i>=0);
+        if(i<=BytesTrie.kMaxOneByteDelta) {
+            intBytes[0]=(byte)i;
+            return 1;
+        }
+        int length=1;
         if(i<=BytesTrie.kMaxTwoByteDelta) {
             intBytes[0]=(byte)(BytesTrie.kMinTwoByteDeltaLead+(i>>8));
-            length=1;
         } else {
             if(i<=BytesTrie.kMaxThreeByteDelta) {
                 intBytes[0]=(byte)(BytesTrie.kMinThreeByteDeltaLead+(i>>16));
-                length=2;
             } else {
                 if(i<=0xffffff) {
                     intBytes[0]=(byte)BytesTrie.kFourByteDeltaLead;
-                    length=3;
                 } else {
                     intBytes[0]=(byte)BytesTrie.kFiveByteDeltaLead;
                     intBytes[1]=(byte)(i>>24);
-                    length=4;
+                    length=2;
                 }
-                intBytes[1]=(byte)(i>>16);
+                intBytes[length++]=(byte)(i>>16);
             }
-            intBytes[1]=(byte)(i>>8);
+            intBytes[length++]=(byte)(i>>8);
         }
         intBytes[length++]=(byte)i;
-        return write(intBytes, length);
+        return length;
     }
 
     // Byte serialization of the trie.
index da16f83f60784c653a31ff5ded97b45736d55b90..abe461ed46964ff85bf23ef95e6984269656dcfd 100644 (file)
@@ -13,6 +13,7 @@
 package com.ibm.icu.dev.test.util;
 
 import java.nio.ByteBuffer;
+import java.util.Arrays;
 import java.util.NoSuchElementException;
 
 import org.junit.Test;
@@ -531,6 +532,42 @@ public class BytesTrieTest extends TestFmwk {
         assertEquals("abc value", 300, copy.getValue());
     }
 
+    @Test
+    public void TestDelta() {
+        byte[] intBytes0 = new byte[5];
+        byte[] intBytes1 = new byte[5];
+        int[] sampleDeltas = new int[] {
+            -1, 0, 1, 2, 3, 0xa5, 0xbe, 0xbf,
+            -2, 0xc0, 0xc1, 0xeee, 0x1234, 0x2ffe, 0x2fff,
+            -3, 0x3000, 0x3001, 0x3003, 0x50005, 0xdfffe, 0xdffff,
+            -4, 0xe0000, 0xe0001, 0xef0123, 0xfffffe, 0xffffff,
+            -5, 0x1000000, 0x1000001, 0x7fffffff
+        };
+        int expectedLength = 0;
+        for (int delta : sampleDeltas) {
+            if (delta < 0) {
+                expectedLength = -delta;
+                continue;
+            }
+            // Encoding twice into differently-initialized arrays
+            // catches bytes that are not written to.
+            Arrays.fill(intBytes0, (byte)0);
+            Arrays.fill(intBytes1, (byte)1);
+            int length0 = BytesTrieBuilder.internalEncodeDelta(delta, intBytes0);
+            int length1 = BytesTrieBuilder.internalEncodeDelta(delta, intBytes1);
+            assertTrue("non-zero length to encode delta " + delta, length0 > 0);
+            assertEquals("consistent length to encode delta " + delta, length0, length1);
+            assertEquals("expected length to encode delta " + delta, expectedLength, length0);
+            for (int i = 0; i < length0; ++i) {
+                byte b0 = intBytes0[i];
+                byte b1 = intBytes1[i];
+                assertEquals("differently encoded delta " + delta + " at byte index " + i, b0, b1);
+            }
+            int pos = BytesTrie.jumpByDelta(intBytes0, 0);
+            assertEquals("roundtrip for delta " + delta, delta, pos - length0);
+        }
+    }
+
     private void checkData(StringAndValue data[]) {
         checkData(data, data.length);
     }