]> granicus.if.org Git - icu/commitdiff
ICU-10793 Add optimization to ListFormatter.
authorTravis Keep <keep94@gmail.com>
Wed, 2 Apr 2014 21:43:11 +0000 (21:43 +0000)
committerTravis Keep <keep94@gmail.com>
Wed, 2 Apr 2014 21:43:11 +0000 (21:43 +0000)
X-SVN-Rev: 35579

icu4c/source/common/listformatter.cpp
icu4c/source/common/simplepatternformatter.cpp
icu4c/source/common/simplepatternformatter.h
icu4c/source/test/intltest/simplepatternformattertest.cpp

index 17bc0666d8bd6a030742b35e6ab731a8e3947d5d..bc8aef92e719ffc3e315ea9870d12300594630eb 100644 (file)
@@ -355,9 +355,14 @@ UnicodeString& ListFormatter::format(
             errorCode);
     int32_t i;
     int32_t pos = 0;
-    int32_t npos = 1;
+    int32_t npos = 0;
+    UBool startsWithZeroPlaceholder =
+            data->middlePattern.startsWithPlaceholder(0);
     for (i = 2; i < nItems - 1; ++i) {
-         temp[npos].remove();
+         if (!startsWithZeroPlaceholder) {
+             npos = (pos + 1) & 1;
+             temp[npos].remove();
+         }
          joinStrings(
                  data->middlePattern,
                  temp[pos],
@@ -367,9 +372,11 @@ UnicodeString& ListFormatter::format(
                  offset,
                  errorCode);
          pos = npos;
-         npos = (pos + 1) & 1;
     }
-    temp[npos].remove();
+    if (!data->endPattern.startsWithPlaceholder(0)) {
+        npos = (pos + 1) & 1;
+        temp[npos].remove();
+    }
     joinStrings(
             data->endPattern,
             temp[pos],
index 4c2cdac28d21554fa81c6def822868f5db2bfb14..dd478c2c187759b5e067ccd8f8b0101ef64f4b79 100644 (file)
@@ -56,17 +56,15 @@ void SimplePatternFormatterIdBuilder::add(UChar ch) {
 
 SimplePatternFormatter::SimplePatternFormatter() :
         noPlaceholders(),
-        placeholdersByOffset(placeholderBuffer),
+        placeholders(),
         placeholderSize(0),
-        placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
         placeholderCount(0) {
 }
 
 SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
         noPlaceholders(),
-        placeholdersByOffset(placeholderBuffer),
+        placeholders(),
         placeholderSize(0),
-        placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
         placeholderCount(0) {
     UErrorCode status = U_ZERO_ERROR;
     compile(pattern, status);
@@ -75,15 +73,14 @@ SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
 SimplePatternFormatter::SimplePatternFormatter(
         const SimplePatternFormatter &other) :
         noPlaceholders(other.noPlaceholders),
-        placeholdersByOffset(placeholderBuffer),
+        placeholders(),
         placeholderSize(0),
-        placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
         placeholderCount(other.placeholderCount) {
     placeholderSize = ensureCapacity(other.placeholderSize);
     uprv_memcpy(
-            placeholdersByOffset,
-            other.placeholdersByOffset,
-            placeholderSize * 2 * sizeof(int32_t));
+            placeholders.getAlias(),
+            other.placeholders.getAlias(),
+            placeholderSize * sizeof(PlaceholderInfo));
 }
 
 SimplePatternFormatter &SimplePatternFormatter::operator=(
@@ -92,19 +89,16 @@ SimplePatternFormatter &SimplePatternFormatter::operator=(
         return *this;
     }
     noPlaceholders = other.noPlaceholders;
-    placeholderCount = other.placeholderCount;
     placeholderSize = ensureCapacity(other.placeholderSize);
+    placeholderCount = other.placeholderCount;
     uprv_memcpy(
-            placeholdersByOffset,
-            other.placeholdersByOffset,
-            placeholderSize * 2 * sizeof(int32_t));
+            placeholders.getAlias(),
+            other.placeholders.getAlias(),
+            placeholderSize * sizeof(PlaceholderInfo));
     return *this;
 }
 
 SimplePatternFormatter::~SimplePatternFormatter() {
-    if (placeholdersByOffset != placeholderBuffer) {
-        uprv_free(placeholdersByOffset);
-    }
 }
 
 UBool SimplePatternFormatter::compile(
@@ -183,6 +177,13 @@ UBool SimplePatternFormatter::compile(
     return TRUE;
 }
 
+UBool SimplePatternFormatter::startsWithPlaceholder(int32_t id) const {
+    if (placeholderSize == 0) {
+        return FALSE;
+    }
+    return (placeholders[0].offset == 0 && placeholders[0].id == id);
+}
+
 UnicodeString& SimplePatternFormatter::format(
         const UnicodeString &arg0,
         UnicodeString &appendTo,
@@ -267,72 +268,70 @@ UnicodeString& SimplePatternFormatter::format(
         appendTo.append(noPlaceholders);
         return appendTo;
     }
-    appendRange(
-            noPlaceholders,
-            0,
-            placeholdersByOffset[0],
-            appendTo);
-    updatePlaceholderOffset(
-            placeholdersByOffset[1],
-            appendTo.length(),
-            offsetArray,
-            offsetArrayLength);
-    appendTo.append(*placeholderValues[placeholdersByOffset[1]]);
+    if (placeholders[0].offset > 0 ||
+            placeholderValues[placeholders[0].id] != &appendTo) {
+        appendRange(
+                noPlaceholders,
+                0,
+                placeholders[0].offset,
+                appendTo);
+        updatePlaceholderOffset(
+                placeholders[0].id,
+                appendTo.length(),
+                offsetArray,
+                offsetArrayLength);
+        appendTo.append(*placeholderValues[placeholders[0].id]);
+    } else {
+        updatePlaceholderOffset(
+                placeholders[0].id,
+                0,
+                offsetArray,
+                offsetArrayLength);
+    }
     for (int32_t i = 1; i < placeholderSize; ++i) {
         appendRange(
                 noPlaceholders,
-                placeholdersByOffset[2 * i - 2],
-                placeholdersByOffset[2 * i],
+                placeholders[i - 1].offset,
+                placeholders[i].offset,
                 appendTo);
         updatePlaceholderOffset(
-                placeholdersByOffset[2 * i + 1],
+                placeholders[i].id,
                 appendTo.length(),
                 offsetArray,
                 offsetArrayLength);
-        appendTo.append(*placeholderValues[placeholdersByOffset[2 * i + 1]]);
+        appendTo.append(*placeholderValues[placeholders[i].id]);
     }
     appendRange(
             noPlaceholders,
-            placeholdersByOffset[2 * placeholderSize - 2],
+            placeholders[placeholderSize - 1].offset,
             noPlaceholders.length(),
             appendTo);
     return appendTo;
 }
 
-int32_t SimplePatternFormatter::ensureCapacity(int32_t atLeast) {
-    if (atLeast <= placeholderCapacity) {
-        return atLeast;
+int32_t SimplePatternFormatter::ensureCapacity(
+        int32_t desiredCapacity, int32_t allocationSize) {
+    if (allocationSize < desiredCapacity) {
+        allocationSize = desiredCapacity;
     }
-    // aim to double capacity each time
-    int32_t newCapacity = 2*atLeast - 2;
-
-    // allocate new buffer
-    int32_t *newBuffer = (int32_t *) uprv_malloc(2 * newCapacity * sizeof(int32_t));
-    if (newBuffer == NULL) {
-        return placeholderCapacity;
+    if (desiredCapacity <= placeholders.getCapacity()) {
+        return desiredCapacity;
     }
-
-    // Copy contents of old buffer to new buffer
-    uprv_memcpy(newBuffer, placeholdersByOffset, 2 * placeholderSize * sizeof(int32_t));
-
-    // free old buffer
-    if (placeholdersByOffset != placeholderBuffer) {
-        uprv_free(placeholdersByOffset);
+    // allocate new buffer
+    if (placeholders.resize(allocationSize, placeholderSize) == NULL) {
+        return placeholders.getCapacity();
     }
-
-    // Use new buffer
-    placeholdersByOffset = newBuffer;
-    placeholderCapacity = newCapacity;
-    return atLeast;
+    return desiredCapacity;
 }
 
 UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) {
-    if (ensureCapacity(placeholderSize + 1) < placeholderSize + 1) {
+    if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) {
         return FALSE;
     }
     ++placeholderSize;
-    placeholdersByOffset[2 * placeholderSize - 2] = offset;
-    placeholdersByOffset[2 * placeholderSize - 1] = id;
+    PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1];
+    placeholderEnd->offset = offset;
+    placeholderEnd->id = id;
     if (id >= placeholderCount) {
         placeholderCount = id + 1;
     }
index 528122adc7b6757555741f6a6ca552688c1f9f22..4a43c37d9d65d76c738a55c979b3780b74b1a4cc 100644 (file)
 
 #define EXPECTED_PLACEHOLDER_COUNT 3
 
+#include "cmemory.h"
 #include "unicode/utypes.h"
 #include "unicode/unistr.h"
 
 U_NAMESPACE_BEGIN
 
+struct PlaceholderInfo {
+  int32_t id;
+  int32_t offset;
+};
+
 /**
  * Compiled version of a pattern string such as "{1} was born in {0}".
  * <p>
@@ -84,6 +90,12 @@ public:
         return placeholderCount;
     }
 
+    /**
+     * Returns true if the pattern this object represents starts with
+     * placeholder id; otherwise, returns false.
+     */
+    UBool startsWithPlaceholder(int32_t id) const;
+
     /**
      * Formats given value.
      */
@@ -120,7 +132,12 @@ public:
      * @param placeholderValueCount the number of placeholder values
      *  must be at least large enough to provide values for all placeholders
      *  in this object. Otherwise status set to U_ILLEGAL_ARGUMENT_ERROR.
-     * @param appendTo resulting string appended here.
+     * @param appendTo resulting string appended here. Optimization: If
+     *   the pattern this object represents starts with a placeholder AND
+     *   appendTo references the value of that same placeholder, then that
+     *   placeholder value is not copied to appendTo (Its already there).
+     *   If the value of the starting placeholder is a very large string,
+     *   this optimization can offer huge savings.
      * @param offsetArray The offset of each placeholder value in appendTo
      *  stored here. The first value gets the offset of the value for {0};
      *  the 2nd for {1}; the 3rd for {2} etc. -1 means that the corresponding
@@ -139,12 +156,32 @@ public:
             UErrorCode &status) const;
 private:
     UnicodeString noPlaceholders;
-    int32_t placeholderBuffer[EXPECTED_PLACEHOLDER_COUNT * 2];
-    int32_t *placeholdersByOffset;
+    MaybeStackArray<PlaceholderInfo, 3> placeholders;
     int32_t placeholderSize;
-    int32_t placeholderCapacity;
     int32_t placeholderCount;
-    int32_t ensureCapacity(int32_t size);
+    
+    // ensureCapacity ensures that the capacity of the placeholders array
+    // is desiredCapacity. If ensureCapacity must resize the placeholders
+    // array, the first placeholderSize elements stay in the array. Note
+    // that ensureCapcity NEVER changes the value of placeholderSize only
+    // the capacity of the placeholders array.
+    // If there is no memory allocation error when resizing, this
+    // function returns desiredCapacity. If there is a memory allocation
+    // error, this function leaves the placeholders array unchanged and
+    // returns the smaller, old capacity. ensureCapacity resizes only if
+    // the current capacity of placeholders array is less than desiredCapacity.
+    // Otherwise, it leaves the placeholders array unchanged. If caller
+    // specifies an allocation size, then it must be at least as large as
+    // desiredCapacity. In that case, if ensureCapacity resizes, it will
+    // allocate allocationSize spots instead of desiredCapacity spots in
+    // the array. If caller is calling ensureCapacity in a loop while adding
+    // elements, it is recommended that it use an allocationSize of
+    // approximately twice desiredCapacity to avoid memory allocation with
+    // every call to ensureCapacity.
+    int32_t ensureCapacity(int32_t desiredCapacity, int32_t allocationSize=0);
+
+    // Records the offset of an individual placeholder in the noPlaceholders
+    // string.
     UBool addPlaceholder(int32_t id, int32_t offset);
 };
 
index 2879d52130c62b509b66ffed90ea2148b483886e..d86cb60d05fa30b2ecf86f34e573ef4a28a38cf1 100644 (file)
@@ -21,6 +21,7 @@ public:
     void TestNoPlaceholders();
     void TestOnePlaceholder();
     void TestManyPlaceholders();
+    void TestOptimization();
     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
 private:
 };
@@ -30,6 +31,7 @@ void SimplePatternFormatterTest::runIndexedTest(int32_t index, UBool exec, const
   TESTCASE_AUTO(TestNoPlaceholders);
   TESTCASE_AUTO(TestOnePlaceholder);
   TESTCASE_AUTO(TestManyPlaceholders);
+  TESTCASE_AUTO(TestOptimization);
   TESTCASE_AUTO_END;
 }
 
@@ -39,14 +41,14 @@ void SimplePatternFormatterTest::TestNoPlaceholders() {
     assertEquals("PlaceholderCount", 0, fmt.getPlaceholderCount());
     UnicodeString appendTo;
     assertEquals(
-            "Evaluate",
+            "format",
             "This doesn't have templates {0}", 
             fmt.format("unused", appendTo, status));
     fmt.compile("This has {} bad {012d placeholders", status);
     assertEquals("PlaceholderCount", 0, fmt.getPlaceholderCount());
     appendTo.remove();
     assertEquals(
-            "Evaluate",
+            "format",
             "This has {} bad {012d placeholders", 
             fmt.format("unused", appendTo, status));
     assertSuccess("Status", status);
@@ -59,7 +61,7 @@ void SimplePatternFormatterTest::TestOnePlaceholder() {
     assertEquals("PlaceholderCount", 1, fmt.getPlaceholderCount());
     UnicodeString appendTo;
     assertEquals(
-            "Evaluate",
+            "format",
             "1 meter",
             fmt.format("1", appendTo, status));
     assertSuccess("Status", status);
@@ -88,6 +90,8 @@ void SimplePatternFormatterTest::TestManyPlaceholders() {
     SimplePatternFormatter fmt;
     fmt.compile(
             "Templates {2}{1}{5} and {4} are out of order.", status);
+    assertSuccess("Status", status);
+    assertFalse("startsWithPlaceholder", fmt.startsWithPlaceholder(2));
     assertEquals("PlaceholderCount", 6, fmt.getPlaceholderCount());
     UnicodeString values[] = {
             "freddy", "tommy", "frog", "billy", "leg", "{0}"};
@@ -97,7 +101,7 @@ void SimplePatternFormatterTest::TestManyPlaceholders() {
     int32_t expectedOffsets[6] = {-1, 22, 18, -1, 35, 27};
     UnicodeString appendTo("Prefix: ");
     assertEquals(
-            "Evaluate",
+            "format",
             "Prefix: Templates frogtommy{0} and leg are out of order.",
             fmt.format(
                     params,
@@ -187,6 +191,40 @@ void SimplePatternFormatterTest::TestManyPlaceholders() {
     assertSuccess("Status", status);
 }
 
+void SimplePatternFormatterTest::TestOptimization() {
+    UErrorCode status = U_ZERO_ERROR;
+    SimplePatternFormatter fmt;
+    fmt.compile("{2}, {0}, {1} and {3}", status);
+    assertSuccess("Status", status);
+    assertTrue("startsWithPlaceholder", fmt.startsWithPlaceholder(2));
+    assertFalse("startsWithPlaceholder", fmt.startsWithPlaceholder(0));
+    UnicodeString values[] = {
+            "freddy", "frog", "leg", "by"};
+    UnicodeString *params[] = {
+           &values[0], &values[1], &values[2], &values[3]}; 
+    int32_t offsets[4];
+    int32_t expectedOffsets[4] = {5, 13, 0, 22};
+
+    // The pattern starts with {2}, so format should append the result of
+    // the rest of the pattern to values[2], the value for {2}.
+    assertEquals(
+            "format",
+            "leg, freddy, frog and by",
+            fmt.format(
+                    params,
+                    LENGTHOF(params),
+                    values[2],
+                    offsets,
+                    LENGTHOF(offsets),
+                    status));
+    assertSuccess("Status", status);
+    for (int32_t i = 0; i < LENGTHOF(expectedOffsets); ++i) {
+        if (expectedOffsets[i] != offsets[i]) {
+            errln("Expected %d, got %d", expectedOffsets[i], offsets[i]);
+        }
+    }
+}
+
 extern IntlTest *createSimplePatternFormatterTest() {
     return new SimplePatternFormatterTest();
 }