]> granicus.if.org Git - icu/commitdiff
ICU-20991 Trace BreakIterator/BreakEngine creation
authorFrank Tang <ftang@chromium.org>
Fri, 6 Mar 2020 19:17:02 +0000 (19:17 +0000)
committerFrank Yung-Fong Tang <41213225+FrankYFTang@users.noreply.github.com>
Fri, 6 Mar 2020 22:18:43 +0000 (14:18 -0800)
See #1014

icu4c/source/common/brkiter.cpp
icu4c/source/common/dictbe.cpp
icu4c/source/common/rbbi.cpp
icu4c/source/common/rbbidata.h
icu4c/source/common/unicode/utrace.h
icu4c/source/test/intltest/rbbitst.cpp
icu4c/source/test/intltest/rbbitst.h

index 3d1366a6688009ac16d9f3b92c661cf9fcfa95a1..b9b6ca65cd4baed27e6c8e83a5386d8940c9dd80 100644 (file)
@@ -38,6 +38,7 @@
 #include "uresimp.h"
 #include "uassert.h"
 #include "ubrkimpl.h"
+#include "utracimp.h"
 #include "charstr.h"
 
 // *****************************************************************************
@@ -412,14 +413,23 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
     BreakIterator *result = NULL;
     switch (kind) {
     case UBRK_CHARACTER:
-        result = BreakIterator::buildInstance(loc, "grapheme", status);
+        {
+            UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER);
+            result = BreakIterator::buildInstance(loc, "grapheme", status);
+            UTRACE_EXIT_STATUS(status);
+        }
         break;
     case UBRK_WORD:
-        result = BreakIterator::buildInstance(loc, "word", status);
+        {
+            UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD);
+            result = BreakIterator::buildInstance(loc, "word", status);
+            UTRACE_EXIT_STATUS(status);
+        }
         break;
     case UBRK_LINE:
-        uprv_strcpy(lbType, "line");
         {
+            UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
+            uprv_strcpy(lbType, "line");
             char lbKeyValue[kKeyValueLenMax] = {0};
             UErrorCode kvStatus = U_ZERO_ERROR;
             int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
@@ -427,13 +437,17 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
                 uprv_strcat(lbType, "_");
                 uprv_strcat(lbType, lbKeyValue);
             }
+            result = BreakIterator::buildInstance(loc, lbType, status);
+
+            UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
+            UTRACE_EXIT_STATUS(status);
         }
-        result = BreakIterator::buildInstance(loc, lbType, status);
         break;
     case UBRK_SENTENCE:
-        result = BreakIterator::buildInstance(loc, "sentence", status);
-#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
         {
+            UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE);
+            result = BreakIterator::buildInstance(loc, "sentence", status);
+#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
             char ssKeyValue[kKeyValueLenMax] = {0};
             UErrorCode kvStatus = U_ZERO_ERROR;
             int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
@@ -444,11 +458,16 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
                     delete fbiBuilder;
                 }
             }
-        }
 #endif
+            UTRACE_EXIT_STATUS(status);
+        }
         break;
     case UBRK_TITLE:
-        result = BreakIterator::buildInstance(loc, "title", status);
+        {
+            UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE);
+            result = BreakIterator::buildInstance(loc, "title", status);
+            UTRACE_EXIT_STATUS(status);
+        }
         break;
     default:
         status = U_ILLEGAL_ARGUMENT_ERROR;
index c769138ae4b3919838073265f27c0e05447416ad..b42cdf03fae934d08917602fb1df3f69f75d998d 100644 (file)
@@ -18,6 +18,7 @@
 #include "unicode/uniset.h"
 #include "unicode/chariter.h"
 #include "unicode/ubrk.h"
+#include "utracimp.h"
 #include "uvectr32.h"
 #include "uvector.h"
 #include "uassert.h"
@@ -194,6 +195,8 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
     : DictionaryBreakEngine(),
       fDictionary(adoptDictionary)
 {
+    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
     fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
     if (U_SUCCESS(status)) {
         setCharacters(fThaiWordSet);
@@ -213,6 +216,7 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
     fEndWordSet.compact();
     fBeginWordSet.compact();
     fSuffixSet.compact();
+    UTRACE_EXIT_STATUS(status);
 }
 
 ThaiBreakEngine::~ThaiBreakEngine() {
@@ -436,6 +440,8 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
     : DictionaryBreakEngine(),
       fDictionary(adoptDictionary)
 {
+    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
     fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
     if (U_SUCCESS(status)) {
         setCharacters(fLaoWordSet);
@@ -452,6 +458,7 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
     fMarkSet.compact();
     fEndWordSet.compact();
     fBeginWordSet.compact();
+    UTRACE_EXIT_STATUS(status);
 }
 
 LaoBreakEngine::~LaoBreakEngine() {
@@ -632,6 +639,8 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro
     : DictionaryBreakEngine(),
       fDictionary(adoptDictionary)
 {
+    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
     fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
     if (U_SUCCESS(status)) {
         setCharacters(fBurmeseWordSet);
@@ -645,6 +654,7 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro
     fMarkSet.compact();
     fEndWordSet.compact();
     fBeginWordSet.compact();
+    UTRACE_EXIT_STATUS(status);
 }
 
 BurmeseBreakEngine::~BurmeseBreakEngine() {
@@ -825,6 +835,8 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
     : DictionaryBreakEngine(),
       fDictionary(adoptDictionary)
 {
+    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
     fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
     if (U_SUCCESS(status)) {
         setCharacters(fKhmerWordSet);
@@ -850,6 +862,7 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
     fEndWordSet.compact();
     fBeginWordSet.compact();
 //    fSuffixSet.compact();
+    UTRACE_EXIT_STATUS(status);
 }
 
 KhmerBreakEngine::~KhmerBreakEngine() {
@@ -1045,6 +1058,8 @@ foundBest:
 static const uint32_t kuint32max = 0xFFFFFFFF;
 CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
 : DictionaryBreakEngine(), fDictionary(adoptDictionary) {
+    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
     // Korean dictionary only includes Hangul syllables
     fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
     fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
@@ -1066,6 +1081,7 @@ CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType
             setCharacters(cjSet);
         }
     }
+    UTRACE_EXIT_STATUS(status);
 }
 
 CjkBreakEngine::~CjkBreakEngine(){
index f80c3e0c6248120992228f00e7e7bf4779e0742c..43ba58ba9e657723cefbc758eecc1264bfd6274a 100644 (file)
@@ -1117,7 +1117,7 @@ static icu::UInitOnce gRBBIInitOnce = U_INITONCE_INITIALIZER;
  * Release all static memory held by breakiterator.
  */
 U_CDECL_BEGIN
-static UBool U_CALLCONV rbbi_cleanup(void) {
+UBool U_CALLCONV rbbi_cleanup(void) {
     delete gLanguageBreakFactories;
     gLanguageBreakFactories = nullptr;
     delete gEmptyString;
index b7de6ce073cbc50d56afdef54d257d2143655a92..7b9b8d825269a6d9a246efbec95dc1bb12c1d0d4 100644 (file)
@@ -192,6 +192,8 @@ private:
 
 U_NAMESPACE_END
 
+U_CFUNC UBool rbbi_cleanup(void);
+
 #endif /* C++ */
 
 #endif
index 5afcd9f4490828cca7be05292cd5e955317a9a85..5b4a0497bf15385e50be4bb6248bac48dca1d966 100644 (file)
@@ -177,6 +177,71 @@ typedef enum UTraceFunctionNumber {
     UTRACE_RES_DATA_LIMIT,
 #endif  // U_HIDE_INTERNAL_API
 
+#ifndef U_HIDE_DRAFT_API
+    /**
+     * The lowest break iterator location.
+     * @draft ICU 67
+     */
+    UTRACE_UBRK_START=0x4000,
+
+    /**
+     * Indicates that a character instance of break iterator was created.
+     *
+     * @draft ICU 67
+     */
+    UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START,
+
+    /**
+     * Indicates that a word instance of break iterator was created.
+     *
+     * @draft ICU 67
+     */
+    UTRACE_UBRK_CREATE_WORD,
+
+    /**
+     * Indicates that a line instance of break iterator was created.
+     *
+     * Provides one C-style string to UTraceData: the lb value ("",
+     * "loose", "strict", or "normal").
+     *
+     * @draft ICU 67
+     */
+    UTRACE_UBRK_CREATE_LINE,
+
+    /**
+     * Indicates that a sentence instance of break iterator was created.
+     *
+     * @draft ICU 67
+     */
+    UTRACE_UBRK_CREATE_SENTENCE,
+
+    /**
+     * Indicates that a title instance of break iterator was created.
+     *
+     * @draft ICU 67
+     */
+    UTRACE_UBRK_CREATE_TITLE,
+
+    /**
+     * Indicates that an internal dictionary break engine was created.
+     *
+     * Provides one C-style string to UTraceData: the script code of what
+     * the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai").
+     *
+     * @draft ICU 67
+     */
+    UTRACE_UBRK_CREATE_BREAK_ENGINE,
+
+#endif  // U_HIDE_DRAFT_API
+
+#ifndef U_HIDE_INTERNAL_API
+    /**
+     * One more than the highest normal break iterator trace location.
+     * @internal The numeric value may change over time, see ICU ticket #12420.
+     */
+    UTRACE_UBRK_LIMIT,
+#endif  // U_HIDE_INTERNAL_API
+
 } UTraceFunctionNumber;
 
 /**
index a5b8f13e4e83454df50d720864f4bb3ed5a0a5af..0ca89f6d64f6e4766c76282f4098fcdb76457c73 100644 (file)
@@ -14,6 +14,7 @@
 #include "unicode/utypes.h"
 #if !UCONFIG_NO_BREAK_ITERATION
 
+#include <sstream>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -35,6 +36,7 @@
 #include "unicode/uscript.h"
 #include "unicode/ustring.h"
 #include "unicode/utext.h"
+#include "unicode/utrace.h"
 
 #include "charstr.h"
 #include "cmemory.h"
@@ -126,6 +128,19 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
     TESTCASE_AUTO(TestReverse);
     TESTCASE_AUTO(TestBug13692);
     TESTCASE_AUTO(TestDebugRules);
+
+#if U_ENABLE_TRACING
+    TESTCASE_AUTO(TestTraceCreateCharacter);
+    TESTCASE_AUTO(TestTraceCreateWord);
+    TESTCASE_AUTO(TestTraceCreateSentence);
+    TESTCASE_AUTO(TestTraceCreateTitle);
+    TESTCASE_AUTO(TestTraceCreateLine);
+    TESTCASE_AUTO(TestTraceCreateLineNormal);
+    TESTCASE_AUTO(TestTraceCreateLineLoose);
+    TESTCASE_AUTO(TestTraceCreateLineStrict);
+    TESTCASE_AUTO(TestTraceCreateBreakEngine);
+#endif
+
     TESTCASE_AUTO_END;
 }
 
@@ -4865,6 +4880,182 @@ void RBBITest::TestDebugRules() {
 #endif
 }
 
+#if U_ENABLE_TRACING
+static std::vector<std::string> gData;
+static std::vector<int32_t> gEntryFn;
+static std::vector<int32_t> gExitFn;
+static std::vector<int32_t> gDataFn;
+
+static void U_CALLCONV traceData(
+        const void*,
+        int32_t fnNumber,
+        int32_t,
+        const char *,
+        va_list args) {
+    if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) {
+        const char* data = va_arg(args, const char*);
+        gDataFn.push_back(fnNumber);
+        gData.push_back(data);
+    }
+}
+
+static void traceEntry(const void *, int32_t fnNumber) {
+    if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) {
+        gEntryFn.push_back(fnNumber);
+    }
+}
+
+static void traceExit(const void *, int32_t fnNumber, const char *, va_list) {
+    if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) {
+        gExitFn.push_back(fnNumber);
+    }
+}
+
+
+void RBBITest::assertTestTraceResult(int32_t fnNumber, const char* expectedData) {
+    assertEquals("utrace_entry should be called ", 1, gEntryFn.size());
+    assertEquals("utrace_entry should be called with ", fnNumber, gEntryFn[0]);
+    assertEquals("utrace_exit should be called ", 1, gExitFn.size());
+    assertEquals("utrace_exit should be called with ", fnNumber, gExitFn[0]);
+
+    if (expectedData == nullptr) {
+      assertEquals("utrace_data should not be called ", 0, gDataFn.size());
+      assertEquals("utrace_data should not be called ", 0, gData.size());
+    } else {
+      assertEquals("utrace_data should be called ", 1, gDataFn.size());
+      assertEquals("utrace_data should be called with ", fnNumber, gDataFn[0]);
+      assertEquals("utrace_data should be called ", 1, gData.size());
+      assertEquals("utrace_data should pass in ", expectedData, gData[0].c_str());
+    }
+}
+
+void SetupTestTrace() {
+    gEntryFn.clear();
+    gExitFn.clear();
+    gDataFn.clear();
+    gData.clear();
+
+    const void* context = nullptr;
+    utrace_setFunctions(context, traceEntry, traceExit, traceData);
+    utrace_setLevel(UTRACE_INFO);
+}
 
+void RBBITest::TestTraceCreateCharacter(void) {
+    SetupTestTrace();
+    IcuTestErrorCode status(*this, "TestTraceCreateCharacter");
+    LocalPointer<BreakIterator> brkitr(
+        BreakIterator::createCharacterInstance("zh-CN", status));
+    status.errIfFailureAndReset();
+    assertTestTraceResult(UTRACE_UBRK_CREATE_CHARACTER, nullptr);
+}
+
+void RBBITest::TestTraceCreateTitle(void) {
+    SetupTestTrace();
+    IcuTestErrorCode status(*this, "TestTraceCreateTitle");
+    LocalPointer<BreakIterator> brkitr(
+        BreakIterator::createTitleInstance("zh-CN", status));
+    status.errIfFailureAndReset();
+    assertTestTraceResult(UTRACE_UBRK_CREATE_TITLE, nullptr);
+}
+
+void RBBITest::TestTraceCreateSentence(void) {
+    SetupTestTrace();
+    IcuTestErrorCode status(*this, "TestTraceCreateSentence");
+    LocalPointer<BreakIterator> brkitr(
+        BreakIterator::createSentenceInstance("zh-CN", status));
+    status.errIfFailureAndReset();
+    assertTestTraceResult(UTRACE_UBRK_CREATE_SENTENCE, nullptr);
+}
+
+void RBBITest::TestTraceCreateWord(void) {
+    SetupTestTrace();
+    IcuTestErrorCode status(*this, "TestTraceCreateWord");
+    LocalPointer<BreakIterator> brkitr(
+        BreakIterator::createWordInstance("zh-CN", status));
+    status.errIfFailureAndReset();
+    assertTestTraceResult(UTRACE_UBRK_CREATE_WORD, nullptr);
+}
+
+void RBBITest::TestTraceCreateLine(void) {
+    SetupTestTrace();
+    IcuTestErrorCode status(*this, "TestTraceCreateLine");
+    LocalPointer<BreakIterator> brkitr(
+        BreakIterator::createLineInstance("zh-CN", status));
+    status.errIfFailureAndReset();
+    assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "");
+}
+
+void RBBITest::TestTraceCreateLineStrict(void) {
+    SetupTestTrace();
+    IcuTestErrorCode status(*this, "TestTraceCreateLineStrict");
+    LocalPointer<BreakIterator> brkitr(
+        BreakIterator::createLineInstance("zh-CN-u-lb-strict", status));
+    status.errIfFailureAndReset();
+    assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "strict");
+}
+
+void RBBITest::TestTraceCreateLineNormal(void) {
+    SetupTestTrace();
+    IcuTestErrorCode status(*this, "TestTraceCreateLineNormal");
+    LocalPointer<BreakIterator> brkitr(
+        BreakIterator::createLineInstance("zh-CN-u-lb-normal", status));
+    status.errIfFailureAndReset();
+    assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "normal");
+}
+
+void RBBITest::TestTraceCreateLineLoose(void) {
+    SetupTestTrace();
+    IcuTestErrorCode status(*this, "TestTraceCreateLineLoose");
+    LocalPointer<BreakIterator> brkitr(
+        BreakIterator::createLineInstance("zh-CN-u-lb-loose", status));
+    status.errIfFailureAndReset();
+    assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "loose");
+}
+
+void RBBITest::TestTraceCreateBreakEngine(void) {
+    rbbi_cleanup();
+    SetupTestTrace();
+    IcuTestErrorCode status(*this, "TestTraceCreateBreakEngine");
+    LocalPointer<BreakIterator> brkitr(
+        BreakIterator::createWordInstance("zh-CN", status));
+    status.errIfFailureAndReset();
+    assertTestTraceResult(UTRACE_UBRK_CREATE_WORD, nullptr);
+
+    // To word break the following text, BreakIterator will create 5 dictionary
+    // break engine internally.
+    brkitr->setText(
+        u"test "
+        u"測試 " // Hani
+        u"សាកល្បង " // Khmr
+        u"ທົດສອບ " // Laoo
+        u"စမ်းသပ်မှု " // Mymr
+        u"ทดสอบ " // Thai
+        u"test "
+    );
+
+    // Loop through all the text.
+    while (brkitr->next() > 0) ;
+
+    assertEquals("utrace_entry should be called ", 6, gEntryFn.size());
+    assertEquals("utrace_exit should be called ", 6, gExitFn.size());
+    assertEquals("utrace_data should be called ", 5, gDataFn.size());
+
+    for (std::vector<int>::size_type i = 0; i < gDataFn.size(); i++) {
+        assertEquals("utrace_entry should be called ",
+                     UTRACE_UBRK_CREATE_BREAK_ENGINE, gEntryFn[i+1]);
+        assertEquals("utrace_exit should be called ",
+                     UTRACE_UBRK_CREATE_BREAK_ENGINE, gExitFn[i+1]);
+        assertEquals("utrace_data should be called ",
+                     UTRACE_UBRK_CREATE_BREAK_ENGINE, gDataFn[i]);
+    }
+
+    assertEquals("utrace_data should pass ", "Hani", gData[0].c_str());
+    assertEquals("utrace_data should pass ", "Khmr", gData[1].c_str());
+    assertEquals("utrace_data should pass ", "Laoo", gData[2].c_str());
+    assertEquals("utrace_data should pass ", "Mymr", gData[3].c_str());
+    assertEquals("utrace_data should pass ", "Thai", gData[4].c_str());
+
+}
+#endif
 
 #endif // #if !UCONFIG_NO_BREAK_ITERATION
index 96c2882c5af16a8acf15d53cfcb51cd07291beea..8f667e5e74dfc4aeddd735a395b1c74ab751dd72 100644 (file)
@@ -87,6 +87,18 @@ public:
     void TestDebug();
     void TestProperties();
 
+#if U_ENABLE_TRACING
+    void TestTraceCreateCharacter();
+    void TestTraceCreateWord();
+    void TestTraceCreateSentence();
+    void TestTraceCreateTitle();
+    void TestTraceCreateLine();
+    void TestTraceCreateLineNormal();
+    void TestTraceCreateLineStrict();
+    void TestTraceCreateLineLoose();
+    void TestTraceCreateBreakEngine();
+#endif
+
 /***********************/
 private:
     /**
@@ -120,6 +132,11 @@ private:
 
     // Test parameters, from the test framework and test invocation.
     const char* fTestParams;
+
+#if U_ENABLE_TRACING
+    void assertTestTraceResult(int32_t fnNumber, const char* expectedData);
+#endif
+
 };
 
 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */