#include "uresimp.h"
#include "uassert.h"
#include "ubrkimpl.h"
+#include "utracimp.h"
#include "charstr.h"
// *****************************************************************************
BreakIterator *result = NULL;
switch (kind) {
case UBRK_CHARACTER:
- result = BreakIterator::buildInstance(loc, "grapheme", status);
+ {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER);
+ result = BreakIterator::buildInstance(loc, "grapheme", status);
+ UTRACE_EXIT_STATUS(status);
+ }
break;
case UBRK_WORD:
- result = BreakIterator::buildInstance(loc, "word", status);
+ {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD);
+ result = BreakIterator::buildInstance(loc, "word", status);
+ UTRACE_EXIT_STATUS(status);
+ }
break;
case UBRK_LINE:
- uprv_strcpy(lbType, "line");
{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
+ uprv_strcpy(lbType, "line");
char lbKeyValue[kKeyValueLenMax] = {0};
UErrorCode kvStatus = U_ZERO_ERROR;
int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
uprv_strcat(lbType, "_");
uprv_strcat(lbType, lbKeyValue);
}
+ result = BreakIterator::buildInstance(loc, lbType, status);
+
+ UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
+ UTRACE_EXIT_STATUS(status);
}
- result = BreakIterator::buildInstance(loc, lbType, status);
break;
case UBRK_SENTENCE:
- result = BreakIterator::buildInstance(loc, "sentence", status);
-#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE);
+ result = BreakIterator::buildInstance(loc, "sentence", status);
+#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
char ssKeyValue[kKeyValueLenMax] = {0};
UErrorCode kvStatus = U_ZERO_ERROR;
int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
delete fbiBuilder;
}
}
- }
#endif
+ UTRACE_EXIT_STATUS(status);
+ }
break;
case UBRK_TITLE:
- result = BreakIterator::buildInstance(loc, "title", status);
+ {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE);
+ result = BreakIterator::buildInstance(loc, "title", status);
+ UTRACE_EXIT_STATUS(status);
+ }
break;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
#include "unicode/uniset.h"
#include "unicode/chariter.h"
#include "unicode/ubrk.h"
+#include "utracimp.h"
#include "uvectr32.h"
#include "uvector.h"
#include "uassert.h"
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fThaiWordSet);
fEndWordSet.compact();
fBeginWordSet.compact();
fSuffixSet.compact();
+ UTRACE_EXIT_STATUS(status);
}
ThaiBreakEngine::~ThaiBreakEngine() {
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fLaoWordSet);
fMarkSet.compact();
fEndWordSet.compact();
fBeginWordSet.compact();
+ UTRACE_EXIT_STATUS(status);
}
LaoBreakEngine::~LaoBreakEngine() {
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fBurmeseWordSet);
fMarkSet.compact();
fEndWordSet.compact();
fBeginWordSet.compact();
+ UTRACE_EXIT_STATUS(status);
}
BurmeseBreakEngine::~BurmeseBreakEngine() {
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fKhmerWordSet);
fEndWordSet.compact();
fBeginWordSet.compact();
// fSuffixSet.compact();
+ UTRACE_EXIT_STATUS(status);
}
KhmerBreakEngine::~KhmerBreakEngine() {
static const uint32_t kuint32max = 0xFFFFFFFF;
CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
setCharacters(cjSet);
}
}
+ UTRACE_EXIT_STATUS(status);
}
CjkBreakEngine::~CjkBreakEngine(){
UTRACE_RES_DATA_LIMIT,
#endif // U_HIDE_INTERNAL_API
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * The lowest break iterator location.
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_START=0x4000,
+
+ /**
+ * Indicates that a character instance of break iterator was created.
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START,
+
+ /**
+ * Indicates that a word instance of break iterator was created.
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_WORD,
+
+ /**
+ * Indicates that a line instance of break iterator was created.
+ *
+ * Provides one C-style string to UTraceData: the lb value ("",
+ * "loose", "strict", or "normal").
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_LINE,
+
+ /**
+ * Indicates that a sentence instance of break iterator was created.
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_SENTENCE,
+
+ /**
+ * Indicates that a title instance of break iterator was created.
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_TITLE,
+
+ /**
+ * Indicates that an internal dictionary break engine was created.
+ *
+ * Provides one C-style string to UTraceData: the script code of what
+ * the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai").
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_BREAK_ENGINE,
+
+#endif // U_HIDE_DRAFT_API
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * One more than the highest normal break iterator trace location.
+ * @internal The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_UBRK_LIMIT,
+#endif // U_HIDE_INTERNAL_API
+
} UTraceFunctionNumber;
/**
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
+#include <sstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "unicode/uscript.h"
#include "unicode/ustring.h"
#include "unicode/utext.h"
+#include "unicode/utrace.h"
#include "charstr.h"
#include "cmemory.h"
TESTCASE_AUTO(TestReverse);
TESTCASE_AUTO(TestBug13692);
TESTCASE_AUTO(TestDebugRules);
+
+#if U_ENABLE_TRACING
+ TESTCASE_AUTO(TestTraceCreateCharacter);
+ TESTCASE_AUTO(TestTraceCreateWord);
+ TESTCASE_AUTO(TestTraceCreateSentence);
+ TESTCASE_AUTO(TestTraceCreateTitle);
+ TESTCASE_AUTO(TestTraceCreateLine);
+ TESTCASE_AUTO(TestTraceCreateLineNormal);
+ TESTCASE_AUTO(TestTraceCreateLineLoose);
+ TESTCASE_AUTO(TestTraceCreateLineStrict);
+ TESTCASE_AUTO(TestTraceCreateBreakEngine);
+#endif
+
TESTCASE_AUTO_END;
}
#endif
}
+#if U_ENABLE_TRACING
+static std::vector<std::string> gData;
+static std::vector<int32_t> gEntryFn;
+static std::vector<int32_t> gExitFn;
+static std::vector<int32_t> gDataFn;
+
+static void U_CALLCONV traceData(
+ const void*,
+ int32_t fnNumber,
+ int32_t,
+ const char *,
+ va_list args) {
+ if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) {
+ const char* data = va_arg(args, const char*);
+ gDataFn.push_back(fnNumber);
+ gData.push_back(data);
+ }
+}
+
+static void traceEntry(const void *, int32_t fnNumber) {
+ if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) {
+ gEntryFn.push_back(fnNumber);
+ }
+}
+
+static void traceExit(const void *, int32_t fnNumber, const char *, va_list) {
+ if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) {
+ gExitFn.push_back(fnNumber);
+ }
+}
+
+
+void RBBITest::assertTestTraceResult(int32_t fnNumber, const char* expectedData) {
+ assertEquals("utrace_entry should be called ", 1, gEntryFn.size());
+ assertEquals("utrace_entry should be called with ", fnNumber, gEntryFn[0]);
+ assertEquals("utrace_exit should be called ", 1, gExitFn.size());
+ assertEquals("utrace_exit should be called with ", fnNumber, gExitFn[0]);
+
+ if (expectedData == nullptr) {
+ assertEquals("utrace_data should not be called ", 0, gDataFn.size());
+ assertEquals("utrace_data should not be called ", 0, gData.size());
+ } else {
+ assertEquals("utrace_data should be called ", 1, gDataFn.size());
+ assertEquals("utrace_data should be called with ", fnNumber, gDataFn[0]);
+ assertEquals("utrace_data should be called ", 1, gData.size());
+ assertEquals("utrace_data should pass in ", expectedData, gData[0].c_str());
+ }
+}
+
+void SetupTestTrace() {
+ gEntryFn.clear();
+ gExitFn.clear();
+ gDataFn.clear();
+ gData.clear();
+
+ const void* context = nullptr;
+ utrace_setFunctions(context, traceEntry, traceExit, traceData);
+ utrace_setLevel(UTRACE_INFO);
+}
+void RBBITest::TestTraceCreateCharacter(void) {
+ SetupTestTrace();
+ IcuTestErrorCode status(*this, "TestTraceCreateCharacter");
+ LocalPointer<BreakIterator> brkitr(
+ BreakIterator::createCharacterInstance("zh-CN", status));
+ status.errIfFailureAndReset();
+ assertTestTraceResult(UTRACE_UBRK_CREATE_CHARACTER, nullptr);
+}
+
+void RBBITest::TestTraceCreateTitle(void) {
+ SetupTestTrace();
+ IcuTestErrorCode status(*this, "TestTraceCreateTitle");
+ LocalPointer<BreakIterator> brkitr(
+ BreakIterator::createTitleInstance("zh-CN", status));
+ status.errIfFailureAndReset();
+ assertTestTraceResult(UTRACE_UBRK_CREATE_TITLE, nullptr);
+}
+
+void RBBITest::TestTraceCreateSentence(void) {
+ SetupTestTrace();
+ IcuTestErrorCode status(*this, "TestTraceCreateSentence");
+ LocalPointer<BreakIterator> brkitr(
+ BreakIterator::createSentenceInstance("zh-CN", status));
+ status.errIfFailureAndReset();
+ assertTestTraceResult(UTRACE_UBRK_CREATE_SENTENCE, nullptr);
+}
+
+void RBBITest::TestTraceCreateWord(void) {
+ SetupTestTrace();
+ IcuTestErrorCode status(*this, "TestTraceCreateWord");
+ LocalPointer<BreakIterator> brkitr(
+ BreakIterator::createWordInstance("zh-CN", status));
+ status.errIfFailureAndReset();
+ assertTestTraceResult(UTRACE_UBRK_CREATE_WORD, nullptr);
+}
+
+void RBBITest::TestTraceCreateLine(void) {
+ SetupTestTrace();
+ IcuTestErrorCode status(*this, "TestTraceCreateLine");
+ LocalPointer<BreakIterator> brkitr(
+ BreakIterator::createLineInstance("zh-CN", status));
+ status.errIfFailureAndReset();
+ assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "");
+}
+
+void RBBITest::TestTraceCreateLineStrict(void) {
+ SetupTestTrace();
+ IcuTestErrorCode status(*this, "TestTraceCreateLineStrict");
+ LocalPointer<BreakIterator> brkitr(
+ BreakIterator::createLineInstance("zh-CN-u-lb-strict", status));
+ status.errIfFailureAndReset();
+ assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "strict");
+}
+
+void RBBITest::TestTraceCreateLineNormal(void) {
+ SetupTestTrace();
+ IcuTestErrorCode status(*this, "TestTraceCreateLineNormal");
+ LocalPointer<BreakIterator> brkitr(
+ BreakIterator::createLineInstance("zh-CN-u-lb-normal", status));
+ status.errIfFailureAndReset();
+ assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "normal");
+}
+
+void RBBITest::TestTraceCreateLineLoose(void) {
+ SetupTestTrace();
+ IcuTestErrorCode status(*this, "TestTraceCreateLineLoose");
+ LocalPointer<BreakIterator> brkitr(
+ BreakIterator::createLineInstance("zh-CN-u-lb-loose", status));
+ status.errIfFailureAndReset();
+ assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "loose");
+}
+
+void RBBITest::TestTraceCreateBreakEngine(void) {
+ rbbi_cleanup();
+ SetupTestTrace();
+ IcuTestErrorCode status(*this, "TestTraceCreateBreakEngine");
+ LocalPointer<BreakIterator> brkitr(
+ BreakIterator::createWordInstance("zh-CN", status));
+ status.errIfFailureAndReset();
+ assertTestTraceResult(UTRACE_UBRK_CREATE_WORD, nullptr);
+
+ // To word break the following text, BreakIterator will create 5 dictionary
+ // break engine internally.
+ brkitr->setText(
+ u"test "
+ u"測試 " // Hani
+ u"សាកល្បង " // Khmr
+ u"ທົດສອບ " // Laoo
+ u"စမ်းသပ်မှု " // Mymr
+ u"ทดสอบ " // Thai
+ u"test "
+ );
+
+ // Loop through all the text.
+ while (brkitr->next() > 0) ;
+
+ assertEquals("utrace_entry should be called ", 6, gEntryFn.size());
+ assertEquals("utrace_exit should be called ", 6, gExitFn.size());
+ assertEquals("utrace_data should be called ", 5, gDataFn.size());
+
+ for (std::vector<int>::size_type i = 0; i < gDataFn.size(); i++) {
+ assertEquals("utrace_entry should be called ",
+ UTRACE_UBRK_CREATE_BREAK_ENGINE, gEntryFn[i+1]);
+ assertEquals("utrace_exit should be called ",
+ UTRACE_UBRK_CREATE_BREAK_ENGINE, gExitFn[i+1]);
+ assertEquals("utrace_data should be called ",
+ UTRACE_UBRK_CREATE_BREAK_ENGINE, gDataFn[i]);
+ }
+
+ assertEquals("utrace_data should pass ", "Hani", gData[0].c_str());
+ assertEquals("utrace_data should pass ", "Khmr", gData[1].c_str());
+ assertEquals("utrace_data should pass ", "Laoo", gData[2].c_str());
+ assertEquals("utrace_data should pass ", "Mymr", gData[3].c_str());
+ assertEquals("utrace_data should pass ", "Thai", gData[4].c_str());
+
+}
+#endif
#endif // #if !UCONFIG_NO_BREAK_ITERATION