virtual void setProperty(const char* propline);
virtual const char* getProperty(const char* prop);
-protected:
/* JUnit-like assertions. Each returns TRUE if it succeeds. */
UBool assertTrue(const char* message, UBool condition, UBool quiet=FALSE, UBool possibleDataError=FALSE, const char *file=NULL, int line=0);
UBool assertFalse(const char* message, UBool condition, UBool quiet=FALSE);
#include <string>
#include "unicode/bytestream.h"
+#include "unicode/edits.h"
#include "unicode/uchar.h"
#include "unicode/normalizer2.h"
#include "unicode/normlzr.h"
std::string exp8;
exp.toUTF8String(exp8);
std::string out8;
+ Edits edits;
+ Edits *editsPtr = (mode == UNORM_NFC || mode == UNORM_NFKC) ? &edits : nullptr;
StringByteSink<std::string> sink(&out8);
- norm2->normalizeUTF8(0, s8, sink, nullptr, errorCode);
+ norm2->normalizeUTF8(0, s8, sink, editsPtr, errorCode);
if (U_FAILURE(errorCode)) {
errln("Normalizer2.%s.normalizeUTF8(%s) failed: %s",
modeString, s8.c_str(), u_errorName(errorCode));
modeString, s8.c_str(), out8.c_str(), exp8.c_str());
return FALSE;
}
- return TRUE;
+ if (editsPtr == nullptr) {
+ return TRUE;
+ }
+
+ // Do the Edits cover the entire input & output?
+ UBool pass = TRUE;
+ pass &= assertEquals("edits.hasChanges()", (UBool)(s8 != out8), edits.hasChanges());
+ pass &= assertEquals("edits.lengthDelta()",
+ (int32_t)(out8.length() - s8.length()), edits.lengthDelta());
+ Edits::Iterator iter = edits.getCoarseIterator();
+ while (iter.next(errorCode)) {}
+ pass &= assertEquals("edits source length", s8.length(), iter.sourceIndex());
+ pass &= assertEquals("edits destination length", out8.length(), iter.destinationIndex());
+ return pass;
}
/**
#include "ustrtest.h"
#include "unicode/tstdtmod.h"
#include "cmemory.h"
-
-struct EditChange {
- UBool change;
- int32_t oldLength, newLength;
-};
+#include "testutil.h"
class StringCaseTest: public IntlTest {
public:
private:
void assertGreekUpper(const char16_t *s, const char16_t *expected);
- void checkEditsIter(
- const UnicodeString &name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
- const EditChange expected[], int32_t expLength, UBool withUnchanged,
- UErrorCode &errorCode);
Locale GREEK_LOCALE_;
};
errorCode.reset();
}
-void StringCaseTest::checkEditsIter(
- const UnicodeString &name,
- Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
- const EditChange expected[], int32_t expLength, UBool withUnchanged,
- UErrorCode &errorCode) {
- assertFalse(name, ei2.findSourceIndex(-1, errorCode));
-
- int32_t expSrcIndex = 0;
- int32_t expDestIndex = 0;
- int32_t expReplIndex = 0;
- for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
- const EditChange &expect = expected[expIndex];
- UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
- if (withUnchanged || expect.change) {
- assertTrue(msg, ei1.next(errorCode));
- assertEquals(msg, expect.change, ei1.hasChange());
- assertEquals(msg, expect.oldLength, ei1.oldLength());
- assertEquals(msg, expect.newLength, ei1.newLength());
- assertEquals(msg, expSrcIndex, ei1.sourceIndex());
- assertEquals(msg, expDestIndex, ei1.destinationIndex());
- assertEquals(msg, expReplIndex, ei1.replacementIndex());
- }
-
- if (expect.oldLength > 0) {
- assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode));
- assertEquals(msg, expect.change, ei2.hasChange());
- assertEquals(msg, expect.oldLength, ei2.oldLength());
- assertEquals(msg, expect.newLength, ei2.newLength());
- assertEquals(msg, expSrcIndex, ei2.sourceIndex());
- assertEquals(msg, expDestIndex, ei2.destinationIndex());
- assertEquals(msg, expReplIndex, ei2.replacementIndex());
- if (!withUnchanged) {
- // For some iterators, move past the current range
- // so that findSourceIndex() has to look before the current index.
- ei2.next(errorCode);
- ei2.next(errorCode);
- }
- }
-
- expSrcIndex += expect.oldLength;
- expDestIndex += expect.newLength;
- if (expect.change) {
- expReplIndex += expect.newLength;
- }
- }
- // TODO: remove casts from u"" when merging into trunk
- UnicodeString msg = UnicodeString(name).append(u" end");
- assertFalse(msg, ei1.next(errorCode));
- assertFalse(msg, ei1.hasChange());
- assertEquals(msg, 0, ei1.oldLength());
- assertEquals(msg, 0, ei1.newLength());
- assertEquals(msg, expSrcIndex, ei1.sourceIndex());
- assertEquals(msg, expDestIndex, ei1.destinationIndex());
- assertEquals(msg, expReplIndex, ei1.replacementIndex());
-
- assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode));
-}
-
void StringCaseTest::TestEdits() {
IcuTestErrorCode errorCode(*this, "TestEdits");
Edits edits;
{ FALSE, 10003, 10003 },
{ TRUE, 103103, 104013 }
};
- checkEditsIter(u"coarse",
+ TestUtility::checkEditsIter(*this, u"coarse",
edits.getCoarseIterator(), edits.getCoarseIterator(),
coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
- checkEditsIter(u"coarse changes",
+ TestUtility::checkEditsIter(*this, u"coarse changes",
edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
{ TRUE, 3000, 4000 },
{ TRUE, 100000, 100000 }
};
- checkEditsIter(u"fine",
+ TestUtility::checkEditsIter(*this, u"fine",
edits.getFineIterator(), edits.getFineIterator(),
fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
- checkEditsIter(u"fine changes",
+ TestUtility::checkEditsIter(*this, u"fine changes",
edits.getFineChangesIterator(), edits.getFineChangesIterator(),
fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
{ TRUE, 1, 1 },
{ FALSE, 2, 2 }
};
- checkEditsIter(u"toLower(IstanBul)",
+ TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
edits.getFineIterator(), edits.getFineIterator(),
lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
TRUE, errorCode);
{ TRUE, 1, 1 },
{ TRUE, 1, 1 }
};
- checkEditsIter(u"toUpper(Πατάτα)",
+ TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
edits.getFineIterator(), edits.getFineIterator(),
upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
TRUE, errorCode);
{ TRUE, 1, 1 },
{ FALSE, 10, 10 }
};
- checkEditsIter(u"toTitle(IjssEL IglOo)",
+ TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
edits.getFineIterator(), edits.getFineIterator(),
titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
TRUE, errorCode);
{ TRUE, 1, 1 },
{ FALSE, 2, 2 }
};
- checkEditsIter(u"foldCase(IßtanBul)",
+ TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
edits.getFineIterator(), edits.getFineIterator(),
foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
TRUE, errorCode);
}
void StringCaseTest::TestCaseMapUTF8WithEdits() {
- IcuTestErrorCode errorCode(*this, "TestEdits");
+ IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits");
char dest[50];
Edits edits;
{ TRUE, 1, 1 },
{ FALSE, 2, 2 }
};
- checkEditsIter(u"toLower(IstanBul)",
+ TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
edits.getFineIterator(), edits.getFineIterator(),
lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
TRUE, errorCode);
{ TRUE, 2, 2 },
{ TRUE, 2, 2 }
};
- checkEditsIter(u"toUpper(Πατάτα)",
+ TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
edits.getFineIterator(), edits.getFineIterator(),
upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
TRUE, errorCode);
{ TRUE, 1, 1 },
{ FALSE, 10, 10 }
};
- checkEditsIter(u"toTitle(IjssEL IglOo)",
+ TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
edits.getFineIterator(), edits.getFineIterator(),
titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
TRUE, errorCode);
{ TRUE, 1, 1 },
{ FALSE, 2, 2 }
};
- checkEditsIter(u"foldCase(IßtanBul)",
+ TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
edits.getFineIterator(), edits.getFineIterator(),
foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
TRUE, errorCode);
**********************************************************************
*/
+#include "unicode/utypes.h"
+#include "unicode/edits.h"
#include "unicode/unistr.h"
#include "testutil.h"
+#include "intltest.h"
-static const UChar HEX[16]={48,49,50,51,52,53,54,55,56,57,65,66,67,68,69,70};
+static const UChar HEX[] = u"0123456789ABCDEF";
UnicodeString &TestUtility::appendHex(UnicodeString &buf, UChar32 ch) {
if (ch >= 0x10000) {
}
UnicodeString TestUtility::hex(const UnicodeString& s) {
- return hex(s, 44 /*,*/);
+ return hex(s, u',');
}
UnicodeString TestUtility::hex(const UnicodeString& s, UChar sep) {
}
UnicodeString TestUtility::hex(const uint8_t* bytes, int32_t len) {
- UnicodeString buf;
- for (int32_t i = 0; i < len; ++i) {
- buf.append(HEX[0x0F & (bytes[i] >> 4)]);
- buf.append(HEX[0x0F & bytes[i]]);
- }
- return buf;
+ UnicodeString buf;
+ for (int32_t i = 0; i < len; ++i) {
+ buf.append(HEX[0x0F & (bytes[i] >> 4)]);
+ buf.append(HEX[0x0F & bytes[i]]);
+ }
+ return buf;
+}
+
+void TestUtility::checkEditsIter(
+ IntlTest &test,
+ const UnicodeString &name,
+ Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
+ const EditChange expected[], int32_t expLength, UBool withUnchanged,
+ UErrorCode &errorCode) {
+ test.assertFalse(name, ei2.findSourceIndex(-1, errorCode));
+
+ int32_t expSrcIndex = 0;
+ int32_t expDestIndex = 0;
+ int32_t expReplIndex = 0;
+ for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
+ const EditChange &expect = expected[expIndex];
+ UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
+ if (withUnchanged || expect.change) {
+ test.assertTrue(msg, ei1.next(errorCode));
+ test.assertEquals(msg, expect.change, ei1.hasChange());
+ test.assertEquals(msg, expect.oldLength, ei1.oldLength());
+ test.assertEquals(msg, expect.newLength, ei1.newLength());
+ test.assertEquals(msg, expSrcIndex, ei1.sourceIndex());
+ test.assertEquals(msg, expDestIndex, ei1.destinationIndex());
+ test.assertEquals(msg, expReplIndex, ei1.replacementIndex());
+ }
+
+ if (expect.oldLength > 0) {
+ test.assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode));
+ test.assertEquals(msg, expect.change, ei2.hasChange());
+ test.assertEquals(msg, expect.oldLength, ei2.oldLength());
+ test.assertEquals(msg, expect.newLength, ei2.newLength());
+ test.assertEquals(msg, expSrcIndex, ei2.sourceIndex());
+ test.assertEquals(msg, expDestIndex, ei2.destinationIndex());
+ test.assertEquals(msg, expReplIndex, ei2.replacementIndex());
+ if (!withUnchanged) {
+ // For some iterators, move past the current range
+ // so that findSourceIndex() has to look before the current index.
+ ei2.next(errorCode);
+ ei2.next(errorCode);
+ }
+ }
+
+ expSrcIndex += expect.oldLength;
+ expDestIndex += expect.newLength;
+ if (expect.change) {
+ expReplIndex += expect.newLength;
+ }
+ }
+ UnicodeString msg = UnicodeString(name).append(u" end");
+ test.assertFalse(msg, ei1.next(errorCode));
+ test.assertFalse(msg, ei1.hasChange());
+ test.assertEquals(msg, 0, ei1.oldLength());
+ test.assertEquals(msg, 0, ei1.newLength());
+ test.assertEquals(msg, expSrcIndex, ei1.sourceIndex());
+ test.assertEquals(msg, expDestIndex, ei1.destinationIndex());
+ test.assertEquals(msg, expReplIndex, ei1.replacementIndex());
+
+ test.assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode));
}
#ifndef TESTUTIL_H
#define TESTUTIL_H
+#include "unicode/utypes.h"
+#include "unicode/edits.h"
+#include "unicode/unistr.h"
#include "intltest.h"
+struct EditChange {
+ UBool change;
+ int32_t oldLength, newLength;
+};
+
/**
- * Utility methods. Everything in this class is static -- do not
- * attempt to instantiate.
+ * Utility methods. Everything in this class is static.
*/
class TestUtility {
-
public:
static UnicodeString &appendHex(UnicodeString &buf, UChar32 ch);
static UnicodeString hex(const UnicodeString& s, UChar sep);
- static UnicodeString hex(const uint8_t* bytes, int32_t len);
+ static UnicodeString hex(const uint8_t* bytes, int32_t len);
-private:
+ static void checkEditsIter(
+ IntlTest &test, const UnicodeString &name,
+ Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
+ const EditChange expected[], int32_t expLength, UBool withUnchanged,
+ UErrorCode &errorCode);
- TestUtility() {} // Prevent instantiation
+private:
+ TestUtility() = delete; // Prevent instantiation
};
#endif
#include "unicode/uchar.h"
#include "unicode/errorcode.h"
#include "unicode/normlzr.h"
+#include "unicode/ucasemap.h" // UCASEMAP_OMIT_UNCHANGED_TEXT
#include "unicode/uniset.h"
#include "unicode/usetiter.h"
#include "unicode/schriter.h"
#include "cmemory.h"
#include "cstring.h"
#include "normalizer2impl.h"
+#include "testutil.h"
#include "tstnorm.h"
#define ARRAY_LENGTH(array) UPRV_LENGTHOF(array)
-#define CASE(id,test) case id: \
- name = #test; \
- if (exec) { \
- logln(#test "---"); \
- logln((UnicodeString)""); \
- test(); \
- } \
- break
-
-static UErrorCode status = U_ZERO_ERROR;
-
void BasicNormalizerTest::runIndexedTest(int32_t index, UBool exec,
const char* &name, char* /*par*/) {
- switch (index) {
- CASE(0,TestDecomp);
- CASE(1,TestCompatDecomp);
- CASE(2,TestCanonCompose);
- CASE(3,TestCompatCompose);
- CASE(4,TestPrevious);
- CASE(5,TestHangulDecomp);
- CASE(6,TestHangulCompose);
- CASE(7,TestTibetan);
- CASE(8,TestCompositionExclusion);
- CASE(9,TestZeroIndex);
- CASE(10,TestVerisign);
- CASE(11,TestPreviousNext);
- CASE(12,TestNormalizerAPI);
- CASE(13,TestConcatenate);
- CASE(14,FindFoldFCDExceptions);
- CASE(15,TestCompare);
- CASE(16,TestSkippable);
+ if(exec) {
+ logln("TestSuite BasicNormalizerTest: ");
+ }
+ TESTCASE_AUTO_BEGIN;
+ TESTCASE_AUTO(TestDecomp);
+ TESTCASE_AUTO(TestCompatDecomp);
+ TESTCASE_AUTO(TestCanonCompose);
+ TESTCASE_AUTO(TestCompatCompose);
+ TESTCASE_AUTO(TestPrevious);
+ TESTCASE_AUTO(TestHangulDecomp);
+ TESTCASE_AUTO(TestHangulCompose);
+ TESTCASE_AUTO(TestTibetan);
+ TESTCASE_AUTO(TestCompositionExclusion);
+ TESTCASE_AUTO(TestZeroIndex);
+ TESTCASE_AUTO(TestVerisign);
+ TESTCASE_AUTO(TestPreviousNext);
+ TESTCASE_AUTO(TestNormalizerAPI);
+ TESTCASE_AUTO(TestConcatenate);
+ TESTCASE_AUTO(FindFoldFCDExceptions);
+ TESTCASE_AUTO(TestCompare);
+ TESTCASE_AUTO(TestSkippable);
#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
- CASE(17,TestCustomComp);
- CASE(18,TestCustomFCC);
+ TESTCASE_AUTO(TestCustomComp);
+ TESTCASE_AUTO(TestCustomFCC);
#endif
- CASE(19,TestFilteredNormalizer2Coverage);
- default: name = ""; break;
- }
+ TESTCASE_AUTO(TestFilteredNormalizer2Coverage);
+ TESTCASE_AUTO(TestNormalizeUTF8WithEdits);
+ TESTCASE_AUTO_END;
}
/**
"\\uFB3B\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
"\\uFB47\\uFB48\\uFB49\\uFB4A\\uFB4B\\uFB4C\\uFB4D\\uFB4E"
);
+ UErrorCode status = U_ZERO_ERROR;
for (int32_t i=0; i<EXCLUDED.length(); ++i) {
UnicodeString a(EXCLUDED.charAt(i));
UnicodeString b;
void BasicNormalizerTest::backAndForth(Normalizer* iter, const UnicodeString& input)
{
UChar32 ch;
+ UErrorCode status = U_ZERO_ERROR;
iter->setText(input, status);
// Run through the iterator forwards and stick it into a StringBuffer
UnicodeString tests[][3], int length,
int outCol)
{
+ UErrorCode status = U_ZERO_ERROR;
for (int i = 0; i < length; i++)
{
UnicodeString& input = tests[i][0];
UnicodeString tests[][3], int length,
int outCol)
{
+ UErrorCode status = U_ZERO_ERROR;
for (int i = 0; i < length; i++)
{
UnicodeString& input = tests[i][0];
UErrorCode errorCode = U_ZERO_ERROR;
const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode);
if (U_FAILURE(errorCode)) {
- dataerrln("Normalizer2::getNFCInstance() call failed - %s", u_errorName(status));
+ dataerrln("Normalizer2::getNFCInstance() call failed - %s", u_errorName(errorCode));
return;
}
UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff\\u0310-\\u031f]"), errorCode);
}
}
+void
+BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
+ IcuTestErrorCode errorCode(*this, "TestNormalizeUTF8WithEdits");
+ const Normalizer2 *nfkc_cf=Normalizer2::getNFKCCasefoldInstance(errorCode);
+ if(errorCode.logDataIfFailureAndReset("Normalizer2::getNFKCCasefoldInstance() call failed")) {
+ return;
+ }
+ static const char *const src =
+ u8" AÄA\u0308A\u0308\u0323Ä\u0323,\u1100\u1161가\u11A8가\u3133 ";
+ std::string expected = u8" aääạ\u0308ạ\u0308,가각갃 ";
+ std::string result;
+ StringByteSink<std::string> sink(&result);
+ Edits edits;
+ nfkc_cf->normalizeUTF8(0, src, sink, &edits, errorCode);
+ assertSuccess("normalizeUTF8 with Edits", errorCode.get());
+ assertEquals("normalizeUTF8 with Edits", expected.c_str(), result.c_str());
+ static const EditChange expectedChanges[] = {
+ { FALSE, 2, 2 }, // 2 spaces
+ { TRUE, 1, 1 }, // A→a
+ { TRUE, 2, 2 }, // Ä→ä
+ { TRUE, 3, 2 }, // A\u0308→ä
+ { TRUE, 5, 5 }, // A\u0308\u0323→ạ\u0308
+ { TRUE, 4, 5 }, // Ä\u0323→ ạ\u0308
+ { FALSE, 1, 1 }, // comma
+ { TRUE, 6, 3 }, // \u1100\u1161→ 가
+ { TRUE, 6, 3 }, // 가\u11A8→ 각
+ { TRUE, 6, 3 }, // 가\u3133→ 갃
+ { FALSE, 2, 2 } // 2 spaces
+ };
+ TestUtility::checkEditsIter(*this, u"normalizeUTF8 with Edits",
+ edits.getFineIterator(), edits.getFineIterator(),
+ expectedChanges, UPRV_LENGTHOF(expectedChanges),
+ TRUE, errorCode);
+
+ // Omit unchanged text.
+ expected = u8"aääạ\u0308ạ\u0308가각갃";
+ result.clear();
+ edits.reset();
+ nfkc_cf->normalizeUTF8(UCASEMAP_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
+ assertSuccess("normalizeUTF8 omit unchanged", errorCode.get());
+ assertEquals("normalizeUTF8 omit unchanged", expected.c_str(), result.c_str());
+ TestUtility::checkEditsIter(*this, u"normalizeUTF8 omit unchanged",
+ edits.getFineIterator(), edits.getFineIterator(),
+ expectedChanges, UPRV_LENGTHOF(expectedChanges),
+ TRUE, errorCode);
+}
+
#endif /* #if !UCONFIG_NO_NORMALIZATION */
void TestCustomComp();
void TestCustomFCC();
void TestFilteredNormalizer2Coverage();
+ void TestNormalizeUTF8WithEdits();
private:
UnicodeString canonTests[24][3];