*/
#include "unicode/std_string.h"
+#include "unicode/casemap.h"
#include "unicode/edits.h"
#include "unicode/uchar.h"
#include "unicode/ures.h"
void TestMalformedUTF8();
void TestBufferOverflow();
void TestEdits();
+ void TestCaseMapWithEdits();
+ void TestLongUnicodeString();
private:
void assertGreekUpper(const char *s, const char *expected);
void checkEditsIter(
- const char *name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
- EditChange expected[], int32_t expLength, UBool withUnchanged,
+ const UnicodeString &name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
+ const EditChange expected[], int32_t expLength, UBool withUnchanged,
UErrorCode &errorCode);
Locale GREEK_LOCALE_;
TESTCASE_AUTO(TestMalformedUTF8);
TESTCASE_AUTO(TestBufferOverflow);
TESTCASE_AUTO(TestEdits);
+ TESTCASE_AUTO(TestCaseMapWithEdits);
+ TESTCASE_AUTO(TestLongUnicodeString);
TESTCASE_AUTO_END;
}
}
void StringCaseTest::checkEditsIter(
- const char *name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
- EditChange expected[], int32_t expLength, UBool withUnchanged,
+ const UnicodeString &name,
+ Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
+ const EditChange expected[], int32_t expLength, UBool withUnchanged,
UErrorCode &errorCode) {
assertFalse(name, ei2.findSourceIndex(-1, errorCode));
- char msg[100];
int32_t expSrcIndex = 0;
int32_t expDestIndex = 0;
int32_t expReplIndex = 0;
for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
- sprintf(msg, "%s %d", name, (int)expIndex);
- if (withUnchanged || expected[expIndex].change) {
+ const EditChange &expect = expected[expIndex];
+ UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
+ if (withUnchanged || expect.change) {
assertTrue(msg, ei1.next(errorCode));
- assertEquals(msg, expected[expIndex].change, ei1.hasChange());
- assertEquals(msg, expected[expIndex].oldLength, ei1.oldLength());
- assertEquals(msg, expected[expIndex].newLength, ei1.newLength());
+ assertEquals(msg, expect.change, ei1.hasChange());
+ assertEquals(msg, expect.oldLength, ei1.oldLength());
+ assertEquals(msg, expect.newLength, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
}
- if (expected[expIndex].oldLength > 0) {
+ if (expect.oldLength > 0) {
assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode));
- assertEquals(msg, expected[expIndex].change, ei2.hasChange());
- assertEquals(msg, expected[expIndex].oldLength, ei2.oldLength());
- assertEquals(msg, expected[expIndex].newLength, ei2.newLength());
+ assertEquals(msg, expect.change, ei2.hasChange());
+ assertEquals(msg, expect.oldLength, ei2.oldLength());
+ assertEquals(msg, expect.newLength, ei2.newLength());
assertEquals(msg, expSrcIndex, ei2.sourceIndex());
assertEquals(msg, expDestIndex, ei2.destinationIndex());
assertEquals(msg, expReplIndex, ei2.replacementIndex());
}
}
- expSrcIndex += expected[expIndex].oldLength;
- expDestIndex += expected[expIndex].newLength;
- if (expected[expIndex].change) {
- expReplIndex += expected[expIndex].newLength;
+ expSrcIndex += expect.oldLength;
+ expDestIndex += expect.newLength;
+ if (expect.change) {
+ expReplIndex += expect.newLength;
}
}
- sprintf(msg, "%s end", name);
+ // TODO: remove casts from u"" when merging into trunk
+ UnicodeString msg = UnicodeString(name).append((const UChar *)u" end");
assertFalse(msg, ei1.next(errorCode));
assertFalse(msg, ei1.hasChange());
assertEquals(msg, 0, ei1.oldLength());
UErrorCode outErrorCode = U_ZERO_ERROR;
assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
- EditChange coarseExpectedChanges[] = {
+ static const EditChange coarseExpectedChanges[] = {
{ FALSE, 10003, 10003 },
{ TRUE, 103103, 104013 }
};
- checkEditsIter("coarse",
+ checkEditsIter((const UChar *)u"coarse",
edits.getCoarseIterator(), edits.getCoarseIterator(),
coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
- checkEditsIter("coarse changes",
+ checkEditsIter((const UChar *)u"coarse changes",
edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
- EditChange fineExpectedChanges[] = {
+ static const EditChange fineExpectedChanges[] = {
{ FALSE, 10003, 10003 },
{ TRUE, 1, 1 },
{ TRUE, 1, 1 },
{ TRUE, 3000, 4000 },
{ TRUE, 100000, 100000 }
};
- checkEditsIter("fine",
+ checkEditsIter((const UChar *)u"fine",
edits.getFineIterator(), edits.getFineIterator(),
fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
- checkEditsIter("fine changes",
+ checkEditsIter((const UChar *)u"fine changes",
edits.getFineChangesIterator(), edits.getFineChangesIterator(),
fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
Edits::Iterator ei = edits.getCoarseChangesIterator();
assertFalse("reset then iterator", ei.next(errorCode));
}
+
+void StringCaseTest::TestCaseMapWithEdits() {
+ IcuTestErrorCode errorCode(*this, "TestEdits");
+ UChar dest[20];
+ Edits edits;
+
+ int32_t length = CaseMap::toLower("tr", UCASEMAP_OMIT_UNCHANGED_TEXT,
+ (const UChar *)u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+ assertEquals((const UChar *)u"toLower(Istanbul)", UnicodeString((const UChar *)u"ıb"), UnicodeString(TRUE, dest, length));
+ static const EditChange lowerExpectedChanges[] = {
+ { TRUE, 1, 1 },
+ { FALSE, 4, 4 },
+ { TRUE, 1, 1 },
+ { FALSE, 2, 2 }
+ };
+ checkEditsIter((const UChar *)u"toLower(Istanbul)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
+ TRUE, errorCode);
+
+ edits.reset();
+ length = CaseMap::toUpper("el", UCASEMAP_OMIT_UNCHANGED_TEXT,
+ (const UChar *)u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+ assertEquals((const UChar *)u"toUpper(Πατάτα)", UnicodeString((const UChar *)u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
+ static const EditChange upperExpectedChanges[] = {
+ { FALSE, 1, 1 },
+ { TRUE, 1, 1 },
+ { TRUE, 1, 1 },
+ { TRUE, 1, 1 },
+ { TRUE, 1, 1 },
+ { TRUE, 1, 1 }
+ };
+ checkEditsIter((const UChar *)u"toUpper(Πατάτα)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
+ TRUE, errorCode);
+
+ edits.reset();
+ length = CaseMap::toTitle("nl",
+ UCASEMAP_OMIT_UNCHANGED_TEXT |
+ U_TITLECASE_NO_BREAK_ADJUSTMENT |
+ U_TITLECASE_NO_LOWERCASE,
+ NULL, (const UChar *)u"IjssEL IglOo", 12,
+ dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+ assertEquals((const UChar *)u"toTitle(IjssEL IglOo)", UnicodeString((const UChar *)u"J"), UnicodeString(TRUE, dest, length));
+ static const EditChange titleExpectedChanges[] = {
+ { FALSE, 1, 1 },
+ { TRUE, 1, 1 },
+ { FALSE, 10, 10 }
+ };
+ checkEditsIter((const UChar *)u"toTitle(IjssEL IglOo)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
+ TRUE, errorCode);
+
+ edits.reset();
+ length = CaseMap::foldCase(UCASEMAP_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
+ (const UChar *)u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+ assertEquals((const UChar *)u"foldCase(IßtanBul)", UnicodeString((const UChar *)u"ıssb"), UnicodeString(TRUE, dest, length));
+ static const EditChange foldExpectedChanges[] = {
+ { TRUE, 1, 1 },
+ { TRUE, 1, 2 },
+ { FALSE, 3, 3 },
+ { TRUE, 1, 1 },
+ { FALSE, 2, 2 }
+ };
+ checkEditsIter((const UChar *)u"foldCase(IßtanBul)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
+ TRUE, errorCode);
+}
+
+void StringCaseTest::TestLongUnicodeString() {
+ // Code coverage for UnicodeString case mapping code handling
+ // long strings or many changes in a string.
+ UnicodeString s(TRUE,
+ (const UChar *)
+ u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
+ u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
+ u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
+ u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
+ u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
+ u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
+ UnicodeString expected(TRUE,
+ (const UChar *)
+ u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
+ u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
+ u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
+ u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
+ u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
+ u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
+ s.toUpper(Locale::getRoot());
+ assertEquals("string length 306", expected, s);
+}
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.CaseMap;
import com.ibm.icu.text.Edits;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.text.UTF16;
int expDestIndex = 0;
int expReplIndex = 0;
for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
+ EditChange expect = expected[expIndex];
String msg = name + ' ' + expIndex;
- if (withUnchanged || expected[expIndex].change) {
+ if (withUnchanged || expect.change) {
assertTrue(msg, ei1.next());
- assertEquals(msg, expected[expIndex].change, ei1.hasChange());
- assertEquals(msg, expected[expIndex].oldLength, ei1.oldLength());
- assertEquals(msg, expected[expIndex].newLength, ei1.newLength());
+ assertEquals(msg, expect.change, ei1.hasChange());
+ assertEquals(msg, expect.oldLength, ei1.oldLength());
+ assertEquals(msg, expect.newLength, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
}
- if (expected[expIndex].oldLength > 0) {
+ if (expect.oldLength > 0) {
assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
- assertEquals(msg, expected[expIndex].change, ei2.hasChange());
- assertEquals(msg, expected[expIndex].oldLength, ei2.oldLength());
- assertEquals(msg, expected[expIndex].newLength, ei2.newLength());
+ assertEquals(msg, expect.change, ei2.hasChange());
+ assertEquals(msg, expect.oldLength, ei2.oldLength());
+ assertEquals(msg, expect.newLength, ei2.newLength());
assertEquals(msg, expSrcIndex, ei2.sourceIndex());
assertEquals(msg, expDestIndex, ei2.destinationIndex());
assertEquals(msg, expReplIndex, ei2.replacementIndex());
}
}
- expSrcIndex += expected[expIndex].oldLength;
- expDestIndex += expected[expIndex].newLength;
- if (expected[expIndex].change) {
- expReplIndex += expected[expIndex].newLength;
+ expSrcIndex += expect.oldLength;
+ expDestIndex += expect.newLength;
+ if (expect.change) {
+ expReplIndex += expect.newLength;
}
}
String msg = name + " end";
assertFalse("reset then iterator", ei.next());
}
+ @Test
+ public void TestCaseMapWithEdits() {
+ StringBuilder sb = new StringBuilder();
+ Edits edits = new Edits();
+
+ sb = CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul", sb, edits);
+ assertEquals("toLower(Istanbul)", "ıb", sb.toString());
+ EditChange[] lowerExpectedChanges = new EditChange[] {
+ new EditChange(true, 1, 1),
+ new EditChange(false, 4, 4),
+ new EditChange(true, 1, 1),
+ new EditChange(false, 2, 2)
+ };
+ checkEditsIter("toLower(Istanbul)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ lowerExpectedChanges, true);
+
+ sb.delete(0, sb.length());
+ edits.reset();
+ sb = CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα", sb, edits);
+ assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ", sb.toString());
+ EditChange[] upperExpectedChanges = new EditChange[] {
+ new EditChange(false, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1)
+ };
+ checkEditsIter("toUpper(Πατάτα)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ upperExpectedChanges, true);
+
+ sb.delete(0, sb.length());
+ edits.reset();
+ sb = CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
+ new Locale("nl"), null, "IjssEL IglOo", sb, edits);
+ assertEquals("toTitle(IjssEL IglOo)", "J", sb.toString());
+ EditChange[] titleExpectedChanges = new EditChange[] {
+ new EditChange(false, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(false, 10, 10)
+ };
+ checkEditsIter("toTitle(IjssEL IglOo)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ titleExpectedChanges, true);
+
+ sb.delete(0, sb.length());
+ edits.reset();
+ sb = CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul", sb, edits);
+ assertEquals("fold(IßtanBul)", "ıssb", sb.toString());
+ EditChange[] foldExpectedChanges = new EditChange[] {
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 2),
+ new EditChange(false, 3, 3),
+ new EditChange(true, 1, 1),
+ new EditChange(false, 2, 2)
+ };
+ checkEditsIter("fold(IßtanBul)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ foldExpectedChanges, true);
+ }
+
// private data members - test data --------------------------------------
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");