/*
*****************************************************************************
*
- * Copyright (C) 1998-2007, International Business Machines
+ * Copyright (C) 1998-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*****************************************************************************
#define UCNV_PRV_ESCAPE_CSS2 'S'
#define UCNV_PRV_STOP_ON_ILLEGAL 'i'
+/*
+ * IS_DEFAULT_IGNORABLE_CODE_POINT
+ * This is to check if a code point has the default ignorable unicode property.
+ * As such, this list needs to be updated if the ignorable code point list ever
+ * changes.
+ * To avoid dependency on other code, this list is hard coded here.
+ * When an ignorable code point is found and is unmappable, the default callbacks
+ * will ignore them.
+ * (c == 0x00AD) || \ (Latin-1 Punctuation and Symbols)
+ * (c == 0x034F) || \ (Combining Diacritical Marks Grapheme Joiner)
+ * (c == 0x061C) || \ (Arabic Format Character)
+ * (c == 0x115F) || \ (Hangul Jamo Old Initial Consonants)
+ * (c == 0x1160) || \ (Hangul Jamo Medial Vowels)
+ * (0x17B4 <= c && c <= 0x17B5) || \ (Khmer Inherent Vowels)
+ * (0x180B <= c && c <= 0x180E) || \ (Mongolian Format Controls)
+ * (0x200B <= c && c <= 0x200F) || \ (General Punctuation Format Characters)
+ * (0x202A <= c && c <= 0x202E) || \ (General Punctuation Format Characters)
+ * (c == 0x2060) || \ (General Punctuation Format Characters)
+ * (0x2066 <= c && c <= 0x2069) || \ (General Punctuation Format Characters)
+ * (0x2061 <= c && c <= 0x2064) || \ (General Punctuation Invisible Operators)
+ * (0x206A <= c && c <= 0x206F) || \ (General Punctuation Deprecated)
+ * (c == 0x3164) || \ (Hangul Compatibility Jamo)
+ * (0x0FE00 <= c && c <= 0x0FE0F) || \ (Variation Selectors)
+ * (c == 0x0FEFF) || \ (Arabic Presentation Forms B)
+ * (c == 0x0FFA0) || \ (Halfwidth and Fullwidth Forms)
+ * (0x01BCA0 <= c && c <= 0x01BCA3) || \ (Shorthand Format Controls)
+ * (0x01D173 <= c && c <= 0x01D17A) || \ (Musical Symbols)
+ * (c == 0x0E0001) || \ (Tag Identifiers)
+ * (0x0E0020 <= c && c <= 0x0E007F) || \ (Tag Components)
+ * (0x0E0100 <= c && c <= 0x0E01EF) || \ (Variation Selectors Supplement)
+ * (c == 0x2065) || \ (Unassigned)
+ * (0x0FFF0 <= c && c <= 0x0FFF8) || \ (Unassigned)
+ * (c == 0x0E0000) || \ (Unassigned)
+ * (0x0E0002 <= c && c <= 0x0E001F) || \ (Unassigned)
+ * (0x0E0080 <= c && c <= 0x0E00FF) || \ (Unassigned)
+ * (0x0E01F0 <= c && c <= 0x0E0FFF) \ (Unassigned)
+ */
+
+#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
+ (c == 0x00AD) || \
+ (c == 0x034F) || \
+ (c == 0x061C) || \
+ (c == 0x115F) || \
+ (c == 0x1160) || \
+ (0x17B4 <= c && c <= 0x17B5) || \
+ (0x180B <= c && c <= 0x180E) || \
+ (0x200B <= c && c <= 0x200F) || \
+ (0x202A <= c && c <= 0x202E) || \
+ (c == 0x2060) || \
+ (0x2066 <= c && c <= 0x2069) || \
+ (0x2061 <= c && c <= 0x2064) || \
+ (0x206A <= c && c <= 0x206F) || \
+ (c == 0x3164) || \
+ (0x0FE00 <= c && c <= 0x0FE0F) || \
+ (c == 0x0FEFF) || \
+ (c == 0x0FFA0) || \
+ (0x01BCA0 <= c && c <= 0x01BCA3) || \
+ (0x01D173 <= c && c <= 0x01D17A) || \
+ (c == 0x0E0001) || \
+ (0x0E0020 <= c && c <= 0x0E007F) || \
+ (0x0E0100 <= c && c <= 0x0E01EF) || \
+ (c == 0x2065) || \
+ (0x0FFF0 <= c && c <= 0x0FFF8) || \
+ (c == 0x0E0000) || \
+ (0x0E0002 <= c && c <= 0x0E001F) || \
+ (0x0E0080 <= c && c <= 0x0E00FF) || \
+ (0x0E01F0 <= c && c <= 0x0E0FFF) \
+ )
+
+
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
U_CAPI void U_EXPORT2
UCNV_FROM_U_CALLBACK_STOP (
UConverterCallbackReason reason,
UErrorCode * err)
{
+ if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ }
/* the caller must have set the error code accordingly */
return;
}
{
if (reason <= UCNV_IRREGULAR)
{
- if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
+ if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ }
+ else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
{
*err = U_ZERO_ERROR;
}
{
if (reason <= UCNV_IRREGULAR)
{
- if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
+ if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ }
+ else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
{
*err = U_ZERO_ERROR;
ucnv_cbFromUWriteSub(fromArgs, 0, err);
{
return;
}
+ else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ return;
+ }
ucnv_setFromUCallBack (fromArgs->converter,
(UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
/*
*******************************************************************************
*
-* Copyright (C) 2003-2013, International Business Machines
+* Copyright (C) 2003-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
+ case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnorableCallback(); break;
#else
case 0:
case 1:
- case 2: name="skip"; break;
+ case 2:
+ case 3: name="skip"; break;
#endif
- case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
+ case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
default: name=""; break; //needed to end loop
}
}
delete [] s0;
}
+// Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
+// If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
+void
+ConversionTest::TestDefaultIgnorableCallback() {
+ UErrorCode status = U_ZERO_ERROR;
+ const char *name = "euc-jp-2007";
+ const char *pattern = "[:Default_Ignorable_Code_Point:]";
+ UnicodeSet *set = new UnicodeSet(pattern, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Unable to create Unicodeset: %s - %s\n", pattern, u_errorName(status));
+ return;
+ }
+ UConverter *cnv = cnv_open(name, status);
+ if (U_FAILURE(status)) {
+ errln("Unable to open converter: %s - %s\n", name, u_errorName(status));
+ return;
+ }
+ // set callback for the converter
+ ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
+
+ UChar32 input[1];
+ char output[10];
+ int size = set->size();
+ for (int i = 0; i < size; i++) {
+ status = U_ZERO_ERROR;
+
+ input[0] = set->charAt(i);
+
+ ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+ if (U_FAILURE(status)) {
+ errln("Callback did not ignore code point: 0x%06X on failed conversion - %s", input[0], u_errorName(status));
+ }
+ }
+ delete set;
+ ucnv_close(cnv);
+}
+
+
// open testdata or ICU data converter ------------------------------------- ***
UConverter *