/*
*******************************************************************************
*
-* Copyright (C) 2009-2010, International Business Machines
+* Copyright (C) 2009-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
return set.contains(c) && norm2.getDecomposition(c, decomposition);
}
+uint8_t
+FilteredNormalizer2::getCombiningClass(UChar32 c) const {
+ return set.contains(c) ? norm2.getCombiningClass(c) : 0;
+}
+
UBool
FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode);
return TRUE;
}
+ virtual uint8_t
+ getCombiningClass(UChar32 c) const {
+ return impl.getCC(impl.getNorm16(c));
+ }
+
// quick checks
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
return NULL;
}
+uint8_t
+Normalizer2::getCombiningClass(UChar32 /*c*/) const {
+ return 0;
+}
+
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
U_NAMESPACE_END
}
}
+U_DRAFT uint8_t U_EXPORT2
+unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
+ return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
+}
+
U_DRAFT UBool U_EXPORT2
unorm2_isNormalized(const UNormalizer2 *norm2,
const UChar *s, int32_t length,
U_CAPI uint8_t U_EXPORT2
u_getCombiningClass(UChar32 c) {
UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+ const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
if(U_SUCCESS(errorCode)) {
- return impl->getCC(impl->getNorm16(c));
+ return nfd->getCombiningClass(c);
} else {
return 0;
}
virtual UBool
getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
+ /**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param c code point
+ * @return c's combining class
+ * @draft ICU 49
+ */
+ virtual uint8_t
+ getCombiningClass(UChar32 c) const;
+
/**
* Tests if the string is normalized.
* Internally, in cases where the quickCheck() method would return "maybe"
virtual UBool
getDecomposition(UChar32 c, UnicodeString &decomposition) const;
+ /**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param c code point
+ * @return c's combining class
+ * @draft ICU 49
+ */
+ virtual uint8_t
+ getCombiningClass(UChar32 c) const;
+
/**
* Tests if the string is normalized.
* For details see the Normalizer2 base class documentation.
/*
*******************************************************************************
*
-* Copyright (C) 2009-2010, International Business Machines
+* Copyright (C) 2009-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
UChar32 c, UChar *decomposition, int32_t capacity,
UErrorCode *pErrorCode);
+/**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @return c's combining class
+ * @draft ICU 49
+ */
+U_DRAFT uint8_t U_EXPORT2
+unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
+
/**
* Tests if the string is normalized.
* Internally, in cases where the quickCheck() method would return "maybe"
UChar32 c;
int32_t j=i;
U16_PREV_UNSAFE(label, j, c);
- if(u_getCombiningClass(c)==9) {
+ if(uts46Norm2.getCombiningClass(c)==9) {
continue;
}
// check precontext (Joining_Type:{L,D})(Joining_Type:T)*
UChar32 c;
int32_t j=i;
U16_PREV_UNSAFE(label, j, c);
- if(u_getCombiningClass(c)!=9) {
+ if(uts46Norm2.getCombiningClass(c)!=9) {
return FALSE;
}
}
}
/* for each line of UnicodeData.txt, check some of the properties */
+typedef struct UnicodeDataContext {
+#if UCONFIG_NO_NORMALIZATION
+ const void *dummy;
+#else
+ const UNormalizer2 *nfkc;
+#endif
+} UnicodeDataContext;
+
/*
* ### TODO
* This test fails incorrectly if the First or Last code point of a repetitive area
int32_t i;
int8_t type;
+#if !UCONFIG_NO_NORMALIZATION
+ const UNormalizer2 *nfkc;
+#endif
+
/* get the character code, field 0 */
c=strtoul(fields[0][0], &end, 16);
if(end<=fields[0][0] || end!=fields[0][1]) {
if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
}
+ nfkc=((UnicodeDataContext *)context)->nfkc;
+ if(value!=unorm2_getCombiningClass(nfkc, c)) {
+ log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
+ }
#endif
/* get BiDi category, field 4 */
UChar32 c;
int8_t type;
+ UnicodeDataContext context;
+
u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
u_getUnicodeVersion(versionArray);
if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
}
errorCode=U_ZERO_ERROR;
- parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, NULL, &errorCode);
+#if !UCONFIG_NO_NORMALIZATION
+ context.nfkc=unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("error: unable to open an NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
+ return;
+ }
+#endif
+ parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
if(U_FAILURE(errorCode)) {
return; /* if we couldn't parse UnicodeData.txt, we should return */
}
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2010, International Business Machines Corporation and
+ * Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
dataerrln("Normalizer2Factory::getNFCInstance() call failed - %s", u_errorName(status));
return;
}
- UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff]"), errorCode);
- UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]");
- UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]");
+ UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff\\u0310-\\u031f]"), errorCode);
FilteredNormalizer2 fn2(*nfcNorm2, filter);
UChar32 char32 = 0x0054;
errln("FilteredNormalizer2.hasBoundaryAfter() failed.");
}
+ UChar32 c;
+ for(c=0; c<=0x3ff; ++c) {
+ uint8_t expectedCC= filter.contains(c) ? nfcNorm2->getCombiningClass(c) : 0;
+ uint8_t cc=fn2.getCombiningClass(c);
+ if(cc!=expectedCC) {
+ errln(
+ UnicodeString("FilteredNormalizer2(NFC, ^A0-FF,310-31F).getCombiningClass(U+")+
+ hex(c)+
+ ")==filtered NFC.getCC()");
+ }
+ }
+
+ UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]");
+ UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]");
fn2.append(newString1, newString2, errorCode);
if (U_FAILURE(errorCode)) {
errln("FilteredNormalizer2.append() failed.");