#******************************************************************************
#
-# Copyright (C) 1999-2012, International Business Machines
+# Copyright (C) 1999-2013, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
chariter.o schriter.o uchriter.o uiter.o \
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
-uscript.o usc_impl.o unames.o \
+uscript.o uscript_props.o usc_impl.o unames.o \
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
<ClCompile Include="uprops.cpp" />\r
<ClCompile Include="usc_impl.c" />\r
<ClCompile Include="uscript.c" />\r
+ <ClCompile Include="uscript_props.cpp" />\r
<ClCompile Include="uset.cpp" />\r
<ClCompile Include="uset_props.cpp" />\r
<ClCompile Include="usetiter.cpp" />\r
<ClCompile Include="uscript.c">\r
<Filter>properties & sets</Filter>\r
</ClCompile>\r
+ <ClCompile Include="uscript_props.cpp">\r
+ <Filter>properties & sets</Filter>\r
+ </ClCompile>\r
<ClCompile Include="uset.cpp">\r
<Filter>properties & sets</Filter>\r
</ClCompile>\r
/*
**********************************************************************
- * Copyright (C) 1997-2012, International Business Machines
+ * Copyright (C) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
UErrorCode *errorCode);
#endif /* U_HIDE_DRAFT_API */
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Script usage constants.
+ * See UAX #31 Unicode Identifier and Pattern Syntax.
+ * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
+ *
+ * @draft ICU 51
+ */
+typedef enum UScriptUsage {
+ /** Not encoded in Unicode. @draft ICU 51 */
+ USCRIPT_USAGE_NOT_ENCODED,
+ /** Unknown script usage. @draft ICU 51 */
+ USCRIPT_USAGE_UNKNOWN,
+ /** Candidate for Exclusion from Identifiers. @draft ICU 51 */
+ USCRIPT_USAGE_EXCLUDED,
+ /** Limited Use script. @draft ICU 51 */
+ USCRIPT_USAGE_LIMITED_USE,
+ /** Aspirational Use script. @draft ICU 51 */
+ USCRIPT_USAGE_ASPIRATIONAL,
+ /** Recommended script. @draft ICU 51 */
+ USCRIPT_USAGE_RECOMMENDED
+} UScriptUsage;
+
+/**
+ * Writes the script sample character string.
+ * This string normally consists of one code point but might be longer.
+ * The string is empty if the script is not encoded.
+ *
+ * @param script script code
+ * @param dest output string array
+ * @param capacity number of UChars in the dest array
+ * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
+ * @return the string length, even if U_BUFFER_OVERFLOW_ERROR
+ * @draft ICU 51
+ */
+U_DRAFT int32_t U_EXPORT2
+uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+class UnicodeString;
+U_NAMESPACE_END
+
+/**
+ * Returns the script sample character string.
+ * This string normally consists of one code point but might be longer.
+ * The string is empty if the script is not encoded.
+ *
+ * @param script script code
+ * @return the sample character string
+ * @draft ICU 51
+ */
+U_COMMON_API icu::UnicodeString U_EXPORT2
+uscript_getSampleUnicodeString(UScriptCode script);
+
+#endif
+
+/**
+ * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
+ * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
+ *
+ * @param script script code
+ * @return script usage
+ * @see UScriptUsage
+ * @draft ICU 51
+ */
+U_DRAFT UScriptUsage U_EXPORT2
+uscript_getUsage(UScriptCode script);
+
+/**
+ * Returns TRUE if the script is written right-to-left.
+ * For example, Arab and Hebr.
+ *
+ * @param script script code
+ * @return TRUE if the script is right-to-left
+ * @draft ICU 51
+ */
+U_DRAFT UBool U_EXPORT2
+uscript_isRightToLeft(UScriptCode script);
+
+/**
+ * Returns TRUE if the script allows line breaks between letters (excluding hyphenation).
+ * Such a script typically requires dictionary-based line breaking.
+ * For example, Hani and Thai.
+ *
+ * @param script script code
+ * @return TRUE if the script allows line breaks between letters
+ * @draft ICU 51
+ */
+U_DRAFT UBool U_EXPORT2
+uscript_breaksBetweenLetters(UScriptCode script);
+
+/**
+ * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary.
+ * For example, Latn and Cyrl.
+ *
+ * @param script script code
+ * @return TRUE if the script is cased
+ * @draft ICU 51
+ */
+U_DRAFT UBool U_EXPORT2
+uscript_isCased(UScriptCode script);
+
+#endif /* U_HIDE_DRAFT_API */
+
#endif
--- /dev/null
+/*
+*******************************************************************************
+* Copyright (C) 2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: uscript_props.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2013feb16
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uscript.h"
+#include "unicode/utf16.h"
+#include "ustr_imp.h"
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
+namespace {
+
+// Script metadata (script properties).
+// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
+
+// 0 = NOT_ENCODED, no sample character, default false script properties.
+// Bits 20.. 0: sample character
+
+// Bits 23..21: usage
+const int32_t UNKNOWN = 1 << 21;
+const int32_t EXCLUSION = 2 << 21;
+const int32_t LIMITED_USE = 3 << 21;
+const int32_t ASPIRATIONAL = 4 << 21;
+const int32_t RECOMMENDED = 5 << 21;
+
+// Bits 31..24: Single-bit flags
+const int32_t RTL = 1 << 24;
+const int32_t LB_LETTERS = 1 << 25;
+const int32_t CASED = 1 << 26;
+
+const int32_t SCRIPT_PROPS[] = {
+ // Begin copy-paste output from
+ // icu/tools/trunk/unicode/py/parsescriptmetadata.py
+ 0x0040 | UNKNOWN, // Zyyy
+ 0x0308 | UNKNOWN, // Zinh
+ 0x0628 | RECOMMENDED | RTL, // Arab
+ 0x0531 | RECOMMENDED | CASED, // Armn
+ 0x0995 | RECOMMENDED, // Beng
+ 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
+ 0x13C4 | LIMITED_USE, // Cher
+ 0x03E2 | EXCLUSION | CASED, // Copt
+ 0x042F | RECOMMENDED | CASED, // Cyrl
+ 0x10414 | EXCLUSION | CASED, // Dsrt
+ 0x0905 | RECOMMENDED, // Deva
+ 0x12A0 | RECOMMENDED, // Ethi
+ 0x10D3 | RECOMMENDED, // Geor
+ 0x10330 | EXCLUSION, // Goth
+ 0x03A9 | RECOMMENDED | CASED, // Grek
+ 0x0A95 | RECOMMENDED, // Gujr
+ 0x0A15 | RECOMMENDED, // Guru
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
+ 0xAC00 | RECOMMENDED, // Hang
+ 0x05D0 | RECOMMENDED | RTL, // Hebr
+ 0x304B | RECOMMENDED | LB_LETTERS, // Hira
+ 0x0C95 | RECOMMENDED, // Knda
+ 0x30AB | RECOMMENDED | LB_LETTERS, // Kana
+ 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
+ 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
+ 0x004C | RECOMMENDED | CASED, // Latn
+ 0x0D15 | RECOMMENDED, // Mlym
+ 0x1826 | ASPIRATIONAL, // Mong
+ 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
+ 0x168F | EXCLUSION, // Ogam
+ 0x10300 | EXCLUSION, // Ital
+ 0x0B15 | RECOMMENDED, // Orya
+ 0x16A0 | EXCLUSION, // Runr
+ 0x0D85 | RECOMMENDED, // Sinh
+ 0x0710 | LIMITED_USE | RTL, // Syrc
+ 0x0B95 | RECOMMENDED, // Taml
+ 0x0C15 | RECOMMENDED, // Telu
+ 0x078C | RECOMMENDED | RTL, // Thaa
+ 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
+ 0x0F40 | RECOMMENDED, // Tibt
+ 0x14C0 | ASPIRATIONAL, // Cans
+ 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii
+ 0x1703 | EXCLUSION, // Tglg
+ 0x1723 | EXCLUSION, // Hano
+ 0x1743 | EXCLUSION, // Buhd
+ 0x1763 | EXCLUSION, // Tagb
+ 0x2800 | UNKNOWN, // Brai
+ 0x10800 | EXCLUSION | RTL, // Cprt
+ 0x1900 | LIMITED_USE, // Limb
+ 0x10000 | EXCLUSION, // Linb
+ 0x10480 | EXCLUSION, // Osma
+ 0x10450 | EXCLUSION, // Shaw
+ 0x1950 | LIMITED_USE | LB_LETTERS, // Tale
+ 0x10380 | EXCLUSION, // Ugar
+ 0,
+ 0x1A00 | EXCLUSION, // Bugi
+ 0x2C00 | EXCLUSION | CASED, // Glag
+ 0x10A00 | EXCLUSION | RTL, // Khar
+ 0xA800 | LIMITED_USE, // Sylo
+ 0x1980 | LIMITED_USE | LB_LETTERS, // Talu
+ 0x2D30 | ASPIRATIONAL, // Tfng
+ 0x103A0 | EXCLUSION, // Xpeo
+ 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali
+ 0x1BC0 | LIMITED_USE, // Batk
+ 0,
+ 0x11005 | EXCLUSION, // Brah
+ 0xAA00 | LIMITED_USE, // Cham
+ 0,
+ 0,
+ 0,
+ 0,
+ 0x13153 | EXCLUSION, // Egyp
+ 0,
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
+ 0,
+ 0,
+ 0,
+ 0xA984 | LIMITED_USE | LB_LETTERS, // Java
+ 0xA90A | LIMITED_USE, // Kali
+ 0,
+ 0,
+ 0x1C00 | LIMITED_USE, // Lepc
+ 0,
+ 0x0840 | LIMITED_USE | RTL, // Mand
+ 0,
+ 0x10980 | EXCLUSION | RTL, // Mero
+ 0x07CA | LIMITED_USE | RTL, // Nkoo
+ 0x10C00 | EXCLUSION | RTL, // Orkh
+ 0,
+ 0xA840 | EXCLUSION, // Phag
+ 0x10900 | EXCLUSION | RTL, // Phnx
+ 0x16F00 | ASPIRATIONAL, // Plrd
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0xA549 | LIMITED_USE, // Vaii
+ 0,
+ 0x12000 | EXCLUSION, // Xsux
+ 0,
+ 0xFFFF | UNKNOWN, // Zzzz
+ 0x102A0 | EXCLUSION, // Cari
+ 0x304B | RECOMMENDED | LB_LETTERS, // Jpan
+ 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
+ 0x10280 | EXCLUSION, // Lyci
+ 0x10920 | EXCLUSION | RTL, // Lydi
+ 0x1C5A | LIMITED_USE, // Olck
+ 0xA930 | EXCLUSION, // Rjng
+ 0xA882 | LIMITED_USE, // Saur
+ 0,
+ 0x1B83 | LIMITED_USE, // Sund
+ 0,
+ 0xABC0 | LIMITED_USE, // Mtei
+ 0x10840 | EXCLUSION | RTL, // Armi
+ 0x10B00 | EXCLUSION | RTL, // Avst
+ 0x11103 | LIMITED_USE, // Cakm
+ 0xAC00 | RECOMMENDED, // Kore
+ 0x11083 | EXCLUSION, // Kthi
+ 0,
+ 0x10B60 | EXCLUSION | RTL, // Phli
+ 0,
+ 0,
+ 0x10B40 | EXCLUSION | RTL, // Prti
+ 0x0800 | EXCLUSION | RTL, // Samr
+ 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
+ 0,
+ 0,
+ 0xA6A0 | LIMITED_USE, // Bamu
+ 0xA4D0 | LIMITED_USE, // Lisu
+ 0,
+ 0x10A60 | EXCLUSION | RTL, // Sarb
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0x109A0 | EXCLUSION | RTL, // Merc
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0x11183 | EXCLUSION, // Shrd
+ 0x110D0 | EXCLUSION, // Sora
+ 0x11680 | EXCLUSION, // Takr
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ // End copy-paste from parsescriptmetadata.py
+};
+
+int32_t getScriptProps(UScriptCode script) {
+ if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
+ return SCRIPT_PROPS[script];
+ } else {
+ return 0;
+ }
+}
+
+} // namespace
+
+U_CAPI int32_t U_EXPORT2
+uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) { return 0; }
+ if(capacity < 0 || (capacity > 0 && dest == NULL)) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ int32_t sampleChar = getScriptProps(script) & 0x1fffff;
+ int32_t length;
+ if(sampleChar == 0) {
+ length = 0;
+ } else {
+ length = U16_LENGTH(sampleChar);
+ if(length <= capacity) {
+ int32_t i = 0;
+ U16_APPEND_UNSAFE(dest, i, sampleChar);
+ }
+ }
+ return u_terminateUChars(dest, capacity, length, pErrorCode);
+}
+
+U_COMMON_API icu::UnicodeString U_EXPORT2
+uscript_getSampleUnicodeString(UScriptCode script) {
+ icu::UnicodeString sample;
+ int32_t sampleChar = getScriptProps(script) & 0x1fffff;
+ if(sampleChar != 0) {
+ sample.append(sampleChar);
+ }
+ return sample;
+}
+
+U_CAPI UScriptUsage U_EXPORT2
+uscript_getUsage(UScriptCode script) {
+ return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_isRightToLeft(UScriptCode script) {
+ return (getScriptProps(script) & RTL) != 0;
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_breaksBetweenLetters(UScriptCode script) {
+ return (getScriptProps(script) & LB_LETTERS) != 0;
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_isCased(UScriptCode script) {
+ return (getScriptProps(script) & CASED) != 0;
+}
/********************************************************************
- * Copyright (c) 1997-2012, International Business Machines
+ * Copyright (c) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************/
}
}
+void TestScriptMetadataAPI() {
+ /* API & code coverage. More testing in intltest/ucdtest.cpp. */
+ UErrorCode errorCode=U_ZERO_ERROR;
+ UChar sample[8];
+
+ if(uscript_getSampleString(USCRIPT_LATIN, sample, LENGTHOF(sample), &errorCode)!=1 ||
+ U_FAILURE(errorCode) ||
+ uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
+ sample[1]!=0) {
+ log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
+ }
+ sample[0]=0xfffe;
+ if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
+ errorCode!=U_BUFFER_OVERFLOW_ERROR ||
+ sample[0]!=0xfffe) {
+ log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
+ }
+ errorCode=U_ZERO_ERROR;
+ if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, LENGTHOF(sample), &errorCode)!=0 ||
+ U_FAILURE(errorCode) ||
+ sample[0]!=0) {
+ log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
+ }
+ sample[0]=0xfffe;
+ if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
+ errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
+ sample[0]!=0xfffe) {
+ log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
+ }
+
+ if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
+ uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
+ uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
+ uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
+ uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
+ uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
+ uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED) {
+ log_err("uscript_getUsage() failed\n");
+ }
+
+ if(uscript_isRightToLeft(USCRIPT_LATIN) ||
+ uscript_isRightToLeft(USCRIPT_CIRTH) ||
+ !uscript_isRightToLeft(USCRIPT_ARABIC) ||
+ !uscript_isRightToLeft(USCRIPT_HEBREW)) {
+ log_err("uscript_isRightToLeft() failed\n");
+ }
+
+ if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
+ uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
+ !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
+ !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
+ log_err("uscript_breaksBetweenLetters() failed\n");
+ }
+
+ if(uscript_isCased(USCRIPT_CIRTH) ||
+ uscript_isCased(USCRIPT_HAN) ||
+ !uscript_isCased(USCRIPT_LATIN) ||
+ !uscript_isCased(USCRIPT_GREEK)) {
+ log_err("uscript_isCased() failed\n");
+ }
+}
+
void TestBinaryValues() {
/*
* Unicode 5.1 explicitly defines binary property value aliases.
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 2003-2010, International Business Machines Corporation and
+ * Copyright (c) 2003-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
void TestUScriptCodeAPI(void);
void TestHasScript(void);
void TestGetScriptExtensions(void);
+void TestScriptMetadataAPI(void);
void TestBinaryValues(void);
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2012, International Business Machines Corporation and
+ * Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/*******************************************************************************
addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
+ addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2011, International Business Machines Corporation and
+ * Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/putil.h"
+#include "unicode/uscript.h"
#include "cstring.h"
#include "hash.h"
#include "patternprops.h"
TESTCASE_AUTO(TestBinaryValues);
TESTCASE_AUTO(TestConsistency);
TESTCASE_AUTO(TestPatternProperties);
+ TESTCASE_AUTO(TestScriptMetadata);
TESTCASE_AUTO_END;
}
}
return same;
}
+
+namespace {
+
+/**
+ * Maps a special script code to the most common script of its encoded characters.
+ */
+UScriptCode getCharScript(UScriptCode script) {
+ switch(script) {
+ case USCRIPT_SIMPLIFIED_HAN:
+ case USCRIPT_TRADITIONAL_HAN:
+ return USCRIPT_HAN;
+ case USCRIPT_JAPANESE:
+ return USCRIPT_HIRAGANA;
+ case USCRIPT_KOREAN:
+ return USCRIPT_HANGUL;
+ default:
+ return script;
+ }
+}
+
+} // namespace
+
+void UnicodeTest::TestScriptMetadata() {
+ IcuTestErrorCode errorCode(*this, "TestScriptMetadata()");
+ UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode);
+ // So far, sample characters are uppercase.
+ // Georgian is special.
+ UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode);
+ for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) {
+ UScriptCode sc = (UScriptCode)sci;
+ // Run the test with -v to see which script has failures:
+ // .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 3 FAIL
+ logln(uscript_getShortName(sc));
+ UScriptUsage usage = uscript_getUsage(sc);
+ UnicodeString sample = uscript_getSampleUnicodeString(sc);
+ UnicodeSet scriptSet;
+ scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode);
+ if(usage == USCRIPT_USAGE_NOT_ENCODED) {
+ assertTrue("not encoded, no sample", sample.isEmpty());
+ assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc));
+ assertFalse("not encoded, not LB letters", uscript_breaksBetweenLetters(sc));
+ assertFalse("not encoded, not cased", uscript_isCased(sc));
+ assertTrue("not encoded, no characters", scriptSet.isEmpty());
+ } else {
+ assertFalse("encoded, has a sample character", sample.isEmpty());
+ UChar32 firstChar = sample.char32At(0);
+ UScriptCode charScript = getCharScript(sc);
+ assertEquals("script(sample(script))",
+ charScript, uscript_getScript(firstChar, errorCode));
+ assertEquals("RTL vs. set", rtl.contains(firstChar), uscript_isRightToLeft(sc));
+ assertEquals("cased vs. set", cased.contains(firstChar), uscript_isCased(sc));
+ assertEquals("encoded, has characters", sc == charScript, !scriptSet.isEmpty());
+ if(uscript_isRightToLeft(sc)) {
+ rtl.removeAll(scriptSet);
+ }
+ if(uscript_isCased(sc)) {
+ cased.removeAll(scriptSet);
+ }
+ }
+ }
+ UnicodeString pattern;
+ assertEquals("no remaining RTL characters",
+ UnicodeString("[]"), rtl.toPattern(pattern));
+ assertEquals("no remaining cased characters",
+ UnicodeString("[]"), cased.toPattern(pattern));
+
+ assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRIPT_HAN));
+ assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRIPT_THAI));
+ assertFalse("Latn does not break between letters", uscript_breaksBetweenLetters(USCRIPT_LATIN));
+}
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2011, International Business Machines Corporation and
+ * Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
void TestBinaryValues();
void TestConsistency();
void TestPatternProperties();
+ void TestScriptMetadata();
private: