return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0);
}
-/* Checks if the Unicode character can start a Unicode identifier.*/
-U_CAPI UBool U_EXPORT2
-u_isIDStart(UChar32 c) {
- /* same as u_isalpha() */
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
-}
-
-/* Checks if the Unicode character can be a Unicode identifier part other than starting the
- identifier.*/
-U_CAPI UBool U_EXPORT2
-u_isIDPart(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(
- (CAT_MASK(props)&
- (U_GC_ND_MASK|U_GC_NL_MASK|
- U_GC_L_MASK|
- U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK)
- )!=0 ||
- u_isIDIgnorable(c));
-}
-
/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
U_CAPI UBool U_EXPORT2
u_isIDIgnorable(UChar32 c) {
/**
* Determines if the specified character is permissible as the
- * first character in an identifier according to Unicode
- * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
- * True for characters with general categories "L" (letters) and "Nl" (letter numbers).
+ * first character in an identifier as ID_Start according to
+ * UnicodeĀ® Standard Annex #31 UNICODE IDENTIFIER AND PATTERN SYNTAX
*
* Same as java.lang.Character.isUnicodeIdentifierStart().
* Same as UCHAR_ID_START
u_isIDStart(UChar32 c);
/**
- * Determines if the specified character is permissible
- * in an identifier according to Java.
- * True for characters with general categories "L" (letters),
- * "Nl" (letter numbers), "Nd" (decimal digits),
- * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and
- * u_isIDIgnorable(c).
+ * Determines if the specified character is permissible as a
+ * character other than the first character in an identifier as ID_Continue
+ * according to UnicodeĀ® Standard Annex #31 UNICODE IDENTIFIER AND PATTERN SYNTAX
*
* Same as java.lang.Character.isUnicodeIdentifierPart().
* Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)
* u_isIDIgnorable(c).
*
* @param c the code point to be tested
- * @return true if the code point may occur in an identifier according to Java
+ * @return true if the code point may occur in an identifier other than the
+ * first character.
*
* @see UCHAR_ID_CONTINUE
* @see u_isIDStart
}
}
+/* Checks if the Unicode character can start a Unicode identifier.*/
+U_CAPI UBool U_EXPORT2
+u_isIDStart(UChar32 c) {
+ return u_hasBinaryProperty(c, UCHAR_ID_START);
+}
+
+/* Checks if the Unicode character can be a Unicode identifier part other than starting the
+ identifier.*/
+U_CAPI UBool U_EXPORT2
+u_isIDPart(UChar32 c) {
+ return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE);
+}
+
U_CAPI UBool U_EXPORT2
u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which) {
if (s == nullptr && length != 0) { return false; }
const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
- const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
- const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
- const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
- const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
+ const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061, 0x1885, 0x212e, 0x309b};
+ const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019, 0x2e2f};
+ const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045, 0x1886, 0x212e, 0x309c};
+ const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020, 0x2019, 0x2e2f};
const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
group: icu_utility_with_props
util_props.o
deps
- icu_utility uchar ucase
+ icu_utility uchar ucase uprops
group: icu_utility
util.o
*/
public static boolean isUnicodeIdentifierPart(int ch)
{
- // if props == 0, it will just fall through and return false
- // cat == format
- return ((1 << getType(ch))
- & ((1 << UCharacterCategory.UPPERCASE_LETTER)
- | (1 << UCharacterCategory.LOWERCASE_LETTER)
- | (1 << UCharacterCategory.TITLECASE_LETTER)
- | (1 << UCharacterCategory.MODIFIER_LETTER)
- | (1 << UCharacterCategory.OTHER_LETTER)
- | (1 << UCharacterCategory.LETTER_NUMBER)
- | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
- | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
- | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
- | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
- || isIdentifierIgnorable(ch);
+ return hasBinaryProperty(ch, UProperty.ID_CONTINUE); // single code point
}
/**
*/
public static boolean isUnicodeIdentifierStart(int ch)
{
- /*int cat = getType(ch);*/
- // if props == 0, it will just fall through and return false
- return ((1 << getType(ch))
- & ((1 << UCharacterCategory.UPPERCASE_LETTER)
- | (1 << UCharacterCategory.LOWERCASE_LETTER)
- | (1 << UCharacterCategory.TITLECASE_LETTER)
- | (1 << UCharacterCategory.MODIFIER_LETTER)
- | (1 << UCharacterCategory.OTHER_LETTER)
- | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
+ return hasBinaryProperty(ch, UProperty.ID_START); // single code point
}
/**
@Test
public void TestIdentifier()
{
- int unicodeidstart[] = {0x0250, 0x0000e2, 0x000061};
- int nonunicodeidstart[] = {0x2000, 0x00000a, 0x002019};
- int unicodeidpart[] = {0x005f, 0x000032, 0x000045};
- int nonunicodeidpart[] = {0x2030, 0x0000a3, 0x000020};
+ int unicodeidstart[] = {0x0250, 0x0000e2, 0x000061, 0x001885, 0x00212e, 0x00309b};
+ int nonunicodeidstart[] = {0x2000, 0x00000a, 0x002019, 0x002e2f};
+ int unicodeidpart[] = {0x005f, 0x000032, 0x000045, 0x001886, 0x00212e, 0x00309c};
+ int nonunicodeidpart[] = {0x2030, 0x0000a3, 0x000020, 0x002019, 0x002e2f};
int idignore[] = {0x0006, 0x0010, 0x206b};
int nonidignore[] = {0x0075, 0x0000a3, 0x000061};
- int size = unicodeidstart.length;
- for (int i = 0; i < size; i ++)
+ for (int i = 0; i < unicodeidstart.length; i ++)
{
if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i]))
{
" expected to be a unicode identifier start character");
break;
}
+ }
+ for (int i = 0; i < nonunicodeidstart.length; i ++)
+ {
if (UCharacter.isUnicodeIdentifierStart(nonunicodeidstart[i]))
{
errln("FAIL \\u" + hex(nonunicodeidstart[i]) +
"character");
break;
}
+ }
+ for (int i = 0; i < unicodeidpart.length; i ++)
+ {
if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i]))
{
errln("FAIL \\u" + hex(unicodeidpart[i]) +
" expected to be a unicode identifier part character");
break;
}
+ }
+ for (int i = 0; i < nonunicodeidpart.length; i ++)
+ {
if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i]))
{
errln("FAIL \\u" + hex(nonunicodeidpart[i]) +
"character");
break;
}
+ }
+ for (int i = 0; i < idignore.length; i ++)
+ {
if (!UCharacter.isIdentifierIgnorable(idignore[i]))
{
errln("FAIL \\u" + hex(idignore[i]) +
" expected to be a ignorable unicode character");
break;
}
+ }
+ for (int i = 0; i < nonidignore.length; i ++)
+ {
if (UCharacter.isIdentifierIgnorable(nonidignore[i]))
{
errln("FAIL \\u" + hex(nonidignore[i]) +
" expected not to be a ignorable unicode character");
break;
}
- logln("Ok \\u" + hex(unicodeidstart[i]) + " and \\u" +
- hex(nonunicodeidstart[i]) + " and \\u" +
- hex(unicodeidpart[i]) + " and \\u" +
- hex(nonunicodeidpart[i]) + " and \\u" +
- hex(idignore[i]) + " and \\u" + hex(nonidignore[i]));
}
}