]> granicus.if.org Git - icu/commitdiff
ICU-10706 Spoof Check, bug fix & test for Identifier Restriction Level; update spoof...
authorAndy Heninger <andy.heninger@gmail.com>
Wed, 19 Feb 2014 23:53:30 +0000 (23:53 +0000)
committerAndy Heninger <andy.heninger@gmail.com>
Wed, 19 Feb 2014 23:53:30 +0000 (23:53 +0000)
X-SVN-Rev: 35175

icu4c/source/i18n/identifier_info.cpp
icu4c/source/i18n/identifier_info.h
icu4c/source/i18n/uspoof.cpp
icu4c/source/test/intltest/itspoof.cpp

index af2a19f24d913043046f57d5602e65bbbcab64d0..87ddda1a793262736fb84670f1ff9a4e9533aa25 100644 (file)
@@ -20,16 +20,17 @@ U_NAMESPACE_BEGIN
 
 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
 
-static UMutex gInitMutex = U_MUTEX_INITIALIZER;
-static UBool gStaticsAreInitialized = FALSE;
+static UnicodeSet *ASCII;
+static ScriptSet *JAPANESE;
+static ScriptSet *CHINESE;
+static ScriptSet *KOREAN;
+static ScriptSet *CONFUSABLE_WITH_LATIN;
+static UInitOnce gIdentifierInfoInitOnce = U_INITONCE_INITIALIZER;
 
-UnicodeSet *IdentifierInfo::ASCII;
-ScriptSet *IdentifierInfo::JAPANESE;
-ScriptSet *IdentifierInfo::CHINESE;
-ScriptSet *IdentifierInfo::KOREAN;
-ScriptSet *IdentifierInfo::CONFUSABLE_WITH_LATIN;
 
-UBool IdentifierInfo::cleanup() {
+U_CDECL_BEGIN
+static UBool U_CALLCONV
+IdentifierInfo_cleanup(void) {
     delete ASCII;
     ASCII = NULL;
     delete JAPANESE;
@@ -40,14 +41,30 @@ UBool IdentifierInfo::cleanup() {
     KOREAN = NULL;
     delete CONFUSABLE_WITH_LATIN;
     CONFUSABLE_WITH_LATIN = NULL;
-    gStaticsAreInitialized = FALSE;
+    gIdentifierInfoInitOnce.reset(); 
     return TRUE;
 }
 
-U_CDECL_BEGIN
-static UBool U_CALLCONV
-IdentifierInfo_cleanup(void) {
-    return IdentifierInfo::cleanup();
+static void U_CALLCONV
+IdentifierInfo_init(UErrorCode &status) {
+    ASCII    = new UnicodeSet(0, 0x7f);
+    JAPANESE = new ScriptSet();
+    CHINESE  = new ScriptSet();
+    KOREAN   = new ScriptSet();
+    CONFUSABLE_WITH_LATIN = new ScriptSet();
+    if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL 
+            || CONFUSABLE_WITH_LATIN == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    ASCII->freeze();
+    JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status)
+             .set(USCRIPT_KATAKANA, status);
+    CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status);
+    KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status);
+    CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status)
+              .set(USCRIPT_CHEROKEE, status);
+    ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup);
 }
 U_CDECL_END
 
@@ -55,33 +72,11 @@ U_CDECL_END
 IdentifierInfo::IdentifierInfo(UErrorCode &status):
          fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL), 
          fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) {
+    umtx_initOnce(gIdentifierInfoInitOnce, &IdentifierInfo_init, status);
     if (U_FAILURE(status)) {
         return;
     }
-    {
-        Mutex lock(&gInitMutex);
-        if (!gStaticsAreInitialized) {
-            ASCII    = new UnicodeSet(0, 0x7f);
-            JAPANESE = new ScriptSet();
-            CHINESE  = new ScriptSet();
-            KOREAN   = new ScriptSet();
-            CONFUSABLE_WITH_LATIN = new ScriptSet();
-            if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL 
-                    || CONFUSABLE_WITH_LATIN == NULL) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return;
-            }
-            ASCII->freeze();
-            JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status)
-                     .set(USCRIPT_KATAKANA, status);
-            CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status);
-            KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status);
-            CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status)
-                      .set(USCRIPT_CHEROKEE, status);
-            ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup);
-            gStaticsAreInitialized = TRUE;
-        }
-    }
+    
     fIdentifier = new UnicodeString();
     fRequiredScripts = new ScriptSet();
     fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status);
index c7cab61792697fb178eefc8ba6c0ee75a082e976..ab0b8b23a8501d37051f2cd99ef869ed9c295b43 100644 (file)
@@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2013, International Business Machines
+*   Copyright (C) 2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *
@@ -171,11 +171,6 @@ class U_I18N_API IdentifierInfo : public UMemory {
      */
     static UnicodeString &displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status);
 
-    /**
-     * Static memory cleanup function.
-     * @internal
-     */
-    static UBool      cleanup();
   private:
 
     IdentifierInfo  & clear();
@@ -187,15 +182,6 @@ class U_I18N_API IdentifierInfo : public UMemory {
     ScriptSet         *fCommonAmongAlternates;
     UnicodeSet        *fNumerics;
     UnicodeSet        *fIdentifierProfile;
-
-    static UnicodeSet *ASCII;
-    static ScriptSet  *JAPANESE;
-    static ScriptSet  *CHINESE;
-    static ScriptSet  *KOREAN;
-    static ScriptSet  *CONFUSABLE_WITH_LATIN;
-
-
-
 };
 
 U_NAMESPACE_END
index 23a9de844a89023065d9858108df0fcae6eb62e1..b9051b26b43ba07314d0215fa2d5fb339054e3a3 100644 (file)
@@ -198,7 +198,7 @@ uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) {
 
     // Verify that the requested checks are all ones (bits) that 
     //   are acceptable, known values.
-    if (checks & ~USPOOF_ALL_CHECKS) {
+    if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) {
         *status = U_ILLEGAL_ARGUMENT_ERROR; 
         return;
     }
index 527f2f031d46b9b39b46a80f5315e16c636de9f8..7e5c8550fd2ad86f905bdf2bee89687748bfa156 100644 (file)
@@ -682,8 +682,9 @@ void IntlTestSpoof::testRestrictionLevel() {
     };
     char msgBuffer[100];
 
-    URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_HIGHLY_RESTRICTIVE, 
-         USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE, USPOOF_UNRESTRICTIVE};
+    URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_SINGLE_SCRIPT_RESTRICTIVE, 
+         USPOOF_HIGHLY_RESTRICTIVE, USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE, 
+         USPOOF_UNRESTRICTIVE};
     
     UErrorCode status = U_ZERO_ERROR;
     IdentifierInfo idInfo(status);
@@ -706,14 +707,30 @@ void IntlTestSpoof::testRestrictionLevel() {
             uspoof_setChecks(sc, USPOOF_RESTRICTION_LEVEL, &status);
             uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status);
             uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker);
-            UBool actualValue = uspoof_checkUnicodeString(sc, testString, NULL, &status) != 0;
-
+            int32_t actualValue = uspoof_checkUnicodeString(sc, testString, NULL, &status);
+            
             // we want to fail if the text is (say) MODERATE and the testLevel is ASCII
-            UBool expectedFailure = expectedLevel > levelSetInSpoofChecker ||
-                                    !uspoof_getRecommendedUnicodeSet(&status)->containsAll(testString);
-            sprintf(msgBuffer, "testNum = %d, levelIndex = %d", testNum, levelIndex);
-            TEST_ASSERT_MSG(expectedFailure == actualValue, msgBuffer);
+            int32_t expectedValue = 0;
+            if (expectedLevel > levelSetInSpoofChecker) {
+                expectedValue |= USPOOF_RESTRICTION_LEVEL;
+            }
+            if (!uspoof_getRecommendedUnicodeSet(&status)->containsAll(testString)) {
+                expectedValue |= USPOOF_CHAR_LIMIT;
+            }
+            sprintf(msgBuffer, "testNum = %d, levelIndex = %d, expected = %#x, actual = %#x",
+                    testNum, levelIndex, expectedValue, actualValue);
+            TEST_ASSERT_MSG(expectedValue == actualValue, msgBuffer);
+            TEST_ASSERT_SUCCESS(status);
+
+            // Run the same check again, with the Spoof Checker configured to return
+            // the actual restriction level.
+            uspoof_setChecks(sc, USPOOF_AUX_INFO | USPOOF_RESTRICTION_LEVEL, &status);
+            uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status);
+            uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker);
+            int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status);
             TEST_ASSERT_SUCCESS(status);
+            TEST_ASSERT_EQ(expectedLevel, result & USPOOF_RESTRICTION_LEVEL_MASK);
+            TEST_ASSERT_EQ(expectedValue, result & USPOOF_ALL_CHECKS);
             uspoof_close(sc);
         }
     }