]> granicus.if.org Git - icu/commitdiff
ICU-8711 Move Thai and Khmer test data from dedicated file to standard test data...
authorAndy Heninger <andy.heninger@gmail.com>
Fri, 20 Jan 2012 02:35:00 +0000 (02:35 +0000)
committerAndy Heninger <andy.heninger@gmail.com>
Fri, 20 Jan 2012 02:35:00 +0000 (02:35 +0000)
X-SVN-Rev: 31234

icu4c/source/test/intltest/Makefile.in
icu4c/source/test/intltest/dicttest.cpp [deleted file]
icu4c/source/test/intltest/dicttest.h [deleted file]
icu4c/source/test/intltest/intltest.vcxproj
icu4c/source/test/intltest/intltest.vcxproj.filters
icu4c/source/test/intltest/itrbbi.cpp
icu4c/source/test/testdata/rbbitst.txt

index 295e83141b0451dc1becb171bfdb0c9664c6a732..d4f7e7c313172fa303600ce389355e41a9ab6a6e 100644 (file)
@@ -1,6 +1,6 @@
 #******************************************************************************
 #
-#   Copyright (C) 1999-2011, International Business Machines
+#   Copyright (C) 1999-2012, International Business Machines
 #   Corporation and others.  All Rights Reserved.
 #
 #******************************************************************************
@@ -48,7 +48,7 @@ tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o \
 tsmthred.o tsnmfmt.o tsputil.o tstnrapi.o tstnorm.o tzbdtest.o         \
 tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o strcase.o transtst.o strtest.o thcoll.o \
 bytestrietest.o ucharstrietest.o \
-itrbbi.o rbbiapts.o dicttest.o rbbitst.o ittrans.o transapi.o cpdtrtst.o \
+itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o \
 testutil.o transrt.o trnserr.o normconf.o sfwdchit.o \
 jamotest.o srchtest.o reptest.o regextst.o \
 itrbnf.o itrbnfrt.o itrbnfp.o ucaconf.o icusvtst.o \
diff --git a/icu4c/source/test/intltest/dicttest.cpp b/icu4c/source/test/intltest/dicttest.cpp
deleted file mode 100644 (file)
index c985a28..0000000
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
-**********************************************************************
-* Copyright (C) 2011-2011, International Business Machines Corporation 
-* and others.  All Rights Reserved.
-**********************************************************************
-************************************************************************
-*   Date          Name        Description
-*   05/14/2011    grhoten     Creation.
-************************************************************************/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "dicttest.h"
-#include "textfile.h"
-#include "uvector.h"
-#include "unicode/rbbi.h"
-
-void DictionaryWordTest::TestThaiBreaks() {
-    UErrorCode status=U_ZERO_ERROR;
-    BreakIterator* b;
-    Locale locale = Locale("th");
-    int32_t p, index;
-    UChar c[]= { 
-            0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B, 
-            0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19, 
-            0x0E16, 0x0E49, 0x0E33, 0x0000
-    };
-    int32_t expectedWordResult[] = {
-            2, 3, 6, 10, 11, 15, 17, 20, 22
-    };
-    int32_t expectedLineResult[] = {
-            3, 6, 11, 15, 17, 20, 22
-    };
-
-    int32_t size = u_strlen(c);
-    UnicodeString text=UnicodeString(c);
-    
-    b = BreakIterator::createWordInstance(locale, status);
-    if (U_FAILURE(status)) {
-        errcheckln(status, "Unable to create thai word break iterator. - %s", u_errorName(status));
-        return;
-    }
-    b->setText(text);
-    p = index = 0;
-    while ((p=b->next())!=BreakIterator::DONE && p < size) {
-        if (p != expectedWordResult[index++]) {
-            errln("Incorrect break given by thai word break iterator. Expected: %d  Got: %d", expectedWordResult[index-1], p);
-        }
-    }
-    delete b;
-    
-    b = BreakIterator::createLineInstance(locale, status);
-    if (U_FAILURE(status)) {
-        errln("Unable to create thai line break iterator.");
-        return;
-    }
-    b->setText(text);
-    p = index = 0;
-    while ((p=b->next())!=BreakIterator::DONE && p < size) {
-        if (p != expectedLineResult[index++]) {
-            errln("Incorrect break given by thai line break iterator. Expected: %d  Got: %d", expectedLineResult[index-1], p);
-        }
-    }
-
-    delete b;
-}
-
-#define DICTIONARY_TEST_FILE "wordsegments.txt"
-
-void DictionaryWordTest::TestWordBoundaries() {
-    UErrorCode      status  = U_ZERO_ERROR;
-
-    TextFile phrases(DICTIONARY_TEST_FILE, "UTF8", status);
-    if (U_FAILURE(status)) {
-        dataerrln("Can't open "DICTIONARY_TEST_FILE": %s; skipping test",
-              u_errorName(status));
-        return;
-    }
-
-    // Due to how the word break iterator works,
-    // scripts for languages that use no spaces should use the correct dictionary by default.
-    BreakIterator *wb = BreakIterator::createWordInstance("en", status);
-    if (U_FAILURE(status)) {
-        dataerrln("Word break iterator can not be opened: %s; skipping test",
-              u_errorName(status));
-        return;
-    }
-
-    int32_t pos, pIdx;
-    int32_t testLines = 0;
-    UnicodeString phrase;
-    while (phrases.readLineSkippingComments(phrase, status, FALSE) && U_SUCCESS(status)) {
-        UVector breaks(status);
-
-        for (pIdx = 0; pIdx < phrase.length(); pIdx++) {
-            if (phrase.charAt(pIdx) == 0x007C /* | */) {
-                breaks.addElement(pIdx, status);
-                phrase.remove(pIdx, 1);
-            }
-        }
-        breaks.addElement(pIdx, status);
-
-        wb->setText(phrase);
-        int32_t brkArrPos = 0;
-        while ((pos=wb->next())!=BreakIterator::DONE) {
-            int32_t expectedPos = breaks.elementAti(brkArrPos);
-            if (expectedPos != pos) {
-                errln("Incorrect forward word break on line %d. Expected: %d  Got: %d",
-                    phrases.getLineNumber(), breaks.elementAt(brkArrPos), pos);
-            }
-            brkArrPos++;
-        }
-        brkArrPos = breaks.size() - 1;
-        while ((pos=wb->previous())!=BreakIterator::DONE) {
-            brkArrPos--;
-            int32_t expectedPos = breaks.elementAti(brkArrPos);
-            if (expectedPos != pos) {
-                errln("Incorrect backward word break on line %d. Expected: %d  Got: %d",
-                    phrases.getLineNumber(), breaks.elementAt(brkArrPos), pos);
-            }
-        }
-        testLines++;
-    }
-    delete wb;
-    logln("%d tests were run.", testLines);
-}
-
-void DictionaryWordTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
-{
-    if (exec) logln("TestSuite DictionaryWordTest: ");
-    TESTCASE_AUTO_BEGIN;
-    TESTCASE_AUTO(TestThaiBreaks);
-    TESTCASE_AUTO(TestWordBoundaries);
-    TESTCASE_AUTO_END;
-}
-
-
-#endif
diff --git a/icu4c/source/test/intltest/dicttest.h b/icu4c/source/test/intltest/dicttest.h
deleted file mode 100644 (file)
index ffce470..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
-**********************************************************************
-* Copyright (C) 2011-2011, International Business Machines Corporation 
-* and others.  All Rights Reserved.
-**********************************************************************
-************************************************************************
-*   Date          Name        Description
-*   05/14/2011    grhoten     Creation.
-************************************************************************/
-
-#ifndef DICTTEST_H
-#define DICTTEST_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "intltest.h"
-
-
-class DictionaryWordTest: public IntlTest {
-public:
-    void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL );
-    void TestWordBoundaries();
-    void TestThaiBreaks();
-};
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
-#endif
-
index 613d63564931417d65bb3f0da2a4758d2f000ec0..5a527d6c8151d6c555322423b2ac90a392b28a93 100644 (file)
   </ItemDefinitionGroup>\r
   <ItemGroup>\r
     <ClCompile Include="bytestrietest.cpp" />\r
-    <ClCompile Include="dicttest.cpp" />\r
     <ClCompile Include="ucharstrietest.cpp" />\r
     <ClCompile Include="itrbbi.cpp" />\r
     <ClCompile Include="rbbiapts.cpp" />\r
     <ClCompile Include="bidiconf.cpp" />\r
   </ItemGroup>\r
   <ItemGroup>\r
-    <ClInclude Include="dicttest.h" />\r
     <ClInclude Include="itrbbi.h" />\r
     <ClInclude Include="rbbiapts.h" />\r
     <ClInclude Include="rbbitst.h" />\r
index 11738b65b5dbf60171cbd07a19beba7eddd7a0dc..962d3df31c501cad4a2097ac8b29e6f1b88ecf63 100644 (file)
     <ClCompile Include="alphaindextst.cpp">\r
       <Filter>collation</Filter>\r
     </ClCompile>\r
-    <ClCompile Include="dicttest.cpp">\r
-      <Filter>break iteration</Filter>\r
-    </ClCompile>\r
   </ItemGroup>\r
   <ItemGroup>\r
     <ClInclude Include="itrbbi.h">\r
     <ClInclude Include="alphaindextst.h">\r
       <Filter>collation</Filter>\r
     </ClInclude>\r
-    <ClInclude Include="dicttest.h">\r
-      <Filter>break iteration</Filter>\r
-    </ClInclude>\r
   </ItemGroup>\r
 </Project>\r
index c6deee06c3b405701f90a18c4d5c3b287988f85e..b99a405beae2d82676b61013c6d1e9e8ece31c49 100644 (file)
@@ -1,6 +1,6 @@
 /*
 **********************************************************************
-* Copyright (C) 1998-2011, International Business Machines Corporation 
+* Copyright (C) 1998-2012, International Business Machines Corporation 
 * and others.  All Rights Reserved.
 **********************************************************************
 */
@@ -19,7 +19,6 @@
 #include "itrbbi.h"
 #include "rbbiapts.h"
 #include "rbbitst.h"
-#include "dicttest.h"
 
 #define TESTCLASS(n,classname)        \
     case n:                           \
@@ -39,7 +38,6 @@ void IntlTestRBBI::runIndexedTest( int32_t index, UBool exec, const char* &name,
     switch (index) {
         TESTCLASS(0, RBBIAPITest);
         TESTCLASS(1, RBBITest);
-        TESTCLASS(2, DictionaryWordTest);
         default: name=""; break;
     }
 }
index 8c185980bf68afe58a87b3d3fe7feec9162c7265..63c6d7de698d6bbd3b37ff60860824909698d0ea 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (c) 2001-2011 International Business Machines
+# Copyright (c) 2001-2012 International Business Machines
 # Corporation and others. All Rights Reserved.
 #
 # RBBI Test Data
@@ -561,6 +561,55 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
 \u0E14\u0E49\u0E27\u0E22<200>\u0e2b\u0e25\u0e32\u0e22<200>\
 \u0e1e\u0e22\u0e32\u0e07\u0e04\u0e4c<200></data>
 
+# Test data originally from http://bugs.icu-project.org/trac/search?q=r30327
+<data>•กู<200> •กิน<200>กุ้ง<200> •ปิ้่<200>งอ<200>ยู่<200>ใน<200>ถ้ำ<200></data>
+
+<data>•\u0E01\u0E39<200>\u0020•\u0E01\u0E34\u0E19<200>\u0E01\u0E38\u0E49\u0E07<200>\
+\u0020•\u0E1B\u0E34\u0E49\u0E48<200>\u0E07\u0E2D<200>\u0E22\u0E39\u0E48<200>\
+\u0E43\u0E19<200>\u0E16\u0E49\u0E33<200></data>
+
+<line>
+<data>•0E01\u0E39\u0020•\u0E01\u0E34\u0E19•\u0E01\u0E38\u0E49\u0E07\
+\u0020•\u0E1B\u0E34\u0E49\u0E48•\u0E07\u0E2D•\u0E22\u0E39\u0E48•\
+\u0E43\u0E19•\u0E16\u0E49\u0E33•</data>
+
+##########################################################################################
+#
+#   Khmer Tests
+#
+##########################################################################################
+
+# Test data originally from http://bugs.icu-project.org/trac/search?q=r30327
+#  from the file testdata/wordsegments.txt
+<locale en>
+<word>
+
+<data>•តើ<200>លោក<200>មក<200>ពី<200>ប្រទេស<200>ណា<200></data>
+<data>•សណ្ដូក<200>ក<200>បណ្ដែត<200>ខ្លួន<200></data>
+<data>•ពណ៌ស<200>ម្ដេច<200>ថា<200>ខ្មៅ<200></data>
+#ប្រយោគ|ពី|របៀប|រួបរួម|និង|ភាព|ផ្សេងគ្នា|ដែល|អាច|ចូល<200></data>
+<data>•ប្រយោគ<200>ពី<200>របៀប<200>ដែល<200>និង<200>ភាព<200>ផ្សេងគ្នា<200>ដែល<200>អាច<200>ចូល<200></data>
+#ប្រយោគ|ពី|របៀប|ជា|មួយ|និង|ភាព|ផ្សេងគ្នា|ដែល|អាច|ចូល<200></data>
+<data>•សូម<200>ចំណាយពេល<200>បន្តិច<200>ដើម្បី<200>អធិស្ឋាន<200>អរព្រះគុណ<200>ដល់<200>ព្រះអង្គ<200></data>
+<data>•ការ<200>ថោកទាប<200>បរិប្បូណ៌<200>ដោយ<200></data>
+<data>•ប្រើប្រាស់<200>ស្អាត<200>ទាំង<200>ចិត្ត<200>សិស្ស<200>នោះ<200></data>
+<data>•បើ<200>អ្នក<200>ប្រព្រឺត្ត<200>អំពើអាក្រក់<200>មុខ<200>ជា<200>មាន<200></data>
+<data>•ប្រដាប់<200>ប្រដា<200>រ<200>រៀនសូត្រ<200>បន្ទប់<200>រៀន<200></data>
+<data>•ដើរតួ<200>មនុស្សគ<200>ឥត<200>បញ្ចេញ<200>យោបល់<200>សោះ<200>ឡើយ<200></data>
+<data>•មិន<200>អាច<200>ឲ្យ<200>យើង<200>ធ្វើ<200>កសិកម្ម<200>បាន<200>ឡើយ<200></data>
+<data>•បន្ត<200>សេចក្ត<200>ទៅទៀត<200></data>
+<data>•ក្រុម<200>ប៉ូលិស<200>បណ្តាក់<200>គ្នា<200></data>
+<data>•គ្មាន<200>សុខ<200>សំរាន្ត<200>ដង<200>ណា<200></data>
+<data>•បាន<200>សុខភាព<200>បរិប្បូណ៌<200></data>
+<data>•ជា<200>មេចោរ<200>ខ្ញុំ<200>នឹង<200>ស្លាប់<200>ទៅវិញ<200>ជា<200>មេចោរ<200></data>
+<data>•ឯ<200>ការ<200>វាយ<200>ផ្ចាល<200>ដែល<200>នាំ<200></data>
+<data>•គេ<200>ដឹក<200>ទៅ<200>សំឡាប់<200></data>
+#អ្នក|ដែល|ជា|មន្ត្រី|ធំ|លើ|គាត់|ទេ<200></data>
+<data>•យក<200>ទៅ<200>សម្លាប់ចោល<200>ស្ងាត់<200></data>
+<data>•ត្រូវ<200>បាន<200>គេ<200>សម្លាប់<200></data>
+<data>•នៅក្នុង<200>ស្រុក<200>ខ្ល<200>ងហ្ស៊ុន<200></data>
+
+
 #
 #  Jitterbug 3671 Test Case
 #