]> granicus.if.org Git - icu/commitdiff
ICU-10286 load suppression data from locales. For now, only test English because...
authorSteven R. Loomis <srl@icu-project.org>
Thu, 6 Mar 2014 12:51:31 +0000 (12:51 +0000)
committerSteven R. Loomis <srl@icu-project.org>
Thu, 6 Mar 2014 12:51:31 +0000 (12:51 +0000)
X-SVN-Rev: 35359

icu4c/source/i18n/filteredbrk.cpp
icu4c/source/test/intltest/rbbiapts.cpp

index 84fad492c1973a1179a73272b434485049048479..33989b901a2d6a4b78f427b374b44e126620ed63 100644 (file)
@@ -14,6 +14,8 @@
 #include <set>
 #include <string>
 #include <functional>
+#include "uresimp.h"
+#include "ubrkimpl.h"
 
 U_NAMESPACE_BEGIN
 
@@ -230,8 +232,25 @@ SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
 SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
   : fSet()
 {
-  // TODO: load, set
-  status = U_UNSUPPORTED_ERROR;
+  if(U_SUCCESS(status)) {
+    LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &status));
+    LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &status));
+    LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &status));
+    if(U_FAILURE(status)) return; // leaves the builder empty, if you try to use it.
+
+    LocalUResourceBundlePointer strs;
+    UErrorCode subStatus = status;
+    do {
+      strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
+      if(strs.isValid() && U_SUCCESS(subStatus)) {
+        UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
+        suppressBreakAfter(str, status); // load the string
+      }
+    } while (strs.isValid() && U_SUCCESS(subStatus));
+    if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
+      status = subStatus;
+    }
+  }
 }
 
 SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder()
@@ -339,7 +358,7 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
   if(revCount>0) {
     backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
     if(U_FAILURE(status)) {
-      //if(debug) u_printf("Error %s building backwards\n", u_errorName(status));
+      printf("Error %s building backwards\n", u_errorName(status));
       return NULL;
     }
   }
@@ -347,7 +366,7 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
   if(fwdCount>0) {
     forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
     if(U_FAILURE(status)) {
-      //if(debug) u_printf("Error %s building forwards\n", u_errorName(status));
+      printf("Error %s building forwards\n", u_errorName(status));
       return NULL;
     }
   }
index f26fcf36c038e2281a6feebc874ebb88e462113c..5102cf5c724672a17bdb92934db8c94bc33aa801 100644 (file)
@@ -1310,6 +1310,84 @@ void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
     prtbrks(filteredBI.getAlias(), text, *this);
   }
 
+
+  {
+    logln("Constructing English builder\n");
+    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
+    TEST_ASSERT_SUCCESS(status);
+
+    logln("Constructing base BI\n");
+    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
+    TEST_ASSERT_SUCCESS(status);
+
+    logln("unsuppressing 'Capt'");
+    TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
+
+    logln("Building new BI\n");
+    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+    TEST_ASSERT_SUCCESS(status);
+
+    if(filteredBI.isValid()) {
+      logln("Testing:");
+      filteredBI->setText(text);
+      TEST_ASSERT(84 == filteredBI->next());
+      TEST_ASSERT(90 == filteredBI->next());
+      TEST_ASSERT(278 == filteredBI->next());
+      filteredBI->first();
+      prtbrks(filteredBI.getAlias(), text, *this);
+    }
+  }
+
+
+  {
+    logln("Constructing English builder\n");
+    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
+    TEST_ASSERT_SUCCESS(status);
+
+    logln("Constructing base BI\n");
+    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
+    TEST_ASSERT_SUCCESS(status);
+
+    logln("Building new BI\n");
+    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+    TEST_ASSERT_SUCCESS(status);
+
+    if(filteredBI.isValid()) {
+      logln("Testing:");
+      filteredBI->setText(text);
+      TEST_ASSERT(84 == filteredBI->next());
+      TEST_ASSERT(278 == filteredBI->next());
+      filteredBI->first();
+      prtbrks(filteredBI.getAlias(), text, *this);
+    }
+  }
+
+#if 0
+  // reenable once french is in
+  {
+    logln("Constructing French builder");
+    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
+    TEST_ASSERT_SUCCESS(status);
+
+    logln("Constructing base BI\n");
+    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
+    TEST_ASSERT_SUCCESS(status);
+
+    logln("Building new BI\n");
+    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+    TEST_ASSERT_SUCCESS(status);
+
+    if(filteredBI.isValid()) {
+      logln("Testing:");
+      filteredBI->setText(text);
+      TEST_ASSERT(20 == filteredBI->next());
+      TEST_ASSERT(84 == filteredBI->next());
+      filteredBI->first();
+      prtbrks(filteredBI.getAlias(), text, *this);
+    }
+  }
+#endif
+
 #else
   logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING");
 #endif