From: Steven R. Loomis Date: Tue, 8 Jul 2014 00:30:21 +0000 (+0000) Subject: ICU-10990 remove STL deps from FilteredBreakIterator implementation X-Git-Tag: milestone-59-0-1~1800 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=68b3eed7cbb435d650896773fbe7baa845895623;p=icu ICU-10990 remove STL deps from FilteredBreakIterator implementation X-SVN-Rev: 36008 --- diff --git a/icu4c/source/common/unicode/uconfig.h b/icu4c/source/common/unicode/uconfig.h index c22c211cca3..d6dcc669e84 100644 --- a/icu4c/source/common/unicode/uconfig.h +++ b/icu4c/source/common/unicode/uconfig.h @@ -417,7 +417,10 @@ * @internal */ #ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION -# define UCONFIG_NO_FILTERED_BREAK_ITERATION 1 +# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0 + + + #endif #endif diff --git a/icu4c/source/i18n/filteredbrk.cpp b/icu4c/source/i18n/filteredbrk.cpp index 01ea106d14e..2f47cbe327c 100644 --- a/icu4c/source/i18n/filteredbrk.cpp +++ b/icu4c/source/i18n/filteredbrk.cpp @@ -11,15 +11,107 @@ #include -#include -#include -#include #include "uresimp.h" #include "ubrkimpl.h" - +#include "uvector.h" U_NAMESPACE_BEGIN -using namespace std; +#ifndef FB_DEBUG +#define FB_DEBUG 0 +#endif + +#if FB_DEBUG +#include +static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) { + char buf[2048]; + if(s) { + s->extract(0,s->length(),buf,2048); + } else { + strcpy(buf,"NULL"); + } + fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n", + f, l, m, buf, (const void*)s, b?'T':'F',(int)d); +} + +#define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__) +#else +#define FB_TRACE(m,s,b,d) +#endif + +static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { + const UnicodeString &a = *(const UnicodeString*)t1.pointer; + const UnicodeString &b = *(const UnicodeString*)t2.pointer; + return a.compare(b); +} + +/** + * A UVector which implements a set of strings. + */ +class U_I18N_API UStringSet : public UVector { + public: + UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject, + uhash_compareUnicodeString, + 1, + status) {} + virtual ~UStringSet(); + /** + * Is this UnicodeSet contained? + */ + inline UBool contains(const UnicodeString& s) { + return contains((void*) &s); + } + using UVector::contains; + /** + * Return the ith UnicodeString alias + */ + inline const UnicodeString* getStringAt(int32_t i) const { + return (const UnicodeString*)elementAt(i); + } + /** + * Adopt the UnicodeString if not already contained. + * Caller no longer owns the pointer in any case. + * @return true if adopted successfully, false otherwise (error, or else duplicate) + */ + inline UBool adopt(UnicodeString *str, UErrorCode &status) { + if(U_FAILURE(status) || contains(*str)) { + delete str; + return false; + } else { + sortedInsert(str, compareUnicodeString, status); + if(U_FAILURE(status)) { + delete str; + return false; + } + return true; + } + } + /** + * Add by value. + * @return true if successfully adopted. + */ + inline UBool add(const UnicodeString& str, UErrorCode &status) { + if(U_FAILURE(status)) return false; + UnicodeString *t = new UnicodeString(str); + if(t==NULL) { + status = U_MEMORY_ALLOCATION_ERROR; return false; + } + return adopt(t, status); + } + /** + * Remove this string. + * @return true if successfully removed, false otherwise (error, or else it wasn't there) + */ + inline UBool remove(const UnicodeString &s, UErrorCode &status) { + if(U_FAILURE(status)) return false; + return removeElement((void*) &s); + } +}; + +/** + * Virtual, won't be inlined + */ +UStringSet::~UStringSet() {} + static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie static const int32_t kMATCH = (1<<1); //< exact match - skip this one. @@ -27,11 +119,11 @@ static const int32_t kSuppressInReverse = (1<<0); static const int32_t kAddToForward = (1<<1); static const UChar kFULLSTOP = 0x002E; // '.' -class ULISentenceBreakIterator : public BreakIterator { +class SimpleFilteredSentenceBreakIterator : public BreakIterator { public: - ULISentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status); - virtual ~ULISentenceBreakIterator() {} - ULISentenceBreakIterator(const ULISentenceBreakIterator& other); + SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status); + virtual ~SimpleFilteredSentenceBreakIterator() {} + SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other); private: LocalPointer fDelegate; LocalUTextPointer fText; @@ -48,7 +140,7 @@ public: status = U_SAFECLONE_ALLOCATED_WARNING; return clone(); } - virtual BreakIterator* clone(void) const { return new ULISentenceBreakIterator(*this); } + virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); } virtual UClassID getDynamicClassID(void) const { return NULL; } virtual UBool operator==(const BreakIterator& o) const { if(*this==o) return true; return false; } @@ -77,7 +169,7 @@ public: }; -ULISentenceBreakIterator::ULISentenceBreakIterator(const ULISentenceBreakIterator& other) +SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other) : BreakIterator(other), fDelegate(other.fDelegate->clone()) { /* @@ -92,7 +184,7 @@ ULISentenceBreakIterator::ULISentenceBreakIterator(const ULISentenceBreakIterato } -ULISentenceBreakIterator::ULISentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) : +SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) : BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)), fDelegate(adopt), fBackwardsTrie(backwards), @@ -101,7 +193,7 @@ ULISentenceBreakIterator::ULISentenceBreakIterator(BreakIterator *adopt, UCharsT // all set.. } -int32_t ULISentenceBreakIterator::next() { +int32_t SimpleFilteredSentenceBreakIterator::next() { int32_t n = fDelegate->next(); if(n == UBRK_DONE || // at end or fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions @@ -196,40 +288,32 @@ int32_t ULISentenceBreakIterator::next() { return n; } -U_NAMESPACE_END - -#if 0 -// Would improve performance - but, platform issues. -// for the 'set' -namespace std { - template <> struct hash { - size_t operator()( const UnicodeString& str ) const { - return (size_t)str.hashCode(); - } - }; -} -#endif - -U_NAMESPACE_BEGIN - +/** + * Concrete implementation of builder class. + */ class SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder { public: virtual ~SimpleFilteredBreakIteratorBuilder(); SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status); - SimpleFilteredBreakIteratorBuilder(); + SimpleFilteredBreakIteratorBuilder(UErrorCode &status); virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status); virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status); virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status); private: - set fSet; + UStringSet fSet; }; SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder() { } +SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status) + : fSet(status) +{ +} + SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status) - : fSet() + : fSet(status) { if(U_SUCCESS(status)) { LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &status)); @@ -252,23 +336,20 @@ SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Loc } } -SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder() - : fSet() -{ -} - UBool SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status) { - if( U_FAILURE(status) ) return FALSE; - return fSet.insert(exception).second; + UBool r = fSet.add(exception, status); + FB_TRACE("suppressBreakAfter",&exception,r,0); + return r; } UBool SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status) { - if( U_FAILURE(status) ) return FALSE; - return ((fSet.erase(exception)) != 0); + UBool r = fSet.remove(exception, status); + FB_TRACE("unsuppressBreakAfter",&exception,r,0); + return r; } BreakIterator * @@ -293,11 +374,19 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr LocalPointer forwardsPartialTrie; // Has ".a" for "a.M." int n=0; - for ( set::iterator i = fSet.begin(); - i != fSet.end(); + for ( int32_t i = 0; + i-1 && (nn+1)!=ustrs[i].length()) { - //if(true) u_printf("Is a partial: /%S/\n", ustrs[i].getTerminatedBuffer()); + FB_TRACE("partial",&ustrs[i],FALSE,i); // is partial. // is it unique? int sameAs = -1; for(int j=0;jadd(prefix, kPARTIAL, status); revCount++; - //if(debug2) u_printf("Added Partial: /%S/ from /%S/ status=%s\n", prefix.getTerminatedBuffer(), ustrs[i].getTerminatedBuffer(), u_errorName(status)); + FB_TRACE("Added partial",&prefix,FALSE, i); + FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i); partials[i] = kSuppressInReverse | kAddToForward; } else { - //if(debug2) u_printf(" // not adding partial for /%S/ from /%S/\n", prefix.getTerminatedBuffer(), ustrs[i].getTerminatedBuffer()); + FB_TRACE("NOT adding partial",&prefix,FALSE, i); + FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i); } } } @@ -341,9 +433,9 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr ustrs[i].reverse(); builder->add(ustrs[i], kMATCH, status); revCount++; - //if(debug2) u_printf("Added: /%S/ status=%s\n", ustrs[i].getTerminatedBuffer(), u_errorName(status)); + FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i); } else { - //if(debug2) u_printf(" Adding fwd: /%S/\n", ustrs[i].getTerminatedBuffer()); + FB_TRACE("Adding fwd",&ustrs[i], FALSE, i); // an optimization would be to only add the portion after the '.' // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward, @@ -355,12 +447,12 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status)); } } - //if(debug) u_printf(" %s has %d abbrs.\n", fJSONSource.c_str(), subCount); + FB_TRACE("AbbrCount",NULL,FALSE, subCount); if(revCount>0) { backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status)); if(U_FAILURE(status)) { - //printf("Error %s building backwards\n", u_errorName(status)); + FB_TRACE(u_errorName(status),NULL,FALSE, -1); return NULL; } } @@ -368,16 +460,16 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr if(fwdCount>0) { forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status)); if(U_FAILURE(status)) { - //printf("Error %s building forwards\n", u_errorName(status)); + FB_TRACE(u_errorName(status),NULL,FALSE, -1); return NULL; } } - return new ULISentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status); + return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status); } -// ----------- +// ----------- Base class implementation FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { } @@ -388,18 +480,16 @@ FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { FilteredBreakIteratorBuilder * FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) { if(U_FAILURE(status)) return NULL; - LocalPointer ret(new SimpleFilteredBreakIteratorBuilder(where, status)); - if(!ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; + if(U_SUCCESS(status) && !ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; return ret.orphan(); } FilteredBreakIteratorBuilder * FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { if(U_FAILURE(status)) return NULL; - - LocalPointer ret(new SimpleFilteredBreakIteratorBuilder()); - if(!ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; + LocalPointer ret(new SimpleFilteredBreakIteratorBuilder(status)); + if(U_SUCCESS(status) && !ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; return ret.orphan(); } diff --git a/icu4c/source/i18n/unicode/filteredbrk.h b/icu4c/source/i18n/unicode/filteredbrk.h index c452f33ff89..01117e500a2 100644 --- a/icu4c/source/i18n/unicode/filteredbrk.h +++ b/icu4c/source/i18n/unicode/filteredbrk.h @@ -10,7 +10,7 @@ #include "unicode/brkiter.h" -#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION +#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION U_NAMESPACE_BEGIN