* tables object that is passed in as a parameter.
*/
RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
- : fSCharIter(UnicodeString())
+ : RuleBasedBreakIterator(status)
{
- init(status);
fData = new RBBIDataWrapper(data, status); // status checked in constructor
if (U_FAILURE(status)) {return;}
if(fData == nullptr) {
RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
uint32_t ruleLength,
UErrorCode &status)
- : fSCharIter(UnicodeString())
+ : RuleBasedBreakIterator(status)
{
- init(status);
if (U_FAILURE(status)) {
return;
}
//
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
- : fSCharIter(UnicodeString())
+ : RuleBasedBreakIterator(status)
{
- init(status);
fData = new RBBIDataWrapper(udm, status); // status checked in constructor
if (U_FAILURE(status)) {return;}
if(fData == nullptr) {
RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
UParseError &parseError,
UErrorCode &status)
- : fSCharIter(UnicodeString())
+ : RuleBasedBreakIterator(status)
{
- init(status);
if (U_FAILURE(status)) {return;}
RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)
RBBIRuleBuilder::createRuleBasedBreakIterator(rules, &parseError, status);
// of rules.
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator()
- : fSCharIter(UnicodeString())
+ : RuleBasedBreakIterator(fErrorCode)
{
- UErrorCode status = U_ZERO_ERROR;
- init(status);
+}
+
+/**
+ * Simple Constructor with an error code.
+ * Handles common initialization for all other constructors.
+ */
+RuleBasedBreakIterator::RuleBasedBreakIterator(UErrorCode &status) {
+ utext_openUChars(&fText, nullptr, 0, &status);
+ LocalPointer<DictionaryCache> lpDictionaryCache(new DictionaryCache(this, status), status);
+ LocalPointer<BreakCache> lpBreakCache(new BreakCache(this, status), status);
+ if (U_FAILURE(status)) {
+ fErrorCode = status;
+ return;
+ }
+ fDictionaryCache = lpDictionaryCache.orphan();
+ fBreakCache = lpBreakCache.orphan();
+
+#ifdef RBBI_DEBUG
+ static UBool debugInitDone = false;
+ if (debugInitDone == false) {
+ char *debugEnv = getenv("U_RBBIDEBUG");
+ if (debugEnv && uprv_strstr(debugEnv, "trace")) {
+ gTrace = true;
+ }
+ debugInitDone = true;
+ }
+#endif
}
//
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
-: BreakIterator(other),
- fSCharIter(UnicodeString())
+: RuleBasedBreakIterator()
{
- UErrorCode status = U_ZERO_ERROR;
- this->init(status);
*this = other;
}
return *this;
}
-
-
-//-----------------------------------------------------------------------------
-//
-// init() Shared initialization routine. Used by all the constructors.
-// Initializes all fields, leaving the object in a consistent state.
-//
-//-----------------------------------------------------------------------------
-void RuleBasedBreakIterator::init(UErrorCode &status) {
- fCharIter = nullptr;
- fData = nullptr;
- fPosition = 0;
- fRuleStatusIndex = 0;
- fDone = false;
- fDictionaryCharCount = 0;
- fLanguageBreakEngines = nullptr;
- fUnhandledBreakEngine = nullptr;
- fBreakCache = nullptr;
- fDictionaryCache = nullptr;
- fLookAheadMatches = nullptr;
- fIsPhraseBreaking = false;
-
- // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
- // fText = UTEXT_INITIALIZER;
- static const UText initializedUText = UTEXT_INITIALIZER;
- uprv_memcpy(&fText, &initializedUText, sizeof(UText));
-
- if (U_FAILURE(status)) {
- return;
- }
-
- utext_openUChars(&fText, nullptr, 0, &status);
- fDictionaryCache = new DictionaryCache(this, status);
- fBreakCache = new BreakCache(this, status);
- if (U_SUCCESS(status) && (fDictionaryCache == nullptr || fBreakCache == nullptr)) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
-
-#ifdef RBBI_DEBUG
- static UBool debugInitDone = false;
- if (debugInitDone == false) {
- char *debugEnv = getenv("U_RBBIDEBUG");
- if (debugEnv && uprv_strstr(debugEnv, "trace")) {
- gTrace = true;
- }
- debugInitDone = true;
- }
-#endif
-}
-
-
-
//-----------------------------------------------------------------------------
//
// clone - Returns a newly-constructed RuleBasedBreakIterator with the same
// Return one over an empty string instead - this is the closest
// we can come to signaling a failure.
// (GetText() is obsolete, this failure is sort of OK)
- fSCharIter.setText(UnicodeString());
+ fSCharIter.setText(u"", 0);
if (fCharIter != &fSCharIter) {
// existing fCharIter was adopted from the outside. Delete it now.
// Needed in case someone calls getText().
// Can not, unfortunately, do this lazily on the (probably never)
// call to getText(), because getText is const.
- fSCharIter.setText(newText);
+ fSCharIter.setText(newText.getBuffer(), newText.length());
if (fCharIter != &fSCharIter) {
// old fCharIter was adopted from the outside. Delete it.
* The UText through which this BreakIterator accesses the text
* @internal (private)
*/
- UText fText;
+ UText fText = UTEXT_INITIALIZER;
#ifndef U_HIDE_INTERNAL_API
public:
* Not for general use; Public only for testing purposes.
* @internal
*/
- RBBIDataWrapper *fData;
+ RBBIDataWrapper *fData = nullptr;
+
private:
+ /**
+ * The saved error code associated with this break iterator.
+ * This is the value to be returned by copyErrorTo().
+ */
+ UErrorCode fErrorCode = U_ZERO_ERROR;
/**
* The current position of the iterator. Pinned, 0 < fPosition <= text.length.
* Never has the value UBRK_DONE (-1).
*/
- int32_t fPosition;
+ int32_t fPosition = 0;
/**
* TODO:
*/
- int32_t fRuleStatusIndex;
+ int32_t fRuleStatusIndex = 0;
/**
* Cache of previously determined boundary positions.
*/
class BreakCache;
- BreakCache *fBreakCache;
+ BreakCache *fBreakCache = nullptr;
/**
* Cache of boundary positions within a region of text that has been
* sub-divided by dictionary based breaking.
*/
class DictionaryCache;
- DictionaryCache *fDictionaryCache;
+ DictionaryCache *fDictionaryCache = nullptr;
/**
*
* handle a given character.
* @internal (private)
*/
- UStack *fLanguageBreakEngines;
+ UStack *fLanguageBreakEngines = nullptr;
/**
*
* LanguageBreakEngine.
* @internal (private)
*/
- UnhandledEngine *fUnhandledBreakEngine;
+ UnhandledEngine *fUnhandledBreakEngine = nullptr;
/**
* Counter for the number of characters encountered with the "dictionary"
* flag set.
* @internal (private)
*/
- uint32_t fDictionaryCharCount;
+ uint32_t fDictionaryCharCount = 0;
/**
* A character iterator that refers to the same text as the UText, above.
* Only included for compatibility with old API, which was based on CharacterIterators.
* Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
*/
- CharacterIterator *fCharIter;
+ CharacterIterator *fCharIter = &fSCharIter;
/**
* When the input text is provided by a UnicodeString, this will point to
* a characterIterator that wraps that data. Needed only for the
* implementation of getText(), a backwards compatibility issue.
*/
- StringCharacterIterator fSCharIter;
+ UCharCharacterIterator fSCharIter {u"", 0};
/**
* True when iteration has run off the end, and iterator functions should return UBRK_DONE.
*/
- UBool fDone;
+ bool fDone = false;
/**
* Array of look-ahead tentative results.
*/
- int32_t *fLookAheadMatches;
+ int32_t *fLookAheadMatches = nullptr;
/**
* A flag to indicate if phrase based breaking is enabled.
*/
- UBool fIsPhraseBreaking;
+ UBool fIsPhraseBreaking = false;
//=======================================================================
// constructors
/** @internal */
friend class BreakIterator;
+ /**
+ * Default constructor with an error code parameter.
+ * Aside from error handling, otherwise identical to the default constructor.
+ * Internally, handles common initialization for other constructors.
+ * @internal (private)
+ */
+ RuleBasedBreakIterator(UErrorCode &status);
+
public:
/** Default constructor. Creates an empty shell of an iterator, with no
- * rules or text to iterate over. Object can subsequently be assigned to.
+ * rules or text to iterate over. Object can subsequently be assigned to,
+ * but is otherwise unusable.
* @stable ICU 2.2
*/
RuleBasedBreakIterator();
* @return true if both BreakIterators are not same.
* @stable ICU 2.0
*/
- inline bool operator!=(const BreakIterator& that) const;
+ inline bool operator!=(const BreakIterator& that) const {
+ return !operator==(that);
+ }
/**
* Returns a newly-constructed RuleBasedBreakIterator with the same
* </p>
* <p>
* When the break iterator is operating on text supplied via a UText,
- * this function will fail. Lacking any way to signal failures, it
- * returns an CharacterIterator containing no text.
+ * this function will fail, returning a CharacterIterator containing no text.
* The function getUText() provides similar functionality,
* is reliable, and is more efficient.
* </p>
//=======================================================================
// implementation
//=======================================================================
- /**
- * Common initialization function, used by constructors and bufferClone.
- * @internal (private)
- */
- void init(UErrorCode &status);
-
/**
* Iterate backwards from an arbitrary position in the input text using the
* synthesized Safe Reverse rules.
#endif /* U_HIDE_INTERNAL_API */
};
-//------------------------------------------------------------------------------
-//
-// Inline Functions Definitions ...
-//
-//------------------------------------------------------------------------------
-
-inline bool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
- return !operator==(that);
-}
-
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */