* Constructs a RuleBasedBreakIterator that uses the already-created
* tables object that is passed in as a parameter.
*/
-RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) {
+RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
+ : fSCharIter(UnicodeString())
+{
init(status);
fData = new RBBIDataWrapper(data, status); // status checked in constructor
if (U_FAILURE(status)) {return;}
//
RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
uint32_t ruleLength,
- UErrorCode &status) {
+ UErrorCode &status)
+ : fSCharIter(UnicodeString())
+{
init(status);
if (U_FAILURE(status)) {
return;
//
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
+ : fSCharIter(UnicodeString())
{
init(status);
fData = new RBBIDataWrapper(udm, status); // status checked in constructor
RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
UParseError &parseError,
UErrorCode &status)
+ : fSCharIter(UnicodeString())
{
init(status);
if (U_FAILURE(status)) {return;}
// Used when creating a RuleBasedBreakIterator from a set
// of rules.
//-------------------------------------------------------------------------------
-RuleBasedBreakIterator::RuleBasedBreakIterator() {
+RuleBasedBreakIterator::RuleBasedBreakIterator()
+ : fSCharIter(UnicodeString())
+{
UErrorCode status = U_ZERO_ERROR;
init(status);
}
//
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
-: BreakIterator(other)
+: BreakIterator(other),
+ fSCharIter(UnicodeString())
{
UErrorCode status = U_ZERO_ERROR;
this->init(status);
* Destructor
*/
RuleBasedBreakIterator::~RuleBasedBreakIterator() {
- if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
+ if (fCharIter != &fSCharIter) {
// fCharIter was adopted from the outside.
delete fCharIter;
}
fCharIter = NULL;
- delete fSCharIter;
- fSCharIter = NULL;
- delete fDCharIter;
- fDCharIter = NULL;
utext_close(fText);
UErrorCode status = U_ZERO_ERROR;
fText = utext_clone(fText, that.fText, FALSE, TRUE, &status);
- if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
+ if (fCharIter != &fSCharIter) {
delete fCharIter;
}
fCharIter = NULL;
- if (that.fCharIter != NULL ) {
+ if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) {
// This is a little bit tricky - it will intially appear that
// this->fCharIter is adopted, even if that->fCharIter was
// not adopted. That's ok.
fCharIter = that.fCharIter->clone();
}
+ fSCharIter = that.fSCharIter;
+ if (fCharIter == NULL) {
+ fCharIter = &fSCharIter;
+ }
if (fData != NULL) {
fData->removeReference();
void RuleBasedBreakIterator::init(UErrorCode &status) {
fText = NULL;
fCharIter = NULL;
- fSCharIter = NULL;
- fDCharIter = NULL;
fData = NULL;
fPosition = 0;
fRuleStatusIndex = 0;
// Return one over an empty string instead - this is the closest
// we can come to signaling a failure.
// (GetText() is obsolete, this failure is sort of OK)
- if (fDCharIter == NULL) {
- static const UChar c = 0;
- fDCharIter = new UCharCharacterIterator(&c, 0);
- if (fDCharIter == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- }
+ fSCharIter.setText(UnicodeString());
- if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
+ if (fCharIter != &fSCharIter) {
// existing fCharIter was adopted from the outside. Delete it now.
delete fCharIter;
}
- fCharIter = fDCharIter;
+ fCharIter = &fSCharIter;
this->first();
}
RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
// If we are holding a CharacterIterator adopted from a
// previous call to this function, delete it now.
- if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
+ if (fCharIter != &fSCharIter) {
delete fCharIter;
}
// Needed in case someone calls getText().
// Can not, unfortunately, do this lazily on the (probably never)
// call to getText(), because getText is const.
- if (fSCharIter == NULL) {
- fSCharIter = new StringCharacterIterator(newText);
- } else {
- fSCharIter->setText(newText);
- }
+ fSCharIter.setText(newText);
- if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
+ if (fCharIter != &fSCharIter) {
// old fCharIter was adopted from the outside. Delete it.
delete fCharIter;
}
- fCharIter = fSCharIter;
+ fCharIter = &fSCharIter;
this->first();
}
*/
RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
- fBI(bi), fBreaks(NULL), fPositionInCache(-1),
+ fBI(bi), fBreaks(status), fPositionInCache(-1),
fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
- fBreaks = new UVector32(status);
}
RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
- delete fBreaks;
- fBreaks = NULL;
}
void RuleBasedBreakIterator::DictionaryCache::reset() {
fLimit = 0;
fFirstRuleStatusIndex = 0;
fOtherRuleStatusIndex = 0;
- fBreaks->removeAllElements();
+ fBreaks.removeAllElements();
}
UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
// Sequential iteration, move from previous boundary to the following
int32_t r = 0;
- if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
+ if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
++fPositionInCache;
- if (fPositionInCache >= fBreaks->size()) {
+ if (fPositionInCache >= fBreaks.size()) {
fPositionInCache = -1;
return FALSE;
}
- r = fBreaks->elementAti(fPositionInCache);
+ r = fBreaks.elementAti(fPositionInCache);
U_ASSERT(r > fromPos);
*result = r;
*statusIndex = fOtherRuleStatusIndex;
// Random indexing. Linear search for the boundary following the given position.
- for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) {
- r= fBreaks->elementAti(fPositionInCache);
+ for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
+ r= fBreaks.elementAti(fPositionInCache);
if (r > fromPos) {
*result = r;
*statusIndex = fOtherRuleStatusIndex;
}
if (fromPos == fLimit) {
- fPositionInCache = fBreaks->size() - 1;
+ fPositionInCache = fBreaks.size() - 1;
if (fPositionInCache >= 0) {
- U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos);
+ U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
}
}
int32_t r;
- if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
+ if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
--fPositionInCache;
- r = fBreaks->elementAti(fPositionInCache);
+ r = fBreaks.elementAti(fPositionInCache);
U_ASSERT(r < fromPos);
*result = r;
*statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
return FALSE;
}
- for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) {
- r = fBreaks->elementAti(fPositionInCache);
+ for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
+ r = fBreaks.elementAti(fPositionInCache);
if (r < fromPos) {
*result = r;
*statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
// Ask the language object if there are any breaks. It will add them to the cache and
// leave the text pointer on the other side of its range, ready to search for the next one.
if (lbe != NULL) {
- foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks);
+ foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, fBreaks);
}
// Reload the loop variables for the next go-round
// printf("foundBreakCount = %d\n", foundBreakCount);
if (foundBreakCount > 0) {
- U_ASSERT(foundBreakCount == fBreaks->size());
- if (startPos < fBreaks->elementAti(0)) {
+ U_ASSERT(foundBreakCount == fBreaks.size());
+ if (startPos < fBreaks.elementAti(0)) {
// The dictionary did not place a boundary at the start of the segment of text.
// Add one now. This should not commonly happen, but it would be easy for interactions
// of the rules for dictionary segments and the break engine implementations to
// inadvertently cause it. Cover it here, just in case.
- fBreaks->insertElementAt(startPos, 0, status);
+ fBreaks.insertElementAt(startPos, 0, status);
}
- if (endPos > fBreaks->peeki()) {
- fBreaks->push(endPos, status);
+ if (endPos > fBreaks.peeki()) {
+ fBreaks.push(endPos, status);
}
fPositionInCache = 0;
// Note: Dictionary matching may extend beyond the original limit.
- fStart = fBreaks->elementAti(0);
- fLimit = fBreaks->peeki();
+ fStart = fBreaks.elementAti(0);
+ fLimit = fBreaks.peeki();
} else {
// there were no language-based breaks, even though the segment contained
// dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
RuleBasedBreakIterator *fBI;
- UVector32 *fBreaks; // A vector containing the boundaries.
+ UVector32 fBreaks; // A vector containing the boundaries.
int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following()
// or preceding(). Optimizes sequential access.
int32_t fStart; // Text position of first boundary in cache.
#include "unicode/udata.h"
#include "unicode/parseerr.h"
#include "unicode/schriter.h"
-#include "unicode/uchriter.h"
U_NAMESPACE_BEGIN
* a characterIterator that wraps that data. Needed only for the
* implementation of getText(), a backwards compatibility issue.
*/
- StringCharacterIterator *fSCharIter;
-
- /**
- * When the input text is provided by a UText, this
- * dummy CharacterIterator over an empty string will
- * be returned from getText()
- */
- UCharCharacterIterator *fDCharIter;
+ StringCharacterIterator fSCharIter;
/**
* The rule data for this BreakIterator instance