From 059f862c4ab584cff0ef089e05ca7fdab52fc0d7 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Mon, 10 Feb 2014 20:12:03 +0000 Subject: [PATCH] ICU-10015 Dictionary Break, sync between ICU4C & J. Most changes for this ticket are in ICU4J. X-SVN-Rev: 35115 --- icu4c/source/common/dictbe.cpp | 5 ++++- icu4c/source/common/dictionarydata.cpp | 4 ++-- icu4c/source/common/rbbi.cpp | 11 ++++++----- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/icu4c/source/common/dictbe.cpp b/icu4c/source/common/dictbe.cpp index 45104265b6f..20447f45c8a 100644 --- a/icu4c/source/common/dictbe.cpp +++ b/icu4c/source/common/dictbe.cpp @@ -1,6 +1,6 @@ /** ******************************************************************************* - * Copyright (C) 2006-2013, International Business Machines Corporation + * Copyright (C) 2006-2014, International Business Machines Corporation * and others. All Rights Reserved. ******************************************************************************* */ @@ -49,6 +49,9 @@ DictionaryBreakEngine::findBreaks( UText *text, int32_t result = 0; // Find the span of characters included in the set. + // The span to break begins at the current position in the text, and + // extends towards the start or end of the text, depending on 'reverse'. + int32_t start = (int32_t)utext_getNativeIndex(text); int32_t current; int32_t rangeStart; diff --git a/icu4c/source/common/dictionarydata.cpp b/icu4c/source/common/dictionarydata.cpp index 039871b5be8..f174f9d4508 100644 --- a/icu4c/source/common/dictionarydata.cpp +++ b/icu4c/source/common/dictionarydata.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2013, International Business Machines +* Copyright (C) 2014, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * dictionarydata.h @@ -118,7 +118,7 @@ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t if (count < limit) { if (values != NULL) { values[count] = bt.getValue(); - } + } lengths[count++] = numChars; } if (result == USTRINGTRIE_FINAL_VALUE) { diff --git a/icu4c/source/common/rbbi.cpp b/icu4c/source/common/rbbi.cpp index 6ab57a7c117..f091a3ac49e 100644 --- a/icu4c/source/common/rbbi.cpp +++ b/icu4c/source/common/rbbi.cpp @@ -1,6 +1,6 @@ /* *************************************************************************** -* Copyright (C) 1999-2013 International Business Machines Corporation +* Copyright (C) 1999-2014 International Business Machines Corporation * and others. All rights reserved. *************************************************************************** */ @@ -592,6 +592,7 @@ int32_t RuleBasedBreakIterator::next(void) { } int32_t startPos = current(); + fDictionaryCharCount = 0; int32_t result = handleNext(fData->fForwardTable); if (fDictionaryCharCount > 0) { result = checkDictionary(startPos, result, FALSE); @@ -646,7 +647,6 @@ int32_t RuleBasedBreakIterator::previous(void) { // break position before the current position (we back our internal // iterator up one step to prevent handlePrevious() from returning // the current position), but not necessarily the last one before - // where we started int32_t start = current(); @@ -679,11 +679,11 @@ int32_t RuleBasedBreakIterator::previous(void) { // the result position that we are to return (in lastResult.) If // the backwards rules overshot and the above loop had to do two or more // next()s to move up to the desired return position, we will have a valid - // tag value. But, if handlePrevious() took us to exactly the correct result positon, + // tag value. But, if handlePrevious() took us to exactly the correct result position, // we wont have a tag value for that position, which is only set by handleNext(). - // set the current iteration position to be the last break position - // before where we started, and then return that value + // Set the current iteration position to be the last break position + // before where we started, and then return that value. utext_setNativeIndex(fText, lastResult); fLastRuleStatusIndex = lastTag; // for use by getRuleStatus() fLastStatusIndexValid = breakTagValid; @@ -1703,6 +1703,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos, // If we found breaks, build a new break cache. The first and last entries must // be the original starting and ending position. if (foundBreakCount > 0) { + U_ASSERT(foundBreakCount == breaks.size()); int32_t totalBreaks = foundBreakCount; if (startPos < breaks.elementAti(0)) { totalBreaks += 1; -- 2.40.0