#define SECOND_LAST_BYTE_SHIFT_ 8
#define SUPPLEMENTARY_MIN_VALUE_ 0x10000
-static const Normalizer2Impl *g_nfcImpl = NULL;
+static const Normalizer2Impl *g_nfcImpl = nullptr;
// internal methods -------------------------------------------------
*/
static
inline void setColEIterOffset(UCollationElements *elems,
- int32_t offset)
+ int32_t offset,
+ UErrorCode &status)
{
// Note: Not "fast" any more after the 2013 collation rewrite.
// We do not want to expose more internals than necessary.
- UErrorCode status = U_ZERO_ERROR;
ucol_setOffset(elems, offset, &status);
}
U_CDECL_BEGIN
static UBool U_CALLCONV
usearch_cleanup(void) {
- g_nfcImpl = NULL;
+ g_nfcImpl = nullptr;
return TRUE;
}
U_CDECL_END
static
inline void initializeFCD(UErrorCode *status)
{
- if (g_nfcImpl == NULL) {
+ if (g_nfcImpl == nullptr) {
g_nfcImpl = Normalizer2Factory::getNFCImpl(*status);
ucln_i18n_registerCleanup(UCLN_I18N_USEARCH, usearch_cleanup);
}
}
/**
-* Allocate a memory and returns NULL if it failed.
+* Allocate a memory and returns nullptr if it failed.
* Internal method, status assumed to be a success.
* @param size to allocate
* @param status output error if any, caller to check status before calling
* method, status assumed to be success when passed in.
-* @return newly allocated array, NULL otherwise
+* @return newly allocated array, nullptr otherwise
*/
static
inline void * allocateMemory(uint32_t size, UErrorCode *status)
{
uint32_t *result = (uint32_t *)uprv_malloc(size);
- if (result == NULL) {
+ if (result == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
}
return result;
* Creates a new array if we run out of space. The caller will have to
* manually deallocate the newly allocated array.
* Internal method, status assumed to be success, caller has to check status
-* before calling this method. destination not to be NULL and has at least
+* before calling this method. destination not to be nullptr and has at least
* size destinationlength.
* @param destination target array
* @param offset destination offset to add value
int32_t *temp = (int32_t *)allocateMemory(
sizeof(int32_t) * newlength, status);
if (U_FAILURE(*status)) {
- return NULL;
+ return nullptr;
}
uprv_memcpy(temp, destination, sizeof(int32_t) * (size_t)offset);
*destinationlength = newlength;
* Creates a new array if we run out of space. The caller will have to
* manually deallocate the newly allocated array.
* Internal method, status assumed to be success, caller has to check status
-* before calling this method. destination not to be NULL and has at least
+* before calling this method. destination not to be nullptr and has at least
* size destinationlength.
* @param destination target array
* @param offset destination offset to add value
sizeof(int64_t) * newlength, status);
if (U_FAILURE(*status)) {
- return NULL;
+ return nullptr;
}
uprv_memcpy(temp, destination, sizeof(int64_t) * (size_t)offset);
uint32_t patternlength = pattern->textLength;
UCollationElements *coleiter = strsrch->utilIter;
- if (coleiter == NULL) {
+ if (coleiter == nullptr) {
coleiter = ucol_openElements(strsrch->collator, pattern->text,
patternlength, status);
// status will be checked in ucol_next(..) later and if it is an
*/
static
inline void initializePatternPCETable(UStringSearch *strsrch,
- UErrorCode *status)
+ UErrorCode *status)
{
UPattern *pattern = &(strsrch->pattern);
uint32_t pcetablesize = INITIAL_ARRAY_SIZE_;
uint32_t patternlength = pattern->textLength;
UCollationElements *coleiter = strsrch->utilIter;
- if (coleiter == NULL) {
+ if (coleiter == nullptr) {
coleiter = ucol_openElements(strsrch->collator, pattern->text,
patternlength, status);
- // status will be checked in ucol_next(..) later and if it is an
- // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be
+ // status will be checked in nextProcessed(..) later and if it is an error
+ // then UCOL_PROCESSED_NULLORDER is returned by nextProcessed(..), so 0 will be
// returned.
strsrch->utilIter = coleiter;
} else {
return;
}
- if (pattern->pces != pcetable && pattern->pces != NULL) {
+ if (pattern->pces != pcetable && pattern->pces != nullptr) {
uprv_free(pattern->pces);
}
// ** Should processed CEs be signed or unsigned?
// ** (the rest of the code in this file seems to play fast-and-loose with
// ** whether a CE is signed or unsigned. For example, look at routine above this one.)
- while ((pce = iter.nextProcessed(NULL, NULL, status)) != UCOL_PROCESSED_NULLORDER &&
+ while ((pce = iter.nextProcessed(nullptr, nullptr, status)) != UCOL_PROCESSED_NULLORDER &&
U_SUCCESS(*status)) {
int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize,
pce,
}
// ** HACK **
- if (strsrch->pattern.pces != NULL) {
+ if (strsrch->pattern.pces != nullptr) {
if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
uprv_free(strsrch->pattern.pces);
}
- strsrch->pattern.pces = NULL;
+ strsrch->pattern.pces = nullptr;
}
initializePatternCETable(strsrch, status);
initializePattern(strsrch, status);
}
+#if !UCONFIG_NO_BREAK_ITERATION
+// If the caller provided a character breakiterator we'll return that,
+// otherwise we lazily create the internal break iterator.
+static UBreakIterator* getBreakIterator(UStringSearch *strsrch, UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ if (strsrch->search->breakIter != nullptr) {
+ return strsrch->search->breakIter;
+ }
+
+ if (strsrch->search->internalBreakIter != nullptr) {
+ return strsrch->search->internalBreakIter;
+ }
+
+ // Need to create the internal break iterator.
+ strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER,
+ ucol_getLocaleByType(strsrch->collator, ULOC_VALID_LOCALE, &status),
+ strsrch->search->text, strsrch->search->textLength, &status);
+
+ return strsrch->search->internalBreakIter;
+}
+#endif
+
/**
-* sets match not found
+* Sets the match result to "not found", regardless of the incoming error status.
+* If an error occurs while setting the result, it is reported back.
+*
* @param strsrch string search data
+* @param status for output errors, if they occur.
*/
static
-inline void setMatchNotFound(UStringSearch *strsrch)
+inline void setMatchNotFound(UStringSearch *strsrch, UErrorCode &status)
{
- // this method resets the match result regardless of the error status.
+ UErrorCode localStatus = U_ZERO_ERROR;
+
strsrch->search->matchedIndex = USEARCH_DONE;
strsrch->search->matchedLength = 0;
if (strsrch->search->isForwardSearching) {
- setColEIterOffset(strsrch->textIter, strsrch->search->textLength);
+ setColEIterOffset(strsrch->textIter, strsrch->search->textLength, localStatus);
}
else {
- setColEIterOffset(strsrch->textIter, 0);
+ setColEIterOffset(strsrch->textIter, 0, localStatus);
+ }
+
+ // If an error occurred while setting the result to not found (ex: OOM),
+ // then we want to report that error back to the caller.
+ if (U_SUCCESS(status) && U_FAILURE(localStatus)) {
+ status = localStatus;
}
}
* @return TRUE if identical match is found
*/
static
-inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start,
- int32_t end)
+inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start, int32_t end)
{
if (strsrch->strength != UCOL_IDENTICAL) {
return TRUE;
UErrorCode *status)
{
if (U_FAILURE(*status)) {
- return NULL;
+ return nullptr;
}
#if UCONFIG_NO_BREAK_ITERATION
- if (breakiter != NULL) {
+ if (breakiter != nullptr) {
*status = U_UNSUPPORTED_ERROR;
- return NULL;
+ return nullptr;
}
#endif
if (locale) {
patternlength, text, textlength,
collator, breakiter, status);
- if (result == NULL || U_FAILURE(*status)) {
+ if (result == nullptr || U_FAILURE(*status)) {
if (collator) {
ucol_close(collator);
}
- return NULL;
+ return nullptr;
}
else {
result->ownCollator = TRUE;
return result;
}
*status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
+ return nullptr;
}
U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
UErrorCode *status)
{
if (U_FAILURE(*status)) {
- return NULL;
+ return nullptr;
}
#if UCONFIG_NO_BREAK_ITERATION
- if (breakiter != NULL) {
+ if (breakiter != nullptr) {
*status = U_UNSUPPORTED_ERROR;
- return NULL;
+ return nullptr;
}
#endif
- if (pattern == NULL || text == NULL || collator == NULL) {
+ if (pattern == nullptr || text == nullptr || collator == nullptr) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
+ return nullptr;
}
// string search does not really work when numeric collation is turned on
if(ucol_getAttribute(collator, UCOL_NUMERIC_COLLATION, status) == UCOL_ON) {
*status = U_UNSUPPORTED_ERROR;
- return NULL;
+ return nullptr;
}
if (U_SUCCESS(*status)) {
initializeFCD(status);
if (U_FAILURE(*status)) {
- return NULL;
+ return nullptr;
}
UStringSearch *result;
}
if (textlength <= 0 || patternlength <= 0) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
+ return nullptr;
}
result = (UStringSearch *)uprv_malloc(sizeof(UStringSearch));
- if (result == NULL) {
+ if (result == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
+ return nullptr;
}
result->collator = collator;
if (U_FAILURE(*status)) {
uprv_free(result);
- return NULL;
+ return nullptr;
}
result->search = (USearch *)uprv_malloc(sizeof(USearch));
- if (result->search == NULL) {
+ if (result->search == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
uprv_free(result);
- return NULL;
+ return nullptr;
}
result->search->text = text;
result->pattern.text = pattern;
result->pattern.textLength = patternlength;
- result->pattern.ces = NULL;
- result->pattern.pces = NULL;
+ result->pattern.ces = nullptr;
+ result->pattern.pces = nullptr;
result->search->breakIter = breakiter;
#if !UCONFIG_NO_BREAK_ITERATION
- result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(result->collator, ULOC_VALID_LOCALE, status), text, textlength, status);
+ result->search->internalBreakIter = nullptr; // Lazily created.
if (breakiter) {
ubrk_setText(breakiter, text, textlength, status);
}
result->ownCollator = FALSE;
result->search->matchedLength = 0;
result->search->matchedIndex = USEARCH_DONE;
- result->utilIter = NULL;
+ result->utilIter = nullptr;
result->textIter = ucol_openElements(collator, text,
textlength, status);
- result->textProcessedIter = NULL;
+ result->textProcessedIter = nullptr;
if (U_FAILURE(*status)) {
usearch_close(result);
- return NULL;
+ return nullptr;
}
result->search->isOverlap = FALSE;
if (U_FAILURE(*status)) {
usearch_close(result);
- return NULL;
+ return nullptr;
}
return result;
}
- return NULL;
+ return nullptr;
}
U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
uprv_free(strsrch->pattern.ces);
}
- if (strsrch->pattern.pces != NULL &&
+ if (strsrch->pattern.pces != nullptr &&
strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
uprv_free(strsrch->pattern.pces);
}
}
#if !UCONFIG_NO_BREAK_ITERATION
- if (strsrch->search->internalBreakIter) {
+ if (strsrch->search->internalBreakIter != nullptr) {
ubrk_close(strsrch->search->internalBreakIter);
}
#endif
UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) {
if (U_FAILURE(*status)) { return FALSE; }
- if (strsrch->textProcessedIter == NULL) {
+ if (strsrch->textProcessedIter == nullptr) {
strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter);
- if (strsrch->textProcessedIter == NULL) {
+ if (strsrch->textProcessedIter == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
// set and get methods --------------------------------------------------
U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
- int32_t position,
+ int32_t position,
UErrorCode *status)
{
if (U_SUCCESS(*status) && strsrch) {
*status = U_INDEX_OUTOFBOUNDS_ERROR;
}
else {
- setColEIterOffset(strsrch->textIter, position);
+ setColEIterOffset(strsrch->textIter, position, *status);
}
strsrch->search->matchedIndex = USEARCH_DONE;
strsrch->search->matchedLength = 0;
return USEARCH_DONE;
}
-U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
- USearchAttribute attribute,
- USearchAttributeValue value,
- UErrorCode *status)
+U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
+ USearchAttribute attribute,
+ USearchAttributeValue value,
+ UErrorCode *status)
{
if (U_SUCCESS(*status) && strsrch) {
switch (attribute)
U_CAPI int32_t U_EXPORT2 usearch_getMatchedStart(
const UStringSearch *strsrch)
{
- if (strsrch == NULL) {
+ if (strsrch == nullptr) {
return USEARCH_DONE;
}
return strsrch->search->matchedIndex;
if (U_FAILURE(*status)) {
return USEARCH_DONE;
}
- if (strsrch == NULL || resultCapacity < 0 || (resultCapacity > 0 &&
- result == NULL)) {
+ if (strsrch == nullptr || resultCapacity < 0 || (resultCapacity > 0 &&
+ result == nullptr)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return USEARCH_DONE;
}
if (strsrch) {
return strsrch->search->breakIter;
}
- return NULL;
+ return nullptr;
}
#endif
UErrorCode *status)
{
if (U_SUCCESS(*status)) {
- if (strsrch == NULL || text == NULL || textlength < -1 ||
+ if (strsrch == nullptr || text == nullptr || textlength < -1 ||
textlength == 0) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
strsrch->search->matchedLength = 0;
strsrch->search->reset = TRUE;
#if !UCONFIG_NO_BREAK_ITERATION
- if (strsrch->search->breakIter != NULL) {
+ if (strsrch->search->breakIter != nullptr) {
ubrk_setText(strsrch->search->breakIter, text,
textlength, status);
}
- ubrk_setText(strsrch->search->internalBreakIter, text, textlength, status);
+ if (strsrch->search->internalBreakIter != nullptr) {
+ ubrk_setText(strsrch->search->internalBreakIter, text, textlength, status);
+ }
#endif
}
}
*length = strsrch->search->textLength;
return strsrch->search->text;
}
- return NULL;
+ return nullptr;
}
U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
UErrorCode *status)
{
if (U_SUCCESS(*status)) {
- if (collator == NULL) {
+ if (collator == nullptr) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (strsrch) {
delete strsrch->textProcessedIter;
- strsrch->textProcessedIter = NULL;
+ strsrch->textProcessedIter = nullptr;
ucol_closeElements(strsrch->textIter);
ucol_closeElements(strsrch->utilIter);
- strsrch->textIter = strsrch->utilIter = NULL;
+ strsrch->textIter = strsrch->utilIter = nullptr;
if (strsrch->ownCollator && (strsrch->collator != collator)) {
ucol_close((UCollator *)strsrch->collator);
strsrch->ownCollator = FALSE;
strsrch->strength = ucol_getStrength(collator);
strsrch->ceMask = getMask(strsrch->strength);
#if !UCONFIG_NO_BREAK_ITERATION
- ubrk_close(strsrch->search->internalBreakIter);
- strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, status),
- strsrch->search->text, strsrch->search->textLength, status);
+ if (strsrch->search->internalBreakIter != nullptr) {
+ ubrk_close(strsrch->search->internalBreakIter);
+ strsrch->search->internalBreakIter = nullptr; // Lazily created.
+ }
#endif
// if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
strsrch->toShift =
if (strsrch) {
return (UCollator *)strsrch->collator;
}
- return NULL;
+ return nullptr;
}
U_CAPI void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch,
UErrorCode *status)
{
if (U_SUCCESS(*status)) {
- if (strsrch == NULL || pattern == NULL) {
+ if (strsrch == nullptr || pattern == nullptr) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
else {
U_CAPI const UChar* U_EXPORT2
usearch_getPattern(const UStringSearch *strsrch,
- int32_t *length)
+ int32_t *length)
{
if (strsrch) {
*length = strsrch->pattern.textLength;
return strsrch->pattern.text;
}
- return NULL;
+ return nullptr;
}
// miscellaneous methods --------------------------------------------------
U_CAPI int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
- UErrorCode *status)
+ UErrorCode *status)
{
if (strsrch && U_SUCCESS(*status)) {
strsrch->search->isForwardSearching = TRUE;
}
U_CAPI int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
- int32_t position,
- UErrorCode *status)
+ int32_t position,
+ UErrorCode *status)
{
if (strsrch && U_SUCCESS(*status)) {
strsrch->search->isForwardSearching = TRUE;
}
U_CAPI int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
- UErrorCode *status)
+ UErrorCode *status)
{
if (strsrch && U_SUCCESS(*status)) {
strsrch->search->isForwardSearching = FALSE;
}
U_CAPI int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
- int32_t position,
- UErrorCode *status)
+ int32_t position,
+ UErrorCode *status)
{
if (strsrch && U_SUCCESS(*status)) {
strsrch->search->isForwardSearching = FALSE;
* element iterator before using this method.
*/
U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
- UErrorCode *status)
+ UErrorCode *status)
{
if (U_SUCCESS(*status) && strsrch) {
// note offset is either equivalent to the start of the previous match
(search->matchedIndex != USEARCH_DONE &&
offset + search->matchedLength > textlength))) {
// not enough characters to match
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return USEARCH_DONE;
}
}
}
search->matchedLength = 0;
- setColEIterOffset(strsrch->textIter, search->matchedIndex);
+ setColEIterOffset(strsrch->textIter, search->matchedIndex, *status);
// status checked below
if (search->matchedIndex == textlength) {
search->matchedIndex = USEARCH_DONE;
}
U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
- UErrorCode *status)
+ UErrorCode *status)
{
if (U_SUCCESS(*status) && strsrch) {
int32_t offset;
offset = search->textLength;
search->isForwardSearching = FALSE;
search->reset = FALSE;
- setColEIterOffset(strsrch->textIter, offset);
+ setColEIterOffset(strsrch->textIter, offset, *status);
}
else {
offset = usearch_getOffset(strsrch);
// Could check pattern length, but the
// linear search will do the right thing
if (offset == 0 || matchedindex == 0) {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return USEARCH_DONE;
}
}
search->matchedIndex =
(matchedindex == USEARCH_DONE ? offset : matchedindex);
if (search->matchedIndex == 0) {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
// status checked below
}
else { // move by codepoints
U16_BACK_1(search->text, 0, search->matchedIndex);
- setColEIterOffset(strsrch->textIter, search->matchedIndex);
+ setColEIterOffset(strsrch->textIter, search->matchedIndex, *status);
// status checked below
search->matchedLength = 0;
}
if (bufSize>DEFAULT_CEBUFFER_SIZE) {
buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI));
- if (buf == NULL) {
+ if (buf == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
}
}
// was being hit, so it was changed back to U_ASSERT per ICU-20680. In ICU 70,
// we now use the new UPRV_UNREACHABLE_ASSERT to better indicate the situation.
// ICU-20792 tracks the follow-up work/further investigation on this.
- return NULL;
+ return nullptr;
}
// Manage the circular CE buffer indexing
// was being hit, so it was changed back to U_ASSERT per ICU-20680. In ICU 70,
// we now use the new UPRV_UNREACHABLE_ASSERT to better indicate the situation.
// ICU-20792 tracks the follow-up work/further investigation on this.
- return NULL;
+ return nullptr;
}
// Manage the circular CE buffer indexing
* has an external break iterator, use that. Otherwise use the internal character
* break iterator.
*/
-static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) {
+static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return startIndex;
+ }
#if 0
const UChar *text = strsrch->search->text;
int32_t textLen = strsrch->search->textLength;
}
return indexOfLastCharChecked;
#elif !UCONFIG_NO_BREAK_ITERATION
- UBreakIterator *breakiterator = strsrch->search->breakIter;
-
- if (breakiterator == NULL) {
- breakiterator = strsrch->search->internalBreakIter;
- }
-
- if (breakiterator != NULL) {
- return ubrk_following(breakiterator, startIndex);
+ UBreakIterator *breakiterator = getBreakIterator(strsrch, status);
+ if (U_FAILURE(status)) {
+ return startIndex;
}
- return startIndex;
+ return ubrk_following(breakiterator, startIndex);
#else
// **** or should we use the original code? ****
return startIndex;
* has an external break iterator, test using that, otherwise test
* using the internal character break iterator.
*/
-static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) {
+static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return TRUE;
+ }
#if 0
const UChar *text = strsrch->search->text;
int32_t textLen = strsrch->search->textLength;
UBool combining = !(gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR);
return !combining;
#elif !UCONFIG_NO_BREAK_ITERATION
- UBreakIterator *breakiterator = strsrch->search->breakIter;
-
- if (breakiterator == NULL) {
- breakiterator = strsrch->search->internalBreakIter;
+ UBreakIterator *breakiterator = getBreakIterator(strsrch, status);
+ if (U_FAILURE(status)) {
+ return TRUE;
}
- return (breakiterator != NULL && ubrk_isBoundary(breakiterator, index));
+ return ubrk_isBoundary(breakiterator, index);
#else
// **** or use the original code? ****
return TRUE;
}
#if 0
-static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end)
+static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end, UErrorCode &status)
{
-#if !UCONFIG_NO_BREAK_ITERATION
- UBreakIterator *breakiterator = strsrch->search->breakIter;
+ if (U_FAILURE(status)) {
+ return TRUE;
+ }
- if (breakiterator != NULL) {
+#if !UCONFIG_NO_BREAK_ITERATION
+ UBreakIterator *breakiterator = getBreakIterator(strsrch, status);
+ if (U_SUCCESS(status)) {
int32_t startindex = ubrk_first(breakiterator);
int32_t endindex = ubrk_last(breakiterator);
// TODO: reject search patterns beginning with a combining char.
#ifdef USEARCH_DEBUG
- if (getenv("USEARCH_DEBUG") != NULL) {
+ if (getenv("USEARCH_DEBUG") != nullptr) {
printf("Pattern CEs\n");
for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
printf(" %8x", strsrch->pattern.ces[ii]);
if(strsrch->pattern.cesLength == 0 ||
startIdx < 0 ||
startIdx > strsrch->search->textLength ||
- strsrch->pattern.ces == NULL) {
+ strsrch->pattern.ces == nullptr) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
- if (strsrch->pattern.pces == NULL) {
+ if (strsrch->pattern.pces == nullptr) {
initializePatternPCETable(strsrch, status);
}
ucol_setOffset(strsrch->textIter, startIdx, status);
CEIBuffer ceb(strsrch, status);
+ // An out-of-memory (OOM) failure can occur in the initializePatternPCETable function
+ // or CEIBuffer constructor above, so we need to check the status.
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
int32_t targetIx = 0;
- const CEI *targetCEI = NULL;
+ const CEI *targetCEI = nullptr;
int32_t patIx;
UBool found;
int64_t patCE = 0;
// For targetIx > 0, this ceb.get gets a CE that is as far back in the ring buffer
// (compared to the last CE fetched for the previous targetIx value) as we need to go
- // for this targetIx value, so if it is non-NULL then other ceb.get calls should be OK.
+ // for this targetIx value, so if it is non-nullptr then other ceb.get calls should be OK.
const CEI *firstCEI = ceb.get(targetIx);
- if (firstCEI == NULL) {
+ if (firstCEI == nullptr) {
*status = U_INTERNAL_PROGRAM_ERROR;
found = FALSE;
break;
}
targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset in target CE space to end of the match so far
- if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) {
+ if (!found && ((targetCEI == nullptr) || (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) {
// No match at this targetIx. Try again at the next.
continue;
}
// to something else.
// This type of match should be rejected for not completely consuming a
// combining sequence.
- if (!isBreakBoundary(strsrch, mStart)) {
+ if (!isBreakBoundary(strsrch, mStart, *status)) {
found = FALSE;
}
+ if (U_FAILURE(*status)) {
+ break;
+ }
// Check for the start of the match being within an Collation Element Expansion,
// meaning that the first char of the match is only partially matched.
// tests in any case)
// * the match limit is a normalization boundary
UBool allowMidclusterMatch = FALSE;
- if (strsrch->search->text != NULL && strsrch->search->textLength > maxLimit) {
+ if (strsrch->search->text != nullptr && strsrch->search->textLength > maxLimit) {
allowMidclusterMatch =
- strsrch->search->breakIter == NULL &&
- nextCEI != NULL && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 &&
+ strsrch->search->breakIter == nullptr &&
+ nextCEI != nullptr && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 &&
maxLimit >= lastCEI->highIndex && nextCEI->highIndex > maxLimit &&
(strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->search, maxLimit)) ||
strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch->search, maxLimit)));
// at the break boundary, move the position to the next boundary will result
// incorrect match length when there are ignorable characters exist between
// the position and the next character produces CE(s). See ticket#8482.
- if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLimit)) {
+ if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLimit, *status)) {
mLimit = minLimit;
} else {
- int32_t nba = nextBoundaryAfter(strsrch, minLimit);
+ int32_t nba = nextBoundaryAfter(strsrch, minLimit, *status);
// Note that we can have nba < maxLimit && nba >= minLImit, in which
// case we want to set mLimit to nba regardless of allowMidclusterMatch
// (i.e. we back off mLimit to the previous breakIterator boundary).
}
}
+ if (U_FAILURE(*status)) {
+ break;
+ }
+
#ifdef USEARCH_DEBUG
- if (getenv("USEARCH_DEBUG") != NULL) {
+ if (getenv("USEARCH_DEBUG") != nullptr) {
printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit);
}
#endif
found = FALSE;
}
- if (!isBreakBoundary(strsrch, mLimit)) {
+ if (!isBreakBoundary(strsrch, mLimit, *status)) {
found = FALSE;
}
+ if (U_FAILURE(*status)) {
+ break;
+ }
}
if (! checkIdentical(strsrch, mStart, mLimit)) {
}
#ifdef USEARCH_DEBUG
- if (getenv("USEARCH_DEBUG") != NULL) {
+ if (getenv("USEARCH_DEBUG") != nullptr) {
printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx);
int32_t lastToPrint = ceb.limitIx+2;
for (int ii=ceb.firstIx; ii<lastToPrint; ii++) {
// All Done. Store back the match bounds to the caller.
//
+
+ if (U_FAILURE(*status)) {
+ found = FALSE; // No match if a failure occured.
+ }
+
if (found==FALSE) {
mLimit = -1;
mStart = -1;
}
- if (matchStart != NULL) {
+ if (matchStart != nullptr) {
*matchStart= mStart;
}
- if (matchLimit != NULL) {
+ if (matchLimit != nullptr) {
*matchLimit = mLimit;
}
// TODO: reject search patterns beginning with a combining char.
#ifdef USEARCH_DEBUG
- if (getenv("USEARCH_DEBUG") != NULL) {
+ if (getenv("USEARCH_DEBUG") != nullptr) {
printf("Pattern CEs\n");
for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
printf(" %8x", strsrch->pattern.ces[ii]);
// Input parameter sanity check.
// TODO: should input indices clip to the text length
// in the same way that UText does.
- if(strsrch->pattern.cesLength == 0 ||
+ if(strsrch->pattern.cesLength == 0 ||
startIdx < 0 ||
startIdx > strsrch->search->textLength ||
- strsrch->pattern.ces == NULL) {
+ strsrch->pattern.ces == nullptr) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
- if (strsrch->pattern.pces == NULL) {
+ if (strsrch->pattern.pces == nullptr) {
initializePatternPCETable(strsrch, status);
}
* consider for the match.
*/
if (startIdx < strsrch->search->textLength) {
- UBreakIterator *bi = strsrch->search->internalBreakIter;
- int32_t next = ubrk_following(bi, startIdx);
+ UBreakIterator *breakiterator = getBreakIterator(strsrch, *status);
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+ int32_t next = ubrk_following(breakiterator, startIdx);
ucol_setOffset(strsrch->textIter, next, status);
ucol_setOffset(strsrch->textIter, startIdx, status);
}
+ // An out-of-memory (OOM) failure can occur above, so we need to check the status.
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
- const CEI *targetCEI = NULL;
+ const CEI *targetCEI = nullptr;
int32_t patIx;
UBool found;
found = TRUE;
// For targetIx > limitIx, this ceb.getPrevious gets a CE that is as far back in the ring buffer
// (compared to the last CE fetched for the previous targetIx value) as we need to go
- // for this targetIx value, so if it is non-NULL then other ceb.getPrevious calls should be OK.
+ // for this targetIx value, so if it is non-nullptr then other ceb.getPrevious calls should be OK.
const CEI *lastCEI = ceb.getPrevious(targetIx);
- if (lastCEI == NULL) {
+ if (lastCEI == nullptr) {
*status = U_INTERNAL_PROGRAM_ERROR;
found = FALSE;
break;
}
}
- if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) {
+ if (!found && ((targetCEI == nullptr) || (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) {
// No match at this targetIx. Try again at the next.
continue;
}
// to something else.
// This type of match should be rejected for not completely consuming a
// combining sequence.
- if (!isBreakBoundary(strsrch, mStart)) {
+ if (!isBreakBoundary(strsrch, mStart, *status)) {
found = FALSE;
}
+ if (U_FAILURE(*status)) {
+ break;
+ }
// Look at the high index of the first CE in the match. If it's the same as the
// low index, the first CE in the match is in the middle of an expansion.
// tests in any case)
// * the match limit is a normalization boundary
UBool allowMidclusterMatch = FALSE;
- if (strsrch->search->text != NULL && strsrch->search->textLength > maxLimit) {
+ if (strsrch->search->text != nullptr && strsrch->search->textLength > maxLimit) {
allowMidclusterMatch =
- strsrch->search->breakIter == NULL &&
- nextCEI != NULL && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 &&
+ strsrch->search->breakIter == nullptr &&
+ nextCEI != nullptr && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 &&
maxLimit >= lastCEI->highIndex && nextCEI->highIndex > maxLimit &&
(strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->search, maxLimit)) ||
strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch->search, maxLimit)));
// Advance the match end position to the first acceptable match boundary.
// This advances the index over any combining characters.
if (minLimit < maxLimit) {
- int32_t nba = nextBoundaryAfter(strsrch, minLimit);
+ int32_t nba = nextBoundaryAfter(strsrch, minLimit, *status);
// Note that we can have nba < maxLimit && nba >= minLImit, in which
// case we want to set mLimit to nba regardless of allowMidclusterMatch
// (i.e. we back off mLimit to the previous breakIterator boundary).
}
// Make sure the end of the match is on a break boundary
- if (!isBreakBoundary(strsrch, mLimit)) {
+ if (!isBreakBoundary(strsrch, mLimit, *status)) {
found = FALSE;
}
+ if (U_FAILURE(*status)) {
+ break;
+ }
}
} else {
// The maximum position is detected by boundary after
// the last non-ignorable CE. Combining sequence
// across the start index will be truncated.
- int32_t nba = nextBoundaryAfter(strsrch, minLimit);
+ int32_t nba = nextBoundaryAfter(strsrch, minLimit, *status);
mLimit = maxLimit = (nba > 0) && (startIdx > nba) ? nba : startIdx;
}
#ifdef USEARCH_DEBUG
- if (getenv("USEARCH_DEBUG") != NULL) {
+ if (getenv("USEARCH_DEBUG") != nullptr) {
printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit);
}
#endif
}
#ifdef USEARCH_DEBUG
- if (getenv("USEARCH_DEBUG") != NULL) {
+ if (getenv("USEARCH_DEBUG") != nullptr) {
printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx);
int32_t lastToPrint = ceb.limitIx+2;
for (int ii=ceb.firstIx; ii<lastToPrint; ii++) {
// All Done. Store back the match bounds to the caller.
//
+
+ if (U_FAILURE(*status)) {
+ found = FALSE; // No match if a failure occured.
+ }
+
if (found==FALSE) {
mLimit = -1;
mStart = -1;
}
- if (matchStart != NULL) {
+ if (matchStart != nullptr) {
*matchStart= mStart;
}
- if (matchLimit != NULL) {
+ if (matchLimit != nullptr) {
*matchLimit = mLimit;
}
UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
{
if (U_FAILURE(*status)) {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
strsrch->search->matchedLength = end - start;
return TRUE;
} else {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
}
UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
{
if (U_FAILURE(*status)) {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
strsrch->search->matchedLength = end - start;
return TRUE;
} else {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
}
UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
{
if (U_FAILURE(*status)) {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
// move the start position at the end of possible match
initializePatternPCETable(strsrch, status);
if (!initTextProcessedIter(strsrch, status)) {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
- int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
+ int64_t pce = strsrch->textProcessedIter->nextProcessed(nullptr, nullptr, status);
if (pce == UCOL_PROCESSED_NULLORDER) {
// at the end of the text
break;
}
}
if (U_FAILURE(*status)) {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
textOffset = ucol_getOffset(strsrch->textIter);
strsrch->search->matchedLength = end - start;
return TRUE;
} else {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
}
UErrorCode *status)
{
if (U_FAILURE(*status)) {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
// move the start position at the end of possible match
initializePatternPCETable(strsrch, status);
if (!initTextProcessedIter(strsrch, status)) {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
- int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
+ int64_t pce = strsrch->textProcessedIter->nextProcessed(nullptr, nullptr, status);
if (pce == UCOL_PROCESSED_NULLORDER) {
// at the end of the text
break;
}
}
if (U_FAILURE(*status)) {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
textOffset = ucol_getOffset(strsrch->textIter);
strsrch->search->matchedLength = end - start;
return TRUE;
} else {
- setMatchNotFound(strsrch);
+ setMatchNotFound(strsrch, *status);
return FALSE;
}
}