case URX_CTR_INIT:
{
U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
- fp->fExtra[opValue] = 0; // Set the loop counter variable to zero
+ fp->fExtra[opValue] = 0; // Set the loop counter variable to zero
// Pick up the three extra operands that CTR_INIT has, and
// skip the pattern location counter past
if (minCount == 0) {
fp = StateSave(fp, loopLoc+1, status);
}
- if (maxCount == 0) {
+ if (maxCount == -1) {
+ fp->fExtra[opValue+1] = fp->fInputIdx; // For loop breaking.
+ } else if (maxCount == 0) {
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
}
}
int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
int32_t minCount = (int32_t)pat[opValue+2];
int32_t maxCount = (int32_t)pat[opValue+3];
- // Increment the counter. Note: we DIDN'T worry about counter
- // overflow, since the data comes from UnicodeStrings, which
- // stores its length in an int32_t. Do we have to think about
- // this now that we're using UText? Probably not, since the length
- // in UChar32s is still an int32_t.
(*pCounter)++;
- U_ASSERT(*pCounter > 0);
- if ((uint64_t)*pCounter >= (uint32_t)maxCount) {
- U_ASSERT(*pCounter == maxCount || maxCount == -1);
+ if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
+ U_ASSERT(*pCounter == maxCount);
break;
}
if (*pCounter >= minCount) {
+ if (maxCount == -1) {
+ // Loop has no hard upper bound.
+ // Check that it is progressing through the input, break if it is not.
+ int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1];
+ if (fp->fInputIdx == *pLastInputIdx) {
+ break;
+ } else {
+ *pLastInputIdx = fp->fInputIdx;
+ }
+ }
fp = StateSave(fp, fp->fPatIdx, status);
}
fp->fPatIdx = opValue + 4; // Loop back.
{
// Initialize a non-greedy loop
U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
- fp->fExtra[opValue] = 0; // Set the loop counter variable to zero
+ fp->fExtra[opValue] = 0; // Set the loop counter variable to zero
- // Pick up the three extra operands that CTR_INIT has, and
+ // Pick up the three extra operands that CTR_INIT_NG has, and
// skip the pattern location counter past
int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
fp->fPatIdx += 3;
U_ASSERT(minCount>=0);
U_ASSERT(maxCount>=minCount || maxCount==-1);
U_ASSERT(loopLoc>fp->fPatIdx);
+ if (maxCount == -1) {
+ fp->fExtra[opValue+1] = fp->fInputIdx; // Save initial input index for loop breaking.
+ }
if (minCount == 0) {
if (maxCount != 0) {
int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
int32_t minCount = (int32_t)pat[opValue+2];
int32_t maxCount = (int32_t)pat[opValue+3];
- // Increment the counter. Note: we DIDN'T worry about counter
- // overflow, since the data comes from UnicodeStrings, which
- // stores its length in an int32_t. Do we have to think about
- // this now that we're using UText? Probably not, since the length
- // in UChar32s is still an int32_t.
- (*pCounter)++;
- U_ASSERT(*pCounter > 0);
- if ((uint64_t)*pCounter >= (uint32_t)maxCount) {
+ (*pCounter)++;
+ if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
// The loop has matched the maximum permitted number of times.
// Break out of here with no action. Matching will
// continue with the following pattern.
- U_ASSERT(*pCounter == maxCount || maxCount == -1);
+ U_ASSERT(*pCounter == maxCount);
break;
}
fp->fPatIdx = opValue + 4; // Loop back.
} else {
// We do have the minimum number of matches.
- // Fall into the following pattern, but first do
- // a state save to the top of the loop, so that a failure
+
+ // If there is no upper bound on the loop iterations, check that the input index
+ // is progressing, and stop the loop if it is not.
+ if (maxCount == -1) {
+ int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1];
+ if (fp->fInputIdx == *pLastInputIdx) {
+ break;
+ }
+ *pLastInputIdx = fp->fInputIdx;
+ }
+
+ // Loop Continuation: we will fall into the pattern following the loop
+ // (non-greedy, don't execute loop body first), but first do
+ // a state save to the top of the loop, so that a match failure
// in the following pattern will try another iteration of the loop.
fp = StateSave(fp, opValue + 4, status);
}
case URX_CTR_INIT:
{
U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
- fp->fExtra[opValue] = 0; // Set the loop counter variable to zero
+ fp->fExtra[opValue] = 0; // Set the loop counter variable to zero
// Pick up the three extra operands that CTR_INIT has, and
// skip the pattern location counter past
if (minCount == 0) {
fp = StateSave(fp, loopLoc+1, status);
}
- if (maxCount == 0) {
+ if (maxCount == -1) {
+ fp->fExtra[opValue+1] = fp->fInputIdx; // For loop breaking.
+ } else if (maxCount == 0) {
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
}
}
int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
int32_t minCount = (int32_t)pat[opValue+2];
int32_t maxCount = (int32_t)pat[opValue+3];
- // Increment the counter. Note: we DIDN'T worry about counter
- // overflow, since the data comes from UnicodeStrings, which
- // stores its length in an int32_t. Do we have to think about
- // this now that we're using UText? Probably not, since the length
- // in UChar32s is still an int32_t.
(*pCounter)++;
- U_ASSERT(*pCounter > 0);
- if ((uint64_t)*pCounter >= (uint32_t)maxCount) {
- U_ASSERT(*pCounter == maxCount || maxCount == -1);
+ if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
+ U_ASSERT(*pCounter == maxCount);
break;
}
if (*pCounter >= minCount) {
+ if (maxCount == -1) {
+ // Loop has no hard upper bound.
+ // Check that it is progressing through the input, break if it is not.
+ int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1];
+ if (fp->fInputIdx == *pLastInputIdx) {
+ break;
+ } else {
+ *pLastInputIdx = fp->fInputIdx;
+ }
+ }
fp = StateSave(fp, fp->fPatIdx, status);
}
fp->fPatIdx = opValue + 4; // Loop back.
{
// Initialize a non-greedy loop
U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
- fp->fExtra[opValue] = 0; // Set the loop counter variable to zero
+ fp->fExtra[opValue] = 0; // Set the loop counter variable to zero
- // Pick up the three extra operands that CTR_INIT has, and
+ // Pick up the three extra operands that CTR_INIT_NG has, and
// skip the pattern location counter past
int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
fp->fPatIdx += 3;
U_ASSERT(minCount>=0);
U_ASSERT(maxCount>=minCount || maxCount==-1);
U_ASSERT(loopLoc>fp->fPatIdx);
+ if (maxCount == -1) {
+ fp->fExtra[opValue+1] = fp->fInputIdx; // Save initial input index for loop breaking.
+ }
if (minCount == 0) {
if (maxCount != 0) {
int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
int32_t minCount = (int32_t)pat[opValue+2];
int32_t maxCount = (int32_t)pat[opValue+3];
- // Increment the counter. Note: we DIDN'T worry about counter
- // overflow, since the data comes from UnicodeStrings, which
- // stores its length in an int32_t. Do we have to think about
- // this now that we're using UText? Probably not, since the length
- // in UChar32s is still an int32_t.
+
(*pCounter)++;
- U_ASSERT(*pCounter > 0);
-
- if ((uint64_t)*pCounter >= (uint32_t)maxCount) {
+ if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
// The loop has matched the maximum permitted number of times.
// Break out of here with no action. Matching will
// continue with the following pattern.
- U_ASSERT(*pCounter == maxCount || maxCount == -1);
+ U_ASSERT(*pCounter == maxCount);
break;
}
fp->fPatIdx = opValue + 4; // Loop back.
} else {
// We do have the minimum number of matches.
- // Fall into the following pattern, but first do
- // a state save to the top of the loop, so that a failure
+
+ // If there is no upper bound on the loop iterations, check that the input index
+ // is progressing, and stop the loop if it is not.
+ if (maxCount == -1) {
+ int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1];
+ if (fp->fInputIdx == *pLastInputIdx) {
+ break;
+ }
+ *pLastInputIdx = fp->fInputIdx;
+ }
+
+ // Loop Continuation: we will fall into the pattern following the loop
+ // (non-greedy, don't execute loop body first), but first do
+ // a state save to the top of the loop, so that a match failure
// in the following pattern will try another iteration of the loop.
fp = StateSave(fp, opValue + 4, status);
}