From: Andrei Zmievski Date: Mon, 10 Jul 2006 21:18:01 +0000 (+0000) Subject: Implement following() for combining sequences. X-Git-Tag: RELEASE_1_0_0RC1~2506 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f2cbf6f9502367a68217b566bfbdf47fcca0f3b7;p=php Implement following() for combining sequences. --- diff --git a/ext/unicode/unicode_iterators.c b/ext/unicode/unicode_iterators.c index adec5f06ab..c8fc9c0714 100644 --- a/ext/unicode/unicode_iterators.c +++ b/ext/unicode/unicode_iterators.c @@ -424,12 +424,12 @@ static void text_iter_cs_next(text_iter_obj* object, long flags TSRMLS_DC) if (flags & ITER_REVERSE) { text_iter_helper_move(0, object->text, object->text_len, &object->u.cs.start, &object->u.cs.start_cp_offset); - object->u.cs.end = object->u.cs.start; } else { text_iter_helper_move(1, object->text, object->text_len, &object->u.cs.start, &object->u.cs.start_cp_offset); - object->u.cs.end = object->u.cs.start; } + object->u.cs.end = object->u.cs.start; + object->u.cs.end_cp_offset = object->u.cs.start_cp_offset; object->u.cs.index++; } @@ -446,6 +446,64 @@ static void text_iter_cs_rewind(text_iter_obj *object, long flags TSRMLS_DC) object->u.cs.index = 0; /* because _next increments index */ } +static void text_iter_cs_following(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) +{ + int32_t k, tmp; + + if (offset < 0) { + offset = 0; + } + + /* + * On invalid iterator we always want to start looking for the code unit + * offset from the beginning of the string. + */ + if (object->u.cs.start_cp_offset == UBRK_DONE) { + object->u.cs.start_cp_offset = 0; + object->u.cs.start = 0; + } + + /* + * Try to locate the code unit position relative to the last known codepoint + * offset. + */ + k = object->u.cs.start; + if (offset > object->u.cs.start_cp_offset) { + U16_FWD_N(object->text, k, object->text_len, offset - object->u.cs.start_cp_offset); + } else { + U16_BACK_N(object->text, 0, k, object->u.cs.start_cp_offset - offset); + } + + /* + * Locate the actual boundary. + */ + if (flags & ITER_REVERSE) { + /* + * If offset was at or beyond the length of text, we need to adjust it + * to the number of codepoints in the text. + */ + if (k == object->text_len) { + offset = u_countChar32(object->text, object->text_len); + } + text_iter_helper_move(0, object->text, object->text_len, &k, &offset); + } else { + text_iter_helper_move(1, object->text, object->text_len, &k, &offset); + } + + if (k == object->u.cs.start) { + return; + } + + object->u.cs.start = k; + object->u.cs.start_cp_offset = offset; + object->u.cs.end = object->u.cs.start; +} + +static zend_bool text_iter_cs_isBoundary(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) +{ + return 1; +} + static text_iter_ops text_iter_cs_ops = { text_iter_cs_valid, text_iter_cs_current, @@ -453,6 +511,8 @@ static text_iter_ops text_iter_cs_ops = { text_iter_cs_offset, text_iter_cs_next, text_iter_cs_rewind, + text_iter_cs_following, + text_iter_cs_isBoundary, }; @@ -598,7 +658,6 @@ static void text_iter_brk_following(text_iter_obj *object, int32_t offset, long } else { object->u.brk.bound = ubrk_following(object->u.brk.iter, k); } - object->u.brk.next = object->u.brk.bound; /* * If boundary is the same one as where we were at before, simply return. @@ -607,6 +666,8 @@ static void text_iter_brk_following(text_iter_obj *object, int32_t offset, long return; } + object->u.brk.next = object->u.brk.bound; + /* * Adjust the internal codepoint offset based on how far we've moved. */