*
* The length can be negative for a NUL-terminated string.
*
- * If the offset points to a single, unpaired surrogate, then that itself
- * will be returned as the code point.
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to that unpaired surrogate.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
*
* @param s const UChar * string
} \
}
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @draft ICU 60
+ */
+#define U16_GET_OR_FFFD(s, start, i, length, c) { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } else { \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+ } \
+}
+
+#endif // U_HIDE_DRAFT_API
+
/* definitions with forward iteration --------------------------------------- */
/**
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate or
- * to a single, unpaired lead surrogate, then that itself
- * will be returned as the code point.
+ * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param i string offset, must be i<length
} \
}
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @draft ICU 60
+ */
+#define U16_NEXT_OR_FFFD(s, i, length, c) { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+}
+
+#endif // U_HIDE_DRAFT_API
+
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate or behind a single, unpaired
- * trail surrogate, then that itself
- * will be returned as the code point.
+ * trail surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
} \
}
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @draft ICU 60
+ */
+#define U16_PREV_OR_FFFD(s, start, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+}
+
+#endif // U_HIDE_DRAFT_API
+
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
0x11734, 0xd800, UTF_ERROR_VALUE
};
uint16_t i=0;
- UChar32 c;
+ UChar32 c, expected;
uint16_t offset=0;
for(offset=0; offset<UPRV_LENGTHOF(input); offset++) {
if(0<offset && offset<UPRV_LENGTHOF(input)-1){
}
UTF16_GET_CHAR_SAFE(input, 0, offset, UPRV_LENGTHOF(input), c, FALSE);
- if(c != result[i+1]){
- log_err("ERROR: UTF16_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
+ expected=result[i+1];
+ if(c != expected) {
+ log_err("ERROR: UTF16_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
}
U16_GET(input, 0, offset, UPRV_LENGTHOF(input), c);
- if(c != result[i+1]){
- log_err("ERROR: U16_GET failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
+ if(c != expected) {
+ log_err("ERROR: U16_GET failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
+ }
+
+ U16_GET_OR_FFFD(input, 0, offset, UPRV_LENGTHOF(input), c);
+ if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
+ if(c != expected) {
+ log_err("ERROR: U16_GET_OR_FFFD failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
}
UTF16_GET_CHAR_SAFE(input, 0, offset, UPRV_LENGTHOF(input), c, TRUE);
};
- UChar32 c=0x0000;
+ UChar32 c=0x0000, expected;
uint16_t i=0;
uint16_t offset=0, setOffset=0;
for(offset=0; offset<UPRV_LENGTHOF(input); offset++){
log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
offset, movedOffset[i+1], setOffset);
}
- if(c != result[i+1]){
- log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
- }
+ expected=result[i+1];
+ if(c != expected) {
+ log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
+ }
setOffset=offset;
U16_NEXT(input, setOffset, UPRV_LENGTHOF(input), c);
log_err("ERROR: U16_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
offset, movedOffset[i+1], setOffset);
}
- if(c != result[i+1]){
- log_err("ERROR: U16_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
- }
+ if(c != expected){
+ log_err("ERROR: U16_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
+ }
+
+ setOffset=offset;
+ U16_NEXT_OR_FFFD(input, setOffset, UPRV_LENGTHOF(input), c);
+ if(setOffset != movedOffset[i+1]){
+ log_err("ERROR: U16_NEXT_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
+ offset, movedOffset[i+1], setOffset);
+ }
+ if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
+ if(c != expected){
+ log_err("ERROR: U16_NEXT_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
+ }
setOffset=offset;
UTF16_NEXT_CHAR_SAFE(input, setOffset, UPRV_LENGTHOF(input), c, TRUE);
log_err("ERROR: U16_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
offset, movedOffset[i+4], setOffset);
}
- if(c != result[i+4]){
- log_err("ERROR: U16_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
- }
+ expected = result[i+4];
+ if(c != expected) {
+ log_err("ERROR: U16_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
+ }
+
+ setOffset=offset;
+ U16_PREV_OR_FFFD(input, 0, setOffset, c);
+ if(setOffset != movedOffset[i+4]){
+ log_err("ERROR: U16_PREV_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
+ offset, movedOffset[i+4], setOffset);
+ }
+ if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
+ if(c != expected) {
+ log_err("ERROR: U16_PREV_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
+ }
setOffset=offset;
UTF16_PREV_CHAR_SAFE(input, 0, setOffset, c, TRUE);
0
};
- UChar32 c, c2;
+ UChar32 c, c2, expected;
int32_t i0, i=0, j, k, expectedIndex;
int32_t cpIndex=0;
do {
i0=i;
U16_NEXT(input, i, -1, c);
- if(c!=result[cpIndex]) {
- log_err("U16_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, result[cpIndex]);
+ expected=result[cpIndex];
+ if(c!=expected) {
+ log_err("U16_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, expected);
+ }
+ j=i0;
+ U16_NEXT_OR_FFFD(input, j, -1, c);
+ if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
+ if(c!=expected) {
+ log_err("U16_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x\n", i0, c, expected);
+ }
+ if(j!=i) {
+ log_err("U16_NEXT_OR_FFFD() moved to index %d but U16_NEXT() moved to %d\n", j, i);
}
j=i0;
U16_FWD_1(input, j, -1);
if(c2!=c) {
log_err("U16_NEXT(from %d)=U+%04x != U+%04x=U16_GET(at %d)\n", i0, c, c2, j);
}
+ U16_GET_OR_FFFD(input, 0, j, -1, c2);
+ expected= U_IS_SURROGATE(c) ? 0xfffd : c;
+ if(c2!=expected) {
+ log_err("U16_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x=U16_GET_OR_FFFD(at %d)\n", i0, expected, c2, j);
+ }
/* U16_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */
k=j+1;
U16_SET_CP_LIMIT(input, 0, k, -1);