From: Markus Scherer Date: Tue, 5 Dec 2017 19:19:34 +0000 (+0000) Subject: ICU-13467 U8_NEXT() handles all in macro, no function call X-Git-Tag: release-61-rc~172 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=56fd8a74c5ee81026a9a2e2ffeb852c386d8f38b;p=icu ICU-13467 U8_NEXT() handles all in macro, no function call X-SVN-Rev: 40698 --- diff --git a/icu4c/source/common/unicode/utf8.h b/icu4c/source/common/unicode/utf8.h index df08d341ab8..021fdcf1f24 100644 --- a/icu4c/source/common/unicode/utf8.h +++ b/icu4c/source/common/unicode/utf8.h @@ -348,29 +348,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_NEXT_UNSAFE * @stable ICU 2.4 */ -#define U8_NEXT(s, i, length, c) { \ - (c)=(uint8_t)(s)[(i)++]; \ - if(!U8_IS_SINGLE(c)) { \ - uint8_t __t1, __t2; \ - if( /* handle U+0800..U+FFFF inline */ \ - (0xe0<=(c) && (c)<0xf0) && \ - (((i)+1)<(length) || (length)<0) && \ - U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ - (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ - (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ - (i)+=2; \ - } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(s)[i]-0x80)<=0x3f) { \ - (c)=(((c)&0x1f)<<6)|__t1; \ - ++(i); \ - } else { \ - /* function call for "complicated" and error cases */ \ - (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \ - } \ - } \ -} +#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL) /** * Get a code point from a string at a code point boundary offset, @@ -396,26 +374,33 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_NEXT * @stable ICU 51 */ -#define U8_NEXT_OR_FFFD(s, i, length, c) { \ +#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd) + +/** @internal */ +#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \ (c)=(uint8_t)(s)[(i)++]; \ if(!U8_IS_SINGLE(c)) { \ - uint8_t __t1, __t2; \ - if( /* handle U+0800..U+FFFF inline */ \ - (0xe0<=(c) && (c)<0xf0) && \ - (((i)+1)<(length) || (length)<0) && \ - U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ - (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ - (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ - (i)+=2; \ - } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(s)[i]-0x80)<=0x3f) { \ - (c)=(((c)&0x1f)<<6)|__t1; \ - ++(i); \ + uint8_t __t; \ + if((i)!=(length) && \ + /* fetch/validate/assemble all but last trail byte */ \ + ((c)>=0xe0 ? \ + ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \ + U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \ + (__t&=0x3f, 1) \ + : /* U+10000..U+10FFFF */ \ + ((c)-=0xf0)<=4 && \ + U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \ + ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \ + (__t=(s)[i]-0x80)<=0x3f) && \ + /* valid second-to-last trail byte */ \ + ((c)=((c)<<6)|__t, ++(i)!=(length)) \ + : /* U+0080..U+07FF */ \ + (c)>=0xc2 && ((c)&=0x1f, 1)) && \ + /* last trail byte */ \ + (__t=(s)[i]-0x80)<=0x3f && \ + ((c)=((c)<<6)|__t, ++(i), 1)) { \ } else { \ - /* function call for "complicated" and error cases */ \ - (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \ + (c)=(sub); /* ill-formed*/ \ } \ } \ } @@ -434,21 +419,22 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @stable ICU 2.4 */ #define U8_APPEND_UNSAFE(s, i, c) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ } else { \ - if((uint32_t)(c)<=0x7ff) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + if(__uc<=0x7ff) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ } else { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + if(__uc<=0xffff) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ } else { \ - (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ - (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ } \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ } \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ } \ } @@ -470,17 +456,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @stable ICU 2.4 */ #define U8_APPEND(s, i, capacity, c, isError) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ - } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ - } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ + } else if(__uc<=0x7ff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ } else { \ - (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \ + (isError)=TRUE; \ } \ }