From: K.Kosako Date: Fri, 22 Mar 2019 06:14:25 +0000 (+0900) Subject: use onigenc_wb_is_break_position() X-Git-Tag: v6.9.2_rc1~55 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a753b56e4799a92f564f8b5b5b6b2a31688438b7;p=onig use onigenc_wb_is_break_position() --- diff --git a/src/regenc.h b/src/regenc.h index 8a3397d..c980d59 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -114,6 +114,7 @@ struct PropertyNameCtype { /* #define USE_CRNL_AS_LINE_TERMINATOR */ #define USE_UNICODE_PROPERTIES #define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER +#define USE_UNICODE_WORD_BREAK /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ @@ -174,7 +175,7 @@ extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, u extern struct PropertyNameCtype* onigenc_euc_jp_lookup_property_name P_((register const char *str, register size_t len)); extern struct PropertyNameCtype* onigenc_sjis_lookup_property_name P_((register const char *str, register size_t len)); -/* in enc/unicode.c */ +/* in unicode.c */ extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); extern int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); extern int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[])); @@ -182,7 +183,9 @@ extern int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, Onig extern int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); extern int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end)); - +#ifdef USE_UNICODE_WORD_BREAK +extern int onigenc_wb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end)); +#endif #define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) #define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) diff --git a/src/regexec.c b/src/regexec.c index bad4567..2863faa 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -3259,6 +3259,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: is_break = onigenc_egcb_is_break_position(encode, s, sprev, str, end); break; +#ifdef USE_UNICODE_WORD_BREAK + case WORD_BOUNDARY: + is_break = onigenc_wb_is_break_position(encode, s, sprev, str, end); + break; +#endif default: goto bytecode_error; break;