onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
return EncLen_BIG5[*p];
}
+static int
+is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_BIG5, s, end);
+}
+
static OnigCodePoint
big5_mbc_to_code(const UChar* p, const UChar* end)
{
big5_left_adjust_char_head,
big5_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
return EncLen_EUCJP[*p];
}
+static int
+is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_JP, s, end);
+}
+
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end)
{
left_adjust_char_head,
is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
return EncLen_EUCKR[*p];
}
+static int
+is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_KR, s, end);
+}
+
static OnigCodePoint
euckr_mbc_to_code(const UChar* p, const UChar* end)
{
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
/* Same with OnigEncodingEUC_KR except the name */
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
return EncLen_EUCTW[*p];
}
+static int
+is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_TW, s, end);
+}
+
static OnigCodePoint
euctw_mbc_to_code(const UChar* p, const UChar* end)
{
euctw_left_adjust_char_head,
euctw_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
return 2;
}
+static int
+is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_GB18030, s, end);
+}
+
static OnigCodePoint
gb18030_mbc_to_code(const UChar* p, const UChar* end)
{
gb18030_left_adjust_char_head,
gb18030_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string
};
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
int (*init)(void);
int (*is_initialized)(void);
+ int (*is_valid_mbc_string)(const OnigUChar* s, const OnigUChar* end);
} OnigEncodingType;
typedef OnigEncodingType* OnigEncoding;
(enc)->is_allowed_reverse_match(s,end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
(enc)->left_adjust_char_head(start, s)
+#define ONIGENC_IS_VALID_MBC_STRING(enc,s,end) \
+ (enc)->is_valid_mbc_string(s,end)
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
(enc)->apply_all_case_fold(case_fold_flag,f,arg)
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
regenc.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
return FALSE;
}
+extern int
+onigenc_always_true_is_valid_mbc_string(const UChar* s ARG_UNUSED,
+ const UChar* end ARG_UNUSED)
+{
+ return TRUE;
+}
+
+extern int
+onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,
+ const UChar* p, const UChar* end)
+{
+ while (p < end) {
+ p += enclen(enc, p);
+ }
+
+ if (p != end)
+ return FALSE;
+ else
+ return TRUE;
+}
+
extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
{
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
+ONIG_EXTERN int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end));
+ONIG_EXTERN int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end));
/* methods for multi byte encoding */
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
return 0;
}
-static int
-check_incomplete_multibyte_string(OnigEncoding enc, const UChar* p, const UChar* end)
-{
- while (p < end) {
- p += enclen(enc, p);
- }
-
- if (p != end)
- return -1;
- else
- return 0;
-}
-
extern int
onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
regex_t* reg, ScanEnv* env)
*root = NULL;
- if (check_incomplete_multibyte_string(env->enc, pattern, end) != 0)
- return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
+ if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
p = (UChar* )pattern;
r = parse_regexp(root, &p, (UChar* )end, env);
return EncLen_SJIS[*p];
}
+static int
+is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_SJIS, s, end);
+}
+
static int
code_to_mbclen(OnigCodePoint code)
{
left_adjust_char_head,
is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
return EncLen_UTF16[*p];
}
+static int
+is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_UTF16_BE, s, end);
+}
+
static int
utf16be_is_mbc_newline(const UChar* p, const UChar* end)
{
utf16be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
return EncLen_UTF16[*(p+1)];
}
+static int
+is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_UTF16_LE, s, end);
+}
+
static int
utf16le_is_mbc_newline(const UChar* p, const UChar* end)
{
utf16le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
return 4;
}
+static int
+is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_UTF32_BE, s, end);
+}
+
static int
utf32be_is_mbc_newline(const UChar* p, const UChar* end)
{
utf32be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
return 4;
}
+static int
+is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_UTF32_LE, s, end);
+}
+
static int
utf32le_is_mbc_newline(const UChar* p, const UChar* end)
{
utf32le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};
#endif
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
+#define utf8_istail(c) ((UChar )((c) & 0xc0) == 0x80)
static const int EncLen_UTF8[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
return EncLen_UTF8[*p];
}
+static int
+is_valid_mbc_string(const UChar* p, const UChar* end)
+{
+ int i, len;
+
+ while (p < end) {
+ if (! utf8_islead(*p))
+ return FALSE;
+
+ len = mbc_enc_len(p++);
+ if (len > 1) {
+ for (i = 1; i < len; i++) {
+ if (p == end)
+ return FALSE;
+
+ if (! utf8_istail(*p++))
+ return FALSE;
+ }
+ }
+ }
+
+ if (p != end)
+ return FALSE;
+ else
+ return TRUE;
+}
+
static int
is_mbc_newline(const UChar* p, const UChar* end)
{
left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
- NULL /* is_initialized */
+ NULL, /* is_initialized */
+ is_valid_mbc_string
};