From 99211d04442b5d92ceff94ccd01e6b57ef23f796 Mon Sep 17 00:00:00 2001 From: Seiji Masugata Date: Thu, 24 Aug 2006 17:20:58 +0000 Subject: [PATCH] Initial commit. Updated bundled oniguruma library (used for multibyte regular expression) to 4.3.1. --- ext/mbstring/oniguruma/doc/API | 586 +++++++++++++++++++++++++++++ ext/mbstring/oniguruma/doc/API.ja | 593 ++++++++++++++++++++++++++++++ ext/mbstring/oniguruma/doc/FAQ | 33 ++ ext/mbstring/oniguruma/doc/FAQ.ja | 115 ++++++ ext/mbstring/oniguruma/doc/RE | 412 +++++++++++++++++++++ ext/mbstring/oniguruma/doc/RE.ja | 424 +++++++++++++++++++++ 6 files changed, 2163 insertions(+) create mode 100644 ext/mbstring/oniguruma/doc/API create mode 100644 ext/mbstring/oniguruma/doc/API.ja create mode 100644 ext/mbstring/oniguruma/doc/FAQ create mode 100644 ext/mbstring/oniguruma/doc/FAQ.ja create mode 100644 ext/mbstring/oniguruma/doc/RE create mode 100644 ext/mbstring/oniguruma/doc/RE.ja diff --git a/ext/mbstring/oniguruma/doc/API b/ext/mbstring/oniguruma/doc/API new file mode 100644 index 0000000000..7374f65bd4 --- /dev/null +++ b/ext/mbstring/oniguruma/doc/API @@ -0,0 +1,586 @@ +Oniguruma API Version 4.1.0 2006/05/15 + +#include + + +# int onig_init(void) + + Initialize library. + + You don't have to call it explicitly, because it is called in onig_new(). + + +# int onig_error_code_to_str(UChar* err_buf, int err_code, ...) + + Get error message string. + If this function is used for onig_new(), + don't call this after the pattern argument of onig_new() is freed. + + normal return: error message string length + + arguments + 1 err_buf: error message string buffer. + (required size: ONIG_MAX_ERROR_MESSAGE_LEN) + 2 err_code: error code returned by other API functions. + 3 err_info (optional): error info returned by onig_new(). + + +# void onig_set_warn_func(OnigWarnFunc func) + + Set warning function. + + WARNING: + '[', '-', ']' in character class without escape. + ']' in pattern without escape. + + arguments + 1 func: function pointer. void (*func)(char* warning_message) + + +# void onig_set_verb_warn_func(OnigWarnFunc func) + + Set verbose warning function. + + WARNING: + redundant nested repeat operator. + + arguments + 1 func: function pointer. void (*func)(char* warning_message) + + +# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* err_info) + + Create a regex object. + + normal return: ONIG_NORMAL + + arguments + 1 reg: return regex object's address. + 2 pattern: regex pattern string. + 3 pattern_end: terminate address of pattern. (pattern + pattern length) + 4 option: compile time options. + + ONIG_OPTION_NONE no option + ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z' + ONIG_OPTION_MULTILINE '.' match with newline + ONIG_OPTION_IGNORECASE ambiguity match on + ONIG_OPTION_EXTEND extended pattern form + ONIG_OPTION_FIND_LONGEST find longest match + ONIG_OPTION_FIND_NOT_EMPTY ignore empty match + ONIG_OPTION_NEGATE_SINGLELINE + clear ONIG_OPTION_SINGLELINE which is enabled on + ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED, + ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA + + ONIG_OPTION_DONT_CAPTURE_GROUP only named group captured. + ONIG_OPTION_CAPTURE_GROUP named and no-named group captured. + + 5 enc: character encoding. + + ONIG_ENCODING_ASCII ASCII + ONIG_ENCODING_ISO_8859_1 ISO 8859-1 + ONIG_ENCODING_ISO_8859_2 ISO 8859-2 + ONIG_ENCODING_ISO_8859_3 ISO 8859-3 + ONIG_ENCODING_ISO_8859_4 ISO 8859-4 + ONIG_ENCODING_ISO_8859_5 ISO 8859-5 + ONIG_ENCODING_ISO_8859_6 ISO 8859-6 + ONIG_ENCODING_ISO_8859_7 ISO 8859-7 + ONIG_ENCODING_ISO_8859_8 ISO 8859-8 + ONIG_ENCODING_ISO_8859_9 ISO 8859-9 + ONIG_ENCODING_ISO_8859_10 ISO 8859-10 + ONIG_ENCODING_ISO_8859_11 ISO 8859-11 + ONIG_ENCODING_ISO_8859_13 ISO 8859-13 + ONIG_ENCODING_ISO_8859_14 ISO 8859-14 + ONIG_ENCODING_ISO_8859_15 ISO 8859-15 + ONIG_ENCODING_ISO_8859_16 ISO 8859-16 + ONIG_ENCODING_UTF8 UTF-8 + ONIG_ENCODING_UTF16_BE UTF-16BE + ONIG_ENCODING_UTF16_LE UTF-16LE + ONIG_ENCODING_UTF32_BE UTF-32BE + ONIG_ENCODING_UTF32_LE UTF-32LE + ONIG_ENCODING_EUC_JP EUC-JP + ONIG_ENCODING_EUC_TW EUC-TW + ONIG_ENCODING_EUC_KR EUC-KR + ONIG_ENCODING_EUC_CN EUC-CN + ONIG_ENCODING_SJIS Shift_JIS + ONIG_ENCODING_KOI8 KOI8 + ONIG_ENCODING_KOI8_R KOI8-R + ONIG_ENCODING_BIG5 Big5 + ONIG_ENCODING_GB18030 GB 18030 + + or any OnigEncodingType data address defined by user. + + 6 syntax: address of pattern syntax definition. + + ONIG_SYNTAX_ASIS plain text + ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE + ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE + ONIG_SYNTAX_EMACS Emacs + ONIG_SYNTAX_GREP grep + ONIG_SYNTAX_GNU_REGEX GNU regex + ONIG_SYNTAX_JAVA Java (Sun java.util.regex) + ONIG_SYNTAX_PERL Perl + ONIG_SYNTAX_PERL_NG Perl + named group + ONIG_SYNTAX_RUBY Ruby + ONIG_SYNTAX_DEFAULT default (== Ruby) + onig_set_default_syntax() + + or any OnigSyntaxType data address defined by user. + + 7 err_info: address for return optional error info. + Use this value as 3rd argument of onig_error_code_to_str(). + + + +# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigCompileInfo* ci, OnigErrorInfo* einfo) + + Create a regex object. + This function is deluxe version of onig_new(). + + normal return: ONIG_NORMAL + + arguments + 1 reg: return address of regex object. + 2 pattern: regex pattern string. + 3 pattern_end: terminate address of pattern. (pattern + pattern length) + 4 ci: compile time info. + + ci->num_of_elements: number of elements in ci. (current version: 5) + ci->pattern_enc: pattern string character encoding. + ci->target_enc: target string character encoding. + ci->syntax: address of pattern syntax definition. + ci->option: compile time option. + ci->ambig_flag: character matching ambiguity bit flag for + ONIG_OPTION_IGNORECASE mode. + + ONIGENC_AMBIGUOUS_MATCH_NONE: exact + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ignore case for ASCII + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ignore case for non-ASCII + ONIGENC_AMBIGUOUS_MATCH_COMPOUND: grapheme cluster as a char + ONIGENC_AMBIGUOUS_MATCH_FULL: all ambiguity on + ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII) + onig_set_default_ambig_flag() + + 5 err_info: address for return optional error info. + Use this value as 3rd argument of onig_error_code_to_str(). + + + Different character encoding combination is allowed for + the following cases only. + + pattern_enc: ASCII, ISO_8859_1 + target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE + + pattern_enc: UTF16_BE/LE + target_enc: UTF16_LE/BE + + pattern_enc: UTF32_BE/LE + target_enc: UTF32_LE/BE + + +# void onig_free(regex_t* reg) + + Free memory used by regex object. + + arguments + 1 reg: regex object. + + +# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, + const UChar* range, OnigRegion* region, OnigOptionType option) + + Search string and return search result and matching region. + + normal return: match position offset (i.e. p - str >= 0) + not found: ONIG_MISMATCH (< 0) + + arguments + 1 reg: regex object + 2 str: target string + 3 end: terminate address of target string + 4 start: search start address of target string + 5 range: search terminate address of target string + in forward search (start <= searched string head < range) + in backward search (range <= searched string head <= start) + 6 region: address for return group match range info (NULL is allowed) + 7 option: search time option + + ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line + ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line + ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API. + + +# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, + OnigRegion* region, OnigOptionType option) + + Match string and return result and matching region. + + normal return: match length (>= 0) + not match: ONIG_MISMATCH ( < 0) + + arguments + 1 reg: regex object + 2 str: target string + 3 end: terminate address of target string + 4 at: match address of target string + 5 region: address for return group match range info (NULL is allowed) + 6 option: search time option + + ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line + ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line + ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API. + + +# OnigRegion* onig_region_new(void) + + Create a region. + + +# void onig_region_free(OnigRegion* region, int free_self) + + Free memory used by region. + + arguments + 1 region: target region + 2 free_self: [1: free all, 0: free memory used in region but not self] + + +# void onig_region_copy(OnigRegion* to, OnigRegion* from) + + Copy contents of region. + + arguments + 1 to: target region + 2 from: source region + + +# void onig_region_clear(OnigRegion* region) + + Clear contents of region. + + arguments + 1 region: target region + + +# int onig_region_resize(OnigRegion* region, int n) + + Resize group range area of region. + + normal return: ONIG_NORMAL + + arguments + 1 region: target region + 2 n: new size + + +# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end, + int** num_list) + + Return the group number list of the name. + Named subexp is defined by (?....). + + normal return: number of groups for the name. + (ex. /(?..)(?..)/ ==> 2) + name not found: -1 + + arguments + 1 reg: regex object. + 2 name: group name. + 3 name_end: terminate address of group name. + 4 num_list: return list of group number. + + +# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end, + OnigRegion *region) + + Return the group number corresponding to the named backref (\k). + If two or more regions for the groups of the name are effective, + the greatest number in it is obtained. + + normal return: group number. + + arguments + 1 reg: regex object. + 2 name: group name. + 3 name_end: terminate address of group name. + 4 region: search/match result region. + + +# int onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) + + Iterate function call for all names. + + normal return: 0 + error: func's return value. + + arguments + 1 reg: regex object. + 2 func: callback function. + func(name, name_end, , , + reg, arg); + if func does not return 0, then iteration is stopped. + 3 arg: argument for func. + + +# int onig_number_of_names(regex_t* reg) + + Return the number of names defined in the pattern. + Multiple definitions of one name is counted as one. + + arguments + 1 reg: regex object. + + +# OnigEncoding onig_get_encoding(regex_t* reg) +# OnigOptionType onig_get_options(regex_t* reg) +# OnigAmbigType onig_get_ambig_flag(regex_t* reg) +# OnigSyntaxType* onig_get_syntax(regex_t* reg) + + Return a value of the regex object. + + arguments + 1 reg: regex object. + + +# int onig_number_of_captures(regex_t* reg) + + Return the number of capture group in the pattern. + + arguments + 1 reg: regex object. + + +# int onig_number_of_capture_histories(regex_t* reg) + + Return the number of capture history defined in the pattern. + + You can't use capture history if ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY + is disabled in the pattern syntax.(disabled in the default syntax) + + arguments + 1 reg: regex object. + + + +# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region) + + Return the root node of capture history data tree. + + This value is undefined if matching has faild. + + arguments + 1 region: matching result. + + +# int onig_capture_tree_traverse(OnigRegion* region, int at, + int(*func)(int,int,int,int,int,void*), void* arg) + + Traverse and callback in capture history data tree. + + normal return: 0 + error: callback func's return value. + + arguments + 1 region: match region data. + 2 at: callback position. + + ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse childs. + ONIG_TRAVERSE_CALLBACK_AT_LAST: traverse childs first, then callback. + ONIG_TRAVERSE_CALLBACK_AT_BOTH: callback first, then traverse childs, + and at last callback again. + + 3 func: callback function. + if func does not return 0, then traverse is stopped. + + int func(int group, int beg, int end, int level, int at, + void* arg) + + group: group number + beg: capture start position + end: capture end position + level: nest level (from 0) + at: callback position + ONIG_TRAVERSE_CALLBACK_AT_FIRST + ONIG_TRAVERSE_CALLBACK_AT_LAST + arg: optional callback argument + + 4 arg; optional callback argument. + + +# int onig_noname_group_capture_is_active(regex_t* reg) + + Return noname group capture activity. + + active: 1 + inactive: 0 + + arguments + 1 reg: regex object. + + if option ONIG_OPTION_DONT_CAPTURE_GROUP == ON + --> inactive + + if the regex pattern have named group + and syntax ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP == ON + and option ONIG_OPTION_CAPTURE_GROUP == OFF + --> inactive + + else --> active + + +# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) + + Return previous character head address. + + arguments + 1 enc: character encoding + 2 start: string address + 3 s: target address of string + + +# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + Return left-adjusted head address of a character. + + arguments + 1 enc: character encoding + 2 start: string address + 3 s: target address of string + + +# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + Return right-adjusted head address of a character. + + arguments + 1 enc: character encoding + 2 start: string address + 3 s: target address of string + + +# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end) +# int onigenc_strlen_null(OnigEncoding enc, const UChar* s) + + Return number of characters in the string. + + +# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) + + Return number of bytes in the string. + + +# int onig_set_default_syntax(OnigSyntaxType* syntax) + + Set default syntax. + + arguments + 1 syntax: address of pattern syntax definition. + + +# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) + + Copy syntax. + + arguments + 1 to: destination address. + 2 from: source address. + + +# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax) +# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax) + +# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) +# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) +# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) +# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) + + Get/Set elements of the syntax. + + arguments + 1 syntax: syntax + 2 op, op2, behavior, options: value of element. + + +# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from) + + Copy encoding. + + arguments + 1 to: destination address. + 2 from: source address. + + +# int onig_set_meta_char(OnigEncoding enc, unsigned int what, + OnigCodePoint code) + + Set a variable meta character to the code point value. + Except for an escape character, this meta characters specification + is not work, if ONIG_SYN_OP_VARIABLE_META_CHARACTERS is not effective + by the syntax. (Build-in syntaxes are not effective.) + + normal return: ONIG_NORMAL + + arguments + 1 enc: target encoding + 2 what: specifies which meta character it is. + + ONIG_META_CHAR_ESCAPE + ONIG_META_CHAR_ANYCHAR + ONIG_META_CHAR_ANYTIME + ONIG_META_CHAR_ZERO_OR_ONE_TIME + ONIG_META_CHAR_ONE_OR_MORE_TIME + ONIG_META_CHAR_ANYCHAR_ANYTIME + + 3 code: meta character or ONIG_INEFFECTIVE_META_CHAR. + + +# OnigAmbigType onig_get_default_ambig_flag() + + Get default ambig flag. + + +# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag) + + Set default ambig flag. + + 1 ambig_flag: ambiguity flag + + +# unsigned int onig_get_match_stack_limit_size(void) + + Return the maximum number of stack size. + (default: 0 == unlimited) + + +# int onig_set_match_stack_limit_size(unsigned int size) + + Set the maximum number of stack size. + (size = 0: unlimited) + + normal return: ONIG_NORMAL + + +# int onig_end(void) + + The use of this library is finished. + + normal return: ONIG_NORMAL + + It is not allowed to use regex objects which created + before onig_end() call. + + +# const char* onig_version(void) + + Return version string. (ex. "2.2.8") + +// END diff --git a/ext/mbstring/oniguruma/doc/API.ja b/ext/mbstring/oniguruma/doc/API.ja new file mode 100644 index 0000000000..2682da4808 --- /dev/null +++ b/ext/mbstring/oniguruma/doc/API.ja @@ -0,0 +1,593 @@ +µ´¼Ö¥¤¥ó¥¿¡¼¥Õ¥§¡¼¥¹ Version 4.1.0 2006/05/15 + +#include + + +# int onig_init(void) + + ¥é¥¤¥Ö¥é¥ê¤Î½é´ü²½ + + onig_new()¤ÎÃæ¤Ç¸Æ¤Ó½Ð¤µ¤ì¤ë¤Î¤Ç¡¢¤³¤Î´Ø¿ô¤òÌÀ¼¨Åª¤Ë¸Æ¤Ó½Ð¤µ¤Ê¤¯¤Æ¤â¤è¤¤¡£ + + +# int onig_error_code_to_str(UChar* err_buf, int err_code, ...) + + ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸¤ò¼èÆÀ¤¹¤ë¡£ + + ¤³¤Î´Ø¿ô¤ò¡¢onig_new()¤Î·ë²Ì¤ËÂФ·¤Æ¸Æ¤Ó½Ð¤¹¾ì¹ç¤Ë¤Ï¡¢onig_new()¤Îpattern°ú¿ô¤ò + ¥á¥â¥ê²òÊü¤¹¤ë¤è¤ê¤âÁ°¤Ë¸Æ¤Ó½Ð¤µ¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸Ê¸»úÎó¤Î¥Ð¥¤¥ÈĹ + + °ú¿ô + 1 err_buf: ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸¤ò³ÊǼ¤¹¤ëÎΰè + (ɬÍפʥµ¥¤¥º: ONIG_MAX_ERROR_MESSAGE_LEN) + 2 err_code: ¥¨¥é¡¼¥³¡¼¥É + 3 err_info (optional): onig_new()¤Îerr_info + + +# void onig_set_warn_func(OnigWarnFunc func) + + ·Ù¹ðÄÌÃδؿô¤ò¥»¥Ã¥È¤¹¤ë¡£ + + ·Ù¹ð: + '[', '-', ']' in character class without escape. + ']' in pattern without escape. + + °ú¿ô + 1 func: ·Ù¹ð´Ø¿ô void (*func)(char* warning_message) + + +# void onig_set_verb_warn_func(OnigWarnFunc func) + + ¾ÜºÙ·Ù¹ðÄÌÃδؿô¤ò¥»¥Ã¥È¤¹¤ë¡£ + + ¾ÜºÙ·Ù¹ð: + redundant nested repeat operator. + + °ú¿ô + 1 func: ¾ÜºÙ·Ù¹ð´Ø¿ô void (*func)(char* warning_message) + + +# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* err_info) + + Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È(regex)¤òºîÀ®¤¹¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + °ú¿ô + 1 reg: ºîÀ®¤µ¤ì¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹ + 2 pattern: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó + 3 pattern_end: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹(pattern + pattern length) + 4 option: Àµµ¬É½¸½¥³¥ó¥Ñ¥¤¥ë»þ¥ª¥×¥·¥ç¥ó + + ONIG_OPTION_NONE ¥ª¥×¥·¥ç¥ó¤Ê¤· + ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z' + ONIG_OPTION_MULTILINE '.'¤¬²þ¹Ô¤Ë¥Þ¥Ã¥Á¤¹¤ë + ONIG_OPTION_IGNORECASE Û£Ëæ¥Þ¥Ã¥Á ¥ª¥ó + ONIG_OPTION_EXTEND ¥Ñ¥¿¡¼¥ó³ÈÄ¥·Á¼° + ONIG_OPTION_FIND_LONGEST ºÇĹ¥Þ¥Ã¥Á + ONIG_OPTION_FIND_NOT_EMPTY ¶õ¥Þ¥Ã¥Á¤ò̵»ë + ONIG_OPTION_NEGATE_SINGLELINE + ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED, + ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA¤Ç + ¥Ç¥Õ¥©¥ë¥È¤ÇÍ­¸ú¤ÊONIG_OPTION_SINGLELINE¤ò¥¯¥ê¥¢¤¹¤ë¡£ + + ONIG_OPTION_DONT_CAPTURE_GROUP ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Î¤ßÊá³Í + ONIG_OPTION_CAPTURE_GROUP ̾Á°Ìµ¤·Êá³Í¼°½¸¹ç¤âÊá³Í + + 5 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + + ONIG_ENCODING_ASCII ASCII + ONIG_ENCODING_ISO_8859_1 ISO 8859-1 + ONIG_ENCODING_ISO_8859_2 ISO 8859-2 + ONIG_ENCODING_ISO_8859_3 ISO 8859-3 + ONIG_ENCODING_ISO_8859_4 ISO 8859-4 + ONIG_ENCODING_ISO_8859_5 ISO 8859-5 + ONIG_ENCODING_ISO_8859_6 ISO 8859-6 + ONIG_ENCODING_ISO_8859_7 ISO 8859-7 + ONIG_ENCODING_ISO_8859_8 ISO 8859-8 + ONIG_ENCODING_ISO_8859_9 ISO 8859-9 + ONIG_ENCODING_ISO_8859_10 ISO 8859-10 + ONIG_ENCODING_ISO_8859_11 ISO 8859-11 + ONIG_ENCODING_ISO_8859_13 ISO 8859-13 + ONIG_ENCODING_ISO_8859_14 ISO 8859-14 + ONIG_ENCODING_ISO_8859_15 ISO 8859-15 + ONIG_ENCODING_ISO_8859_16 ISO 8859-16 + ONIG_ENCODING_UTF8 UTF-8 + ONIG_ENCODING_UTF16_BE UTF-16BE + ONIG_ENCODING_UTF16_LE UTF-16LE + ONIG_ENCODING_UTF32_BE UTF-32BE + ONIG_ENCODING_UTF32_LE UTF-32LE + ONIG_ENCODING_EUC_JP EUC-JP + ONIG_ENCODING_EUC_TW EUC-TW + ONIG_ENCODING_EUC_KR EUC-KR + ONIG_ENCODING_EUC_CN EUC-CN + ONIG_ENCODING_SJIS Shift_JIS + ONIG_ENCODING_KOI8 KOI8 + ONIG_ENCODING_KOI8_R KOI8-R + ONIG_ENCODING_BIG5 Big5 + ONIG_ENCODING_GB18030 GB 18030 + + ¤Þ¤¿¤Ï¡¢¥æ¡¼¥¶¤¬ÄêµÁ¤·¤¿OnigEncodingType¥Ç¡¼¥¿¤Î¥¢¥É¥ì¥¹ + + 6 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡÄêµÁ + + ONIG_SYNTAX_ASIS plain text + ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE + ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE + ONIG_SYNTAX_EMACS Emacs + ONIG_SYNTAX_GREP grep + ONIG_SYNTAX_GNU_REGEX GNU regex + ONIG_SYNTAX_JAVA Java (Sun java.util.regex) + ONIG_SYNTAX_PERL Perl + ONIG_SYNTAX_PERL_NG Perl + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç + ONIG_SYNTAX_RUBY Ruby + ONIG_SYNTAX_DEFAULT default (== Ruby) + onig_set_default_syntax() + + ¤Þ¤¿¤Ï¡¢¥æ¡¼¥¶¤¬ÄêµÁ¤·¤¿OnigSyntaxType¥Ç¡¼¥¿¤Î¥¢¥É¥ì¥¹ + + 7 err_info: ¥¨¥é¡¼¾ðÊó¤òÊÖ¤¹¤¿¤á¤Î¥¢¥É¥ì¥¹ + onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë + + +# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigCompileInfo* ci, OnigErrorInfo* einfo) + + Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È(regex)¤òºîÀ®¤¹¤ë¡£ + ¤³¤Î´Ø¿ô¤Ï¡¢onig_new()¤Î¥Ç¥é¥Ã¥¯¥¹ÈÇ¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + °ú¿ô + 1 reg: ºîÀ®¤µ¤ì¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹ + 2 pattern: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó + 3 pattern_end: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹(pattern + pattern length) + 4 ci: ¥³¥ó¥Ñ¥¤¥ë¾ðÊó + + ci->num_of_elements: ci¤ÎÍ×ÁÇ¿ô (¸½ºß¤ÎÈǤǤÏ: 5) + ci->pattern_enc: ¥Ñ¥¿¡¼¥óʸ»úÎó¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + ci->target_enc: ÂоÝʸ»úÎó¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + ci->syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡÄêµÁ + ci->option: Àµµ¬É½¸½¥³¥ó¥Ñ¥¤¥ë»þ¥ª¥×¥·¥ç¥ó + ci->ambig_flag: ONIG_OPTION_IGNORECASE¥â¡¼¥É¤Ç¤Î + ʸ»úÛ£Ëæ¥Þ¥Ã¥Á»ØÄê¥Ó¥Ã¥È¥Õ¥é¥° + + ONIGENC_AMBIGUOUS_MATCH_NONE: Û£Ëæ̵¤· + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ASCII¤ÎÂçʸ»ú¾®Ê¸»ú + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ASCII°Ê³°¤ÎÂçʸ»ú¾®Ê¸»ú + ONIGENC_AMBIGUOUS_MATCH_COMPOUND: ¹çÀ®Ê¸»ú + ONIGENC_AMBIGUOUS_MATCH_FULL: Á´¤Æ¤ÎÛ£Ëæ¥Õ¥é¥°Í­¸ú + ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII) + onig_set_default_ambig_flag() + + 5 err_info: ¥¨¥é¡¼¾ðÊó¤òÊÖ¤¹¤¿¤á¤Î¥¢¥É¥ì¥¹ + onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë + + + °Û¤Ê¤ëʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ÎÁȤ߹ç¤ï¤»¤Ï¡¢°Ê²¼¤Î¾ì¹ç¤Ë¤Î¤ßµö¤µ¤ì¤ë¡£ + + pattern_enc: ASCII, ISO_8859_1 + target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE + + pattern_enc: UTF16_BE/LE + target_enc: UTF16_LE/BE + + pattern_enc: UTF32_BE/LE + target_enc: UTF32_LE/BE + + +# void onig_free(regex_t* reg) + + Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤Î¥á¥â¥ê¤ò²òÊü¤¹¤ë¡£ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + + +# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, + const UChar* range, OnigRegion* region, OnigOptionType option) + + Àµµ¬É½¸½¤Çʸ»úÎó¤ò¸¡º÷¤·¡¢¸¡º÷·ë²Ì¤È¥Þ¥Ã¥ÁÎΰè¤òÊÖ¤¹¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥Þ¥Ã¥Á°ÌÃÖ (p - str >= 0) + ¸¡º÷¼ºÇÔ: ONIG_MISMATCH (< 0) + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + 2 str: ¸¡º÷ÂоÝʸ»úÎó + 3 end: ¸¡º÷ÂоÝʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹ + 4 start: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèƬ°ÌÃÖ³«»Ï¥¢¥É¥ì¥¹ + 5 range: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèƬ°ÌÃÖ½ªÃ¼¥¢¥É¥ì¥¹ + Á°Êýõº÷ (start <= õº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèƬ < range) + ¸åÊýõº÷ (range <= õº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèƬ <= start) + 6 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region) (NULL¤âµö¤µ¤ì¤ë) + 7 option: ¸¡º÷»þ¥ª¥×¥·¥ç¥ó + + ONIG_OPTION_NOTBOL ʸ»úÎó¤ÎÀèƬ(str)¤ò¹ÔƬ¤È´ÇÐö¤µ¤Ê¤¤ + ONIG_OPTION_NOTEOL ʸ»úÎó¤Î½ªÃ¼(end)¤ò¹ÔËö¤È´ÇÐö¤µ¤Ê¤¤ + ONIG_OPTION_POSIX_REGION region°ú¿ô¤òPOSIX API¤Îregmatch_t[]¤Ë¤¹¤ë + + +# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, + OnigRegion* region, OnigOptionType option) + + ʸ»úÎó¤Î»ØÄê°ÌÃ֤ǥޥåÁ¥ó¥°¤ò¹Ô¤¤¡¢·ë²Ì¤È¥Þ¥Ã¥ÁÎΰè¤òÊÖ¤¹¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥Þ¥Ã¥Á¤·¤¿¥Ð¥¤¥ÈĹ (>= 0) + not match: ONIG_MISMATCH ( < 0) + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + 2 str: ¸¡º÷ÂоÝʸ»úÎó + 3 end: ¸¡º÷ÂоÝʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹ + 4 at: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷¥¢¥É¥ì¥¹ + 5 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region) (NULL¤âµö¤µ¤ì¤ë) + 6 option: ¸¡º÷»þ¥ª¥×¥·¥ç¥ó + + ONIG_OPTION_NOTBOL ʸ»úÎó¤ÎÀèƬ(str)¤ò¹ÔƬ¤È´ÇÐö¤µ¤Ê¤¤ + ONIG_OPTION_NOTEOL ʸ»úÎó¤Î½ªÃ¼(end)¤ò¹ÔËö¤È´ÇÐö¤µ¤Ê¤¤ + ONIG_OPTION_POSIX_REGION region°ú¿ô¤òPOSIX API¤Îregmatch_t[]¤Ë¤¹¤ë + + +# OnigRegion* onig_region_new(void) + + ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤òºîÀ®¤¹¤ë¡£ + + +# void onig_region_free(OnigRegion* region, int free_self) + + ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤Ç»ÈÍѤµ¤ì¤Æ¤¤¤ë¥á¥â¥ê¤ò²òÊü¤¹¤ë¡£ + + °ú¿ô + 1 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊ󥪥֥¸¥§¥¯¥È + 2 free_self: [1: region¼«¿È¤ò´Þ¤á¤ÆÁ´¤Æ²òÊü, 0: region¼«¿È¤Ï²òÊü¤·¤Ê¤¤] + + +# void onig_region_copy(OnigRegion* to, OnigRegion* from) + + ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤òÊ£À½¤¹¤ë¡£ + + °ú¿ô + 1 to: ÂоÝÎΰè + 2 from: ¸µÎΰè + + +# void onig_region_clear(OnigRegion* region) + + ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤ÎÃæÌ£¤ò¥¯¥ê¥¢¤¹¤ë¡£ + + °ú¿ô + 1 region: ÂоÝÎΰè + + +# int onig_region_resize(OnigRegion* region, int n) + + ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤ÎÊá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¿ô¤òÊѹ¹¤¹¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + °ú¿ô + 1 region: ÂоÝÎΰè + 2 n: ¿·¤·¤¤¥µ¥¤¥º + + +# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end, + int** num_list) + + »ØÄꤷ¤¿Ì¾Á°¤ËÂФ¹¤ë̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¤Î + ¥°¥ë¡¼¥×ÈÖ¹æ¥ê¥¹¥È¤òÊÖ¤¹¡£ + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Ï¡¢(?....)¤Ë¤è¤Ã¤ÆÄêµÁ¤Ç¤­¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: »ØÄꤵ¤ì¤¿Ì¾Á°¤ËÂФ¹¤ë¥°¥ë¡¼¥×¿ô + (Îã /(?..)(?..)/ ==> 2) + ̾Á°¤ËÂФ¹¤ë¥°¥ë¡¼¥×¤¬Â¸ºß¤·¤Ê¤¤: -1 + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + 2 name: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾ + 3 name_end: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾¤Î½ªÃ¼¥¢¥É¥ì¥¹ + 4 num_list: ÈÖ¹æ¥ê¥¹¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹ + + +# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end, + OnigRegion *region) + + »ØÄꤵ¤ì¤¿Ì¾Á°¤Î¸åÊý»²¾È(\k)¤ËÂФ¹¤ëÊá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¤ÎÈÖ¹æ¤òÊÖ¤¹¡£ + ̾Á°¤ËÂФ·¤Æ¡¢Ê£¿ô¤Î¥Þ¥Ã¥ÁÎΰ褬ͭ¸ú¤Ç¤¢¤ì¤Ð¡¢¤½¤ÎÃæ¤ÎºÇÂç¤ÎÈÖ¹æ¤òÊÖ¤¹¡£ + ̾Á°¤ËÂФ¹¤ëÊá³Í¼°½¸¹ç¤¬°ì¸Ä¤·¤«¤Ê¤¤¤È¤­¤Ë¤Ï¡¢Âбþ¤¹¤ë¥Þ¥Ã¥ÁÎΰ褬ͭ¸ú¤« + ¤É¤¦¤«¤Ë´Ø·¸¤Ê¤¯¡¢¤½¤ÎÈÖ¹æ¤òÊÖ¤¹¡£(½¾¤Ã¤Æ¡¢region¤Ë¤ÏNULL¤òÅϤ·¤Æ¤â¤è¤¤¡£) + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ÈÖ¹æ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + 2 name: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾ + 3 name_end: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾¤Î½ªÃ¼¥¢¥É¥ì¥¹ + 4 region: search/match·ë²Ì¤Î¥Þ¥Ã¥ÁÎΰè + + +# int onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) + + Á´¤Æ¤Î̾Á°¤ËÂФ·¤Æ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¸Æ¤Ó½Ð¤·¤ò¼Â¹Ô¤¹¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: 0 + ¥¨¥é¡¼: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¤ÎÌá¤êÃÍ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + 2 func: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô + func(name, name_end, , , + reg, arg); + + func¤¬0°Ê³°¤ÎÃͤòÊÖ¤¹¤È¡¢¤½¤ì°Ê¹ß¤Î¥³¡¼¥ë¥Ð¥Ã¥¯¤Ï¹Ô¤Ê¤ï¤º¤Ë + ½ªÎ»¤¹¤ë¡£ + + 3 arg: func¤ËÂФ¹¤ëÄɲðú¿ô + + +# int onig_number_of_names(regex_t* reg) + + ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Ì¾Á°¤Î¿ô¤òÊÖ¤¹¡£ + °ì¸Ä¤Î̾Á°¤Î¿½ÅÄêµÁ¤Ï°ì¸Ä¤È´ÇÐö¤¹¡£ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + +# OnigEncoding onig_get_encoding(regex_t* reg) +# OnigOptionType onig_get_options(regex_t* reg) +# OnigAmbigType onig_get_ambig_flag(regex_t* reg) +# OnigSyntaxType* onig_get_syntax(regex_t* reg) + + Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤ËÂФ·¤Æ¡¢Âбþ¤¹¤ëÃͤòÊÖ¤¹¡£ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + +# int onig_number_of_captures(regex_t* reg) + + ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Êá³Í¥°¥ë¡¼¥×¤Î¿ô¤òÊÖ¤¹¡£ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + +# int onig_number_of_capture_histories(regex_t* reg) + + ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Êá³ÍÍúÎò(?@...)¤Î¿ô¤òÊÖ¤¹¡£ + + »ÈÍѤ¹¤ëʸˡ¤ÇÊá³ÍÍúÎòµ¡Ç½¤¬Í­¸ú(ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY) + ¤Ç¤Ê¤±¤ì¤Ð¡¢Êá³ÍÍúÎòµ¡Ç½¤Ï»ÈÍѤǤ­¤Ê¤¤¡£ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + +# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region) + + Êá³ÍÍúÎò¥Ç¡¼¥¿¤Î¥ë¡¼¥È¥Î¡¼¥É¤òÊÖ¤¹¡£ + + ¥Þ¥Ã¥Á¤¬¼ºÇÔ¤·¤Æ¤¤¤ë¾ì¹ç¤Ë¤Ï¡¢¤³¤ÎÃͤÏÉÔÄê¤Ç¤¢¤ë¡£ + + °ú¿ô + 1 region: ¥Þ¥Ã¥ÁÎΰè + + +# int onig_capture_tree_traverse(OnigRegion* region, int at, + int(*func)(int,int,int,int,int,void*), void* arg) + + Êá³ÍÍúÎò¥Ç¡¼¥¿ÌÚ¤ò½ä²ó¤·¤Æ¥³¡¼¥ë¥Ð¥Ã¥¯¤¹¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: 0 + ¥¨¥é¡¼: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¤ÎÌá¤êÃÍ + + °ú¿ô + 1 region: ¥Þ¥Ã¥ÁÎΰè + 2 at: ¥³¡¼¥ë¥Ð¥Ã¥¯¤ò¹Ô¤Ê¤¦¥¿¥¤¥ß¥ó¥° + + ONIG_TRAVERSE_CALLBACK_AT_FIRST: + ºÇ½é¤Ë¥³¡¼¥ë¥Ð¥Ã¥¯¤·¤Æ¡¢»Ò¥Î¡¼¥É¤ò½ä²ó + ONIG_TRAVERSE_CALLBACK_AT_LAST: + »Ò¥Î¡¼¥É¤ò½ä²ó¤·¤Æ¡¢¥³¡¼¥ë¥Ð¥Ã¥¯ + ONIG_TRAVERSE_CALLBACK_AT_BOTH: + ºÇ½é¤Ë¥³¡¼¥ë¥Ð¥Ã¥¯¤·¤Æ¡¢»Ò¥Î¡¼¥É¤ò½ä²ó¡¢ºÇ¸å¤Ë¤â¤¦°ìÅÙ¥³¡¼¥ë¥Ð¥Ã¥¯ + + 3 func: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô + func¤¬0°Ê³°¤ÎÃͤòÊÖ¤¹¤È¡¢¤½¤ì°Ê¹ß¤Î½ä²ó¤Ï¹Ô¤Ê¤ï¤º¤Ë + ½ªÎ»¤¹¤ë¡£ + + int func(int group, int beg, int end, int level, int at, + void* arg) + group: ¥°¥ë¡¼¥×ÈÖ¹æ + beg: ¥Þ¥Ã¥Á³«»Ï°ÌÃÖ + end ¥Þ¥Ã¥Á½ªÎ»°ÌÃÖ + level: ¥Í¥¹¥È¥ì¥Ù¥ë (0¤«¤é) + at: ¥³¡¼¥ë¥Ð¥Ã¥¯¤¬¸Æ¤Ó½Ð¤µ¤ì¤¿¥¿¥¤¥ß¥ó¥° + ONIG_TRAVERSE_CALLBACK_AT_FIRST + ONIG_TRAVERSE_CALLBACK_AT_LAST + arg: Äɲðú¿ô + + 4 arg; func¤ËÂФ¹¤ëÄɲðú¿ô + + +# int onig_noname_group_capture_is_active(regex_t* reg) + + ̾Á°¤Ê¤·¼°½¸¹ç¤ÎÊá³Íµ¡Ç½¤¬Í­¸ú¤«¤É¤¦¤«¤òÊÖ¤¹¡£ + + Í­¸ú: 1 + ̵¸ú: 0 + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + + ¥ª¥×¥·¥ç¥ó¤ÎONIG_OPTION_DONT_CAPTURE_GROUP¤¬ON --> ̵¸ú + + ¥Ñ¥¿¡¼¥ó¤¬Ì¾Á°¤Ä¤­¼°½¸¹ç¤ò»ÈÍѤ·¤Æ¤¤¤ë + AND »ÈÍÑʸˡ¤Ç¡¢ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP¤¬ON + AND ¥ª¥×¥·¥ç¥ó¤ÎONIG_OPTION_CAPTURE_GROUP¤¬OFF + --> ̵¸ú + + ¾åµ­°Ê³°¤Î¾ì¹ç --> Í­¸ú + + +# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) + + ʸ»ú°ì¸ÄʬÁ°¤Îʸ»úÎó°ÌÃÖ¤òÊÖ¤¹¡£ + + °ú¿ô + 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + 2 start: ʸ»úÎó¤ÎÀèƬ¥¢¥É¥ì¥¹ + 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ + + +# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + ʸ»ú¤ÎÀèƬ¥Ð¥¤¥È°ÌÃ֤ˤʤë¤è¤¦¤Ëº¸Â¦¤ËÄ´À°¤·¤¿¥¢¥É¥ì¥¹¤òÊÖ¤¹¡£ + + °ú¿ô + 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + 2 start: ʸ»úÎó¤ÎÀèƬ¥¢¥É¥ì¥¹ + 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ + + +# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + ʸ»ú¤ÎÀèƬ¥Ð¥¤¥È°ÌÃ֤ˤʤë¤è¤¦¤Ë±¦Â¦¤ËÄ´À°¤·¤¿¥¢¥É¥ì¥¹¤òÊÖ¤¹¡£ + + °ú¿ô + 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + 2 start: ʸ»úÎó¤ÎÀèƬ¥¢¥É¥ì¥¹ + 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ + + +# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end) +# int onigenc_strlen_null(OnigEncoding enc, const UChar* s) + + ʸ»úÎó¤Îʸ»ú¿ô¤òÊÖ¤¹¡£ + + +# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) + + ʸ»úÎó¤Î¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£ + + +# int onig_set_default_syntax(OnigSyntaxType* syntax) + + ¥Ç¥Õ¥©¥ë¥È¤ÎÀµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ò¥»¥Ã¥È¤¹¤ë¡£ + + °ú¿ô + 1 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ + + +# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) + + Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ò¥³¥Ô¡¼¤¹¤ë¡£ + + °ú¿ô + 1 to: ÂÐ¾Ý + 2 from: ¸µ + + +# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax) +# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax) + +# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) +# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) +# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) +# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) + + Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ÎÍ×ÁǤò»²¾È/¼èÆÀ¤¹¤ë¡£ + + °ú¿ô + 1 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ + 2 op, op2, behavior, options: Í×ÁǤÎÃÍ + + +# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from) + + ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ò¥³¥Ô¡¼¤¹¤ë¡£ + + °ú¿ô + 1 to: ÂÐ¾Ý + 2 from: ¸µ + + +# int onig_set_meta_char(OnigEncoding enc, unsigned int what, + OnigCodePoint code) + + ¥á¥¿Ê¸»ú¤ò»ØÄꤷ¤¿¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃͤ˥»¥Ã¥È¤¹¤ë¡£ + ONIG_SYN_OP_VARIABLE_META_CHARACTERS¤¬Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ÇÍ­¸ú¤Ë + ¤Ê¤Ã¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢¥¨¥¹¥±¡¼¥×ʸ»ú¤ò½ü¤¤¤Æ¡¢¤³¤³¤Ç»ØÄꤷ¤¿¥á¥¿Ê¸»ú¤Ï + µ¡Ç½¤·¤Ê¤¤¡£(Áȹþ¤ß¤Îʸˡ¤Ç¤ÏÍ­¸ú¤Ë¤·¤Æ¤¤¤Ê¤¤¡£) + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + °ú¿ô + 1 enc: ÂоÝʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + 2 what: ¥á¥¿Ê¸»úµ¡Ç½¤Î»ØÄê + + ONIG_META_CHAR_ESCAPE + ONIG_META_CHAR_ANYCHAR + ONIG_META_CHAR_ANYTIME + ONIG_META_CHAR_ZERO_OR_ONE_TIME + ONIG_META_CHAR_ONE_OR_MORE_TIME + ONIG_META_CHAR_ANYCHAR_ANYTIME + + 3 code: ¥á¥¿Ê¸»ú¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È ¤Þ¤¿¤Ï ONIG_INEFFECTIVE_META_CHAR. + + +# OnigAmbigType onig_get_default_ambig_flag() + + ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¼èÆÀ¤¹¤ë¡£ + + +# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag) + + ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¥»¥Ã¥È¤¹¤ë¡£ + + °ú¿ô + 1 ambig_flag: Û£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥° + + +# unsigned int onig_get_match_stack_limit_size(void) + + ¥Þ¥Ã¥Á¥¹¥¿¥Ã¥¯¥µ¥¤¥º¤ÎºÇÂçÃͤòÊÖ¤¹¡£ + (¥Ç¥Õ¥©¥ë¥È: 0 == ̵À©¸Â) + + +# int onig_set_match_stack_limit_size(unsigned int size) + + ¥Þ¥Ã¥Á¥¹¥¿¥Ã¥¯¥µ¥¤¥º¤ÎºÇÂçÃͤò»ØÄꤹ¤ë¡£ + (size = 0: ̵À©¸Â) + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + +# int onig_end(void) + + ¥é¥¤¥Ö¥é¥ê¤Î»ÈÍѤò½ªÎ»¤¹¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + onig_init()¤òºÆÅٸƤӽФ·¤Æ¤â¡¢°ÊÁ°¤ËºîÀ®¤·¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + ¤ò»ÈÍѤ¹¤ë¤³¤È¤Ï¤Ç¤­¤Ê¤¤¡£ + + +# const char* onig_version(void) + + ¥Ð¡¼¥¸¥ç¥óʸ»úÎó¤òÊÖ¤¹¡£(Îã "2.2.8") + +// END diff --git a/ext/mbstring/oniguruma/doc/FAQ b/ext/mbstring/oniguruma/doc/FAQ new file mode 100644 index 0000000000..1621a359eb --- /dev/null +++ b/ext/mbstring/oniguruma/doc/FAQ @@ -0,0 +1,33 @@ +FAQ 2006/05/15 + +1. Lognest match + + You can execute longest match by using ONIG_OPTION_FIND_LONGEST option + in onig_new(). + + +2. Thread safe + + In order to make thread safe, which of (A) or (B) must be done. + + (A) Oniguruma Layer + + Define the macro below at NOT_RUBY case in oniguruma/regint.h. + + USE_MULTI_THREAD_SYSTEM + THREAD_ATOMIC_START + THREAD_ATOMIC_END + THREAD_PASS + + (B) Application Layer + + The plural threads should not do simultaneously that making + new regexp objects or re-compiling objects or freeing objects, + even if these objects are differ. + + +3. Mailing list + + There is no mailing list about Oniguruma. + +// END diff --git a/ext/mbstring/oniguruma/doc/FAQ.ja b/ext/mbstring/oniguruma/doc/FAQ.ja new file mode 100644 index 0000000000..5f61b09554 --- /dev/null +++ b/ext/mbstring/oniguruma/doc/FAQ.ja @@ -0,0 +1,115 @@ +FAQ 2006/05/15 + +1. ºÇĹ¥Þ¥Ã¥Á + + onig_new()¤ÎÃæ¤Ç¡¢ONIG_OPTION_FIND_LONGEST¥ª¥×¥·¥ç¥ó + ¤ò»ÈÍѤ¹¤ì¤ÐºÇĹ¥Þ¥Ã¥Á¤Ë¤Ê¤ë¡£ + + +2. ¥¹¥ì¥Ã¥É¥»¡¼¥Õ + + ¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë¤¹¤ë¤Ë¤Ï¡¢°Ê²¼¤Î(A)¤È(B)¤Î¤É¤Á¤é¤«¤ò¹Ô¤Ê¤¨¤Ð + ¤è¤¤¡£ + + (A) Oniguruma Layer + + oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤ÎÉôʬ¤Î°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤¹¤ë¡£ + + USE_MULTI_THREAD_SYSTEM + THREAD_ATOMIC_START + THREAD_ATOMIC_END + THREAD_PASS + + (B) Application Layer + + Ʊ»þ¤ËÊ£¿ô¤Î¥¹¥ì¥Ã¥É¤¬¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë¡¢ + ¤Þ¤¿¤Ï²òÊü¤¹¤ë¡¢¤³¤È¤ò¹Ô¤Ê¤Ã¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£ + ¤½¤ì¤é¤Î¥ª¥Ö¥¸¥§¥¯¥È¤¬Á´¤¯Ê̤Τâ¤Î¤Ç¤¢¤Ã¤Æ¤â¡£ + + ¤â¤¦¾¯¤·¾Ü¤·¤¤ÀâÌÀ¤Ï¡¢¤³¤Î¥É¥­¥å¥á¥ó¥È¤ÎÃæ¤Î + "¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë´Ø¤¹¤ëÊä­"¤Ë½ñ¤¤¤Æ¤ª¤¤¤¿¡£ + + +3. ¥á¡¼¥ê¥ó¥°¥ê¥¹¥È + + µ´¼Ö¤Ë´Ø¤¹¤ë¥á¡¼¥ê¥ó¥°¥ê¥¹¥È¤Ï¸ºß¤·¤Ê¤¤¡£ + +//END + + + +¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë´Ø¤¹¤ëÊä­ + +¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë¤¹¤ë¤Ë¤Ï¡¢¸ÄÊ̤Υ¢¥×¥ê¥±¡¼¥·¥ç¥ó¤ÎÃæ¤Ç¹Ô¤¦¤«¡¢ +Oniguruma¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ç¹Ô¤¦¤«¡¢¤É¤Á¤é¤«¤òÁª¤Ö¤³¤È¤¬¤Ç¤­¤Þ¤¹¡£ +(Oniguruma¤ò»ÈÍѤ¹¤ë¦¤ÇÂн褹¤ë¤«¡¢Oniguruma¤ËÂн褵¤»¤ë¤« +¤É¤Á¤é¤«ÊÒÊý¤Ç¹Ô¤¦É¬Íפ¬¤¢¤ë¤È¤¤¤¦¤³¤È¤Ç¤¹¡£) + +¤³¤ì¤é¤ÎÊýË¡¤Ë¤Ä¤¤¤Æ¡¢°Ê²¼(A)¤È(B)¤ÇÀâÌÀ¤·¤Þ¤¹¡£ + +¥Þ¥ë¥Á¥¹¥ì¥Ã¥ÉAPI¤Ï¡¢¤½¤ì¤¾¤ì¤Î¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ë¤è¤Ã¤Æ¤â +°Û¤Ê¤ê¤Þ¤¹¤Î¤Ç¡¢°Ê²¼¤ÎÀâÌÀ¤ÎÃæ¤Ç¶ñÂÎŪ¤Ë²¿¤ò¸Æ¤Ö¤Î¤«¤ò +½ñ¤¯¤³¤È¤Ï̵Íý¤Ç¤¹¡£¼ÂºÝ¤Ë»ÈÍѤµ¤ì¤ë¥Þ¥ë¥Á¥¹¥ì¥Ã¥ÉAPI¤Ç¡¢ +Âбþ¤¹¤ëµ¡Ç½¤Î¤â¤Î¤ò»ØÄꤷ¤Æ¤¯¤À¤µ¤¤¡£ + +(A) Oniguruma¤ÎÃæ¤ÇÂбþ¤¹¤ë¾ì¹ç + +oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤Ç°Ï¤Þ¤ì¤Æ¤¤¤ëÉôʬ¤ÎÃæ¤Ç +°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤·¤ÆºÆ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£ + +USE_MULTI_THREAD_SYSTEM + + ñ¤ËÍ­¸ú¤Ë¤¹¤ì¤Ð¤è¤¤¤Ç¤¹¡£ + +THREAD_ATOMIC_START +THREAD_ATOMIC_END + + THREAD_ATOMIC_START¤«¤éTHREAD_ATOMIC_END¤Ç°Ï¤Þ¤ì¤¿ + ¥×¥í¥°¥é¥à¤Î¥³¡¼¥ÉÉôʬ¤ò¤¢¤ë¥¹¥ì¥Ã¥É¤¬¼Â¹ÔÃæ¤Ë¡¢Â¾¤Î + ¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤¬°ÜÆ°¤·¤Ê¤¤¤³¤È¤òÊݾ㤹¤ë¤â¤Î¤ËÄêµÁ + ¤·¤Æ¤¯¤À¤µ¤¤¡£ + (̾Á°¤ÎÄ̤ꡢ°Ï¤Þ¤ì¤¿¥³¡¼¥ÉÉôʬ¤ò¥¹¥ì¥Ã¥É¥¢¥È¥ß¥Ã¥¯¤Ë + ¤¹¤ë¤È¤¤¤¦°ÕÌ£) + +THREAD_PASS + + ¤³¤ì¤ò¼Â¹Ô¤·¤¿¥¹¥ì¥Ã¥É¤«¤é¡¢Â¾¤Î¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤ò°Ñ¾ù + ¤¹¤ë¤â¤Î¤ËÄêµÁ¤ò¤·¤Æ¤¯¤À¤µ¤¤¡£(ºÆ¥¹¥±¥¸¥å¡¼¥ë¤ò¸Æ¤Ó½Ð¤¹ + ¤È¤¤¤¦°ÕÌ£) + Âбþ¤¹¤ëµ¡Ç½¤¬Á´¤¯¤Ê¤±¤ì¤Ð¡¢¶õÄêµÁ¤Ë¤·¤Æ¤¯¤À¤µ¤¤¡£ + +(»²¹ÍÎã) +Ruby¤Î¾ì¹ç¤òÎã¤Ë¤¹¤ë¤È¡¢ +Ruby¤Ï¼«Ê¬¼«¿È¤ÇÆȼ«¤Î¥¹¥ì¥Ã¥Éµ¡Ç½¤ò¼ÂÁõ¤·¤Æ¤¤¤Þ¤¹¡£ +¤½¤Îµ¡Ç½¤ò»ÈÍѤ¹¤ë¤È¡¢°Ê²¼¤Î¤è¤¦¤ËÄêµÁ¤¹¤ì¤Ð¤è¤¤¤³¤È¤Ë +¤Ê¤ê¤Þ¤¹¡£ + +#define USE_MULTI_THREAD_SYSTEM +#define THREAD_ATOMIC_START DEFER_INTS +#define THREAD_ATOMIC_END ENABLE_INTS +#define THREAD_PASS rb_thread_schedule() + +Ruby¤Î¾ì¹ç¡¢¥¿¥¤¥Þ³ä¤ê¹þ¤ß¤ò»ÈÍѤ·¤Æ¡¢¥¹¥ì¥Ã¥É¤ÎÀÚ¤êÂؤ¨¤ò +¹Ô¤Ã¤Æ¤¤¤Þ¤¹¡£DEFER_INTS¤Ï³ä¤ê¹þ¤ß¥Ï¥ó¥É¥é¤Î¼Â¹Ô¤ò°ì»þŪ¤Ë +»ß¤á¤ë¤¿¤á¤Î¥Þ¥¯¥í¤Ç¤¹¡£ENABLE_INTS¥Þ¥¯¥í¤Ç³ä¤ê¹þ¤ß¥Ï¥ó¥É¥é +¤Î¼Â¹Ô¤òµö²Ä¤·¤Þ¤¹¡£ +¤³¤ì¤Ë¤è¤Ã¤Æ¡¢THREAD_ATOMIC_START¤«¤éTHREAD_ATOMIC_END +¤Ç°Ï¤Þ¤ì¤¿Éôʬ¤Î¼Â¹ÔÃæ¤Ë¡¢Â¾¤Î¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤¬°ÜÆ°¤·¤Þ¤»¤ó¡£ + + +(B) ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¤ÎÃæ¤ÇÂбþ¤¹¤ë¾ì¹ç + +°Ê²¼¤òÊݾ㤹¤ë¤è¤¦¤Ë¡¢¥¹¥ì¥Ã¥É¤Î¼Â¹Ô¤òÀ©¸æ¤·¤Æ¤¯¤À¤µ¤¤¡£ + +Ʊ»þ¤ËÊ£¿ô¤Î¥¹¥ì¥Ã¥É¤¬¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë¡¢¤Þ¤¿¤Ï²òÊü¤¹¤ë¡¢¤³¤È¤ò +¹Ô¤Ê¤Ã¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£¤½¤ì¤é¤Î¥ª¥Ö¥¸¥§¥¯¥È¤¬Á´¤¯Ê̤Τâ¤Î¤Ç¤¢¤Ã¤Æ¤â¡£ + +onig_new(), onig_new_deluxe(), onig_free()¤Î¤É¤ì¤«¤Î¸Æ¤Ó½Ð¤·¤ò¡¢ +Ê£¿ô¤Î¥¹¥ì¥Ã¥É¤¬Æ±»þ¤Ë¼Â¹Ô¤¹¤ë¤³¤È¤òÈò¤±¤Æ¤¯¤À¤µ¤¤¡£Æ±»þ¤Ç¤Ê¤±¤ì¤ÐÊ̤ˤ«¤Þ¤¤¤Þ¤»¤ó¡£ + +¤³¤ì¤Ï²¿¸ÎɬÍפʤΤ«¤È¤¤¤¦¤È¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë +²áÄø¤Ç¡¢ÆâÉô¤Ç¶¦Ä̤˻²¾È¤¹¤ë¥Æ¡¼¥Ö¥ë¤¬¤¢¤ê¤Þ¤¹¡£ +¤³¤Î¥Æ¡¼¥Ö¥ë¤ËÂФ·¤Æ¤Î¥Ç¡¼¥¿ÅÐÏ¿½èÍý¤¬Ê£¿ô¤Î¥¹¥ì¥Ã¥É¤Ç¾×Æͤ·¤Æ +°Û¾ï¤Ê¾õÂ֤ˤʤé¤Ê¤¤¤¿¤á¤ËɬÍפǤ¹¡£ + +// END diff --git a/ext/mbstring/oniguruma/doc/RE b/ext/mbstring/oniguruma/doc/RE new file mode 100644 index 0000000000..5a2783d167 --- /dev/null +++ b/ext/mbstring/oniguruma/doc/RE @@ -0,0 +1,412 @@ +Oniguruma Regular Expressions Version 4.3.0 2006/08/17 + +syntax: ONIG_SYNTAX_RUBY (default) + + +1. Syntax elements + + \ escape (enable or disable meta character meaning) + | alternation + (...) group + [...] character class + + +2. Characters + + \t horizontal tab (0x09) + \v vertical tab (0x0B) + \n newline (0x0A) + \r return (0x0D) + \b back space (0x08) + \f form feed (0x0C) + \a bell (0x07) + \e escape (0x1B) + \nnn octal char (encoded byte value) + \xHH hexadecimal char (encoded byte value) + \x{7HHHHHHH} wide hexadecimal char (character code point value) + \cx control char (character code point value) + \C-x control char (character code point value) + \M-x meta (x|0x80) (character code point value) + \M-\C-x meta control char (character code point value) + + (* \b is effective in character class [...] only) + + +3. Character types + + . any character (except newline) + + \w word character + + Not Unicode: + alphanumeric, "_" and multibyte char. + + Unicode: + General_Category -- (Letter|Mark|Number|Connector_Punctuation) + + \W non word char + + \s whitespace char + + Not Unicode: + \t, \n, \v, \f, \r, \x20 + + Unicode: + 0009, 000A, 000B, 000C, 000D, 0085(NEL), + General_Category -- Line_Separator + -- Paragraph_Separator + -- Space_Separator + + \S non whitespace char + + \d decimal digit char + + Unicode: General_Category -- Decimal_Number + + \D non decimal digit char + + \h hexadecimal digit char [0-9a-fA-F] + + \H non hexadecimal digit char + + +4. Quantifier + + greedy + + ? 1 or 0 times + * 0 or more times + + 1 or more times + {n,m} at least n but not more than m times + {n,} at least n times + {,n} at least 0 but not more than n times ({0,n}) + {n} n times + + reluctant + + ?? 1 or 0 times + *? 0 or more times + +? 1 or more times + {n,m}? at least n but not more than m times + {n,}? at least n times + {,n}? at least 0 but not more than n times (== {0,n}?) + + possessive (greedy and does not backtrack after repeated) + + ?+ 1 or 0 times + *+ 0 or more times + ++ 1 or more times + + ({n,m}+, {n,}+, {n}+ are possessive op. in ONIG_SYNTAX_JAVA only) + + ex. /a*+/ === /(?>a*)/ + + +5. Anchors + + ^ beginning of the line + $ end of the line + \b word boundary + \B not word boundary + \A beginning of string + \Z end of string, or before newline at the end + \z end of string + \G matching start position (*) + + * Ruby Regexp: + previous end-of-match position + (This specification is not related to this library.) + + +6. Character class + + ^... negative class (lowest precedence operator) + x-y range from x to y + [...] set (character class in character class) + ..&&.. intersection (low precedence at the next of ^) + + ex. [a-w&&[^c-g]z] ==> ([a-w] AND ([^c-g] OR z)) ==> [abh-w] + + * If you want to use '[', '-', ']' as a normal character + in a character class, you should escape these characters by '\'. + + + POSIX bracket ([:xxxxx:], negate [:^xxxxx:]) + + Not Unicode Case: + + alnum alphabet or digit char + alpha alphabet + ascii code value: [0 - 127] + blank \t, \x20 + cntrl + digit 0-9 + graph include all of multibyte encoded characters + lower + print include all of multibyte encoded characters + punct + space \t, \n, \v, \f, \r, \x20 + upper + xdigit 0-9, a-f, A-F + + + Unicode Case: + + alnum Letter | Mark | Decimal_Number + alpha Letter | Mark + ascii 0000 - 007F + blank Space_Separator | 0009 + cntrl Control | Format | Unassigned | Private_Use | Surrogate + digit Decimal_Number + graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate + lower Lowercase_Letter + print [[:graph:]] | [[:space:]] + punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation | + Final_Punctuation | Initial_Punctuation | Other_Punctuation | + Open_Punctuation + space Space_Separator | Line_Separator | Paragraph_Separator | + 0009 | 000A | 000B | 000C | 000D | 0085 + upper Uppercase_Letter + xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066 + (0-9, a-f, A-F) + + +7. Extended groups + + (?#...) comment + + (?imx-imx) option on/off + i: ignore case + m: multi-line (dot(.) match newline) + x: extended form + (?imx-imx:subexp) option on/off for subexp + + (?:subexp) not captured group + (subexp) captured group + + (?=subexp) look-ahead + (?!subexp) negative look-ahead + (?<=subexp) look-behind + (?subexp) atomic group + don't backtrack in subexp. + + (?subexp) define named group + (All characters of the name must be a word character. + And first character must not be a digit or uppper case) + + Not only a name but a number is assigned like a captured + group. + + Assigning the same name as two or more subexps is allowed. + In this case, a subexp call can not be performed although + the back reference is possible. + + +8. Back reference + + \n back reference by group number (n >= 1) + \k back reference by group name + + In the back reference by the multiplex definition name, + a subexp with a large number is referred to preferentially. + (When not matched, a group of the small number is referred to.) + + * Back reference by group number is forbidden if named group is defined + in the pattern and ONIG_OPTION_CAPTURE_GROUP is not setted. + + + back reference with nest level + + (This function is disabled in Ruby 1.9.) + + \k n: 0, 1, 2, ... + \k n: 0, 1, 2, ... + + Destinate relative nest level from back reference position. + + ex 1. + + /\A(?|.|(?:(?.)\g\k))\z/.match("reer") + + ex 2. + + r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED) + (? \g \g* \g ){0} + (? < \g \s* > ){0} + (? [a-zA-Z_:]+ ){0} + (? [^<&]+ (\g | [^<&]+)* ){0} + (? >){0} + \g + __REGEXP__ + + p r.match('fbbbf').captures + + + +9. Subexp call ("Tanaka Akira special") + + \g call by group name + \g call by group number (n >= 1) + + * left-most recursive call is not allowed. + ex. (?a|\gb) => error + (?a|b\gc) => OK + + * Call by group number is forbidden if named group is defined in the pattern + and ONIG_OPTION_CAPTURE_GROUP is not setted. + + * If the option status of called group is different from calling position + then the group's option is effective. + + ex. (?-i:\g)(?i:(?a)){0} match to "A" + + +10. Captured group + + Behavior of the no-named group (...) changes with the following conditions. + (But named group is not changed.) + + case 1. /.../ (named group is not used, no option) + + (...) is treated as a captured group. + + case 2. /.../g (named group is not used, 'g' option) + + (...) is treated as a no-captured group (?:...). + + case 3. /..(?..)../ (named group is used, no option) + + (...) is treated as a no-captured group (?:...). + numbered-backref/call is not allowed. + + case 4. /..(?..)../G (named group is used, 'G' option) + + (...) is treated as a captured group. + numbered-backref/call is allowed. + + where + g: ONIG_OPTION_DONT_CAPTURE_GROUP + G: ONIG_OPTION_CAPTURE_GROUP + + ('g' and 'G' options are argued in ruby-dev ML) + + These options are not implemented in Ruby level. + + +----------------------------- +A-1. Syntax depend options + + + ONIG_SYNTAX_RUBY + (?m): dot(.) match newline + + + ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA + (?s): dot(.) match newline + (?m): ^ match after newline, $ match before newline + + +A-2. Original extensions + + + hexadecimal digit char type \h, \H + + named group (?...) + + named backref \k + + subexp call \g, \g + + +A-3. Lacked features compare with perl 5.8.0 + + + [:word:] + + \N{name} + + \l,\u,\L,\U, \X, \C + + (?{code}) + + (??{code}) + + (?(condition)yes-pat|no-pat) + + * \Q...\E + This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA. + + * \p{property}, \P{property} + This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA. + Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower, + Print, Punct, Space, Upper, XDigit, ASCII are supported. + + Prefix 'Is' of property name is allowed in ONIG_SYNTAX_PERL only. + ex. \p{IsXDigit}. + + Negation operator of property is supported in ONIG_SYNTAX_PERL only. + \p{^...}, \P{^...} + + +A-4. Differences with Japanized GNU regex(version 0.12) of Ruby + + + add hexadecimal digit char type (\h, \H) + + add look-behind + (?<=fixed-char-length-pattern), (?>]/ in EUC-JP encoding. + + effect range of isolated option is to next ')'. + ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b). + + isolated option is not transparent to previous pattern. + ex. a(?i)* is a syntax error pattern. + + allowed incompleted left brace as an usual string. + ex. /{/, /({)/, /a{2,3/ etc... + + negative POSIX bracket [:^xxxx:] is supported. + + POSIX bracket [:ascii:] is added. + + repeat of look-ahead is not allowed. + ex. /(?=a)*/, /(?!b){5}/ + + Ignore case option is effective to numbered character. + ex. /\x61/i =~ "A" + + In the range quantifier, the number of the minimum is omissible. + /a{,n}/ == /a{0,n}/ + The simultanious abbreviation of the number of times of the minimum + and the maximum is not allowed. (/a{,}/) + + /a{n}?/ is not a non-greedy operator. + /a{n}?/ == /(?:a{n})?/ + + invalid back reference is checked and cause error. + /\1/, /(a)\2/ + + Zero-length match in infinite repeat stops the repeat, + then changes of the capture group status are checked as stop condition. + /(?:()|())*\1\2/ =~ "" + /(?:\1a|())*/ =~ "a" + + +A-5. Disabled functions by default syntax + + + capture history + + (?@...) and (?@...) + + ex. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>] + + see sample/listcap.c file. + + +A-6. Problems + + + Invalid encoding byte sequence is not checked in UTF-8. + + * Invalid first byte is treated as a character. + /./u =~ "\xa3" + + * Incomplete byte sequence is not checked. + /\w+/ =~ "a\xf3\x8ec" + +// END diff --git a/ext/mbstring/oniguruma/doc/RE.ja b/ext/mbstring/oniguruma/doc/RE.ja new file mode 100644 index 0000000000..51681715c4 --- /dev/null +++ b/ext/mbstring/oniguruma/doc/RE.ja @@ -0,0 +1,424 @@ +µ´¼Ö Àµµ¬É½¸½ Version 4.3.0 2006/08/17 + +»ÈÍÑʸˡ: ONIG_SYNTAX_RUBY (´ûÄêÃÍ) + + +1. ´ðËÜÍ×ÁÇ + + \ ÂàÈò½¤¾þ (¥¨¥¹¥±¡¼¥×) Àµµ¬É½¸½µ­¹æ¤ÎÍ­¸ú/̵¸ú¤ÎÀ©¸æ + | ÁªÂò»Ò + (...) ¼°½¸¹ç (¥°¥ë¡¼¥×) + [...] ʸ»ú½¸¹ç (ʸ»ú¥¯¥é¥¹) + + +2. ʸ»ú + + \t ¿åÊ¿¥¿¥Ö (0x09) + \v ¿âľ¥¿¥Ö (0x0B) + \n ²þ¹Ô (0x0A) + \r Éüµ¢ (0x0D) + \b ¸åÂà¶õÇò (0x08) + \f ²þÊÇ (0x0C) + \a ¾â (0x07) + \e ÂàÈò½¤¾þ (0x1B) + \nnn Ȭ¿Ê¿ôɽ¸½ Éä¹æ²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) + \xHH ½½Ï»¿Ê¿ôɽ¸½ Éä¹æ²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) + \x{7HHHHHHH} ³ÈÄ¥½½Ï»¿Ê¿ôɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \cx À©¸æʸ»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \C-x À©¸æʸ»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \M-x Ķ (x|0x80) ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \M-\C-x Ķ + À©¸æʸ»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + + ¢¨ \b¤Ï¡¢Ê¸»ú½¸¹çÆâ¤Ç¤Î¤ßÍ­¸ú + + +3. ʸ»ú¼ï + + . Ǥ°Õʸ»ú (²þ¹Ô¤ò½ü¤¯) + + \w ñ¸ì¹½À®Ê¸»ú + + Unicode°Ê³°¤Î¾ì¹ç: + ±Ñ¿ô»ú, "_" ¤ª¤è¤Ó ¿¥Ð¥¤¥Èʸ»ú¡£ + + Unicode¤Î¾ì¹ç: + General_Category -- (Letter|Mark|Number|Connector_Punctuation) + + \W Èóñ¸ì¹½À®Ê¸»ú + + \s ¶õÇòʸ»ú + + Unicode°Ê³°¤Î¾ì¹ç: + \t, \n, \v, \f, \r, \x20 + + Unicode¤Î¾ì¹ç: + 0009, 000A, 000B, 000C, 000D, 0085(NEL), + General_Category -- Line_Separator + -- Paragraph_Separator + -- Space_Separator + + \S Èó¶õÇòʸ»ú + + \d 10¿Ê¿ô»ú + + Unicode¤Î¾ì¹ç: General_Category -- Decimal_Number + + \D Èó10¿Ê¿ô»ú + + \h 16¿Ê¿ô»ú [0-9a-fA-F] + + \H Èó16¿Ê¿ô»ú + + + +4. ÎÌ»ØÄê»Ò + + ÍßÄ¥¤ê + + ? °ì²ó¤Þ¤¿¤ÏÎí²ó + * Îí²ó°Ê¾å + + °ì²ó°Ê¾å + {n,m} n²ó°Ê¾åm²ó°Ê²¼ + {n,} n²ó°Ê¾å + {,n} Îí²ó°Ê¾ån²ó°Ê²¼ ({0,n}) + {n} n²ó + + ̵Íß + + ?? °ì²ó¤Þ¤¿¤ÏÎí²ó + *? Îí²ó°Ê¾å + +? °ì²ó°Ê¾å + {n,m}? n²ó°Ê¾åm²ó°Ê²¼ + {n,}? n²ó°Ê¾å + {,n}? Îí²ó°Ê¾ån²ó°Ê²¼ (== {0,n}?) + + ¶¯Íß (ÍßÄ¥¤ê¤Ç¡¢·«¤êÊÖ¤·¤ËÀ®¸ù¤·¤¿¸å¤Ï²ó¿ô¤ò¸º¤é¤¹¤è¤¦¤Ê¸åÂàºÆ»î¹Ô¤ò¤·¤Ê¤¤) + + ?+ °ì²ó¤Þ¤¿¤ÏÎí²ó + *+ Îí²ó°Ê¾å + ++ °ì²ó°Ê¾å + + ({n,m}+, {n,}+, {n}+ ¤Ï¡¢ONIG_SYNTAX_JAVA¤Ç¤Î¤ß¶¯ÍߤʻØÄê»Ò) + + Îã. /a*+/ === /(?>a*)/ + + +5. ÉÅ + + ^ ¹ÔƬ + $ ¹ÔËö + \b ñ¸ì¶­³¦ + \B Èóñ¸ì¶­³¦ + \A ʸ»úÎóÀèƬ + \Z ʸ»úÎóËöÈø¡¢¤Þ¤¿¤Ïʸ»úÎóËöÈø¤Î²þ¹Ô¤ÎľÁ° + \z ʸ»úÎóËöÈø + \G ¾È¹ç³«»Ï°ÌÃÖ(*) + + * Ruby Regexp: + Á°²ó¾È¹çÀ®¸ùËöÈø°ÌÃÖ + (¤³¤Î»ÅÍͤÏRuby¤Î¼ÂÁõ¤Ë´Ø¤¹¤ë¤â¤Î¤Ç¤¢¤ê¡¢ + Àµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤È¤Ï̵´Ø·¸) + + +6. ʸ»ú½¸¹ç + + ^... ÈÝÄê (ºÇÄãÍ¥ÀèÅٱ黻»Ò) + x-y ÈÏ°Ï (x¤«¤éy¤Þ¤Ç) + [...] ½¸¹ç (ʸ»ú½¸¹çÆâʸ»ú½¸¹ç) + ..&&.. Àѱ黻 (^¤Î¼¡¤ËÍ¥ÀèÅÙ¤¬Ä㤤±é»»»Ò) + + Îã. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w] + + ¢¨ '[', '-', ']'¤ò¡¢Ê¸»ú½¸¹çÆâ¤ÇÄ̾ïʸ»ú¤Î°ÕÌ£¤Ç»ÈÍѤ·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢ + ¤³¤ì¤é¤Îʸ»ú¤ò'\'¤ÇÂàÈò½¤¾þ¤·¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ + + + POSIX¥Ö¥é¥±¥Ã¥È ([:xxxxx:], ÈÝÄê [:^xxxxx:]) + + Unicode°Ê³°¤Î¾ì¹ç: + + alnum ±Ñ¿ô»ú + alpha ±Ñ»ú + ascii 0 - 127 + blank \t, \x20 + cntrl + digit 0-9 + graph ¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à + lower + print ¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à + punct + space \t, \n, \v, \f, \r, \x20 + upper + xdigit 0-9, a-f, A-F + + Unicode¤Î¾ì¹ç: + + alnum Letter | Mark | Decimal_Number + alpha Letter | Mark + ascii 0000 - 007F + blank Space_Separator | 0009 + cntrl Control | Format | Unassigned | Private_Use | Surrogate + digit Decimal_Number + graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate + lower Lowercase_Letter + print [[:graph:]] | [[:space:]] + punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation | + Final_Punctuation | Initial_Punctuation | Other_Punctuation | + Open_Punctuation + space Space_Separator | Line_Separator | Paragraph_Separator | + 0009 | 000A | 000B | 000C | 000D | 0085 + upper Uppercase_Letter + xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066 + (0-9, a-f, A-F) + + +7. ³ÈÄ¥¼°½¸¹ç + + (?#...) Ãí¼á + (?imx-imx) ¸ÉΩ¥ª¥×¥·¥ç¥ó + i: Âçʸ»ú¾®Ê¸»ú¾È¹ç + m: Ê£¿ô¹Ô + x: ³ÈÄ¥·Á¼° + (?imx-imx:¼°) ¼°¥ª¥×¥·¥ç¥ó + + (¼°) Êá³Í¼°½¸¹ç + (?:¼°) ÈóÊá³Í¼°½¸¹ç + + (?=¼°) ÀèÆÉ¤ß + (?!¼°) ÈÝÄêÀèÆÉ¤ß + (?<=¼°) Ìá¤êÆÉ¤ß + (?¼°) ¸¶»ÒŪ¼°½¸¹ç + ¼°Á´ÂΤòÄ̲ᤷ¤¿¤È¤­¡¢¼°¤ÎÃæ¤Ç¤Î¸åÂàºÆ»î¹Ô¤ò¹Ô¤Ê¤ï¤Ê¤¤ + + (?¼°) ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç + ¼°½¸¹ç¤Ë̾Á°¤ò³ä¤êÅö¤Æ¤ë(ÄêµÁ¤¹¤ë)¡£ + (̾Á°¤Ïñ¸ì¹½À®Ê¸»ú¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ºÇ½é¤Îʸ»ú¤Ï + ±ÑÂçʸ»ú¤Ç¤¢¤Ã¤Æ¤Ï¤¤¤±¤Ê¤¤¡£) + + ̾Á°¤À¤±¤Ç¤Ê¤¯¡¢Êá³Í¼°½¸¹ç¤ÈƱÍͤËÈÖ¹æ¤â³ä¤êÅö¤Æ¤é¤ì¤ë¡£ + ÈÖ¹æ»ØÄ꤬¶Ø»ß¤µ¤ì¤Æ¤¤¤Ê¤¤¾õÂÖ (10. Êá³Í¼°½¸¹ç ¤ò»²¾È) + ¤Î¤È¤­¤Ï¡¢Ì¾Á°¤ò»È¤ï¤Ê¤¤¤ÇÈÖ¹æ¤Ç¤â»²¾È¤Ç¤­¤ë¡£ + + Ê£¿ô¤Î¼°½¸¹ç¤ËƱ¤¸Ì¾Á°¤òÍ¿¤¨¤ë¤³¤È¤Ïµö¤µ¤ì¤Æ¤¤¤ë¡£ + ¤³¤Î¾ì¹ç¤Ë¤Ï¡¢¤³¤Î̾Á°¤ò»ÈÍѤ·¤¿¸åÊý»²¾È¤Ï²Äǽ¤Ç¤¢¤ë¤¬¡¢ + Éôʬ¼°¸Æ½Ð¤·¤Ï¤Ç¤­¤Ê¤¤¡£ + + +8. ¸åÊý»²¾È + + \n ÈÖ¹æ»ØÄ껲¾È (n >= 1) + \k ̾Á°»ØÄ껲¾È + + ̾Á°»ØÄ껲¾È¤Ç¡¢¤½¤Î̾Á°¤¬Ê£¿ô¤Î¼°½¸¹ç¤Ç¿½ÅÄêµÁ¤µ¤ì¤Æ¤¤¤ë¾ì¹ç¤Ë¤Ï¡¢ + ÈÖ¹æ¤ÎÂ礭¤¤¼°½¸¹ç¤«¤éÍ¥ÀèŪ¤Ë»²¾È¤µ¤ì¤ë¡£ + (¥Þ¥Ã¥Á¤·¤Ê¤¤¤È¤­¤Ë¤ÏÈÖ¹æ¤Î¾®¤µ¤¤¼°½¸¹ç¤¬»²¾È¤µ¤ì¤ë) + + ¢¨ ÈÖ¹æ»ØÄ껲¾È¤Ï¡¢Ì¾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤¬ÄêµÁ¤µ¤ì¡¢ + ¤«¤Ä ONIG_OPTION_CAPTURE_GROUP¤¬»ØÄꤵ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢ + ¶Ø»ß¤µ¤ì¤ë¡£(10. Êá³Í¼°½¸¹ç ¤ò»²¾È) + + + ¥Í¥¹¥È¥ì¥Ù¥ëÉÕ¤­¸åÊý»²¾È + + ¤³¤Îµ¡Ç½¤Ï¸½ºß¡¢Ruby 1.9¤Ç¤Ï̵¸ú¤Ë¤·¤Æ¤¤¤ë¡£ + + \k n: 0, 1, 2, ... + \k n: 0, 1, 2, ... + + ¸åÊý»²¾È¤Î°ÌÃÖ¤«¤éÁêÂÐŪ¤ÊÉôʬ¼°¸Æ½Ð¤·¥Í¥¹¥È¥ì¥Ù¥ë¤ò»ØÄꤷ¤Æ¡¢¤½¤Î¥ì¥Ù¥ë¤Ç¤Î + Êá³ÍÃͤò»²¾È¤¹¤ë¡£ + + Îã-1. + + /\A(?|.|(?:(?.)\g\k))\z/.match("reer") + + Îã-2. + + r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED) + (? \g \g* \g ){0} + (? < \g \s* > ){0} + (? [a-zA-Z_:]+ ){0} + (? [^<&]+ (\g | [^<&]+)* ){0} + (? >){0} + \g + __REGEXP__ + + p r.match('fbbbf').captures + + + +9. Éôʬ¼°¸Æ½Ð¤· ("ÅÄÃæů¥¹¥Ú¥·¥ã¥ë") + + \g ̾Á°»ØÄê¸Æ½Ð¤· + \g ÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1) + + ¢¨ ºÇº¸°ÌÃ֤ǤκƵ¢¸Æ½Ð¤·¤Ï¶Ø»ß¤µ¤ì¤ë¡£ + Îã. (?a|\gb) => error + (?a|b\gc) => OK + + ¢¨ ÈÖ¹æ»ØÄê¸Æ½Ð¤·¤Ï¡¢Ì¾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤¬ÄêµÁ¤µ¤ì¡¢ + ¤«¤Ä ONIG_OPTION_CAPTURE_GROUP¤¬»ØÄꤵ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢ + ¶Ø»ß¤µ¤ì¤ë¡£ (10. Êá³Í¼°½¸¹ç ¤ò»²¾È) + + ¢¨ ¸Æ¤Ó½Ð¤µ¤ì¤¿¼°½¸¹ç¤Î¥ª¥×¥·¥ç¥ó¾õÂÖ¤¬¸Æ½Ð¤·Â¦¤Î¥ª¥×¥·¥ç¥ó¾õÂ֤ȰۤʤäƤ¤¤ë + ¤È¤­¡¢¸Æ¤Ó½Ð¤µ¤ì¤¿Â¦¤Î¥ª¥×¥·¥ç¥ó¾õÂÖ¤¬Í­¸ú¤Ç¤¢¤ë¡£ + + Îã. (?-i:\g)(?i:(?a)){0} ¤Ï "A" ¤Ë¾È¹çÀ®¸ù¤¹¤ë¡£ + + +10. Êá³Í¼°½¸¹ç + + Êá³Í¼°½¸¹ç(...)¤Ï¡¢°Ê²¼¤Î¾ò·ï¤Ë±þ¤¸¤Æ¿¶Éñ¤¬ÊѲ½¤¹¤ë¡£ + (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤ÏÊѲ½¤·¤Ê¤¤) + + case 1. /.../ (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤ÏÉÔ»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó¤Ê¤·) + + (...) ¤Ï¡¢Êá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£ + + case 2. /.../g (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤ÏÉÔ»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó 'g'¤ò»ØÄê) + + (...) ¤Ï¡¢ÈóÊá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£ + + case 3. /..(?..)../ (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Ï»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó¤Ê¤·) + + (...) ¤Ï¡¢ÈóÊá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£ + ÈÖ¹æ»ØÄ껲¾È/¸Æ¤Ó½Ð¤·¤ÏÉÔµö²Ä¡£ + + case 4. /..(?..)../G (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Ï»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó 'G'¤ò»ØÄê) + + (...) ¤Ï¡¢Êá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£ + ÈÖ¹æ»ØÄ껲¾È/¸Æ¤Ó½Ð¤·¤Ïµö²Ä¡£ + + ⤷ + g: ONIG_OPTION_DONT_CAPTURE_GROUP + G: ONIG_OPTION_CAPTURE_GROUP + ('g'¤È'G'¥ª¥×¥·¥ç¥ó¤Ï¡¢ruby-dev ML¤ÇµÄÏÀ¤µ¤ì¤¿¡£) + + ¤³¤ì¤é¤Î¿¶Éñ¤Î°ÕÌ£¤Ï¡¢ + ̾Á°ÉÕ¤­Êá³Í¤È̾Á°Ìµ¤·Êá³Í¤òƱ»þ¤Ë»ÈÍѤ¹¤ëɬÁ³À­¤Î¤¢¤ë¾ìÌ̤Ͼ¯¤Ê¤¤¤Ç¤¢¤í¤¦ + ¤È¤¤¤¦Íýͳ¤«¤é¹Í¤¨¤é¤ì¤¿¤â¤Î¤Ç¤¢¤ë¡£ + ¤³¤ì¤é¤Î¥ª¥×¥·¥ç¥ó¤Ë¤Ä¤¤¤Æ¤Ï¡¢Ruby¤Ç¤Ï¸½ºß¼ÂÁõ¤µ¤ì¤Æ¤¤¤Ê¤¤¡£ + + +----------------------------- +Êäµ­ 1. ʸˡ°Í¸¥ª¥×¥·¥ç¥ó + + + ONIG_SYNTAX_RUBY + (?m): ½ª»ßÉäµ­¹æ(.)¤Ï²þ¹Ô¤È¾È¹çÀ®¸ù + + + ONIG_SYNTAX_PERL ¤È ONIG_SYNTAX_JAVA + (?s): ½ª»ßÉäµ­¹æ(.)¤Ï²þ¹Ô¤È¾È¹çÀ®¸ù + (?m): ^ ¤Ï²þ¹Ô¤Îľ¸å¤Ë¾È¹ç¤¹¤ë¡¢$ ¤Ï²þ¹Ô¤ÎľÁ°¤Ë¾È¹ç¤¹¤ë + + +Êäµ­ 2. Æȼ«³ÈÄ¥µ¡Ç½ + + + 16¿Ê¿ô¿ô»ú¡¢Èó16¿Ê¿ô»ú \h, \H + + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç (?...) + + ̾Á°»ØÄê¸åÊý»²¾È \k + + Éôʬ¼°¸Æ½Ð¤· \g, \g + + +Êäµ­ 3. Perl 5.8.0¤ÈÈæ³Ó¤·¤Æ¸ºß¤·¤Ê¤¤µ¡Ç½ + + + [:word:] + + \N{name} + + \l,\u,\L,\U, \X, \C + + (?{code}) + + (??{code}) + + (?(condition)yes-pat|no-pat) + + * \Q...\E + ⤷ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤ÏÍ­¸ú + + * \p{property}, \P{property} + ⤷ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤ÏÍ­¸ú + Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower, + Print, Punct, Space, Upper, XDigit, ASCII¤¬»ØÄê¤Ç¤­¤ë¡£ + + ÆÃÀ­Ì¾¤ÎÁ°¤Ë 'Is'Á°ÃÖ»ì¤ò»ÈÍѤ¹¤ë¤³¤È¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ß + µö¤µ¤ì¤Æ¤¤¤ë¡£ + ex. \p{IsXDigit}. + + ÆÃÀ­¤ÎÈÝÄê±é»»»Ò¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ßµö¤µ¤ì¤Æ¤¤¤ë¡£ + \p{^...}, \P{^...} + + +Êäµ­ 4. Ruby¤ÎÆüËܸ첽 GNU regex(version 0.12)¤È¤Î°ã¤¤ + + + 16¿Ê¿ô»ú¥¿¥¤¥×Äɲà (\h, \H) + + Ìá¤êÆɤߵ¡Ç½¤òÄɲà + + ¶¯Íߤʷ«¤êÊÖ¤·»ØÄê»Ò¤òÄɲà (?+, *+, ++) + + ʸ»ú½¸¹ç¤ÎÃæ¤Î±é»»»Ò¤òÄɲà ([...], &&) + ('[' ¤Ï¡¢Ê¸»ú½¸¹ç¤ÎÃæ¤ÇÄ̾ï¤Îʸ»ú¤È¤·¤Æ»ÈÍѤ¹¤ë¤È¤­¤Ë¤Ï + ÂàÈò½¤¾þ¤·¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤) + + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤È¡¢Éôʬ¼°¸Æ½Ð¤·µ¡Ç½Äɲà + + ¿¥Ð¥¤¥Èʸ»ú¥³¡¼¥É¤¬»ØÄꤵ¤ì¤Æ¤¤¤ë¤È¤­¡¢ + ʸ»ú½¸¹ç¤ÎÃæ¤ÇȬ¿Ê¿ô¤Þ¤¿¤Ï½½Ï»¿Ê¿ôɽ¸½¤ÎϢ³¤Ï¡¢Â¿¥Ð¥¤¥ÈÉä¹ç¤Çɽ¸½¤µ¤ì¤¿ + °ì¸Ä¤Îʸ»ú¤È²ò¼á¤µ¤ì¤ë + (Îã. [\xa1\xa2], [\xa1\xa7-\xa4\xa1]) + + ʸ»ú½¸¹ç¤ÎÃæ¤Ç¡¢°ì¥Ð¥¤¥Èʸ»ú¤È¿¥Ð¥¤¥Èʸ»ú¤ÎÈÏ°Ï»ØÄê¤Ïµö¤µ¤ì¤ë¡£ + ex. /[a-¤¢]/ + + ¸ÉΩ¥ª¥×¥·¥ç¥ó¤ÎÍ­¸úÈϰϤϡ¢¤½¤Î¸ÉΩ¥ª¥×¥·¥ç¥ó¤ò´Þ¤ó¤Ç¤¤¤ë¼°½¸¹ç¤Î + ½ª¤ï¤ê¤Þ¤Ç¤Ç¤¢¤ë + Îã. (?:(?i)a|b) ¤Ï (?:(?i:a|b)) ¤È²ò¼á¤µ¤ì¤ë¡¢(?:(?i:a)|b)¤Ç¤Ï¤Ê¤¤ + + ¸ÉΩ¥ª¥×¥·¥ç¥ó¤Ï¤½¤ÎÁ°¤Î¼°¤ËÂФ·¤ÆÆ©²áŪ¤Ç¤Ï¤Ê¤¤ + Îã. /a(?i)*/ ¤Ïʸˡ¥¨¥é¡¼¤È¤Ê¤ë + + ÉÔ´°Á´¤Ê·«¤êÊÖ¤·ÈÏ°Ï»ØÄê»Ò¤ÏÄ̾ï¤Îʸ»úÎó¤È¤·¤Æµö²Ä¤µ¤ì¤ë + Îã. /{/, /({)/, /a{2,3/ + + ÈÝÄêŪPOSIX¥Ö¥é¥±¥Ã¥È [:^xxxx:] ¤òÄɲà + + POSIX¥Ö¥é¥±¥Ã¥È [:ascii:] ¤òÄɲà + + ÀèÆɤߤ竤êÊÖ¤·¤ÏÉÔµö²Ä + Îã. /(?=a)*/, /(?!b){5}/ + + ¿ôÃͤǻØÄꤵ¤ì¤¿Ê¸»ú¤ËÂФ·¤Æ¤â¡¢Âçʸ»ú¾®Ê¸»ú¾È¹ç¥ª¥×¥·¥ç¥ó¤ÏÍ­¸ú + Îã. /\x61/i =~ "A" + + ·«¤êÊÖ¤·²ó¿ô»ØÄê¤Ç¡¢ºÇÄã²ó¿ô¤Î¾Êά(0²ó)¤¬¤Ç¤­¤ë + /a{,n}/ == /a{0,n}/ + ºÇÄã²ó¿ô¤ÈºÇÂç²ó¿ô¤ÎƱ»þ¾Êά¤Ïµö¤µ¤ì¤Ê¤¤¡£(/a{,}/) + + /a{n}?/¤Ï̵Íߤʱ黻»Ò¤Ç¤Ï¤Ê¤¤¡£ + /a{n}?/ == /(?:a{n})?/ + + ̵¸ú¤Ê¸åÊý»²¾È¤ò¥Á¥§¥Ã¥¯¤·¤Æ¥¨¥é¡¼¤Ë¤¹¤ë¡£ + /\1/, /(a)\2/ + + ̵¸Â·«¤êÊÖ¤·¤ÎÃæ¤Ç¡¢Ä¹¤µÎí¤Ç¤Î¾È¹çÀ®¸ù¤Ï·«¤êÊÖ¤·¤òÃæÃǤµ¤»¤ë¤¬¡¢ + ¤³¤Î¤È¤­¡¢ÃæÃǤ¹¤Ù¤­¤«¤É¤¦¤«¤ÎȽÄê¤È¤·¤Æ¡¢Êá³Í¼°½¸¹ç¤ÎÊá³Í¾õÂ֤Π+ ÊѲ½¤Þ¤Ç¹Íθ¤·¤Æ¤¤¤ë + /(?:()|())*\1\2/ =~ "" + /(?:\1a|())*/ =~ "a" + + + +Êäµ­ 5. ¼ÂÁõ¤µ¤ì¤Æ¤¤¤ë¤¬¡¢´ûÄêÃͤǤÏÍ­¸ú¤Ë¤·¤Æ¤¤¤Ê¤¤µ¡Ç½ + + + Êá³ÍÍúÎò»²¾È + + (?@...) ¤È (?@...) + + Îã. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>] + + »ÈÍÑÊýË¡¤Ï¡¢sample/listcap.c¤ò»²¾È + + Í­¸ú¤Ë¤·¤Æ¤¤¤Ê¤¤Íýͳ¤Ï¡¢¤É¤ÎÄøÅÙÌò¤ËΩ¤Ä¤«¤Ï¤Ã¤­¤ê¤·¤Ê¤¤¤¿¤á¡£ + + +Êäµ­ 6. ÌäÂêÅÀ + + + UTF-8¤Ç¡¢¥Ð¥¤¥ÈÃͤ¬Å¬Àµ¤Ê²Á¤«¤É¤¦¤«¤Î¥Á¥§¥Ã¥¯¤Ï¹Ô¤Ê¤Ã¤Æ¤¤¤Ê¤¤¡£ + + * ÀèƬ¥Ð¥¤¥È¤È¤·¤ÆÉÔÀµ¤Ê¥Ð¥¤¥È¤ò°ìʸ»ú¤È¤ß¤Ê¤¹ + /./u =~ "\xa3" + + * ÉÔ´°Á´¤Ê¥Ð¥¤¥È¥·¡¼¥±¥ó¥¹¤Î¥Á¥§¥Ã¥¯¤ò¤·¤Ê¤¤ + /\w+/ =~ "a\xf3\x8ec" + + ¤³¤ì¤òÄ´¤Ù¤ë¤³¤È¤Ï²Äǽ¤Ç¤Ï¤¢¤ë¤¬¡¢ÃÙ¤¯¤Ê¤ë¤Î¤Ç¹Ô¤Ê¤ï¤Ê¤¤¡£ + +½ª¤ê -- 2.40.0