From: K.Kosako Date: Mon, 10 Jul 2017 07:30:56 +0000 (+0900) Subject: implement \K X-Git-Tag: v6.5.0^2~129 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8b6c6304f587af8cce1bbc101211ec0531769965;p=onig implement \K --- diff --git a/src/regcomp.c b/src/regcomp.c index 27b10c4..596d40d 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -346,6 +346,24 @@ add_option(regex_t* reg, OnigOptionType option) return 0; } +static int +add_save_type(regex_t* reg, enum SaveType type) +{ + SaveType t = (SaveType )type; + + BBUF_ADD(reg, &t, SIZE_SAVE_TYPE); + return 0; +} + +static int +add_update_var_type(regex_t* reg, enum UpdateVarType type) +{ + UpdateVarType t = (UpdateVarType )type; + + BBUF_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE); + return 0; +} + static int add_opcode_rel_addr(regex_t* reg, int opcode, int addr) { @@ -1662,6 +1680,38 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) return r; } +static int +compile_gimmick_node(GimmickNode* node, regex_t* reg) +{ + int r; + + switch (node->type) { + case GIMMICK_KEEP: + r = add_opcode(reg, OP_PUSH_SAVE_VAL); + if (r != 0) return r; + r = add_save_type(reg, SAVE_KEEP); + if (r != 0) return r; + r = add_mem_num(reg, node->id); + break; + } + + return r; +} + +static int +compile_length_gimmick_node(GimmickNode* node, regex_t* reg) +{ + int len; + + switch (node->type) { + case GIMMICK_KEEP: + len = SIZE_OP_PUSH_SAVE_VAL; + break; + } + + return len; +} + static int compile_length_tree(Node* node, regex_t* reg) { @@ -1756,6 +1806,10 @@ compile_length_tree(Node* node, regex_t* reg) r = compile_length_anchor_node(ANCHOR_(node), reg); break; + case NODE_GIMMICK: + r = compile_length_gimmick_node(GIMMICK_(node), reg); + break; + default: return ONIGERR_TYPE_BUG; break; @@ -1939,6 +1993,10 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) r = compile_anchor_node(ANCHOR_(node), reg, env); break; + case NODE_GIMMICK: + r = compile_gimmick_node(GIMMICK_(node), reg); + break; + default: #ifdef ONIG_DEBUG fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node)); @@ -2358,6 +2416,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) break; case NODE_ANCHOR: + case NODE_GIMMICK: break; case NODE_BACKREF: @@ -2649,6 +2708,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) n = get_head_value_node(NODE_BODY(node), exact, reg); break; + case NODE_GIMMICK: default: break; } @@ -2707,6 +2767,7 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); break; + case NODE_GIMMICK: default: break; } @@ -2838,6 +2899,7 @@ get_min_len(Node* node, ScanEnv* env) break; case NODE_ANCHOR: + case NODE_GIMMICK: default: break; } @@ -2967,6 +3029,7 @@ get_max_len(Node* node, ScanEnv* env) break; case NODE_ANCHOR: + case NODE_GIMMICK: default: break; } @@ -4010,6 +4073,7 @@ quantifiers_memory_node_info(Node* node) case NODE_CTYPE: case NODE_CCLASS: case NODE_ANCHOR: + case NODE_GIMMICK: default: break; } @@ -4445,6 +4509,7 @@ setup_called_state(Node* node, int state) case NODE_STR: case NODE_CTYPE: case NODE_CCLASS: + case NODE_GIMMICK: default: break; } @@ -4730,6 +4795,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) #endif case NODE_CTYPE: case NODE_CCLASS: + case NODE_GIMMICK: default: break; } @@ -5724,6 +5790,9 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; + case NODE_GIMMICK: + break; + default: #ifdef ONIG_DEBUG fprintf(stderr, "optimize_node_left: undefined node type %d\n", NODE_TYPE(node)); @@ -6225,6 +6294,13 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = compile_tree(root, reg, &scan_env); if (r == 0) { + if (scan_env.keep_num > 0) { + r = add_opcode(reg, OP_UPDATE_VAR); + if (r != 0) goto err; + r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST); + if (r != 0) goto err; + } + r = add_opcode(reg, OP_END); #ifdef USE_CALL if (scan_env.num_call > 0) { @@ -6286,8 +6362,8 @@ static int onig_inited = 0; extern int onig_reg_init(regex_t* reg, OnigOptionType option, - OnigCaseFoldType case_fold_flag, - OnigEncoding enc, OnigSyntaxType* syntax) + OnigCaseFoldType case_fold_flag, + OnigEncoding enc, OnigSyntaxType* syntax) { int r; @@ -6603,6 +6679,8 @@ OnigOpInfoType OnigOpInfo[] = { { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, { OP_CALL, "call", ARG_ABSADDR }, { OP_RETURN, "return", ARG_NON }, + { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, + { OP_UPDATE_VAR, "update-var", ARG_SPECIAL }, { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, @@ -6902,6 +6980,23 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, p_rel_addr(f, addr, bp, start); break; + case OP_PUSH_SAVE_VAL: + { + SaveType type; + GET_SAVE_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + + case OP_UPDATE_VAR: + { + UpdateVarType type; + GET_UPDATE_VAR_TYPE_INC(type, bp); + fprintf(f, ":%d", type); + } + break; + default: fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); @@ -7110,6 +7205,15 @@ print_indent_tree(FILE* f, Node* node, int indent) print_indent_tree(f, NODE_BODY(node), indent + add); break; + case NODE_GIMMICK: + fprintf(f, " ", node); + switch (GIMMICK_(node)->type) { + case GIMMICK_KEEP: + fprintf(f, "keep:%d", GIMMICK_(node)->id); + break; + } + break; + default: fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node)); break; diff --git a/src/regexec.c b/src/regexec.c index 973f5b7..1a79309 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -783,7 +783,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\ StackType *k = stk;\ - while (1) {\ + while (k > stk_base) {\ k--;\ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\ @@ -2877,18 +2877,29 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif case OP_PUSH_SAVE_VAL: MOP_IN(OP_PUSH_SAVE_VAL); - GET_MEMNUM_INC(mem, p); /* mem: save id */ - STACK_PUSH_SAVE_VAL(mem, SAVE_KEEP, keep); + { + SaveType type; + GET_SAVE_TYPE_INC(type, p); + GET_MEMNUM_INC(mem, p); /* mem: save id */ + switch ((enum SaveType )type) { + case SAVE_KEEP: + STACK_PUSH_SAVE_VAL(mem, type, s); + break; + } + } MOP_OUT; continue; break; case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR); - GET_MEMNUM_INC(mem, p); /* mem: update var type */ - switch ((enum UpdateVarType )mem) { - case UPDATE_VAR_KEEP_FROM_STACK_LAST: - STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep); - break; + { + UpdateVarType type; + GET_UPDATE_VAR_TYPE_INC(type, p); + switch ((enum UpdateVarType )type) { + case UPDATE_VAR_KEEP_FROM_STACK_LAST: + STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep); + break; + } } MOP_OUT; continue; diff --git a/src/regint.h b/src/regint.h index 773459f..2605e81 100644 --- a/src/regint.h +++ b/src/regint.h @@ -563,6 +563,8 @@ typedef int RepeatNumType; typedef int MemNumType; typedef short int StateCheckNumType; typedef void* PointerType; +typedef int SaveType; +typedef int UpdateVarType; #define SIZE_OPCODE 1 #define SIZE_RELADDR sizeof(RelAddrType) @@ -574,7 +576,8 @@ typedef void* PointerType; #define SIZE_OPTION sizeof(OnigOptionType) #define SIZE_CODE_POINT sizeof(OnigCodePoint) #define SIZE_POINTER sizeof(PointerType) - +#define SIZE_SAVE_TYPE sizeof(SaveType) +#define SIZE_UPDATE_VAR_TYPE sizeof(UpdateVarType) #define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) #define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) @@ -584,6 +587,8 @@ typedef void* PointerType; #define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) #define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) #define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) +#define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType) +#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType) /* code point's address must be aligned address. */ #define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) @@ -625,6 +630,8 @@ typedef void* PointerType; #define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE #define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) #define SIZE_OP_RETURN SIZE_OPCODE +#define SIZE_OP_PUSH_SAVE_VAL (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM) +#define SIZE_OP_UPDATE_VAR (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE) #ifdef USE_COMBINATION_EXPLOSION_CHECK #define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) diff --git a/src/regparse.c b/src/regparse.c index cd3a4ce..73cb749 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -1044,6 +1044,7 @@ scan_env_clear(ScanEnv* env) env->has_recursion = 0; #endif env->parse_depth = 0; + env->keep_num = 0; env->save_num = 0; env->save_alloc_num = 0; env->saves = 0; @@ -1487,7 +1488,8 @@ node_new_keep(Node** node, ScanEnv* env) NODE_SET_TYPE(*node, NODE_GIMMICK); GIMMICK_(*node)->id = id; - GIMMICK_(*node)->type = SAVE_KEEP; + GIMMICK_(*node)->type = GIMMICK_KEEP; + env->keep_num++; return ONIG_NORMAL; } diff --git a/src/regparse.h b/src/regparse.h index 42a2b04..440487e 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -46,6 +46,10 @@ typedef enum { NODE_GIMMICK = 10 } NodeType; +enum GimmickType { + GIMMICK_KEEP = 0, +}; + /* node type bit */ #define NODE_TYPE2BIT(type) (1<<(type)) @@ -309,7 +313,7 @@ typedef struct { int status; int id; - int type; + enum GimmickType type; } GimmickNode; typedef struct _Node { @@ -389,6 +393,7 @@ typedef struct { #endif unsigned int parse_depth; + int keep_num; int save_num; int save_alloc_num; SaveItem* saves;