]> granicus.if.org Git - onig/commitdiff
remove OP_EXTENDED_GRAPHEME_CLUSTER and add OP_TEXT_SEGMENT_BOUNDARY
authorK.Kosako <kkosako0@gmail.com>
Fri, 22 Mar 2019 05:38:39 +0000 (14:38 +0900)
committerK.Kosako <kkosako0@gmail.com>
Fri, 22 Mar 2019 05:38:39 +0000 (14:38 +0900)
src/regcomp.c
src/regexec.c
src/regint.h

index 5001de7a21a678d39b993c66b9427e87799f4182..cc6eaa259fd50216eb6c31fde00b137808671fc5 100644 (file)
@@ -1567,11 +1567,11 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
 #endif
 
   case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
-    r = add_op(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
-    break;
-
   case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
-    r = add_op(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+    r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);
+    if (r != 0) return r;
+    COP(reg)->text_segment_boundary.not =
+      (node->type == ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY ? 1 : 0);
     break;
 
   case ANCR_PREC_READ:
index 21181f2152e1611411273b9679039f06e2f7baf4..bad45670fdaaa3b93720ccf3f2427490ea2d1c74 100644 (file)
@@ -185,8 +185,7 @@ static OpInfoType OpInfo[] = {
   { OP_NO_WORD_BOUNDARY,      "not-word-boundary" },
   { OP_WORD_BEGIN,            "word-begin" },
   { OP_WORD_END,              "word-end"   },
-  { OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,    "extended-grapheme-cluster-boundary" },
-  { OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "no-extended-grapheme-cluster-boundary" },
+  { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary" },
   { OP_BEGIN_BUF,             "begin-buf"  },
   { OP_END_BUF,               "end-buf"    },
   { OP_BEGIN_LINE,            "begin-line" },
@@ -577,6 +576,11 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
     break;
 #endif
 
+  case OP_TEXT_SEGMENT_BOUNDARY:
+    if (p->text_segment_boundary.not != 0)
+      fprintf(f, ":not");
+    break;
+
   case OP_FINISH:
   case OP_END:
   case OP_ANYCHAR:
@@ -587,8 +591,6 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
   case OP_WORD_ASCII:
   case OP_NO_WORD:
   case OP_NO_WORD_ASCII:
-  case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
-  case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
   case OP_BEGIN_BUF:
   case OP_END_BUF:
   case OP_BEGIN_LINE:
@@ -2495,8 +2497,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
   &&L_NO_WORD_BOUNDARY,
   &&L_WORD_BEGIN,
   &&L_WORD_END,
-  &&L_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,
-  &&L_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,
+  &&L_TEXT_SEGMENT_BOUNDARY,
   &&L_BEGIN_BUF,
   &&L_END_BUF,
   &&L_BEGIN_LINE,
@@ -3250,19 +3251,30 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
       goto fail;
 #endif
 
-    CASE_OP(EXTENDED_GRAPHEME_CLUSTER_BOUNDARY)
-      if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) {
-        INC_OP;
-        JUMP_OUT;
-      }
-      goto fail;
+    CASE_OP(TEXT_SEGMENT_BOUNDARY)
+      {
+        int is_break;
 
-    CASE_OP(NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY)
-      if (onigenc_egcb_is_break_position(encode, s, sprev, str, end))
-        goto fail;
+        switch (p->text_segment_boundary.type) {
+        case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+          is_break = onigenc_egcb_is_break_position(encode, s, sprev, str, end);
+          break;
+        default:
+          goto bytecode_error;
+          break;
+        }
 
-      INC_OP;
-      JUMP_OUT;
+        if (p->text_segment_boundary.not != 0)
+          is_break = ! is_break;
+
+        if (is_break != 0) {
+          INC_OP;
+          JUMP_OUT;
+        }
+        else {
+          goto fail;
+        }
+      }
 
     CASE_OP(BEGIN_BUF)
       if (! ON_STR_BEGIN(s)) goto fail;
index b6892e3d1e3257ecefb195ef543db5dc3aa2f73e..783e3b37496ce779118dfb357c1a058e215f7b0b 100644 (file)
@@ -533,8 +533,7 @@ enum OpCode {
   OP_WORD_BEGIN,
   OP_WORD_END,
 
-  OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,
-  OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,
+  OP_TEXT_SEGMENT_BOUNDARY,
 
   OP_BEGIN_BUF,
   OP_END_BUF,
@@ -615,6 +614,11 @@ enum UpdateVarType {
   UPDATE_VAR_RIGHT_RANGE_INIT         = 4,
 };
 
+enum TextSegmentBoundaryType {
+  EXTENDED_GRAPHEME_CLUSTER_BOUNDARY = 0,
+  WORD_BOUNDARY = 1,
+};
+
 typedef int RelAddrType;
 typedef int AbsAddrType;
 typedef int LengthType;
@@ -837,6 +841,10 @@ typedef struct {
     struct {
       ModeType mode;
     } word_boundary; /* OP_WORD_BOUNDARY, OP_NO_WORD_BOUNDARY, OP_WORD_BEGIN, OP_WORD_END */
+    struct {
+      enum TextSegmentBoundaryType type;
+      int not;
+    } text_segment_boundary;
     struct {
       union {
         MemNumType  n1; /* num == 1 */