patch 8.2.0938: NFA regexp uses tolower ()to compare ignore-case

author Bram Moolenaar <Bram@vim.org>

Tue, 9 Jun 2020 17:34:54 +0000 (19:34 +0200)

committer Bram Moolenaar <Bram@vim.org>

Tue, 9 Jun 2020 17:34:54 +0000 (19:34 +0200)
author Bram Moolenaar <Bram@vim.org>
Tue, 9 Jun 2020 17:34:54 +0000 (19:34 +0200)
committer Bram Moolenaar <Bram@vim.org>
Tue, 9 Jun 2020 17:34:54 +0000 (19:34 +0200)
diff --git a/src/diff.c b/src/diff.c

index 1b0ad47782181b1cff7d23249d570174f869efc0..bb95d5ed65f8831d70da0455ba321aba0ef315d3 100644 (file)
--- a/src/diff.c
+++ b/src/diff.c
@@ -747,7 +747,7 @@ diff_write_buffer(buf_T *buf, diffin_T *din)
  
                 // xdiff doesn't support ignoring case, fold-case the text.
                 c = PTR2CHAR(s);
-               c = enc_utf8 ? utf_fold(c) : MB_TOLOWER(c);
+               c = MB_CASEFOLD(c);
                 orig_len = mb_ptr2len(s);
                 if (mb_char2bytes(c, cbuf) != orig_len)
                     // TODO: handle byte length difference
diff --git a/src/macros.h b/src/macros.h

index 16421d26150987c3684fe19f512b52e3ce92790d..7604910a6d6e5a7adf5915f5a56638f1795ebc57 100644 (file)
--- a/src/macros.h
+++ b/src/macros.h
@@ -93,6 +93,7 @@
  #define MB_ISUPPER(c)  vim_isupper(c)
  #define MB_TOLOWER(c)  vim_tolower(c)
  #define MB_TOUPPER(c)  vim_toupper(c)
+#define MB_CASEFOLD(c) (enc_utf8 ? utf_fold(c) : MB_TOLOWER(c))
  
  // Use our own isdigit() replacement, because on MS-Windows isdigit() returns
  // non-zero for superscript 1.  Also avoids that isdigit() crashes for numbers
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c

index 465797dd3e9da284ca4c446f49aae3b9698ba878..a55750b19983d6e338c63dc229384c14edabd82e 100644 (file)
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -5459,7 +5459,7 @@ find_match_text(colnr_T startcol, int regstart, char_u *match_text)
         {
             c1 = PTR2CHAR(match_text + len1);
             c2 = PTR2CHAR(rex.line + col + len2);
-           if (c1 != c2 && (!rex.reg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2)))
+           if (c1 != c2 && (!rex.reg_ic || MB_CASEFOLD(c1) != MB_CASEFOLD(c2)))
             {
                 match = FALSE;
                 break;
@@ -6271,11 +6271,11 @@ nfa_regmatch(
                         }
                         if (rex.reg_ic)
                         {
-                           int curc_low = MB_TOLOWER(curc);
+                           int curc_low = MB_CASEFOLD(curc);
                             int done = FALSE;
  
                             for ( ; c1 <= c2; ++c1)
-                               if (MB_TOLOWER(c1) == curc_low)
+                               if (MB_CASEFOLD(c1) == curc_low)
                                 {
                                     result = result_if_matched;
                                     done = TRUE;
@@ -6287,8 +6287,8 @@ nfa_regmatch(
                     }
                     else if (state->c < 0 ? check_char_class(state->c, curc)
                                : (curc == state->c
-                                  || (rex.reg_ic && MB_TOLOWER(curc)
-                                                   == MB_TOLOWER(state->c))))
+                                  || (rex.reg_ic && MB_CASEFOLD(curc)
+                                                   == MB_CASEFOLD(state->c))))
                     {
                         result = result_if_matched;
                         break;
@@ -6713,7 +6713,7 @@ nfa_regmatch(
                 result = (c == curc);
  
                 if (!result && rex.reg_ic)
-                   result = MB_TOLOWER(c) == MB_TOLOWER(curc);
+                   result = MB_CASEFOLD(c) == MB_CASEFOLD(curc);
                 // If rex.reg_icombine is not set only skip over the character
                 // itself.  When it is set skip over composing characters.
                 if (result && enc_utf8 && !rex.reg_icombine)
@@ -6882,7 +6882,7 @@ nfa_regmatch(
                         // cheaper than adding a state that won't match.
                         c = PTR2CHAR(rex.input + clen);
                         if (c != prog->regstart && (!rex.reg_ic
-                              || MB_TOLOWER(c) != MB_TOLOWER(prog->regstart)))
+                            || MB_CASEFOLD(c) != MB_CASEFOLD(prog->regstart)))
                         {
  #ifdef ENABLE_LOG
                             fprintf(log_fd, "  Skipping start state, regstart does not match\n");
diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim

index e9d5507114bbe5791399d42d550ea2818c92a446..e8e67a62ce22a6a2991b84f23373c04fdff874ed 100644 (file)
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim
@@ -355,4 +355,23 @@ func Test_ambiwidth()
    set regexpengine& ambiwidth&
  endfunc
  
+func Run_regexp_ignore_case()
+  call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
+
+  call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
+  call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
+endfunc
+
+func Test_regexp_ignore_case()
+  set regexpengine=1
+  call Run_regexp_ignore_case()
+  set regexpengine=2
+  call Run_regexp_ignore_case()
+  set regexpengine&
+endfunc
+
  " vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c

index 922ab4605e556e35b9af8feb75be5d45ff52964c..9f1cd00aa9c923f5d5e3f29807c836557af5ab4a 100644 (file)
--- a/src/version.c
+++ b/src/version.c
@@ -754,6 +754,8 @@ static char *(features[]) =
  
  static int included_patches[] =
  {   /* Add new patch number below this line */
+/**/
+    938,
  /**/
      937,
  /**/
author	Bram Moolenaar <Bram@vim.org>
	Tue, 9 Jun 2020 17:34:54 +0000 (19:34 +0200)
committer	Bram Moolenaar <Bram@vim.org>
	Tue, 9 Jun 2020 17:34:54 +0000 (19:34 +0200)
src/diff.c		patch \| blob \| history
src/macros.h		patch \| blob \| history
src/regexp_nfa.c		patch \| blob \| history
src/testdir/test_regexp_utf8.vim		patch \| blob \| history
src/version.c		patch \| blob \| history