patch 8.2.2607: strcharpart() cannot include composing characters

author Bram Moolenaar <Bram@vim.org>

Sun, 14 Mar 2021 18:46:45 +0000 (19:46 +0100)

committer Bram Moolenaar <Bram@vim.org>

Sun, 14 Mar 2021 18:46:45 +0000 (19:46 +0100)
author Bram Moolenaar <Bram@vim.org>
Sun, 14 Mar 2021 18:46:45 +0000 (19:46 +0100)
committer Bram Moolenaar <Bram@vim.org>
Sun, 14 Mar 2021 18:46:45 +0000 (19:46 +0100)
diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt

index 3e5c6ee44b1ec3da219c4b95bda5ba94503ae52c..723a7329b3fba4e4179faa21f46d13dfd06883ac 100644 (file)
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -1187,7 +1187,8 @@ byte under the cursor: >
  
  In Vim9 script:
  If expr8 is a String this results in a String that contains the expr1'th
-single character from expr8.  To use byte indexes use |strpart()|.
+single character (including any composing characters) from expr8.  To use byte
+indexes use |strpart()|.
  
  Index zero gives the first byte or character.  Careful: text column numbers
  start with one!
@@ -1217,8 +1218,9 @@ In legacy Vim script the indexes are byte indexes.  This doesn't recognize
  multibyte encodings, see |byteidx()| for computing the indexes.  If expr8 is
  a Number it is first converted to a String.
  
-In Vim9 script the indexes are character indexes.  To use byte indexes use
-|strpart()|.
+In Vim9 script the indexes are character indexes and include composing
+characters.  To use byte indexes use |strpart()|.  To use character indexes
+without including composing characters use |strcharpart()|.
  
  The item at index expr1b is included, it is inclusive.  For an exclusive index
  use the |slice()| function.
@@ -2924,7 +2926,7 @@ str2list({expr} [, {utf8}])       List    convert each character of {expr} to
  str2nr({expr} [, {base} [, {quoted}]])
                                 Number  convert String to Number
  strcharlen({expr})             Number  character length of the String {expr}
-strcharpart({str}, {start} [, {len}])
+strcharpart({str}, {start} [, {len} [, {skipcc}]])
                                 String  {len} characters of {str} at
                                         character {start}
  strchars({expr} [, {skipcc}])  Number  character count of the String {expr}
@@ -9919,7 +9921,7 @@ slice({expr}, {start} [, {end}])                  *slice()*
                 Similar to using a |slice| "expr[start : end]", but "end" is
                 used exclusive.  And for a string the indexes are used as
                 character indexes instead of byte indexes, like in
-               |vim9script|.
+               |vim9script|.  Also, composing characters are not counted.
                 When {end} is omitted the slice continues to the last item.
                 When {end} is -1 the last item is omitted.
  
@@ -10290,12 +10292,16 @@ strcharlen({expr})                                    *strcharlen()*
                         GetText()->strcharlen()
  
  
-strcharpart({src}, {start} [, {len}])                  *strcharpart()*
+strcharpart({src}, {start} [, {len} [, {skipcc}]])             *strcharpart()*
                 Like |strpart()| but using character index and length instead
-               of byte index and length.  Composing characters are counted
-               separately.
+               of byte index and length.
+               When {skipcc} is omitted or zero, composing characters are
+               counted separately.
+               When {skipcc} set to 1, Composing characters are ignored,
+               similar to  |slice()|.
                 When a character index is used where a character does not
-               exist it is assumed to be one character.  For example: >
+               exist it is omitted and counted as one character.  For
+               example: >
                         strcharpart('abc', -1, 2)
  <              results in 'a'.
  
@@ -10309,7 +10315,7 @@ strchars({expr} [, {skipcc}])                                   *strchars()*
                 When {skipcc} is omitted or zero, composing characters are
                 counted separately.
                 When {skipcc} set to 1, Composing characters are ignored.
-               |strcharlen()| does the same.
+               |strcharlen()| always does this.
  
                 Also see |strlen()|, |strdisplaywidth()| and |strwidth()|.
  
diff --git a/src/evalfunc.c b/src/evalfunc.c

index 34369d77add16bfa76490512c0bbc4bfbdc2c97e..8fcdedbc0081741c9a03108843f623a7e0cf9997 100644 (file)
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -1575,7 +1575,7 @@ static funcentry_T global_functions[] =
                         ret_number,         f_str2nr},
      {"strcharlen",     1, 1, FEARG_1,      NULL,
                         ret_number,         f_strcharlen},
-    {"strcharpart",    2, 3, FEARG_1,      NULL,
+    {"strcharpart",    2, 4, FEARG_1,      NULL,
                         ret_string,         f_strcharpart},
      {"strchars",       1, 2, FEARG_1,      NULL,
                         ret_number,         f_strchars},
@@ -9316,6 +9316,7 @@ f_strcharpart(typval_T *argvars, typval_T *rettv)
      int                nchar;
      int                nbyte = 0;
      int                charlen;
+    int                skipcc = FALSE;
      int                len = 0;
      int                slen;
      int                error = FALSE;
@@ -9326,10 +9327,24 @@ f_strcharpart(typval_T *argvars, typval_T *rettv)
      nchar = (int)tv_get_number_chk(&argvars[1], &error);
      if (!error)
      {
+       if (argvars[2].v_type != VAR_UNKNOWN
+                                          && argvars[3].v_type != VAR_UNKNOWN)
+       {
+           skipcc = tv_get_bool(&argvars[3]);
+           if (skipcc < 0 || skipcc > 1)
+           {
+               semsg(_(e_using_number_as_bool_nr), skipcc);
+               return;
+           }
+       }
+
         if (nchar > 0)
             while (nchar > 0 && nbyte < slen)
             {
-               nbyte += MB_CPTR2LEN(p + nbyte);
+               if (skipcc)
+                   nbyte += mb_ptr2len(p + nbyte);
+               else
+                   nbyte += MB_CPTR2LEN(p + nbyte);
                 --nchar;
             }
         else
@@ -9344,7 +9359,12 @@ f_strcharpart(typval_T *argvars, typval_T *rettv)
                 if (off < 0)
                     len += 1;
                 else
-                   len += MB_CPTR2LEN(p + off);
+               {
+                   if (skipcc)
+                       len += mb_ptr2len(p + off);
+                   else
+                       len += MB_CPTR2LEN(p + off);
+               }
                 --charlen;
             }
         }
diff --git a/src/testdir/test_expr_utf8.vim b/src/testdir/test_expr_utf8.vim

index b5937b2087927d9c6bc5de8bebf8019cbf977f20..c6d2e4ed7e6cb679bb78e63aea06fbd2839cab00 100644 (file)
--- a/src/testdir/test_expr_utf8.vim
+++ b/src/testdir/test_expr_utf8.vim
@@ -31,6 +31,14 @@ func Test_strcharpart()
    call assert_equal('a', strcharpart('àxb', 0, 1))
    call assert_equal('̀', strcharpart('àxb', 1, 1))
    call assert_equal('x', strcharpart('àxb', 2, 1))
+
+
+  call assert_equal('a', strcharpart('àxb', 0, 1, 0))
+  call assert_equal('à', strcharpart('àxb', 0, 1, 1))
+  call assert_equal('x', strcharpart('àxb', 1, 1, 1))
+
+  call assert_fails("let v = strcharpart('abc', 0, 0, [])", 'E745:')
+  call assert_fails("let v = strcharpart('abc', 0, 0, 2)", 'E1023:')
  endfunc
  
  " vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c

index 918a34c98f2445e60fdf44c725d8814fff7d61c5..0b1ece6a6417f6dd0b018267006ba11817e777ed 100644 (file)
--- a/src/version.c
+++ b/src/version.c
@@ -750,6 +750,8 @@ static char *(features[]) =
  
  static int included_patches[] =
  {   /* Add new patch number below this line */
+/**/
+    2607,
  /**/
      2606,
  /**/
author	Bram Moolenaar <Bram@vim.org>
	Sun, 14 Mar 2021 18:46:45 +0000 (19:46 +0100)
committer	Bram Moolenaar <Bram@vim.org>
	Sun, 14 Mar 2021 18:46:45 +0000 (19:46 +0100)
runtime/doc/eval.txt		patch \| blob \| history
src/evalfunc.c		patch \| blob \| history
src/testdir/test_expr_utf8.vim		patch \| blob \| history
src/version.c		patch \| blob \| history