patch 8.2.1933: cannot sort using locale ordering

author Bram Moolenaar <Bram@vim.org>

Sun, 1 Nov 2020 12:57:44 +0000 (13:57 +0100)

committer Bram Moolenaar <Bram@vim.org>

Sun, 1 Nov 2020 12:57:44 +0000 (13:57 +0100)
author Bram Moolenaar <Bram@vim.org>
Sun, 1 Nov 2020 12:57:44 +0000 (13:57 +0100)
committer Bram Moolenaar <Bram@vim.org>
Sun, 1 Nov 2020 12:57:44 +0000 (13:57 +0100)
diff --git a/runtime/doc/change.txt b/runtime/doc/change.txt

index fe62d1d95ffc1ec9eac688e83ff3fb0e5597868d..12e20a5ab1ebb415b926c044ceab613bbfb57c9f 100644 (file)
--- a/runtime/doc/change.txt
+++ b/runtime/doc/change.txt
@@ -1801,7 +1801,7 @@ Vim has a sorting function and a sorting command.  The sorting function can be
  found here: |sort()|, |uniq()|.
  
                                                         *:sor* *:sort*
-:[range]sor[t][!] [b][f][i][n][o][r][u][x] [/{pattern}/]
+:[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/]
                         Sort lines in [range].  When no range is given all
                         lines are sorted.
  
@@ -1809,6 +1809,14 @@ found here: |sort()|, |uniq()|.
  
                         With [i] case is ignored.
  
+                       With [l] sort uses the current locale. See
+                       `language collate` to check or set the locale used
+                       for ordering. For example, with "en_US.UTF8",
+                       Ö will be ordered after O and before P,
+                       whereas with the Swedish locale "sv_SE.UTF8",
+                       it will be after Z.
+                       Case is typically ignored by the locale.
+
                         Options [n][f][x][o][b] are mutually exclusive.
  
                         With [n] sorting is done on the first decimal number
@@ -1875,8 +1883,7 @@ found here: |sort()|, |uniq()|.
  Note that using `:sort` with `:global` doesn't sort the matching lines, it's
  quite useless.
  
-The details about sorting depend on the library function used.  There is no
-guarantee that sorting obeys the current locale.  You will have to try it out.
+`:sort` does not use the current locale unless the l flag is used.
  Vim does do a "stable" sort.
  
  The sorting can be interrupted, but if you interrupt it too late in the
diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt

index b757e5f2f2ff796bcdc6538b7be3d88e177a809d..e487d3e28c6a62a2760fff14b20f70ad914b3467 100644 (file)
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -9700,6 +9700,13 @@ sort({list} [, {func} [, {dict}]])                       *sort()* *E702*
                 When {func} is given and it is '1' or 'i' then case is
                 ignored.
  
+               When {func} is given and it is 'l' then the current locale
+               is used for ordering. See `language collate` to check or set
+               the locale used for ordering.  For example, with "en_US.UTF8",
+               Ö will be ordered after O and before P, whereas with the
+               Swedish locale "sv_SE.UTF8", it will be after Z.
+               Case is typically ignored by the locale.
+
                 When {func} is given and it is 'n' then all items will be
                 sorted numerical (Implementation detail: This uses the
                 strtod() function to parse numbers, Strings, Lists, Dicts and
diff --git a/src/ex_cmds.c b/src/ex_cmds.c

index 8ddf238d34ba4586c81b86814cdf37134eb28c22..de4b806b27717c8c3e99e48b5648be4421039f33 100644 (file)
--- a/src/ex_cmds.c
+++ b/src/ex_cmds.c
@@ -277,6 +277,7 @@ linelen(int *has_tab)
  static char_u  *sortbuf1;
  static char_u  *sortbuf2;
  
+static int     sort_lc;        // sort using locale
  static int     sort_ic;        // ignore case
  static int     sort_nr;        // sort on number
  static int     sort_rx;        // sort on regex instead of skipping it
@@ -307,7 +308,13 @@ typedef struct
      } st_u;
  } sorti_T;
  
-static int sort_compare(const void *s1, const void *s2);
+    static int
+string_compare(const void *s1, const void *s2)
+{
+    if (sort_lc)
+       return strcoll((char *)s1, (char *)s2);
+    return sort_ic ? STRICMP(s1, s2) : STRCMP(s1, s2);
+}
  
      static int
  sort_compare(const void *s1, const void *s2)
@@ -350,8 +357,7 @@ sort_compare(const void *s1, const void *s2)
                      l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1);
         sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = 0;
  
-       result = sort_ic ? STRICMP(sortbuf1, sortbuf2)
-                                                : STRCMP(sortbuf1, sortbuf2);
+       result = string_compare(sortbuf1, sortbuf2);
      }
  
      // If two lines have the same value, preserve the original line order.
@@ -398,7 +404,7 @@ ex_sort(exarg_T *eap)
      if (nrs == NULL)
         goto sortend;
  
-    sort_abort = sort_ic = sort_rx = sort_nr = 0;
+    sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = 0;
  #ifdef FEAT_FLOAT
      sort_flt = 0;
  #endif
@@ -409,6 +415,8 @@ ex_sort(exarg_T *eap)
             ;
         else if (*p == 'i')
             sort_ic = TRUE;
+       else if (*p == 'l')
+           sort_lc = TRUE;
         else if (*p == 'r')
             sort_rx = TRUE;
         else if (*p == 'n')
@@ -614,8 +622,7 @@ ex_sort(exarg_T *eap)
             change_occurred = TRUE;
  
         s = ml_get(get_lnum);
-       if (!unique || i == 0
-               || (sort_ic ? STRICMP(s, sortbuf1) : STRCMP(s, sortbuf1)) != 0)
+       if (!unique || i == 0 || string_compare(s, sortbuf1) != 0)
         {
             // Copy the line into a buffer, it may become invalid in
             // ml_append(). And it's needed for "unique".
diff --git a/src/list.c b/src/list.c

index 62c571c71413e120e8b7cb24273d9b6e1e385888..1da4f3d0c8b87386c658e672b29d3742c7e2b97d 100644 (file)
--- a/src/list.c
+++ b/src/list.c
@@ -1516,6 +1516,7 @@ typedef struct
  typedef struct
  {
      int                item_compare_ic;
+    int                item_compare_lc;
      int                item_compare_numeric;
      int                item_compare_numbers;
  #ifdef FEAT_FLOAT
@@ -1594,10 +1595,10 @@ item_compare(const void *s1, const void *s2)
         p2 = (char_u *)"";
      if (!sortinfo->item_compare_numeric)
      {
-       if (sortinfo->item_compare_ic)
-           res = STRICMP(p1, p2);
+       if (sortinfo->item_compare_lc)
+           res = strcoll((char *)p1, (char *)p2);
         else
-           res = STRCMP(p1, p2);
+           res = sortinfo->item_compare_ic ? STRICMP(p1, p2): STRCMP(p1, p2);
      }
      else
      {
@@ -1706,6 +1707,7 @@ do_sort_uniq(typval_T *argvars, typval_T *rettv, int sort)
             goto theend;        // short list sorts pretty quickly
  
         info.item_compare_ic = FALSE;
+       info.item_compare_lc = FALSE;
         info.item_compare_numeric = FALSE;
         info.item_compare_numbers = FALSE;
  #ifdef FEAT_FLOAT
@@ -1773,6 +1775,11 @@ do_sort_uniq(typval_T *argvars, typval_T *rettv, int sort)
                         info.item_compare_func = NULL;
                         info.item_compare_ic = TRUE;
                     }
+                   else if (STRCMP(info.item_compare_func, "l") == 0)
+                   {
+                       info.item_compare_func = NULL;
+                       info.item_compare_lc = TRUE;
+                   }
                 }
             }
  
diff --git a/src/testdir/test_sort.vim b/src/testdir/test_sort.vim

index d76132ee57a694ea14341dceb85b0c604e08304e..93190a940ceb2da75c7cb376546d6d31c653ec0e 100644 (file)
--- a/src/testdir/test_sort.vim
+++ b/src/testdir/test_sort.vim
@@ -15,6 +15,25 @@ func Test_sort_strings()
    " numbers compared as strings
    call assert_equal([1, 2, 3], sort([3, 2, 1]))
    call assert_equal([13, 28, 3], sort([3, 28, 13]))
+
+  call assert_equal(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
+  \            sort(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ']))
+
+  call assert_equal(['A', 'a', 'o', 'O', 'p', 'P', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
+  \            sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'i'))
+
+  let lc = execute('language collate')
+  " With the following locales, the accentuated letters are ordered
+  " similarly to the non-accentuated letters...
+  if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
+    call assert_equal(['a', 'A', 'ä', 'Ä', 'o', 'O', 'ô', 'Ô', 'œ', 'œ', 'p', 'P'],
+    \            sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
+  " ... whereas with a Swedish locale, the accentuated letters are ordered
+  " after Z.
+  elseif lc =~? '"sv.*utf-\?8"'
+    call assert_equal(['a', 'A', 'o', 'O', 'p', 'P', 'ä', 'Ä', 'œ', 'œ', 'ô', 'Ô'],
+    \            sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
+  endif
  endfunc
  
  func Test_sort_numeric()
@@ -1204,6 +1223,57 @@ func Test_sort_cmd()
         \ },
         \ ]
  
+    " With the following locales, the accentuated letters are ordered
+    " similarly to the non-accentuated letters...
+    let lc = execute('language collate')
+    if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
+      let tests += [
+       \ {
+       \    'name' : 'sort with locale',
+       \    'cmd' : '%sort l',
+       \    'input' : [
+       \       'A',
+       \       'E',
+       \       'O',
+       \       'À',
+       \       'È',
+       \       'É',
+       \       'Ô',
+       \       'Œ',
+       \       'Z',
+       \       'a',
+       \       'e',
+       \       'o',
+       \       'à',
+       \       'è',
+       \       'é',
+       \       'ô',
+       \       'œ',
+       \       'z'
+       \    ],
+       \    'expected' : [
+       \       'a',
+       \       'A',
+       \       'à',
+       \       'À',
+       \       'e',
+       \       'E',
+       \       'é',
+       \       'É',
+       \       'è',
+       \       'È',
+       \       'o',
+       \       'O',
+       \       'ô',
+       \       'Ô',
+       \       'œ',
+       \       'Œ',
+       \       'z',
+       \       'Z'
+       \    ]
+       \ },
+       \ ]
+  endif
    if has('float')
      let tests += [
            \ {
diff --git a/src/version.c b/src/version.c

index 11d45607f119a6c56bc052467e7082cb305a8399..ca98d241c423adc8b82b2ddb492a92161673fdfd 100644 (file)
--- a/src/version.c
+++ b/src/version.c
@@ -750,6 +750,8 @@ static char *(features[]) =
  
  static int included_patches[] =
  {   /* Add new patch number below this line */
+/**/
+    1933,
  /**/
      1932,
  /**/
author	Bram Moolenaar <Bram@vim.org>
	Sun, 1 Nov 2020 12:57:44 +0000 (13:57 +0100)
committer	Bram Moolenaar <Bram@vim.org>
	Sun, 1 Nov 2020 12:57:44 +0000 (13:57 +0100)
runtime/doc/change.txt		patch \| blob \| history
runtime/doc/eval.txt		patch \| blob \| history
src/ex_cmds.c		patch \| blob \| history
src/list.c		patch \| blob \| history
src/testdir/test_sort.vim		patch \| blob \| history
src/version.c		patch \| blob \| history