]> granicus.if.org Git - vim/commitdiff
patch 8.2.3921: the way xdiff is used is inefficient v8.2.3921
authorLewis Russell <lewis6991@gmail.com>
Tue, 28 Dec 2021 13:54:41 +0000 (13:54 +0000)
committerBram Moolenaar <Bram@vim.org>
Tue, 28 Dec 2021 13:54:41 +0000 (13:54 +0000)
Problem:    The way xdiff is used is inefficient.
Solution:   Use hunk_func instead of the out_line callback. (Lewis Russell,
            closes #9344)

src/diff.c
src/version.c

index e9367b568a517efa05d2b78d083d0f7c70c30b55..ceb73a6bd9af0bbcffc6d65cd86dac902e46a8ad 100644 (file)
@@ -64,6 +64,14 @@ typedef struct {
     garray_T   dout_ga;      // used for internal diff
 } diffout_T;
 
+// used for recording hunks from xdiff
+typedef struct {
+    linenr_T lnum_orig;
+    long     count_orig;
+    linenr_T lnum_new;
+    long     count_new;
+} diffhunk_T;
+
 // two diff inputs and one result
 typedef struct {
     diffin_T   dio_orig;     // original file input
@@ -84,12 +92,12 @@ static int diff_cmp(char_u *s1, char_u *s2);
 #ifdef FEAT_FOLDING
 static void diff_fold_update(diff_T *dp, int skip_idx);
 #endif
-static void diff_read(int idx_orig, int idx_new, diffout_T *fname);
+static void diff_read(int idx_orig, int idx_new, diffio_T *dio);
 static void diff_copy_entry(diff_T *dprev, diff_T *dp, int idx_orig, int idx_new);
 static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp);
-static int parse_diff_ed(char_u *line, linenr_T *lnum_orig, long *count_orig, linenr_T *lnum_new, long *count_new);
-static int parse_diff_unified(char_u *line, linenr_T *lnum_orig, long *count_orig, linenr_T *lnum_new, long *count_new);
-static int xdiff_out(void *priv, mmbuffer_t *mb, int nbuf);
+static int parse_diff_ed(char_u *line, diffhunk_T *hunk);
+static int parse_diff_unified(char_u *line, diffhunk_T *hunk);
+static int xdiff_out(long start_a, long count_a, long start_b, long count_b, void *priv);
 
 #define FOR_ALL_DIFFBLOCKS_IN_TAB(tp, dp) \
     for ((dp) = (tp)->tp_first_diff; (dp) != NULL; (dp) = (dp)->df_next)
@@ -880,7 +888,7 @@ diff_try_update(
            continue;
 
        // Read the diff output and add each entry to the diff list.
-       diff_read(idx_orig, idx_new, &dio->dio_diff);
+       diff_read(idx_orig, idx_new, dio);
 
        clear_diffin(&dio->dio_new);
        clear_diffout(&dio->dio_diff);
@@ -1118,7 +1126,7 @@ diff_file_internal(diffio_T *diffio)
 
     emit_cfg.ctxlen = 0; // don't need any diff_context here
     emit_cb.priv = &diffio->dio_diff;
-    emit_cb.out_line = xdiff_out;
+    emit_cfg.hunk_func = xdiff_out;
     if (xdl_diff(&diffio->dio_orig.din_mmfile,
                &diffio->dio_new.din_mmfile,
                &param, &emit_cfg, &emit_cb) < 0)
@@ -1482,9 +1490,9 @@ diff_win_options(
     wp->w_p_crb = TRUE;
     if (!(diff_flags & DIFF_FOLLOWWRAP))
     {
-        if (!wp->w_p_diff)
+       if (!wp->w_p_diff)
            wp->w_p_wrap_save = wp->w_p_wrap;
-        wp->w_p_wrap = FALSE;
+       wp->w_p_wrap = FALSE;
     }
 # ifdef FEAT_FOLDING
     if (!wp->w_p_diff)
@@ -1549,7 +1557,7 @@ ex_diffoff(exarg_T *eap)
                if (!(diff_flags & DIFF_FOLLOWWRAP))
                {
                    if (!wp->w_p_wrap)
-                       wp->w_p_wrap = wp->w_p_wrap_save;
+                       wp->w_p_wrap = wp->w_p_wrap_save;
                }
 #ifdef FEAT_FOLDING
                free_string_option(wp->w_p_fdm);
@@ -1607,20 +1615,21 @@ ex_diffoff(exarg_T *eap)
 diff_read(
     int                idx_orig,       // idx of original file
     int                idx_new,        // idx of new file
-    diffout_T  *dout)          // diff output
+    diffio_T   *dio)           // diff output
 {
     FILE       *fd = NULL;
     int                line_idx = 0;
     diff_T     *dprev = NULL;
     diff_T     *dp = curtab->tp_first_diff;
     diff_T     *dn, *dpl;
+    diffout_T   *dout = &dio->dio_diff;
     char_u     linebuf[LBUFLEN];   // only need to hold the diff line
     char_u     *line;
     long       off;
     int                i;
-    linenr_T   lnum_orig, lnum_new;
-    long       count_orig, count_new;
     int                notset = TRUE;      // block "*dp" not set yet
+    diffhunk_T *hunk;
+
     enum {
        DIFF_ED,
        DIFF_UNIFIED,
@@ -1641,75 +1650,94 @@ diff_read(
        }
     }
 
-    for (;;)
+    if (!dio->dio_internal)
     {
-       if (fd == NULL)
-       {
-           if (line_idx >= dout->dout_ga.ga_len)
-               break;      // did last line
-           line = ((char_u **)dout->dout_ga.ga_data)[line_idx++];
-       }
-       else
-       {
-           if (vim_fgets(linebuf, LBUFLEN, fd))
-               break;          // end of file
-           line = linebuf;
-       }
-
-       if (diffstyle == DIFF_NONE)
+       hunk = ALLOC_ONE(diffhunk_T);
+       if (hunk == NULL)
        {
-           // Determine diff style.
-           // ed like diff looks like this:
-           // {first}[,{last}]c{first}[,{last}]
-           // {first}a{first}[,{last}]
-           // {first}[,{last}]d{first}
-           //
-           // unified diff looks like this:
-           // --- file1       2018-03-20 13:23:35.783153140 +0100
-           // +++ file2       2018-03-20 13:23:41.183156066 +0100
-           // @@ -1,3 +1,5 @@
-           if (isdigit(*line))
-               diffstyle = DIFF_ED;
-           else if ((STRNCMP(line, "@@ ", 3) == 0))
-              diffstyle = DIFF_UNIFIED;
-           else if ((STRNCMP(line, "--- ", 4) == 0)
-                   && (vim_fgets(linebuf, LBUFLEN, fd) == 0)
-                   && (STRNCMP(line, "+++ ", 4) == 0)
-                   && (vim_fgets(linebuf, LBUFLEN, fd) == 0)
-                   && (STRNCMP(line, "@@ ", 3) == 0))
-               diffstyle = DIFF_UNIFIED;
-           else
-               // Format not recognized yet, skip over this line.  Cygwin diff
-               // may put a warning at the start of the file.
-               continue;
+           emsg(_("E98: Cannot read diff output"));
+           return;
        }
+    }
 
-       if (diffstyle == DIFF_ED)
-       {
-           if (!isdigit(*line))
-               continue;       // not the start of a diff block
-           if (parse_diff_ed(line, &lnum_orig, &count_orig,
-                                               &lnum_new, &count_new) == FAIL)
-               continue;
-       }
-       else if (diffstyle == DIFF_UNIFIED)
+    for (;;)
+    {
+       if (dio->dio_internal)
        {
-           if (STRNCMP(line, "@@ ", 3)  != 0)
-               continue;       // not the start of a diff block
-           if (parse_diff_unified(line, &lnum_orig, &count_orig,
-                                               &lnum_new, &count_new) == FAIL)
-               continue;
+           if (line_idx >= dout->dout_ga.ga_len) {
+               break;      // did last line
+           }
+           hunk = ((diffhunk_T **)dout->dout_ga.ga_data)[line_idx++];
        }
        else
        {
-           emsg(_("E959: Invalid diff format."));
-           break;
+           if (fd == NULL)
+           {
+               if (line_idx >= dout->dout_ga.ga_len)
+                   break;          // did last line
+               line = ((char_u **)dout->dout_ga.ga_data)[line_idx++];
+           }
+           else
+           {
+               if (vim_fgets(linebuf, LBUFLEN, fd))
+                   break;              // end of file
+               line = linebuf;
+           }
+
+           if (diffstyle == DIFF_NONE)
+           {
+               // Determine diff style.
+               // ed like diff looks like this:
+               // {first}[,{last}]c{first}[,{last}]
+               // {first}a{first}[,{last}]
+               // {first}[,{last}]d{first}
+               //
+               // unified diff looks like this:
+               // --- file1       2018-03-20 13:23:35.783153140 +0100
+               // +++ file2       2018-03-20 13:23:41.183156066 +0100
+               // @@ -1,3 +1,5 @@
+               if (isdigit(*line))
+                   diffstyle = DIFF_ED;
+               else if ((STRNCMP(line, "@@ ", 3) == 0))
+                   diffstyle = DIFF_UNIFIED;
+               else if ((STRNCMP(line, "--- ", 4) == 0)
+                       && (vim_fgets(linebuf, LBUFLEN, fd) == 0)
+                       && (STRNCMP(line, "+++ ", 4) == 0)
+                       && (vim_fgets(linebuf, LBUFLEN, fd) == 0)
+                       && (STRNCMP(line, "@@ ", 3) == 0))
+                   diffstyle = DIFF_UNIFIED;
+               else
+                   // Format not recognized yet, skip over this line.  Cygwin
+                   // diff may put a warning at the start of the file.
+                   continue;
+           }
+
+           if (diffstyle == DIFF_ED)
+           {
+               if (!isdigit(*line))
+                   continue;   // not the start of a diff block
+               if (parse_diff_ed(line, hunk) == FAIL)
+                   continue;
+           }
+           else if (diffstyle == DIFF_UNIFIED)
+           {
+               if (STRNCMP(line, "@@ ", 3)  != 0)
+                   continue;   // not the start of a diff block
+               if (parse_diff_unified(line, hunk) == FAIL)
+                   continue;
+           }
+           else
+           {
+               emsg(_("E959: Invalid diff format."));
+               break;
+           }
        }
 
        // Go over blocks before the change, for which orig and new are equal.
        // Copy blocks from orig to new.
        while (dp != NULL
-               && lnum_orig > dp->df_lnum[idx_orig] + dp->df_count[idx_orig])
+               && hunk->lnum_orig > dp->df_lnum[idx_orig]
+                                                     + dp->df_count[idx_orig])
        {
            if (notset)
                diff_copy_entry(dprev, dp, idx_orig, idx_new);
@@ -1719,41 +1747,43 @@ diff_read(
        }
 
        if (dp != NULL
-               && lnum_orig <= dp->df_lnum[idx_orig] + dp->df_count[idx_orig]
-               && lnum_orig + count_orig >= dp->df_lnum[idx_orig])
+               && hunk->lnum_orig <= dp->df_lnum[idx_orig]
+                                                      + dp->df_count[idx_orig]
+               && hunk->lnum_orig + hunk->count_orig >= dp->df_lnum[idx_orig])
        {
            // New block overlaps with existing block(s).
            // First find last block that overlaps.
            for (dpl = dp; dpl->df_next != NULL; dpl = dpl->df_next)
-               if (lnum_orig + count_orig < dpl->df_next->df_lnum[idx_orig])
+               if (hunk->lnum_orig + hunk->count_orig
+                                            < dpl->df_next->df_lnum[idx_orig])
                    break;
 
            // If the newly found block starts before the old one, set the
            // start back a number of lines.
-           off = dp->df_lnum[idx_orig] - lnum_orig;
+           off = dp->df_lnum[idx_orig] - hunk->lnum_orig;
            if (off > 0)
            {
                for (i = idx_orig; i < idx_new; ++i)
                    if (curtab->tp_diffbuf[i] != NULL)
                        dp->df_lnum[i] -= off;
-               dp->df_lnum[idx_new] = lnum_new;
-               dp->df_count[idx_new] = count_new;
+               dp->df_lnum[idx_new] = hunk->lnum_new;
+               dp->df_count[idx_new] = hunk->count_new;
            }
            else if (notset)
            {
                // new block inside existing one, adjust new block
-               dp->df_lnum[idx_new] = lnum_new + off;
-               dp->df_count[idx_new] = count_new - off;
+               dp->df_lnum[idx_new] = hunk->lnum_new + off;
+               dp->df_count[idx_new] = hunk->count_new - off;
            }
            else
                // second overlap of new block with existing block
-               dp->df_count[idx_new] += count_new - count_orig
+               dp->df_count[idx_new] += hunk->count_new - hunk->count_orig
                    + dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig]
                    - (dp->df_lnum[idx_orig] + dp->df_count[idx_orig]);
 
            // Adjust the size of the block to include all the lines to the
            // end of the existing block or the new diff, whatever ends last.
-           off = (lnum_orig + count_orig)
+           off = (hunk->lnum_orig + hunk->count_orig)
                         - (dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig]);
            if (off < 0)
            {
@@ -1785,10 +1815,10 @@ diff_read(
            if (dp == NULL)
                goto done;
 
-           dp->df_lnum[idx_orig] = lnum_orig;
-           dp->df_count[idx_orig] = count_orig;
-           dp->df_lnum[idx_new] = lnum_new;
-           dp->df_count[idx_new] = count_new;
+           dp->df_lnum[idx_orig] = hunk->lnum_orig;
+           dp->df_count[idx_orig] = hunk->count_orig;
+           dp->df_lnum[idx_new] = hunk->lnum_new;
+           dp->df_count[idx_new] = hunk->count_new;
 
            // Set values for other buffers, these must be equal to the
            // original buffer, otherwise there would have been a change
@@ -1811,6 +1841,9 @@ diff_read(
     }
 
 done:
+    if (!dio->dio_internal)
+       vim_free(hunk);
+
     if (fd != NULL)
        fclose(fd);
 }
@@ -3137,10 +3170,7 @@ diff_lnum_win(linenr_T lnum, win_T *wp)
     static int
 parse_diff_ed(
        char_u      *line,
-       linenr_T    *lnum_orig,
-       long        *count_orig,
-       linenr_T    *lnum_new,
-       long        *count_new)
+       diffhunk_T  *hunk)
 {
     char_u *p;
     long    f1, l1, f2, l2;
@@ -3175,23 +3205,23 @@ parse_diff_ed(
 
     if (difftype == 'a')
     {
-       *lnum_orig = f1 + 1;
-       *count_orig = 0;
+       hunk->lnum_orig = f1 + 1;
+       hunk->count_orig = 0;
     }
     else
     {
-       *lnum_orig = f1;
-       *count_orig = l1 - f1 + 1;
+       hunk->lnum_orig = f1;
+       hunk->count_orig = l1 - f1 + 1;
     }
     if (difftype == 'd')
     {
-       *lnum_new = f2 + 1;
-       *count_new = 0;
+       hunk->lnum_new = f2 + 1;
+       hunk->count_new = 0;
     }
     else
     {
-       *lnum_new = f2;
-       *count_new = l2 - f2 + 1;
+       hunk->lnum_new = f2;
+       hunk->count_new = l2 - f2 + 1;
     }
     return OK;
 }
@@ -3203,10 +3233,7 @@ parse_diff_ed(
     static int
 parse_diff_unified(
        char_u      *line,
-       linenr_T    *lnum_orig,
-       long        *count_orig,
-       linenr_T    *lnum_new,
-       long        *count_new)
+       diffhunk_T  *hunk)
 {
     char_u *p;
     long    oldline, oldcount, newline, newcount;
@@ -3245,10 +3272,10 @@ parse_diff_unified(
        if (newline == 0)
            newline = 1;
 
-       *lnum_orig = oldline;
-       *count_orig = oldcount;
-       *lnum_new = newline;
-       *count_new = newcount;
+       hunk->lnum_orig = oldline;
+       hunk->count_orig = oldcount;
+       hunk->lnum_new = newline;
+       hunk->count_new = newcount;
 
        return OK;
     }
@@ -3261,26 +3288,27 @@ parse_diff_unified(
  * Stores the diff output in a grow array.
  */
     static int
-xdiff_out(void *priv, mmbuffer_t *mb, int nbuf)
+xdiff_out(
+       long start_a,
+       long count_a,
+       long start_b,
+       long count_b,
+       void *priv)
 {
     diffout_T  *dout = (diffout_T *)priv;
-    char_u     *p;
-
-    // The header line always comes by itself, text lines in at least two
-    // parts.  We drop the text part.
-    if (nbuf > 1)
-       return 0;
+    diffhunk_T *p = ALLOC_ONE(diffhunk_T);
 
-    // sanity check
-    if (STRNCMP(mb[0].ptr, "@@ ", 3)  != 0)
-       return 0;
+    if (p == NULL)
+       return -1;
 
     if (ga_grow(&dout->dout_ga, 1) == FAIL)
        return -1;
-    p = vim_strnsave((char_u *)mb[0].ptr, mb[0].size);
-    if (p == NULL)
-       return -1;
-    ((char_u **)dout->dout_ga.ga_data)[dout->dout_ga.ga_len++] = p;
+
+    p->lnum_orig  = start_a + 1;
+    p->count_orig = count_a;
+    p->lnum_new   = start_b + 1;
+    p->count_new  = count_b;
+    ((diffhunk_T **)dout->dout_ga.ga_data)[dout->dout_ga.ga_len++] = p;
     return 0;
 }
 
index 8963062fcd150424fb29076287e531fc2e0f38bc..96359c11df71c7224b744e9a840d2e000ad29da2 100644 (file)
@@ -749,6 +749,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    3921,
 /**/
     3920,
 /**/