patch 8.2.4538: the find_tags_in_file() function is too long

author Yegappan Lakshmanan <yegappan@yahoo.com>

Thu, 10 Mar 2022 18:36:54 +0000 (18:36 +0000)

committer Bram Moolenaar <Bram@vim.org>

Thu, 10 Mar 2022 18:36:54 +0000 (18:36 +0000)
author Yegappan Lakshmanan <yegappan@yahoo.com>
Thu, 10 Mar 2022 18:36:54 +0000 (18:36 +0000)
committer Bram Moolenaar <Bram@vim.org>
Thu, 10 Mar 2022 18:36:54 +0000 (18:36 +0000)
diff --git a/src/tag.c b/src/tag.c

index 65723ed57364e835b03985e29fcfdfdbcc1b7a38..aa0f6e5d7e050520acc710d581b8351e9a70328f 100644 (file)
--- a/src/tag.c
+++ b/src/tag.c
@@ -38,6 +38,68 @@ typedef struct tag_pointers
      linenr_T   tagline;        // "line:" value
  } tagptrs_T;
  
+/*
+ * Return values used when reading lines from a tags file.
+ */
+typedef enum
+{
+    TAGS_READ_SUCCESS = 1,
+    TAGS_READ_EOF,
+    TAGS_READ_IGNORE,
+} tags_read_status_T;
+
+/*
+ * States used during a tags search
+ */
+typedef enum
+{
+    TS_START,          // at start of file
+    TS_LINEAR,         // linear searching forward, till EOF
+    TS_BINARY,         // binary searching
+    TS_SKIP_BACK,      // skipping backwards
+    TS_STEP_FORWARD    // stepping forwards
+} tagsearch_state_T;   // Current search state
+
+/*
+ * Binary search file offsets in a tags file
+ */
+typedef struct
+{
+    off_T      low_offset;     // offset for first char of first line that
+                               // could match
+    off_T      high_offset;    // offset of char after last line that could
+                               // match
+    off_T      curr_offset;    // Current file offset in search range
+    off_T      curr_offset_used; // curr_offset used when skipping back
+    off_T      match_offset;   // Where the binary search found a tag
+    int        low_char;               // first char at low_offset
+    int        high_char;              // first char at high_offset
+} tagsearch_info_T;
+
+/*
+ * Return values used when matching tags against a pattern.
+ */
+typedef enum
+{
+    TAG_MATCH_SUCCESS = 1,
+    TAG_MATCH_FAIL,
+    TAG_MATCH_STOP,
+    TAG_MATCH_NEXT
+} tagmatch_status_T;
+
+/*
+ * Arguments used for matching tags read from a tags file against a pattern.
+ */
+typedef struct
+{
+    int        matchoff;               // tag match offset
+    int        match_re;               // TRUE if the tag matches a regexp
+    int        match_no_ic;            // TRUE if the tag matches with case
+    int        has_re;                 // regular expression used
+    int        sortic;                 // tags file sorted ignoring case (foldcase)
+    int        sort_error;             // tags file not sorted
+} findtags_match_args_T;
+
  /*
   * The matching tags are first stored in one of the hash tables.  In
   * which one depends on the priority of the match.
@@ -1577,13 +1639,20 @@ find_tagfunc_tags(
  /*
   * State information used during a tag search
   */
-typedef struct {
+typedef struct
+{
+    tagsearch_state_T  state;          // tag search state
      char_u     *tag_fname;             // name of the tag file
+    FILE       *fp;                    // current tags file pointer
      pat_T      orgpat;                 // holds unconverted pattern info
-    int                name_only;              // get only tag names
-    int                get_searchpat;  // used for 'showfulltag'
+    int                flags;                  // flags used for tag search
+    int                tag_file_sorted;        // !_TAG_FILE_SORTED value
+    int                get_searchpat;          // used for 'showfulltag'
      int                help_only;              // only search for help tags
+    vimconv_T  vimconv;
  #ifdef FEAT_MULTI_LANG
+    char_u     help_lang[3];           // lang of current tags file
+    int                help_pri;               // help language priority
      char_u     *help_lang_find;        // lang to be found
      int                is_txt;                 // flag of file extension
  #endif
@@ -1594,6 +1663,7 @@ typedef struct {
      char_u     *lbuf;                  // line buffer
      int                lbuf_size;              // length of lbuf
  #ifdef FEAT_EMACS_TAGS
+    int                is_etag;                // current file is emaces style
      char_u     *ebuf;                  // additional buffer for etag fname
  #endif
      int                match_count;            // number of matches found
@@ -1615,13 +1685,17 @@ findtags_state_init(
      int                mtt;
  
      st->tag_fname = alloc(MAXPATHL + 1);
+    st->fp = NULL;
      st->orgpat.pat = pat;
      st->orgpat.len = (int)STRLEN(pat);
      st->orgpat.regmatch.regprog = NULL;
+    st->flags = flags;
+    st->tag_file_sorted = NUL;
      st->help_only = (flags & TAG_HELP);
-    st->name_only = (flags & TAG_NAMES);
      st->get_searchpat = FALSE;
  #ifdef FEAT_MULTI_LANG
+    st->help_lang[0] = NUL;
+    st->help_pri = 0;
      st->help_lang_find = NULL;
      st->is_txt = FALSE;
  #endif
@@ -1669,66 +1743,63 @@ findtags_state_free(findtags_state_T *st)
  
  #ifdef FEAT_MULTI_LANG
  /*
- * Initialize the state for searching tags in a Vim help file.
+ * Initialize the language and priority used for searching tags in a Vim help
+ * file.
   * Returns TRUE to process the help file and FALSE to skip the file.
   */
      static int
-findtags_in_help_init(
-    findtags_state_T   *st,
-    int                        flags,
-    char_u             *help_lang,
-    int                        *help_pri)
+findtags_in_help_init(findtags_state_T *st)
  {
      int                i;
      char_u     *s;
  
-    // Keep en if the file extension is .txt
+    // Keep 'en' as the language if the file extension is '.txt'
      if (st->is_txt)
-       STRCPY(help_lang, "en");
+       STRCPY(st->help_lang, "en");
      else
      {
-       // Prefer help tags according to 'helplang'.  Put the
-       // two-letter language name in help_lang[].
+       // Prefer help tags according to 'helplang'.  Put the two-letter
+       // language name in help_lang[].
         i = (int)STRLEN(st->tag_fname);
         if (i > 3 && st->tag_fname[i - 3] == '-')
-           STRCPY(help_lang, st->tag_fname + i - 2);
+           STRCPY(st->help_lang, st->tag_fname + i - 2);
         else
-           STRCPY(help_lang, "en");
+           STRCPY(st->help_lang, "en");
      }
-    // When searching for a specific language skip tags files
-    // for other languages.
+    // When searching for a specific language skip tags files for other
+    // languages.
      if (st->help_lang_find != NULL
-           && STRICMP(help_lang, st->help_lang_find) != 0)
+           && STRICMP(st->help_lang, st->help_lang_find) != 0)
         return FALSE;
  
-    // For CTRL-] in a help file prefer a match with the same
-    // language.
-    if ((flags & TAG_KEEP_LANG)
+    // For CTRL-] in a help file prefer a match with the same language.
+    if ((st->flags & TAG_KEEP_LANG)
             && st->help_lang_find == NULL
             && curbuf->b_fname != NULL
             && (i = (int)STRLEN(curbuf->b_fname)) > 4
             && curbuf->b_fname[i - 1] == 'x'
             && curbuf->b_fname[i - 4] == '.'
-           && STRNICMP(curbuf->b_fname + i - 3, help_lang, 2) == 0)
-       *help_pri = 0;
+           && STRNICMP(curbuf->b_fname + i - 3, st->help_lang, 2) == 0)
+       st->help_pri = 0;
      else
      {
-       *help_pri = 1;
+       // search for the language in 'helplang'
+       st->help_pri = 1;
         for (s = p_hlg; *s != NUL; ++s)
         {
-           if (STRNICMP(s, help_lang, 2) == 0)
+           if (STRNICMP(s, st->help_lang, 2) == 0)
                 break;
-           ++*help_pri;
+           ++st->help_pri;
             if ((s = vim_strchr(s, ',')) == NULL)
                 break;
         }
         if (s == NULL || *s == NUL)
         {
-           // Language not in 'helplang': use last, prefer English,
-           // unless found already.
-           ++*help_pri;
-           if (STRICMP(help_lang, "en") != 0)
-               ++*help_pri;
+           // Language not in 'helplang': use last, prefer English, unless
+           // found already.
+           ++st->help_pri;
+           if (STRICMP(st->help_lang, "en") != 0)
+               ++st->help_pri;
         }
      }
  
@@ -1738,18 +1809,15 @@ findtags_in_help_init(
  
  #ifdef FEAT_EVAL
  /*
- * Use the 'tagfunc' (if configured and enabled) to get the tags.
+ * Use the function set in 'tagfunc' (if configured and enabled) to get the
+ * tags.
   * Return OK if at least 1 tag has been successfully found, NOTDONE if the
   * 'tagfunc' is not used or the 'tagfunc' returns v:null and FAIL otherwise.
   */
      static int
-findtags_apply_tfu(
-    char_u             *pat,
-    findtags_state_T   *st,
-    int                        flags,
-    char_u             *buf_ffname)
+findtags_apply_tfu(findtags_state_T *st, char_u *pat, char_u *buf_ffname)
  {
-    int         use_tfu = ((flags & TAG_NO_TAGFUNC) == 0);
+    int         use_tfu = ((st->flags & TAG_NO_TAGFUNC) == 0);
      int                retval;
  
      if (!use_tfu || tfu_in_use || *curbuf->b_p_tfu == NUL)
@@ -1757,7 +1825,7 @@ findtags_apply_tfu(
  
      tfu_in_use = TRUE;
      retval = find_tagfunc_tags(pat, st->ga_match, &st->match_count,
-                                                       flags, buf_ffname);
+                                               st->flags, buf_ffname);
      tfu_in_use = FALSE;
  
      return retval;
@@ -1799,14 +1867,14 @@ emacs_tags_incstack_free(void)
   * then returns a pointer to the new tags file. The old file pointer is saved
   * in incstack.
   */
-    static FILE *
-emacs_tags_new_filename(findtags_state_T *st, FILE *fp, int *is_etag)
+    static void
+emacs_tags_new_filename(findtags_state_T *st)
  {
      char_u     *p;
      char_u     *fullpath_ebuf;
  
-    if (vim_fgets(st->ebuf, LSIZE, fp))
-       return fp;
+    if (vim_fgets(st->ebuf, LSIZE, st->fp))
+       return;
  
      for (p = st->ebuf; *p && *p != ','; p++)
         ;
@@ -1817,41 +1885,41 @@ emacs_tags_new_filename(findtags_state_T *st, FILE *fp, int *is_etag)
      // include statement. Skip the included tags file if it exceeds the
      // maximum.
      if (STRNCMP(p + 1, "include", 7) != 0 || incstack_idx >= INCSTACK_SIZE)
-       return fp;
+       return;
  
      // Save current "fp" and "tag_fname" in the stack.
      incstack[incstack_idx].etag_fname = vim_strsave(st->tag_fname);
      if (incstack[incstack_idx].etag_fname == NULL)
-       return fp;
+       return;
  
-    incstack[incstack_idx].fp = fp;
-    fp = NULL;
+    incstack[incstack_idx].fp = st->fp;
+    st->fp = NULL;
  
      // Figure out "tag_fname" and "fp" to use for
      // included file.
      fullpath_ebuf = expand_tag_fname(st->ebuf, st->tag_fname, FALSE);
      if (fullpath_ebuf != NULL)
      {
-       fp = mch_fopen((char *)fullpath_ebuf, "r");
-       if (fp != NULL)
+       st->fp = mch_fopen((char *)fullpath_ebuf, "r");
+       if (st->fp != NULL)
         {
             if (STRLEN(fullpath_ebuf) > LSIZE)
                 semsg(_(e_tag_file_path_truncated_for_str), st->ebuf);
             vim_strncpy(st->tag_fname, fullpath_ebuf, MAXPATHL);
             ++incstack_idx;
-           *is_etag = 0; // we can include anything
+           st->is_etag = FALSE; // we can include anything
         }
         vim_free(fullpath_ebuf);
      }
-    if (fp == NULL)
+    if (st->fp == NULL)
      {
         // Can't open the included file, skip it and
         // restore old value of "fp".
-       fp = incstack[incstack_idx].fp;
+       st->fp = incstack[incstack_idx].fp;
         vim_free(incstack[incstack_idx].etag_fname);
      }
  
-    return fp;
+    return;
  }
  
  /*
@@ -1862,15 +1930,15 @@ emacs_tags_new_filename(findtags_state_T *st, FILE *fp, int *is_etag)
   * continue with the parent tags file. Otherwise returns FALSE.
   */
      static int
-emacs_tags_file_eof(findtags_state_T *st, FILE **fp)
+emacs_tags_file_eof(findtags_state_T *st)
  {
      if (!incstack_idx) // reached end of file. stop processing.
         return FALSE;
  
      // reached the end of an included tags file. pop it.
      --incstack_idx;
-    fclose(*fp);       // end of this file ...
-    *fp = incstack[incstack_idx].fp;
+    fclose(st->fp);    // end of this file ...
+    st->fp = incstack[incstack_idx].fp;
      STRCPY(st->tag_fname, incstack[incstack_idx].etag_fname);
      vim_free(incstack[incstack_idx].etag_fname);
  
@@ -1942,48 +2010,490 @@ etag_fail:
  }
  #endif
  
+/*
+ * Read the next line from a tags file.
+ * Returns TAGS_READ_SUCCESS if a tags line is successfully read and should be
+ * processed.
+ * Returns TAGS_READ_EOF if the end of file is reached.
+ * Returns TAGS_READ_IGNORE if the current line should be ignored (used when
+ * reached end of a emacs included tags file)
+ */
+    static tags_read_status_T
+findtags_get_next_line(findtags_state_T *st, tagsearch_info_T *sinfo_p)
+{
+    int                eof;
+    off_T      offset;
+
+    // For binary search: compute the next offset to use.
+    if (st->state == TS_BINARY)
+    {
+       offset = sinfo_p->low_offset + ((sinfo_p->high_offset
+                                               - sinfo_p->low_offset) / 2);
+       if (offset == sinfo_p->curr_offset)
+           return TAGS_READ_EOF; // End the binary search without a match.
+       else
+           sinfo_p->curr_offset = offset;
+    }
+
+    // Skipping back (after a match during binary search).
+    else if (st->state == TS_SKIP_BACK)
+    {
+       sinfo_p->curr_offset -= st->lbuf_size * 2;
+       if (sinfo_p->curr_offset < 0)
+       {
+           sinfo_p->curr_offset = 0;
+           rewind(st->fp);
+           st->state = TS_STEP_FORWARD;
+       }
+    }
+
+    // When jumping around in the file, first read a line to find the
+    // start of the next line.
+    if (st->state == TS_BINARY || st->state == TS_SKIP_BACK)
+    {
+       // Adjust the search file offset to the correct position
+       sinfo_p->curr_offset_used = sinfo_p->curr_offset;
+       vim_fseek(st->fp, sinfo_p->curr_offset, SEEK_SET);
+       eof = vim_fgets(st->lbuf, st->lbuf_size, st->fp);
+       if (!eof && sinfo_p->curr_offset != 0)
+       {
+           sinfo_p->curr_offset = vim_ftell(st->fp);
+           if (sinfo_p->curr_offset == sinfo_p->high_offset)
+           {
+               // oops, gone a bit too far; try from low offset
+               vim_fseek(st->fp, sinfo_p->low_offset, SEEK_SET);
+               sinfo_p->curr_offset = sinfo_p->low_offset;
+           }
+           eof = vim_fgets(st->lbuf, st->lbuf_size, st->fp);
+       }
+       // skip empty and blank lines
+       while (!eof && vim_isblankline(st->lbuf))
+       {
+           sinfo_p->curr_offset = vim_ftell(st->fp);
+           eof = vim_fgets(st->lbuf, st->lbuf_size, st->fp);
+       }
+       if (eof)
+       {
+           // Hit end of file.  Skip backwards.
+           st->state = TS_SKIP_BACK;
+           sinfo_p->match_offset = vim_ftell(st->fp);
+           sinfo_p->curr_offset = sinfo_p->curr_offset_used;
+           return TAGS_READ_IGNORE;
+       }
+    }
+    // Not jumping around in the file: Read the next line.
+    else
+    {
+       // skip empty and blank lines
+       do
+       {
+#ifdef FEAT_CSCOPE
+           if (st->flags & TAG_CSCOPE)
+               eof = cs_fgets(st->lbuf, st->lbuf_size);
+           else
+#endif
+           {
+               sinfo_p->curr_offset = vim_ftell(st->fp);
+               eof = vim_fgets(st->lbuf, st->lbuf_size, st->fp);
+           }
+       } while (!eof && vim_isblankline(st->lbuf));
+
+       if (eof)
+       {
+#ifdef FEAT_EMACS_TAGS
+           if (emacs_tags_file_eof(st) == TRUE)
+           {
+               // an included tags file. Continue processing the parent
+               // tags file.
+               st->is_etag = TRUE;     // (only etags can include)
+               return TAGS_READ_IGNORE;
+           }
+#endif
+           return TAGS_READ_EOF;
+       }
+    }
+
+    return TAGS_READ_SUCCESS;
+}
+
  /*
   * Parse a tags file header line in 'st->lbuf'.
- * Returns TRUE to read the next header line and FALSE to process the line.
+ * Returns TRUE if the current line in st->lbuf is not a tags header line and
+ * should be parsed as a regular tag line. Returns FALSE if the line is a
+ * header line and the next header line should be read.
   */
      static int
-tags_file_hdr_parse(findtags_state_T *st, vimconv_T *vcp, int *sorted_file)
+findtags_hdr_parse(findtags_state_T *st)
  {
      char_u     *p;
  
+    // Header lines in a tags file start with "!_TAG_"
      if (STRNCMP(st->lbuf, "!_TAG_", 6) != 0)
         // Non-header item before the header, e.g. "!" itself.
-       return FALSE;
+       return TRUE;
  
-    // Read header line.
+    // Process the header line.
      if (STRNCMP(st->lbuf, "!_TAG_FILE_SORTED\t", 18) == 0)
-       *sorted_file = st->lbuf[18];
+       st->tag_file_sorted = st->lbuf[18];
      if (STRNCMP(st->lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0)
      {
-       // Prepare to convert every line from the specified
-       // encoding to 'encoding'.
+       // Prepare to convert every line from the specified encoding to
+       // 'encoding'.
         for (p = st->lbuf + 20; *p > ' ' && *p < 127; ++p)
             ;
         *p = NUL;
-       convert_setup(vcp, st->lbuf + 20, p_enc);
+       convert_setup(&st->vimconv, st->lbuf + 20, p_enc);
      }
  
      // Read the next line.  Unrecognized flags are ignored.
+    return FALSE;
+}
+
+/*
+ * Handler to initialize the state when starting to process a new tags file.
+ * Called in the TS_START state when finding tags from a tags file.
+ * Returns TRUE if the line read from the tags file should be parsed and
+ * FALSE if the line should be ignored.
+ */
+    static int
+findtags_start_state_handler(
+    findtags_state_T   *st,
+    int                        *sortic,
+    tagsearch_info_T   *sinfo_p)
+{
+#ifdef FEAT_CSCOPE
+    int                use_cscope = (st->flags & TAG_CSCOPE);
+#endif
+    int                noic = (st->flags & TAG_NOIC);
+    off_T      filesize;
+
+    // The header ends when the line sorts below "!_TAG_".  When
+    // case is folded lower case letters sort before "_".
+    if (STRNCMP(st->lbuf, "!_TAG_", 6) <= 0
+           || (st->lbuf[0] == '!' && ASCII_ISLOWER(st->lbuf[1])))
+       return findtags_hdr_parse(st);
+
+    // Headers ends.
+
+    // When there is no tag head, or ignoring case, need to do a
+    // linear search.
+    // When no "!_TAG_" is found, default to binary search.  If
+    // the tag file isn't sorted, the second loop will find it.
+    // When "!_TAG_FILE_SORTED" found: start binary search if
+    // flag set.
+    // For cscope, it's always linear.
+# ifdef FEAT_CSCOPE
+    if (st->linear || use_cscope)
+# else
+    if (st->linear)
+# endif
+       st->state = TS_LINEAR;
+    else if (st->tag_file_sorted == NUL)
+       st->state = TS_BINARY;
+    else if (st->tag_file_sorted == '1')
+       st->state = TS_BINARY;
+    else if (st->tag_file_sorted == '2')
+    {
+       st->state = TS_BINARY;
+       *sortic = TRUE;
+       st->orgpat.regmatch.rm_ic = (p_ic || !noic);
+    }
+    else
+       st->state = TS_LINEAR;
+
+    if (st->state == TS_BINARY && st->orgpat.regmatch.rm_ic && !*sortic)
+    {
+       // Binary search won't work for ignoring case, use linear
+       // search.
+       st->linear = TRUE;
+       st->state = TS_LINEAR;
+    }
+
+    // When starting a binary search, get the size of the file and
+    // compute the first offset.
+    if (st->state == TS_BINARY)
+    {
+       if (vim_fseek(st->fp, 0L, SEEK_END) != 0)
+           // can't seek, don't use binary search
+           st->state = TS_LINEAR;
+       else
+       {
+           // Get the tag file size (don't use mch_fstat(), it's
+           // not portable).  Don't use lseek(), it doesn't work
+           // properly on MacOS Catalina.
+           filesize = vim_ftell(st->fp);
+           vim_fseek(st->fp, 0L, SEEK_SET);
+
+           // Calculate the first read offset in the file.  Start
+           // the search in the middle of the file.
+           sinfo_p->low_offset = 0;
+           sinfo_p->low_char = 0;
+           sinfo_p->high_offset = filesize;
+           sinfo_p->curr_offset = 0;
+           sinfo_p->high_char = 0xff;
+       }
+       return FALSE;
+    }
+
      return TRUE;
  }
  
+/*
+ * Parse a tag line read from a tags file.
+ * Returns OK if a tags line is successfully parsed.
+ * Returns FAIL if an error is encountered.
+ */
+    static int
+findtags_parse_line(findtags_state_T *st, tagptrs_T *tagpp)
+{
+    int                status;
+
+    // Figure out where the different strings are in this line.
+    // For "normal" tags: Do a quick check if the tag matches.
+    // This speeds up tag searching a lot!
+    if (st->orgpat.headlen
+#ifdef FEAT_EMACS_TAGS
+           && !st->is_etag
+#endif
+       )
+    {
+       CLEAR_FIELD(*tagpp);
+       tagpp->tagname = st->lbuf;
+       tagpp->tagname_end = vim_strchr(st->lbuf, TAB);
+       if (tagpp->tagname_end == NULL)
+       {
+           // Corrupted tag line.
+           return FAIL;
+       }
+
+       // Can be a matching tag, isolate the file name and command.
+       tagpp->fname = tagpp->tagname_end + 1;
+       tagpp->fname_end = vim_strchr(tagpp->fname, TAB);
+       if (tagpp->fname_end == NULL)
+           status = FAIL;
+       else
+       {
+           tagpp->command = tagpp->fname_end + 1;
+           status = OK;
+       }
+    }
+    else
+       status = parse_tag_line(st->lbuf,
+#ifdef FEAT_EMACS_TAGS
+               st->is_etag,
+#endif
+               tagpp);
+
+    if (status == FAIL)
+       return FAIL;
+
+#ifdef FEAT_EMACS_TAGS
+    if (st->is_etag)
+       tagpp->fname = st->ebuf;
+#endif
+
+    return OK;
+}
+
+/*
+ * Initialize the structure used for tag matching.
+ */
+    static void
+findtags_matchargs_init(findtags_match_args_T *margs, int flags)
+{
+    margs->matchoff = 0;                       // match offset
+    margs->match_re = FALSE;                   // match with regexp
+    margs->match_no_ic = FALSE;                        // matches with case
+    margs->has_re = (flags & TAG_REGEXP);      // regexp used
+    margs->sortic = FALSE;                     // tag file sorted in nocase
+    margs->sort_error = FALSE;                 // tags file not sorted
+}
+
+/*
+ * Compares the tag name in 'tagpp->tagname' with a search pattern in
+ * 'st->orgpat.head'.
+ * Returns TAG_MATCH_SUCCESS if the tag matches, TAG_MATCH_FAIL if the tag
+ * doesn't match, TAG_MATCH_NEXT to look for the next matching tag (used in a
+ * binary search) and TAG_MATCH_STOP if all the tags are processed without a
+ * match. Uses the values in 'margs' for doing the comparison.
+ */
+    static tagmatch_status_T
+findtags_match_tag(
+    findtags_state_T   *st,
+    tagptrs_T          *tagpp,
+    findtags_match_args_T *margs,
+    tagsearch_info_T   *sinfo_p)
+{
+    int                match = FALSE;
+    int                cmplen;
+    int                i;
+    int                tagcmp;
+
+    // Skip this line if the length of the tag is different and
+    // there is no regexp, or the tag is too short.
+    if (st->orgpat.headlen
+#ifdef FEAT_EMACS_TAGS
+           && !st->is_etag
+#endif
+       )
+    {
+       cmplen = (int)(tagpp->tagname_end - tagpp->tagname);
+       if (p_tl != 0 && cmplen > p_tl)     // adjust for 'taglength'
+           cmplen = p_tl;
+       if (margs->has_re && st->orgpat.headlen < cmplen)
+           cmplen = st->orgpat.headlen;
+       else if (st->state == TS_LINEAR && st->orgpat.headlen != cmplen)
+           return TAG_MATCH_FAIL;
+
+       if (st->state == TS_BINARY)
+       {
+           // Simplistic check for unsorted tags file.
+           i = (int)tagpp->tagname[0];
+           if (margs->sortic)
+               i = (int)TOUPPER_ASC(tagpp->tagname[0]);
+           if (i < sinfo_p->low_char || i > sinfo_p->high_char)
+               margs->sort_error = TRUE;
+
+           // Compare the current tag with the searched tag.
+           if (margs->sortic)
+               tagcmp = tag_strnicmp(tagpp->tagname, st->orgpat.head,
+                                                       (size_t)cmplen);
+           else
+               tagcmp = STRNCMP(tagpp->tagname, st->orgpat.head, cmplen);
+
+           // A match with a shorter tag means to search forward.
+           // A match with a longer tag means to search backward.
+           if (tagcmp == 0)
+           {
+               if (cmplen < st->orgpat.headlen)
+                   tagcmp = -1;
+               else if (cmplen > st->orgpat.headlen)
+                   tagcmp = 1;
+           }
+
+           if (tagcmp == 0)
+           {
+               // We've located the tag, now skip back and search
+               // forward until the first matching tag is found.
+               st->state = TS_SKIP_BACK;
+               sinfo_p->match_offset = sinfo_p->curr_offset;
+               return TAG_MATCH_NEXT;
+           }
+           if (tagcmp < 0)
+           {
+               sinfo_p->curr_offset = vim_ftell(st->fp);
+               if (sinfo_p->curr_offset < sinfo_p->high_offset)
+               {
+                   sinfo_p->low_offset = sinfo_p->curr_offset;
+                   if (margs->sortic)
+                       sinfo_p->low_char = TOUPPER_ASC(tagpp->tagname[0]);
+                   else
+                       sinfo_p->low_char = tagpp->tagname[0];
+                   return TAG_MATCH_NEXT;
+               }
+           }
+           if (tagcmp > 0 && sinfo_p->curr_offset != sinfo_p->high_offset)
+           {
+               sinfo_p->high_offset = sinfo_p->curr_offset;
+               if (margs->sortic)
+                   sinfo_p->high_char = TOUPPER_ASC(tagpp->tagname[0]);
+               else
+                   sinfo_p->high_char = tagpp->tagname[0];
+               return TAG_MATCH_NEXT;
+           }
+
+           // No match yet and are at the end of the binary search.
+           return TAG_MATCH_STOP;
+       }
+       else if (st->state == TS_SKIP_BACK)
+       {
+           if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0)
+               st->state = TS_STEP_FORWARD;
+           else
+               // Have to skip back more.  Restore the curr_offset
+               // used, otherwise we get stuck at a long line.
+               sinfo_p->curr_offset = sinfo_p->curr_offset_used;
+           return TAG_MATCH_NEXT;
+       }
+       else if (st->state == TS_STEP_FORWARD)
+       {
+           if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0)
+           {
+               if ((off_T)vim_ftell(st->fp) > sinfo_p->match_offset)
+                   return TAG_MATCH_STOP;      // past last match
+               else
+                   return TAG_MATCH_NEXT;      // before first match
+           }
+       }
+       else
+           // skip this match if it can't match
+           if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0)
+               return TAG_MATCH_FAIL;
+    }
+
+    // First try matching with the pattern literally (also when it is
+    // a regexp).
+    cmplen = (int)(tagpp->tagname_end - tagpp->tagname);
+    if (p_tl != 0 && cmplen > p_tl)        // adjust for 'taglength'
+       cmplen = p_tl;
+    // if tag length does not match, don't try comparing
+    if (st->orgpat.len != cmplen)
+       match = FALSE;
+    else
+    {
+       if (st->orgpat.regmatch.rm_ic)
+       {
+           match =
+               (MB_STRNICMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0);
+           if (match)
+               margs->match_no_ic =
+                   (STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0);
+       }
+       else
+           match = (STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0);
+    }
+
+    // Has a regexp: Also find tags matching regexp.
+    margs->match_re = FALSE;
+    if (!match && st->orgpat.regmatch.regprog != NULL)
+    {
+       int     cc;
+
+       cc = *tagpp->tagname_end;
+       *tagpp->tagname_end = NUL;
+       match = vim_regexec(&st->orgpat.regmatch, tagpp->tagname, (colnr_T)0);
+       if (match)
+       {
+           margs->matchoff = (int)(st->orgpat.regmatch.startp[0] -
+                                                       tagpp->tagname);
+           if (st->orgpat.regmatch.rm_ic)
+           {
+               st->orgpat.regmatch.rm_ic = FALSE;
+               margs->match_no_ic = vim_regexec(&st->orgpat.regmatch,
+                       tagpp->tagname, (colnr_T)0);
+               st->orgpat.regmatch.rm_ic = TRUE;
+           }
+       }
+       *tagpp->tagname_end = cc;
+       margs->match_re = TRUE;
+    }
+
+    return match ? TAG_MATCH_SUCCESS : TAG_MATCH_FAIL;
+}
+
  /*
   * Convert the encoding of a line read from a tags file in 'st->lbuf'.
   * Converting the pattern from 'enc' to the tags file encoding doesn't work,
- * because characters are not recognized.
+ * because characters are not recognized. The converted line is saved in
+ * st->lbuf.
   */
      static void
-findtags_string_convert(findtags_state_T *st, vimconv_T *vcp)
+findtags_string_convert(findtags_state_T *st)
  {
      char_u     *conv_line;
      int                len;
  
-    conv_line = string_convert(vcp, st->lbuf, NULL);
+    conv_line = string_convert(&st->vimconv, st->lbuf, NULL);
      if (conv_line == NULL)
         return;
  
@@ -2002,23 +2512,23 @@ findtags_string_convert(findtags_state_T *st, vimconv_T *vcp)
      }
  }
  
+/*
+ * Add a matching tag found in a tags file to st->ht_match and st->ga_match.
+ * Returns OK if successfully added the match and FAIL on memory allocation
+ * failure.
+ */
      static int
  findtags_add_match(
      findtags_state_T   *st,
-    tagptrs_T          *tagp,
+    tagptrs_T          *tagpp,
+    findtags_match_args_T   *margs,
      char_u             *buf_ffname,
-    int                        flags UNUSED,
-    hash_T             *hash,
-    int                        match_re,
-    int                        match_no_ic,
-    int                        matchoff,
-    int                        is_etag UNUSED,
-    char_u             *help_lang UNUSED,
-    int                        help_pri UNUSED)
+    hash_T             *hash)
  {
  #ifdef FEAT_CSCOPE
-    int                use_cscope = (flags & TAG_CSCOPE);
+    int                use_cscope = (st->flags & TAG_CSCOPE);
  #endif
+    int                name_only = (st->flags & TAG_NAMES);
      int                mtt;
      int                len = 0;
      int                is_current;             // file name matches
@@ -2039,15 +2549,14 @@ findtags_add_match(
         // Decide in which array to store this match.
         is_current = test_for_current(
  #ifdef FEAT_EMACS_TAGS
-               is_etag,
+               st->is_etag,
  #endif
-               tagp->fname, tagp->fname_end, st->tag_fname,
-               buf_ffname);
+               tagpp->fname, tagpp->fname_end, st->tag_fname, buf_ffname);
  #ifdef FEAT_EMACS_TAGS
         is_static = FALSE;
-       if (!is_etag)   // emacs tags are never static
+       if (!st->is_etag)       // emacs tags are never static
  #endif
-           is_static = test_for_static(tagp);
+           is_static = test_for_static(tagpp);
  
         // decide in which of the sixteen tables to store this
         // match
@@ -2065,9 +2574,9 @@ findtags_add_match(
             else
                 mtt = MT_GL_OTH;
         }
-       if (st->orgpat.regmatch.rm_ic && !match_no_ic)
+       if (st->orgpat.regmatch.rm_ic && !margs->match_no_ic)
             mtt += MT_IC_OFF;
-       if (match_re)
+       if (margs->match_re)
             mtt += MT_RE_OFF;
      }
  
@@ -2085,35 +2594,36 @@ findtags_add_match(
         // sorting it later.  The heuristic is ignored for
         // detecting duplicates.
         // The format is {tagname}@{lang}NUL{heuristic}NUL
-       *tagp->tagname_end = NUL;
-       len = (int)(tagp->tagname_end - tagp->tagname);
+       *tagpp->tagname_end = NUL;
+       len = (int)(tagpp->tagname_end - tagpp->tagname);
         mfp = alloc(sizeof(char_u) + len + 10 + ML_EXTRA + 1);
         if (mfp != NULL)
         {
             int heuristic;
  
             p = mfp;
-           STRCPY(p, tagp->tagname);
+           STRCPY(p, tagpp->tagname);
  #ifdef FEAT_MULTI_LANG
             p[len] = '@';
-           STRCPY(p + len + 1, help_lang);
+           STRCPY(p + len + 1, st->help_lang);
  #endif
  
-           heuristic = help_heuristic(tagp->tagname,
-                   match_re ? matchoff : 0, !match_no_ic);
+           heuristic = help_heuristic(tagpp->tagname,
+                               margs->match_re ? margs->matchoff : 0,
+                               !margs->match_no_ic);
  #ifdef FEAT_MULTI_LANG
-           heuristic += help_pri;
+           heuristic += st->help_pri;
  #endif
             sprintf((char *)p + len + 1 + ML_EXTRA, "%06d",
                     heuristic);
         }
-       *tagp->tagname_end = TAB;
+       *tagpp->tagname_end = TAB;
      }
-    else if (st->name_only)
+    else if (name_only)
      {
         if (st->get_searchpat)
         {
-           char_u *temp_end = tagp->command;
+           char_u *temp_end = tagpp->command;
  
             if (*temp_end == '/')
                 while (*temp_end && *temp_end != '\r'
@@ -2121,12 +2631,12 @@ findtags_add_match(
                         && *temp_end != '$')
                     temp_end++;
  
-           if (tagp->command + 2 < temp_end)
+           if (tagpp->command + 2 < temp_end)
             {
-               len = (int)(temp_end - tagp->command - 2);
+               len = (int)(temp_end - tagpp->command - 2);
                 mfp = alloc(len + 2);
                 if (mfp != NULL)
-                   vim_strncpy(mfp, tagp->command + 2, len);
+                   vim_strncpy(mfp, tagpp->command + 2, len);
             }
             else
                 mfp = NULL;
@@ -2134,10 +2644,10 @@ findtags_add_match(
         }
         else
         {
-           len = (int)(tagp->tagname_end - tagp->tagname);
+           len = (int)(tagpp->tagname_end - tagpp->tagname);
             mfp = alloc(sizeof(char_u) + len + 1);
             if (mfp != NULL)
-               vim_strncpy(mfp, tagp->tagname, len);
+               vim_strncpy(mfp, tagpp->tagname, len);
  
             // if wanted, re-read line to get long form too
             if (State & INSERT)
@@ -2161,7 +2671,7 @@ findtags_add_match(
         // Here <mtt> is the "mtt" value plus 1 to avoid NUL.
         len = (int)tag_fname_len + (int)STRLEN(st->lbuf) + 3;
  #ifdef FEAT_EMACS_TAGS
-       if (is_etag)
+       if (st->is_etag)
         {
             ebuf_len = STRLEN(st->ebuf);
             len += (int)ebuf_len + 1;
@@ -2183,7 +2693,7 @@ findtags_add_match(
             p[tag_fname_len + 1] = TAG_SEP;
             s = p + 1 + tag_fname_len + 1;
  #ifdef FEAT_EMACS_TAGS
-           if (is_etag)
+           if (st->is_etag)
             {
                 STRCPY(s, st->ebuf);
                 s[ebuf_len] = TAG_SEP;
@@ -2222,12 +2732,10 @@ findtags_add_match(
                 st->stop_searching = TRUE;
                 return FAIL;
             }
-           else
-           {
-               ((char_u **)(st->ga_match[mtt].ga_data))
-                   [st->ga_match[mtt].ga_len++] = mfp;
-               st->match_count++;
-           }
+
+           ((char_u **)(st->ga_match[mtt].ga_data))
+               [st->ga_match[mtt].ga_len++] = mfp;
+           st->match_count++;
         }
         else
             // duplicate tag, drop it
@@ -2238,117 +2746,37 @@ findtags_add_match(
  }
  
  /*
- * Search for tags matching 'st->orgpat.pat' in the 'st->tag_fname' tags file.
- * Information needed to search for the tags is in the 'st' state structure.
- * The matching tags are returned in 'st'.
- * Returns OK if successfully processed the file and FAIL on memory allocation
- * failure.
+ * Read and get all the tags from file st->tag_fname.
+ * Returns OK if all the tags are processed successfully and FAIL is a tag
+ * format error is encountered.
   */
      static int
-find_tags_in_file(
-    findtags_state_T   *st,
-    int                        flags,
-    char_u             *buf_ffname)
+findtags_get_all_tags(
+    findtags_state_T           *st,
+    findtags_match_args_T      *margs,
+    char_u                     *buf_ffname)
  {
-    FILE       *fp = NULL;
-    tagptrs_T  tagp;
-    int                eof = FALSE;            // found end-of-file
-    int                i;
-#ifdef FEAT_MULTI_LANG
-    int                help_pri = 0;
-    char_u     help_lang[3] = "";      // lang of current tags file
-#endif
-    int                tag_file_sorted = NUL;  // !_TAG_FILE_SORTED value
-    off_T      filesize;
-    int                tagcmp;
-    off_T      offset;
-    enum
-    {
-       TS_START,               // at start of file
-       TS_LINEAR,              // linear searching forward, till EOF
-       TS_BINARY,              // binary searching
-       TS_SKIP_BACK,           // skipping backwards
-       TS_STEP_FORWARD         // stepping forwards
-    } state;                   // Current search state
-    struct tag_search_info     // Binary search file offsets
-    {
-       off_T   low_offset;     // offset for first char of first line that
-                               // could match
-       off_T   high_offset;    // offset of char after last line that could
-                               // match
-       off_T   curr_offset;    // Current file offset in search range
-       off_T   curr_offset_used; // curr_offset used when skipping back
-       off_T   match_offset;   // Where the binary search found a tag
-       int     low_char;       // first char at low_offset
-       int     high_char;      // first char at high_offset
-    } search_info;
-
-    int                cmplen;
-    int                match;          // matches
-    int                match_no_ic = 0;// matches with rm_ic == FALSE
-    int                match_re;       // match with regexp
-    int                matchoff = 0;
-
-    int                is_etag;                // current file is emaces style
-
-    hash_T     hash = 0;
-
-    int                sort_error = FALSE;             // tags file not sorted
-    int                sortic = FALSE;                 // tag file sorted in nocase
-    int                noic = (flags & TAG_NOIC);
-    int                line_error = FALSE;             // syntax error
-    int                has_re = (flags & TAG_REGEXP);  // regexp used
+    tagptrs_T          tagp;
+    tagsearch_info_T   search_info;
+    int                        retval;
  #ifdef FEAT_CSCOPE
-    int                use_cscope = (flags & TAG_CSCOPE);
+    int                        use_cscope = (st->flags & TAG_CSCOPE);
  #endif
-    vimconv_T  vimconv;
-
-    vimconv.vc_type = CONV_NONE;
+    hash_T             hash = 0;
  
      // This is only to avoid a compiler warning for using search_info
      // uninitialised.
      CLEAR_FIELD(search_info);
  
-    // A file that doesn't exist is silently ignored.  Only when not a
-    // single file is found, an error message is given (further on).
-#ifdef FEAT_CSCOPE
-    if (use_cscope)
-       fp = NULL;          // avoid GCC warning
-    else
-#endif
-    {
-#ifdef FEAT_MULTI_LANG
-       if (curbuf->b_help)
-       {
-           if (!findtags_in_help_init(st, flags, help_lang, &help_pri))
-               return OK;
-       }
-#endif
-
-       if ((fp = mch_fopen((char *)st->tag_fname, "r")) == NULL)
-           return OK;
-
-       if (p_verbose >= 5)
-       {
-           verbose_enter();
-           smsg(_("Searching tags file %s"), st->tag_fname);
-           verbose_leave();
-       }
-    }
-    st->did_open = TRUE;       // remember that we found at least one file
-
-    state = TS_START;          // we're at the start of the file
-    is_etag = 0;               // default is: not emacs style
-
      // Read and parse the lines in the file one by one
      for (;;)
      {
         // check for CTRL-C typed, more often when jumping around
-       if (state == TS_BINARY || state == TS_SKIP_BACK)
+       if (st->state == TS_BINARY || st->state == TS_SKIP_BACK)
             line_breakcheck();
         else
             fast_breakcheck();
-       if ((flags & TAG_INS_COMP))     // Double brackets for gcc
+       if ((st->flags & TAG_INS_COMP)) // Double brackets for gcc
             ins_compl_check_keys(30, FALSE);
         if (got_int || ins_compl_interrupted())
         {
@@ -2364,209 +2792,43 @@ find_tags_in_file(
         }
         if (st->get_searchpat)
             goto line_read_in;
-       // For binary search: compute the next offset to use.
-       if (state == TS_BINARY)
-       {
-           offset = search_info.low_offset + ((search_info.high_offset
-                       - search_info.low_offset) / 2);
-           if (offset == search_info.curr_offset)
-               break;  // End the binary search without a match.
-           else
-               search_info.curr_offset = offset;
-       }
-
-       /*
-        * Skipping back (after a match during binary search).
-        */
-       else if (state == TS_SKIP_BACK)
-       {
-           search_info.curr_offset -= st->lbuf_size * 2;
-           if (search_info.curr_offset < 0)
-           {
-               search_info.curr_offset = 0;
-               rewind(fp);
-               state = TS_STEP_FORWARD;
-           }
-       }
  
-       /*
-        * When jumping around in the file, first read a line to find the
-        * start of the next line.
-        */
-       if (state == TS_BINARY || state == TS_SKIP_BACK)
-       {
-           // Adjust the search file offset to the correct position
-           search_info.curr_offset_used = search_info.curr_offset;
-           vim_fseek(fp, search_info.curr_offset, SEEK_SET);
-           eof = vim_fgets(st->lbuf, st->lbuf_size, fp);
-           if (!eof && search_info.curr_offset != 0)
-           {
-               search_info.curr_offset = vim_ftell(fp);
-               if (search_info.curr_offset == search_info.high_offset)
-               {
-                   // oops, gone a bit too far; try from low offset
-                   vim_fseek(fp, search_info.low_offset, SEEK_SET);
-                   search_info.curr_offset = search_info.low_offset;
-               }
-               eof = vim_fgets(st->lbuf, st->lbuf_size, fp);
-           }
-           // skip empty and blank lines
-           while (!eof && vim_isblankline(st->lbuf))
-           {
-               search_info.curr_offset = vim_ftell(fp);
-               eof = vim_fgets(st->lbuf, st->lbuf_size, fp);
-           }
-           if (eof)
-           {
-               // Hit end of file.  Skip backwards.
-               state = TS_SKIP_BACK;
-               search_info.match_offset = vim_ftell(fp);
-               search_info.curr_offset = search_info.curr_offset_used;
-               continue;
-           }
-       }
-
-       /*
-        * Not jumping around in the file: Read the next line.
-        */
-       else
-       {
-           // skip empty and blank lines
-           do
-           {
-#ifdef FEAT_CSCOPE
-               if (use_cscope)
-                   eof = cs_fgets(st->lbuf, st->lbuf_size);
-               else
-#endif
-               {
-                   search_info.curr_offset = vim_ftell(fp);
-                   eof = vim_fgets(st->lbuf, st->lbuf_size, fp);
-               }
-           } while (!eof && vim_isblankline(st->lbuf));
+       retval = findtags_get_next_line(st, &search_info);
+       if (retval == TAGS_READ_IGNORE)
+           continue;
+       if (retval == TAGS_READ_EOF)
+           break;
  
-           if (eof)
-           {
-#ifdef FEAT_EMACS_TAGS
-               if (emacs_tags_file_eof(st, &fp) == TRUE)
-               {
-                   // an included tags file. Continue processing the parent
-                   // tags file.
-                   is_etag = 1;        // (only etags can include)
-                   continue;
-               }
-#endif
-               break;                  // end of file
-           }
-       }
  line_read_in:
  
-       if (vimconv.vc_type != CONV_NONE)
-           findtags_string_convert(st, &vimconv);
+       if (st->vimconv.vc_type != CONV_NONE)
+           findtags_string_convert(st);
  
  #ifdef FEAT_EMACS_TAGS
-       /*
-        * Emacs tags line with CTRL-L: New file name on next line.
-        * The file name is followed by a ','.
-        * Remember etag file name in ebuf.
-        */
+       // Emacs tags line with CTRL-L: New file name on next line.
+       // The file name is followed by a ','.
+       // Remember etag file name in ebuf.
         if (*st->lbuf == Ctrl_L
  # ifdef FEAT_CSCOPE
                 && !use_cscope
  # endif
            )
         {
-           is_etag = 1;                // in case at the start
-           state = TS_LINEAR;
-           fp = emacs_tags_new_filename(st, fp, &is_etag);
+           st->is_etag = TRUE;         // in case at the start
+           st->state = TS_LINEAR;
+           emacs_tags_new_filename(st);
             continue;
         }
  #endif
  
-       /*
-        * When still at the start of the file, check for Emacs tags file
-        * format, and for "not sorted" flag.
-        */
-       if (state == TS_START)
+       // When still at the start of the file, check for Emacs tags file
+       // format, and for "not sorted" flag.
+       if (st->state == TS_START)
         {
-           // The header ends when the line sorts below "!_TAG_".  When
-           // case is folded lower case letters sort before "_".
-           if (STRNCMP(st->lbuf, "!_TAG_", 6) <= 0
-                   || (st->lbuf[0] == '!' && ASCII_ISLOWER(st->lbuf[1])))
-           {
-               if (tags_file_hdr_parse(st, &vimconv, &tag_file_sorted))
-                   // Read the next line.  Unrecognized flags are ignored.
-                   continue;
-
-               goto parse_line;
-           }
-
-           // Headers ends.
-
-           /*
-            * When there is no tag head, or ignoring case, need to do a
-            * linear search.
-            * When no "!_TAG_" is found, default to binary search.  If
-            * the tag file isn't sorted, the second loop will find it.
-            * When "!_TAG_FILE_SORTED" found: start binary search if
-            * flag set.
-            * For cscope, it's always linear.
-            */
-# ifdef FEAT_CSCOPE
-           if (st->linear || use_cscope)
-# else
-           if (st->linear)
-# endif
-               state = TS_LINEAR;
-           else if (tag_file_sorted == NUL)
-               state = TS_BINARY;
-           else if (tag_file_sorted == '1')
-               state = TS_BINARY;
-           else if (tag_file_sorted == '2')
-           {
-               state = TS_BINARY;
-               sortic = TRUE;
-               st->orgpat.regmatch.rm_ic = (p_ic || !noic);
-           }
-           else
-               state = TS_LINEAR;
-
-           if (state == TS_BINARY && st->orgpat.regmatch.rm_ic && !sortic)
-           {
-               // Binary search won't work for ignoring case, use linear
-               // search.
-               st->linear = TRUE;
-               state = TS_LINEAR;
-           }
-
-           // When starting a binary search, get the size of the file and
-           // compute the first offset.
-           if (state == TS_BINARY)
-           {
-               if (vim_fseek(fp, 0L, SEEK_END) != 0)
-                   // can't seek, don't use binary search
-                   state = TS_LINEAR;
-               else
-               {
-                   // Get the tag file size (don't use mch_fstat(), it's
-                   // not portable).  Don't use lseek(), it doesn't work
-                   // properly on MacOS Catalina.
-                   filesize = vim_ftell(fp);
-                   vim_fseek(fp, 0L, SEEK_SET);
-
-                   // Calculate the first read offset in the file.  Start
-                   // the search in the middle of the file.
-                   search_info.low_offset = 0;
-                   search_info.low_char = 0;
-                   search_info.high_offset = filesize;
-                   search_info.curr_offset = 0;
-                   search_info.high_char = 0xff;
-               }
+           if (findtags_start_state_handler(st, &margs->sortic, &search_info) == FALSE)
                 continue;
-           }
         }
  
-parse_line:
         // When the line is too long the NUL will not be in the
         // last-but-one byte (see vim_fgets()).
         // Has been reported for Mozilla JS with extremely long names.
@@ -2582,244 +2844,100 @@ parse_line:
             st->lbuf = alloc(st->lbuf_size);
             if (st->lbuf == NULL)
             {
-               if (fp != NULL)
-                   fclose(fp);
-               return FAIL;
+               if (st->fp != NULL)
+                   fclose(st->fp);
+               break;
             }
  
-           if (state == TS_STEP_FORWARD)
+           if (st->state == TS_STEP_FORWARD)
                 // Seek to the same position to read the same line again
-               vim_fseek(fp, search_info.curr_offset, SEEK_SET);
+               vim_fseek(st->fp, search_info.curr_offset, SEEK_SET);
             // this will try the same thing again, make sure the offset is
             // different
             search_info.curr_offset = 0;
             continue;
         }
  
-       /*
-        * Figure out where the different strings are in this line.
-        * For "normal" tags: Do a quick check if the tag matches.
-        * This speeds up tag searching a lot!
-        */
-       if (st->orgpat.headlen
-#ifdef FEAT_EMACS_TAGS
-               && !is_etag
-#endif
-          )
-       {
-           CLEAR_FIELD(tagp);
-           tagp.tagname = st->lbuf;
-           tagp.tagname_end = vim_strchr(st->lbuf, TAB);
-           if (tagp.tagname_end == NULL)
-           {
-               // Corrupted tag line.
-               line_error = TRUE;
-               break;
-           }
-
-           /*
-            * Skip this line if the length of the tag is different and
-            * there is no regexp, or the tag is too short.
-            */
-           cmplen = (int)(tagp.tagname_end - tagp.tagname);
-           if (p_tl != 0 && cmplen > p_tl)         // adjust for 'taglength'
-               cmplen = p_tl;
-           if (has_re && st->orgpat.headlen < cmplen)
-               cmplen = st->orgpat.headlen;
-           else if (state == TS_LINEAR && st->orgpat.headlen != cmplen)
-               continue;
+       if (findtags_parse_line(st, &tagp) == FAIL)
+           return FAIL;
  
-           if (state == TS_BINARY)
-           {
-               /*
-                * Simplistic check for unsorted tags file.
-                */
-               i = (int)tagp.tagname[0];
-               if (sortic)
-                   i = (int)TOUPPER_ASC(tagp.tagname[0]);
-               if (i < search_info.low_char || i > search_info.high_char)
-                   sort_error = TRUE;
+       retval = findtags_match_tag(st, &tagp, margs, &search_info);
+       if (retval == TAG_MATCH_NEXT)
+           continue;
+       if (retval == TAG_MATCH_STOP)
+           break;
  
-               /*
-                * Compare the current tag with the searched tag.
-                */
-               if (sortic)
-                   tagcmp = tag_strnicmp(tagp.tagname, st->orgpat.head,
-                           (size_t)cmplen);
-               else
-                   tagcmp = STRNCMP(tagp.tagname, st->orgpat.head, cmplen);
+       // If a match is found, add it to ht_match[] and ga_match[].
+       if (retval == TAG_MATCH_SUCCESS)
+       {
+           if (findtags_add_match(st, &tagp, margs, buf_ffname, &hash)
+                                                               == FAIL)
+               break;
+       }
+    } // forever
  
-               /*
-                * A match with a shorter tag means to search forward.
-                * A match with a longer tag means to search backward.
-                */
-               if (tagcmp == 0)
-               {
-                   if (cmplen < st->orgpat.headlen)
-                       tagcmp = -1;
-                   else if (cmplen > st->orgpat.headlen)
-                       tagcmp = 1;
-               }
+    return OK;
+}
  
-               if (tagcmp == 0)
-               {
-                   // We've located the tag, now skip back and search
-                   // forward until the first matching tag is found.
-                   state = TS_SKIP_BACK;
-                   search_info.match_offset = search_info.curr_offset;
-                   continue;
-               }
-               if (tagcmp < 0)
-               {
-                   search_info.curr_offset = vim_ftell(fp);
-                   if (search_info.curr_offset < search_info.high_offset)
-                   {
-                       search_info.low_offset = search_info.curr_offset;
-                       if (sortic)
-                           search_info.low_char =
-                               TOUPPER_ASC(tagp.tagname[0]);
-                       else
-                           search_info.low_char = tagp.tagname[0];
-                       continue;
-                   }
-               }
-               if (tagcmp > 0
-                       && search_info.curr_offset != search_info.high_offset)
-               {
-                   search_info.high_offset = search_info.curr_offset;
-                   if (sortic)
-                       search_info.high_char =
-                           TOUPPER_ASC(tagp.tagname[0]);
-                   else
-                       search_info.high_char = tagp.tagname[0];
-                   continue;
-               }
+/*
+ * Search for tags matching 'st->orgpat.pat' in the 'st->tag_fname' tags file.
+ * Information needed to search for the tags is in the 'st' state structure.
+ * The matching tags are returned in 'st'.
+ * Returns OK if successfully processed the file and FAIL on memory allocation
+ * failure.
+ */
+    static int
+findtags_in_file(findtags_state_T *st, char_u *buf_ffname)
+{
+    findtags_match_args_T margs;
+    int                line_error = FALSE;             // syntax error
+#ifdef FEAT_CSCOPE
+    int                use_cscope = FALSE;
+#endif
  
-               // No match yet and are at the end of the binary search.
-               break;
-           }
-           else if (state == TS_SKIP_BACK)
-           {
-               if (MB_STRNICMP(tagp.tagname, st->orgpat.head, cmplen) != 0)
-                   state = TS_STEP_FORWARD;
-               else
-                   // Have to skip back more.  Restore the curr_offset
-                   // used, otherwise we get stuck at a long line.
-                   search_info.curr_offset = search_info.curr_offset_used;
-               continue;
-           }
-           else if (state == TS_STEP_FORWARD)
-           {
-               if (MB_STRNICMP(tagp.tagname, st->orgpat.head, cmplen) != 0)
-               {
-                   if ((off_T)vim_ftell(fp) > search_info.match_offset)
-                       break;  // past last match
-                   else
-                       continue;       // before first match
-               }
-           }
-           else
-               // skip this match if it can't match
-               if (MB_STRNICMP(tagp.tagname, st->orgpat.head, cmplen) != 0)
-                   continue;
+    st->vimconv.vc_type = CONV_NONE;
+    st->tag_file_sorted = NUL;
+    st->fp = NULL;
+    findtags_matchargs_init(&margs, st->flags);
  
-           /*
-            * Can be a matching tag, isolate the file name and command.
-            */
-           tagp.fname = tagp.tagname_end + 1;
-           tagp.fname_end = vim_strchr(tagp.fname, TAB);
-           if (tagp.fname_end == NULL)
-               i = FAIL;
-           else
-           {
-               tagp.command = tagp.fname_end + 1;
-               i = OK;
-           }
-       }
-       else
-           i = parse_tag_line(st->lbuf,
-#ifdef FEAT_EMACS_TAGS
-                   is_etag,
+    // A file that doesn't exist is silently ignored.  Only when not a
+    // single file is found, an error message is given (further on).
+#ifdef FEAT_CSCOPE
+    use_cscope = (st->flags & TAG_CSCOPE);
+    if (use_cscope)
+       st->fp = NULL;      // avoid GCC warning
+    else
  #endif
-                   &tagp);
-       if (i == FAIL)
+    {
+#ifdef FEAT_MULTI_LANG
+       if (curbuf->b_help)
         {
-           line_error = TRUE;
-           break;
+           if (!findtags_in_help_init(st))
+               return OK;
         }
-
-#ifdef FEAT_EMACS_TAGS
-       if (is_etag)
-           tagp.fname = st->ebuf;
  #endif
-       /*
-        * First try matching with the pattern literally (also when it is
-        * a regexp).
-        */
-       cmplen = (int)(tagp.tagname_end - tagp.tagname);
-       if (p_tl != 0 && cmplen > p_tl)     // adjust for 'taglength'
-           cmplen = p_tl;
-       // if tag length does not match, don't try comparing
-       if (st->orgpat.len != cmplen)
-           match = FALSE;
-       else
-       {
-           if (st->orgpat.regmatch.rm_ic)
-           {
-               match = (MB_STRNICMP(tagp.tagname, st->orgpat.pat, cmplen) == 0);
-               if (match)
-                   match_no_ic = (STRNCMP(tagp.tagname, st->orgpat.pat,
-                               cmplen) == 0);
-           }
-           else
-               match = (STRNCMP(tagp.tagname, st->orgpat.pat, cmplen) == 0);
-       }
  
-       /*
-        * Has a regexp: Also find tags matching regexp.
-        */
-       match_re = FALSE;
-       if (!match && st->orgpat.regmatch.regprog != NULL)
-       {
-           int cc;
-
-           cc = *tagp.tagname_end;
-           *tagp.tagname_end = NUL;
-           match = vim_regexec(&st->orgpat.regmatch, tagp.tagname, (colnr_T)0);
-           if (match)
-           {
-               matchoff = (int)(st->orgpat.regmatch.startp[0] - tagp.tagname);
-               if (st->orgpat.regmatch.rm_ic)
-               {
-                   st->orgpat.regmatch.rm_ic = FALSE;
-                   match_no_ic = vim_regexec(&st->orgpat.regmatch, tagp.tagname,
-                           (colnr_T)0);
-                   st->orgpat.regmatch.rm_ic = TRUE;
-               }
-           }
-           *tagp.tagname_end = cc;
-           match_re = TRUE;
-       }
+       st->fp = mch_fopen((char *)st->tag_fname, "r");
+       if (st->fp == NULL)
+           return OK;
  
-       // If a match is found, add it to ht_match[] and ga_match[].
-       if (match)
+       if (p_verbose >= 5)
         {
-           if (findtags_add_match(st, &tagp, buf_ffname, flags, &hash,
-                       match_re, match_no_ic, matchoff,
-                       is_etag,
-#ifdef FEAT_MULTI_LANG
-                       help_lang, help_pri
-#else
-                       (char_u *)"", 0
-#endif
-                       ) == FAIL)
-               break;
+           verbose_enter();
+           smsg(_("Searching tags file %s"), st->tag_fname);
+           verbose_leave();
         }
-#ifdef FEAT_CSCOPE
-       if (use_cscope && eof)
-           break;
+    }
+    st->did_open = TRUE;       // remember that we found at least one file
+
+    st->state = TS_START;      // we're at the start of the file
+#ifdef FEAT_EMACS_TAGS
+    st->is_etag = FALSE;       // default is: not emacs style
  #endif
-    } // forever
+
+    // Read and parse the lines in the file one by one
+    if (findtags_get_all_tags(st, &margs, buf_ffname) == FAIL)
+       line_error = TRUE;
  
      if (line_error)
      {
@@ -2827,31 +2945,23 @@ parse_line:
  #ifdef FEAT_CSCOPE
         if (!use_cscope)
  #endif
-           semsg(_("Before byte %ld"), (long)vim_ftell(fp));
+           semsg(_("Before byte %ld"), (long)vim_ftell(st->fp));
         st->stop_searching = TRUE;
         line_error = FALSE;
      }
  
-#ifdef FEAT_CSCOPE
-    if (!use_cscope)
-#endif
-       fclose(fp);
+    if (st->fp != NULL)
+       fclose(st->fp);
  #ifdef FEAT_EMACS_TAGS
      emacs_tags_incstack_free();
  #endif
-    if (vimconv.vc_type != CONV_NONE)
-       convert_setup(&vimconv, NULL, NULL);
+    if (st->vimconv.vc_type != CONV_NONE)
+       convert_setup(&st->vimconv, NULL, NULL);
  
-    tag_file_sorted = NUL;
-    if (sort_error)
-    {
+    if (margs.sort_error)
         semsg(_(e_tags_file_not_sorted_str), st->tag_fname);
-       sort_error = FALSE;
-    }
  
-    /*
-     * Stop searching if sufficient tags have been found.
-     */
+    // Stop searching if sufficient tags have been found.
      if (st->match_count >= st->mincount)
         st->stop_searching = TRUE;
  
@@ -2860,13 +2970,12 @@ parse_line:
  
  /*
   * Copy the tags found by find_tags() to 'matchesp'.
+ * Returns the number of matches copied.
   */
-    static void
-findtags_copy_matches(
-    findtags_state_T   *st,
-    char_u             ***matchesp,
-    int                        *num_matches)
+    static int
+findtags_copy_matches(findtags_state_T *st, char_u ***matchesp)
  {
+    int                name_only = (st->flags & TAG_NAMES);
      char_u     **matches;
      int                mtt;
      int                i;
@@ -2887,7 +2996,7 @@ findtags_copy_matches(
                 vim_free(mfp);
             else
             {
-               if (!st->name_only)
+               if (!name_only)
                 {
                     // Change mtt back to zero-based.
                     *mfp = *mfp - 1;
@@ -2906,7 +3015,7 @@ findtags_copy_matches(
      }
  
      *matchesp = matches;
-    *num_matches = st->match_count;
+    return st->match_count;
  }
  
  /*
@@ -2941,7 +3050,7 @@ find_tags(
      int                *num_matches,           // return: number of matches found
      char_u     ***matchesp,            // return: array of matches found
      int                flags,
-    int                mincount,               //  MAXCOL: find all matches
+    int                mincount,               // MAXCOL: find all matches
                                         // other: minimal number of matches
      char_u     *buf_ffname)            // name of buffer for priority
  {
@@ -3035,7 +3144,7 @@ find_tags(
         goto findtag_end;
  
  #ifdef FEAT_EVAL
-    retval = findtags_apply_tfu(pat, &st, flags, buf_ffname);
+    retval = findtags_apply_tfu(&st, pat, buf_ffname);
      if (retval != NOTDONE)
         goto findtag_end;
  
@@ -3079,7 +3188,7 @@ find_tags(
                 get_tagfname(&tn, first_file, st.tag_fname) == OK;
                                                            first_file = FALSE)
        {
-         if (find_tags_in_file(&st, flags, buf_ffname) == FAIL)
+         if (findtags_in_file(&st, buf_ffname) == FAIL)
               goto findtag_end;
           if (st.stop_searching
  #ifdef FEAT_CSCOPE
@@ -3128,7 +3237,7 @@ findtag_end:
      if (retval == FAIL)
         st.match_count = 0;
  
-    findtags_copy_matches(&st, matchesp, num_matches);
+    *num_matches = findtags_copy_matches(&st, matchesp);
  
      curbuf->b_help = help_save;
  #ifdef FEAT_MULTI_LANG
diff --git a/src/testdir/test_tagjump.vim b/src/testdir/test_tagjump.vim

index 95652ef65fc614f8c0ae7bc4dfcb28b9e19a079a..1e5365c794e8da30580aac7bd0807dd1c4ffa538 100644 (file)
--- a/src/testdir/test_tagjump.vim
+++ b/src/testdir/test_tagjump.vim
@@ -811,11 +811,11 @@ endfunc
  
  " Test for an unsorted tags file
  func Test_tag_sort()
-  call writefile([
+  let l = [
          \ "first\tXfoo\t1",
          \ "ten\tXfoo\t3",
-        \ "six\tXfoo\t2"],
-        \ 'Xtags')
+        \ "six\tXfoo\t2"]
+  call writefile(l, 'Xtags')
    set tags=Xtags
    let code =<< trim [CODE]
      int first() {}
@@ -826,7 +826,14 @@ func Test_tag_sort()
  
    call assert_fails('tag first', 'E432:')
  
+  " When multiple tag files are not sorted, then message should be displayed
+  " multiple times
+  call writefile(l, 'Xtags2')
+  set tags=Xtags,Xtags2
+  call assert_fails('tag first', ['E432:', 'E432:'])
+
    call delete('Xtags')
+  call delete('Xtags2')
    call delete('Xfoo')
    set tags&
    %bwipe
diff --git a/src/version.c b/src/version.c

index 6aa798f67e1a34f8187e83a9522e4d449a6e640f..48f2f012a7b9d3bc2f028deaf24fd17b0b311cb1 100644 (file)
--- a/src/version.c
+++ b/src/version.c
@@ -750,6 +750,8 @@ static char *(features[]) =
  
  static int included_patches[] =
  {   /* Add new patch number below this line */
+/**/
+    4538,
  /**/
      4537,
  /**/
author	Yegappan Lakshmanan <yegappan@yahoo.com>
	Thu, 10 Mar 2022 18:36:54 +0000 (18:36 +0000)
committer	Bram Moolenaar <Bram@vim.org>
	Thu, 10 Mar 2022 18:36:54 +0000 (18:36 +0000)
src/tag.c		patch \| blob \| history
src/testdir/test_tagjump.vim		patch \| blob \| history
src/version.c		patch \| blob \| history