From: Aron Griffis <agriffis@n01se.net>
Date: Thu, 10 Jul 2008 23:30:00 +0000 (-0400)
Subject: Unify parser for message-ids
X-Git-Tag: mutt-1-5-19-rel~174
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5a27978a5767a0174193cfc653d2e79a1145ad8e;p=mutt

Unify parser for message-ids

Rewrite mutt_extract_message_id and change mutt_parse_references to us it.  The
parser in mutt_extract_message_id is looser than the old one in
mutt_parse_references; it just looks for <[^\s>]+> and doesn't worry about
@-signs.  Additionally it doesn't use strtok, so the input string can be const.

Closes #3090, #1935, #1116 (both the stated bug and the conversation following).

Signed-off-by: Aron Griffis <agriffis@n01se.net>
---

diff --git a/parse.c b/parse.c
index 6c43a8e6..badadbc1 100644
--- a/parse.c
+++ b/parse.c
@@ -92,66 +92,17 @@ char *mutt_read_rfc822_line (FILE *f, char *line, size_t *linelen)
 static LIST *mutt_parse_references (char *s, int in_reply_to)
 {
   LIST *t, *lst = NULL;
-  int m, n = 0;
-  char *o = NULL, *new, *at;
+  char *m, *sp;
 
-  while ((s = strtok (s, " \t;")) != NULL)
+  for (; (m = mutt_extract_message_id (s, &sp)) != NULL; s = NULL)
   {
-    /*
-     * some mail clients add other garbage besides message-ids, so do a quick
-     * check to make sure this looks like a valid message-id
-     * some idiotic clients also break their message-ids between lines, deal
-     * with that too (give up if it's more than two lines, though)
-     */
-    t = NULL;
-    new = NULL;
-
-    if (*s == '<')
-    {
-      n = strlen (s);
-      if (s[n-1] != '>')
-      {
-	o = s;
-	s = NULL;
-	continue;
-      }
-
-      new = safe_strdup (s);
-    }
-    else if (o)
-    {
-      m = strlen (s);
-      if (s[m - 1] == '>')
-      {
-	new = safe_malloc (sizeof (char) * (n + m + 1));
-	strcpy (new, o);	/* __STRCPY_CHECKED__ */
-	strcpy (new + n, s);	/* __STRCPY_CHECKED__ */
-      }
-    }
-    if (new)
-    {
-      /* make sure that this really does look like a message-id.
-       * it should have exactly one @, and if we're looking at
-       * an in-reply-to header, make sure that the part before
-       * the @ has more than eight characters or it's probably
-       * an email address
-       */
-      if (!(at = strchr (new, '@')) || strchr (at + 1, '@')
-	  || (in_reply_to && at - new <= 8))
-	FREE (&new);
-      else
-      {
-	t = (LIST *) safe_malloc (sizeof (LIST));
-	t->data = new;
-	t->next = lst;
-	lst = t;
-      }
-    }
-    o = NULL;
-    s = NULL;
+    t = safe_malloc (sizeof (LIST));
+    t->data = m;
+    t->next = lst;
+    lst = t;
   }
 
-  return (lst);
+  return lst;
 }
 
 int mutt_check_encoding (const char *c)
@@ -925,20 +876,62 @@ time_t mutt_parse_date (const char *s, HEADER *h)
   return (mutt_mktime (&tm, 0) + tz_offset);
 }
 
-/* extract the first substring that looks like a message-id */
-char *mutt_extract_message_id (const char *s)
+/* extract the first substring that looks like a message-id.
+ * call back with NULL for more (like strtok).
+ */
+char *mutt_extract_message_id (const char *s, const char **saveptr)
 {
-  const char *p;
-  char *r;
-  size_t l;
+  const char *o, *onull, *p;
+  char *ret = NULL;
 
-  if ((s = strchr (s, '<')) == NULL || (p = strchr (s, '>')) == NULL)
-    return (NULL);
-  l = (size_t)(p - s) + 1;
-  r = safe_malloc (l + 1);
-  memcpy (r, s, l);
-  r[l] = 0;
-  return (r);
+  if (s)
+    p = s;
+  else if (saveptr)
+    p = *saveptr;
+
+  for (s = NULL, o = NULL, onull = NULL;
+       (p = strpbrk (p, "<> \t;")) != NULL; ++p)
+  {
+    if (*p == '<')
+    {
+      s = p; 
+      o = onull = NULL;
+      continue;
+    }
+
+    if (!s)
+      continue;
+
+    if (*p == '>')
+    {
+      size_t olen = onull - o, slen = p - s + 1;
+      ret = safe_malloc (olen + slen + 1);
+      if (o)
+	memcpy (ret, o, olen);
+      memcpy (ret + olen, s, slen);
+      ret[olen + slen] = '\0';
+      if (saveptr)
+	*saveptr = p + 1; /* next call starts after '>' */
+      return ret;
+    }
+
+    /* some idiotic clients break their message-ids between lines */
+    if (s == p) 
+      /* step past another whitespace */
+      s = p + 1;
+    else if (o)
+      /* more than two lines, give up */
+      s = o = onull = NULL;
+    else
+    {
+      /* remember the first line, start looking for the second */
+      o = s;
+      onull = p;
+      s = p + 1;
+    }
+  }
+
+  return NULL;
 }
 
 void mutt_parse_mime_message (CONTEXT *ctx, HEADER *cur)
@@ -1132,7 +1125,7 @@ int mutt_parse_rfc822_line (ENVELOPE *e, HEADER *hdr, char *line, char *p, short
     {
       /* We add a new "Message-ID:" when building a message */
       FREE (&e->message_id);
-      e->message_id = mutt_extract_message_id (p);
+      e->message_id = mutt_extract_message_id (p, NULL);
       matched = 1;
     }
     else if (!ascii_strncasecmp (line + 1, "ail-", 4))
diff --git a/protos.h b/protos.h
index bb80973a..a9edf4cb 100644
--- a/protos.h
+++ b/protos.h
@@ -90,7 +90,7 @@ void set_quadoption (int, int);
 int query_quadoption (int, const char *);
 int quadoption (int);
 
-char* mutt_extract_message_id (const char *);
+char* mutt_extract_message_id (const char *, const char **);
 
 ADDRESS *mutt_default_from (void);
 ADDRESS *mutt_get_address (ENVELOPE *, char **);
diff --git a/send.c b/send.c
index 2c00de08..fd3349c0 100644
--- a/send.c
+++ b/send.c
@@ -299,7 +299,7 @@ static void process_user_header (ENVELOPE *env)
     }
     else if (ascii_strncasecmp ("message-id:", uh->data, 11) == 0)
     {
-      char *tmp = mutt_extract_message_id (uh->data + 11);
+      char *tmp = mutt_extract_message_id (uh->data + 11, NULL);
       if (rfc822_valid_msgid (tmp) >= 0)
       {
 	FREE(&env->message_id);