From: Fletcher T. Penney <fletcher@fletcherpenney.net>
Date: Sat, 18 Mar 2017 13:47:10 +0000 (-0400)
Subject: FIXED: Improve token_split() algorithm and searching for abbreviations and glossary... 
X-Git-Tag: 6.0.0-rc2^2~17
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=81736c907c3d07a2597c63f02374462c007fda35;p=multimarkdown

FIXED: Improve token_split() algorithm and searching for abbreviations and glossary terms in text
---

diff --git a/Sources/libMultiMarkdown/token.c b/Sources/libMultiMarkdown/token.c
index 8a30ffe..32a719f 100644
--- a/Sources/libMultiMarkdown/token.c
+++ b/Sources/libMultiMarkdown/token.c
@@ -632,27 +632,78 @@ void token_split_on_char(token * t, const char * source, const char c) {
 }
 
 
-// Split a token and create 
+// Split a token and create new ones as needed
 void token_split(token * t, size_t start, size_t len, unsigned short new_type) {
 	if (!t)
 		return;
 
-	token * u = token_new(new_type, start, len);
 	size_t stop = start + len;
 
-	if (t->start + t->len > stop) {
-		token * v = token_new(t->type, stop, t->start + t->len - stop);
+	if (start < t->start)
+		return;
 
-		u->next = v;
-		v->prev = u;
-		v->next = t->next;
-	} else {
-		u->next = t->next;
-	}
+	if (stop > t->start + t->len)
+		return;
+
+	token * A;		// This will be new token
+	bool inset_start = false;
+	bool inset_stop = false;
 
-	t->next = u;
-	u->prev = t;
+	// Will we need a leading token?
+	if (start > t->start)
+		inset_start = true;
+
+	// Will we need a lagging token?
+	if (stop < t->start + t->len)
+		inset_stop = true;
+
+
+	if (inset_start) {
+		A = token_new(new_type, start, len);
+		if (inset_stop) {
+			// We will end up with t->A->T2
+
+			// Create T2
+			token * T2 = token_new(t->type, stop, t->start + t->len - stop);
+			T2->next = t->next;
+	
+			if (t->next)
+				t->next->prev = T2;
 
-	t->len = start - t->start;
+			A->next = T2;
+			T2->prev = A;
+		} else {
+			// We will end up with T->A
+			A->next = t->next;
+
+			if (t->next)
+				t->next->prev = A;
+		}
+
+		t->next = A;
+		A->prev = t;
+
+		t->len = start - t->start;
+	} else {
+		if (inset_stop) {
+			// We will end up with A->T
+			// But we swap the tokens to ensure we don't
+			// cause difficulty pointing to this chain,
+			// resulting in T->A, where T is the new type
+			A = token_new(t->type, stop, t->start + t->len - stop);
+			A->prev = t;
+			A->next = t->next;
+			t->next = A;
+
+			if (A->next)
+				A->next->prev = A;
+
+			t->len = stop - t->start;
+			t->type = new_type;
+		} else {
+			// We will end up with A
+			t->type = new_type;
+		}
+	}
 }
 
diff --git a/Sources/libMultiMarkdown/writer.c b/Sources/libMultiMarkdown/writer.c
index 64bc876..55629d3 100644
--- a/Sources/libMultiMarkdown/writer.c
+++ b/Sources/libMultiMarkdown/writer.c
@@ -1537,8 +1537,9 @@ void automatic_search_text(mmd_engine * e, token * t, trie * ac) {
 		while (walker) {
 			token_split(tok, walker->start, walker->len, walker->match_type);
 
-			// Advance token to section after the split (if present)
-			tok = tok->next->next;
+			// Advance token to next token
+			while (tok->start < walker->start + walker->len)
+				tok = tok->next;
 
 			// Advance to next match (if present)
 			walker = walker->next;