From 04c2748fbb3cd8472e353815677ca4444e0d6f61 Mon Sep 17 00:00:00 2001
From: "Fletcher T. Penney" <fletcher@fletcherpenney.net>
Date: Thu, 9 Mar 2017 19:37:09 -0500
Subject: [PATCH] ADDED: Add functionality to automatically identify
 abbreviations and glossary terms in source

---
 CMakeLists.txt                           |   2 +
 Sources/libMultiMarkdown/aho-corasick.c  | 591 +++++++++++++++++++++++
 Sources/libMultiMarkdown/aho-corasick.h  | 117 +++++
 Sources/libMultiMarkdown/html.c          |  18 +-
 Sources/libMultiMarkdown/include/token.h |   2 +
 Sources/libMultiMarkdown/latex.c         |   2 +
 Sources/libMultiMarkdown/lexer.c         |   5 +-
 Sources/libMultiMarkdown/lexer.re        |   2 +-
 Sources/libMultiMarkdown/mmd.c           |  51 +-
 Sources/libMultiMarkdown/odf.c           |  14 +-
 Sources/libMultiMarkdown/token.c         |  25 +
 Sources/libMultiMarkdown/writer.c        | 191 ++++----
 tests/MMD6Tests/Abbreviations.fodt       |  10 +-
 tests/MMD6Tests/Abbreviations.html       |   8 +
 tests/MMD6Tests/Abbreviations.htmlc      |   8 +
 tests/MMD6Tests/Abbreviations.tex        |   8 +
 tests/MMD6Tests/Abbreviations.text       |   8 +
 tests/MMD6Tests/Glossaries.fodt          |   4 +
 tests/MMD6Tests/Glossaries.html          |   4 +
 tests/MMD6Tests/Glossaries.htmlc         |   4 +
 tests/MMD6Tests/Glossaries.tex           |   4 +
 tests/MMD6Tests/Glossaries.text          |   5 +
 22 files changed, 936 insertions(+), 147 deletions(-)
 create mode 100644 Sources/libMultiMarkdown/aho-corasick.c
 create mode 100644 Sources/libMultiMarkdown/aho-corasick.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 214ebde..4029546 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -172,6 +172,7 @@ configure_file (
 
 # src_files are the primary files, and will be included in doxygen documentation
 set(src_files
+	Sources/libMultiMarkdown/aho-corasick.c
 	Sources/libMultiMarkdown/beamer.c
 	Sources/libMultiMarkdown/char.c
 	Sources/libMultiMarkdown/d_string.c
@@ -194,6 +195,7 @@ set(src_files
 
 # Primary header files, also for doxygen documentation
 set(header_files
+	Sources/libMultiMarkdown/aho-corasick.h
 	Sources/libMultiMarkdown/beamer.h
 	Sources/libMultiMarkdown/char.h
 	Sources/libMultiMarkdown/include/d_string.h
diff --git a/Sources/libMultiMarkdown/aho-corasick.c b/Sources/libMultiMarkdown/aho-corasick.c
new file mode 100644
index 0000000..7445aa4
--- /dev/null
+++ b/Sources/libMultiMarkdown/aho-corasick.c
@@ -0,0 +1,591 @@
+/**
+
+	C-Template -- Boilerplate c project with cmake support, CuTest unit testing, and more.
+
+	@file aho-corasick.c
+
+	@brief C implementation of the Aho-Corasick algorithm for searching text
+	for multiple strings simultaneously in a single pass without backtracking.
+
+	<https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm>
+
+
+	@author	Fletcher T. Penney
+	@bug	
+
+**/
+
+/*
+
+	Copyright Â© 2015-2017 Fletcher T. Penney.
+
+
+	The `c-template` project is released under the MIT License.
+	
+	GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
+	
+		https://github.com/fletcher/MultiMarkdown-4/
+	
+	MMD 4 is released under both the MIT License and GPL.
+	
+	
+	CuTest is released under the zlib/libpng license. See CuTest.c for the text
+	of the license.
+	
+	
+	## The MIT License ##
+	
+	Permission is hereby granted, free of charge, to any person obtaining a copy
+	of this software and associated documentation files (the "Software"), to deal
+	in the Software without restriction, including without limitation the rights
+	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+	copies of the Software, and to permit persons to whom the Software is
+	furnished to do so, subject to the following conditions:
+	
+	The above copyright notice and this permission notice shall be included in
+	all copies or substantial portions of the Software.
+	
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+	THE SOFTWARE.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "aho-corasick.h"
+
+#define kTrieStartingSize 256
+
+void trie_to_graphviz(trie * a);
+
+
+trie * trie_new(size_t startingSize) {
+	trie * a = malloc(sizeof(trie));
+
+	if (a) {
+		if (startingSize <= 1)
+			startingSize = kTrieStartingSize;
+
+		a->node = malloc(sizeof(trie_node) * startingSize);
+
+		if (!a->node) {
+			free(a);
+			return NULL;
+		}
+
+		// Clear memory
+		memset(a->node, 0, sizeof(trie_node) * startingSize);
+
+		// All tries have a root node
+		a->size = 1;
+		a->capacity = startingSize;
+	}
+
+	return a;
+}
+
+
+void trie_free(trie * a) {
+	free(a->node);
+	free(a);
+}
+
+
+bool trie_node_insert(trie * a, size_t s, const unsigned char * key, unsigned short match_type, unsigned short depth) {
+	// Get node for state s
+	trie_node * n = &a->node[s];
+
+	size_t i;
+
+	if (key[0] == '\0') {
+		// We've hit end of key
+		n->match_type = match_type;
+		n->len = depth;
+		return true;		// Success
+	}
+
+	if (n->child[key[0]] != 0) {
+		// First character already in trie, advance forward
+		return trie_node_insert(a, n->child[key[0]], key + 1, match_type, ++depth);
+	} else {
+		// Create new node
+
+		// Ensure capacity
+		if (a->size == a->capacity) {
+			a->capacity *= 2;
+			a->node = realloc(a->node, a->capacity * sizeof(trie_node));
+
+			// Set n to new location
+			n = &(a->node[s]);
+		}
+
+		// Current node points to next node
+		i = a->size;
+		n->child[key[0]] = i;
+
+		// Initialize new node to 0
+		n = &a->node[i];
+		memset(n, 0, sizeof(trie_node));
+
+		// Set char for new node
+		n->c = key[0];
+
+		// Incremement size
+		a->size++;
+
+		// Advance forward
+		return trie_node_insert(a, i, key + 1, match_type, ++depth);
+	}
+}
+
+
+bool trie_insert(trie * a, const char * key, unsigned short match_type) {
+	if (a && key && (key[0] != '\0')) {
+		return trie_node_insert(a, 0, (const unsigned char *)key, match_type, 0);
+	}
+
+	return false;
+}
+
+
+#ifdef TEST
+void Test_trie_insert(CuTest* tc) {
+	trie * a = trie_new(0);
+
+	CuAssertIntEquals(tc, kTrieStartingSize, a->capacity);
+	CuAssertIntEquals(tc, 1, a->size);
+
+	trie_insert(a, "foo", 42);
+
+	trie_node * n = &a->node[0];
+	CuAssertIntEquals(tc, 0, n->match_type);
+	CuAssertIntEquals(tc, 1, n->child['f']);
+	CuAssertIntEquals(tc, '\0', n->c);
+
+	n = &a->node[1];
+	CuAssertIntEquals(tc, 0, n->match_type);
+	CuAssertIntEquals(tc, 2, n->child['o']);
+	CuAssertIntEquals(tc, 'f', n->c);
+
+	n = &a->node[2];
+	CuAssertIntEquals(tc, 0, n->match_type);
+	CuAssertIntEquals(tc, 3, n->child['o']);
+	CuAssertIntEquals(tc, 'o', n->c);
+
+	n = &a->node[3];
+	CuAssertIntEquals(tc, 42, n->match_type);
+	CuAssertIntEquals(tc, 3, n->len);
+	CuAssertIntEquals(tc, 'o', n->c);
+
+	trie_free(a);
+}
+#endif
+
+
+size_t trie_node_search(trie * a, size_t s, const char * query) {
+	if (query[0] == '\0') {
+		// Found matching state
+		return s;
+	}
+
+	if (a->node[s].child[query[0]] == 0) {
+		// Failed to match
+		return -1;
+	}
+
+	// Partial match, keep going
+	return trie_node_search(a, a->node[s].child[query[0]], query + 1);
+}
+
+
+size_t trie_search(trie * a, const char * query) {
+	if (a && query) {
+		return trie_node_search(a, 0, query);
+	}
+
+	return 0;
+}
+
+
+unsigned short trie_search_match_type(trie * a, const char * query) {
+	size_t s = trie_search(a, query);
+
+	if (s == -1)
+		return -1;
+
+	return a->node[s].match_type;
+}
+
+
+#ifdef TEST
+void Test_trie_search(CuTest* tc) {
+	trie * a = trie_new(0);
+
+	trie_insert(a, "foo", 42);
+	trie_insert(a, "bar", 41);
+	trie_insert(a, "food", 40);
+
+	CuAssertIntEquals(tc, 3, trie_search(a, "foo"));
+	CuAssertIntEquals(tc, 42, trie_search_match_type(a, "foo"));
+
+	CuAssertIntEquals(tc, 6, trie_search(a, "bar"));
+	CuAssertIntEquals(tc, 41, trie_search_match_type(a, "bar"));
+
+	CuAssertIntEquals(tc, 7, trie_search(a, "food"));
+	CuAssertIntEquals(tc, 40, trie_search_match_type(a, "food"));
+
+	CuAssertIntEquals(tc, -1, trie_search(a, "foot"));
+	CuAssertIntEquals(tc, (unsigned short) -1, trie_search_match_type(a, "foot"));
+
+	trie_free(a);
+}
+#endif
+
+
+void ac_trie_node_prepare(trie * a, size_t s, char * buffer, unsigned short depth, size_t last_match_state) {
+
+	buffer[depth] = '\0';
+	buffer[depth + 1] = '\0';
+
+	// Current node
+	trie_node * n = &(a->node[s]);
+
+	char * suffix = buffer;
+
+	// No suffix for first level matches
+	unsigned short last_match_depth = a->node[last_match_state].len;
+
+	if (depth == 1) {
+		last_match_depth = 1;
+	}
+
+	// Longest match seen so far??
+	suffix += 1;
+
+	// Find valid suffixes for failure path
+	while ((suffix[0] != '\0') && (n->ac_fail == 0)) {
+		n->ac_fail = trie_search(a, suffix);
+
+		if (n->ac_fail == -1)
+			n->ac_fail = 0;
+
+		if (n->ac_fail == s) {
+			// Something went wrong
+			fprintf(stderr, "Recursive trie fallback detected at state %lu('%c') - suffix:'%s'!\n", s, n->c, suffix);
+			n->ac_fail = 0;
+		}
+
+		suffix++;
+	}
+
+
+	// Prepare children
+	for (int i = 0; i < 256; ++i)
+	{
+		if ((n->child[i] != 0) &&
+			(n->child[i] != s)) {
+			buffer[depth] = i;
+
+			ac_trie_node_prepare(a, n->child[i], buffer, depth + 1, last_match_state);
+		}
+	}
+}
+
+/// Prepare trie for Aho-Corasick search algorithm by mapping failure connections
+void ac_trie_prepare(trie * a) {
+	// Clear old pointers
+	for (size_t i = 0; i < a->size; ++i)
+	{
+		a->node[i].ac_fail = 0;
+	}
+
+	// Create a buffer to use
+	char buffer[a->capacity];
+
+	ac_trie_node_prepare(a, 0, buffer, 0, 0);
+}
+
+
+
+#ifdef TEST
+void Test_trie_prepare(CuTest* tc) {
+	trie * a = trie_new(0);
+
+	trie_insert(a, "a", 1);
+	trie_insert(a, "aa", 2);
+	trie_insert(a, "aaa", 3);
+	trie_insert(a, "aaaa", 4);
+
+	ac_trie_prepare(a);
+
+	trie_free(a);
+}
+#endif
+
+
+match * match_new(size_t start, size_t len, unsigned short match_type) {
+	match * m = malloc(sizeof(match));
+
+	if (m) {
+		m->start = start;
+		m->len = len;
+		m->match_type = match_type;
+		m->next = NULL;
+	}
+
+	return m;
+}
+
+
+void match_free(match * m) {
+	if (m) {
+		if (m->next) {
+			match_free(m->next);
+		}
+
+		free(m);
+	}
+}
+
+
+match * match_add(match * last, size_t start, size_t len, unsigned short match_type) {
+	if (last) {
+		last->next = match_new(start, len, match_type);
+		last->next->prev = last;
+		return last->next;
+	} else {
+		return match_new(start, len, match_type);
+	}
+
+	return NULL;
+}
+
+
+match * ac_trie_search(trie * a, const char * source, size_t len) {
+
+	// Store results in a linked list
+//	match * result = match_new(0, 0, 0);
+	match * result = NULL;
+	match * m = result;
+
+	// Keep track of our state
+	size_t state = 0;
+	size_t temp_state;
+
+	// Character being compared
+	char test_value;
+	size_t counter = 0;
+
+	while ((counter < len) && (source[counter] != '\0')) {
+		// Read next character
+		test_value = source[counter++];
+
+		// Check for path that allows us to match next character
+		while (state != 0 && a->node[state].child[test_value] == 0) {
+			state = a->node[state].ac_fail;
+		}
+
+		// Advance state for the next character
+		state = a->node[state].child[test_value];
+
+		// Check for partial matches
+		temp_state = state;
+
+		while (temp_state != 0) {
+			if (a->node[temp_state].match_type) {
+				// This is a match
+				if (!m) {
+					result = match_new(0, 0, 0);
+					m = result;
+				}
+				m = match_add(m, counter - a->node[temp_state].len,
+					a->node[temp_state].len, a->node[temp_state].match_type);
+			}
+
+			// Iterate to find shorter matches
+			temp_state = a->node[temp_state].ac_fail;
+		}
+	}
+
+	return result;
+}
+
+
+void match_excise(match * m) {
+	if (m->prev) {
+		m->prev->next = m->next;
+	}
+
+	if (m->next) {
+		m->next->prev = m->prev;
+	}
+
+	free(m);
+}
+
+int match_count(match * m) {
+	int result = 0;
+	m = m->next;	// Skip header
+
+	while (m) {
+		result++;
+		m = m->next;
+	}
+
+	return result;
+}
+
+
+void match_describe(match * m, const char * source) {
+	fprintf(stderr, "'%.*s'(%d) at %lu:%lu\n", (int)m->len, &source[m->start], 
+		m->match_type, m->start, m->start + m->len);
+}
+
+
+void match_set_describe(match * m, const char * source) {
+	m = m->next;	// Skip header
+	while (m) {
+		match_describe(m, source);
+		m = m->next;
+	}
+}
+
+
+void match_set_filter_leftmost_longest(match * header) {
+	// Filter results to include only leftmost/longest results
+	match * m = header->next; 	// Skip header
+	match * n;
+
+	while (m) {
+		if (m->next) {
+			if (m->start == m->next->start) {
+				// The next match is longer than this one
+				n = m;
+				m = m->next;
+				match_excise(n);
+				continue;
+			}
+
+			while (m->next && 
+				m->next->start > m->start &&
+				m->next->start < m->start + m->len) {
+				// This match is "lefter" than next
+				match_excise(m->next);
+			}
+
+			while (m->next &&
+				m->next->start < m->start) {
+				// Next match is "lefter" than us
+				n = m;
+				m = m->prev;
+				match_excise(n);
+			}
+		}
+
+		while (m->prev->len && 
+			m->prev->start >= m->start) {
+			// We are "lefter" than previous
+			n = m->prev;
+			match_excise(n);
+		}
+
+		m = m->next;
+	}
+}
+
+
+match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t len) {
+	match * result = ac_trie_search(a, source, len);
+
+	if (result)
+		match_set_filter_leftmost_longest(result);
+
+	return result;
+}
+
+
+#ifdef TEST
+void Test_aho_trie_search(CuTest* tc) {
+	trie * a = trie_new(0);
+
+	trie_insert(a, "foo", 42);
+	trie_insert(a, "bar", 41);
+	trie_insert(a, "food", 40);
+
+	ac_trie_prepare(a);
+
+	match * m = ac_trie_search(a, "this is a bar that serves food.", 31);
+
+	match_free(m);
+	trie_free(a);
+
+
+	a = trie_new(0);
+
+	trie_insert(a, "A", 1);
+	trie_insert(a, "AB", 2);
+	trie_insert(a, "ABC", 3);
+	trie_insert(a, "BC", 4);
+	trie_insert(a, "BCD", 5);
+	trie_insert(a, "E", 6);
+	trie_insert(a, "EFGHIJ", 7);
+	trie_insert(a, "F", 8);
+	trie_insert(a, "ZABCABCZ", 9);
+	trie_insert(a, "ZAB", 10);
+
+	ac_trie_prepare(a);
+
+	m = ac_trie_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 26);
+	fprintf(stderr, "Finish with %d matches\n", match_count(m));
+	match_set_describe(m, "ABCDEFGGGAZABCABCDZABCABCZ");
+	match_free(m);
+
+	m = ac_trie_leftmost_longest_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 26);
+	fprintf(stderr, "Finish with %d matches\n", match_count(m));
+	match_set_describe(m, "ABCDEFGGGAZABCABCDZABCABCZ");
+	match_free(m);
+
+	// trie_to_graphviz(a);
+
+	trie_free(a);
+}
+#endif
+
+
+void trie_node_to_graphviz(trie * a, size_t s) {
+	trie_node * n = &a->node[s];
+
+	if (n->match_type)
+		fprintf(stderr, "\"%lu\" [shape=doublecircle]\n", s);
+
+	for (int i = 0; i < 256; ++i)
+	{
+		if (n->child[i]) {
+			switch (i) {
+				default:
+					fprintf(stderr, "\"%lu\" -> \"%lu\" [label=\"%c\"]\n", s, n->child[i], (char)i);
+			}
+		}
+	}
+
+	if (n->ac_fail)
+		fprintf(stderr, "\"%lu\" -> \"%lu\" [label=\"fail\"]\n", s, n->ac_fail);
+}
+
+
+void trie_to_graphviz(trie * a) {
+	fprintf(stderr, "digraph dfa {\n");
+	for (int i = 0; i <  a->size; ++i)
+	{
+		trie_node_to_graphviz(a, i);
+	}
+	fprintf(stderr, "}\n");
+}
+
diff --git a/Sources/libMultiMarkdown/aho-corasick.h b/Sources/libMultiMarkdown/aho-corasick.h
new file mode 100644
index 0000000..73414f2
--- /dev/null
+++ b/Sources/libMultiMarkdown/aho-corasick.h
@@ -0,0 +1,117 @@
+/**
+
+	C-Template -- Boilerplate c project with cmake support, CuTest unit testing, and more.
+
+	@file aho-corasick.h
+
+	@brief 
+
+
+	@author	Fletcher T. Penney
+	@bug	
+
+**/
+
+/*
+
+	Copyright Â© 2015-2017 Fletcher T. Penney.
+
+
+	The `c-template` project is released under the MIT License.
+	
+	GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
+	
+		https://github.com/fletcher/MultiMarkdown-4/
+	
+	MMD 4 is released under both the MIT License and GPL.
+	
+	
+	CuTest is released under the zlib/libpng license. See CuTest.c for the text
+	of the license.
+	
+	
+	## The MIT License ##
+	
+	Permission is hereby granted, free of charge, to any person obtaining a copy
+	of this software and associated documentation files (the "Software"), to deal
+	in the Software without restriction, including without limitation the rights
+	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+	copies of the Software, and to permit persons to whom the Software is
+	furnished to do so, subject to the following conditions:
+	
+	The above copyright notice and this permission notice shall be included in
+	all copies or substantial portions of the Software.
+	
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+	THE SOFTWARE.
+
+*/
+
+
+#ifndef AC_TEMPLATE_H
+#define AC_TEMPLATE_H
+
+#include <string.h>
+
+struct trie_node {
+	char				c;					// Character for this node
+	unsigned short		match_type;			// 0 = no match, otherwise what have we matched?
+	unsigned short		len;				// Length of string matched
+	size_t				child[256];			// Where should we go next?
+	size_t				ac_fail;			// Where should we go if we fail?
+};
+
+typedef struct trie_node trie_node;
+
+
+struct trie {
+	size_t				size;				// How many nodes are in use?
+	size_t				capacity;			// How many nodes can we hold
+
+	trie_node *			node;				// Pointer to stack of nodes
+};
+
+typedef struct trie trie;
+
+
+struct match {
+	size_t				start;				// Starting offset for this match
+	size_t				len;				// Length for this match
+	unsigned short		match_type;			// Match type
+	struct match *		next;				// Pointer to next match in list
+	struct match *		prev;				// Pointer to previous match in list
+};
+
+typedef struct match match;
+
+
+trie * trie_new(size_t startingSize);
+
+bool trie_insert(trie * a, const char * key, unsigned short match_type);
+
+void ac_trie_prepare(trie * a);
+
+match * ac_trie_search(trie * a, const char * source, size_t len);
+
+match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t len);
+
+void trie_free(trie * a);
+
+void match_set_describe(match * m, const char * source);
+
+void match_set_filter_leftmost_longest(match * header);
+
+void match_free(match * m);
+
+
+#ifdef TEST
+#include "CuTest.h"
+#endif
+
+
+#endif
diff --git a/Sources/libMultiMarkdown/html.c b/Sources/libMultiMarkdown/html.c
index 327ab11..a1f836e 100644
--- a/Sources/libMultiMarkdown/html.c
+++ b/Sources/libMultiMarkdown/html.c
@@ -1094,6 +1094,7 @@ void mmd_export_token_html(DString * out, const char * source, token * t, scratc
 			mmd_export_token_tree_html(out, source, t->child, scratch);
 			break;
 		case PAIR_BRACKET_ABBREVIATION:
+			// Which might also be an "auto-tagged" abbreviation
 			if (scratch->extensions & EXT_NOTES) {
 				// Note-based syntax enabled
 
@@ -1113,8 +1114,10 @@ void mmd_export_token_html(DString * out, const char * source, token * t, scratc
 				// Get instance of the note used
 				temp_note = stack_peek_index(scratch->used_abbreviations, temp_short - 1);
 
-				t->child->type = TEXT_EMPTY;
-				t->child->mate->type = TEXT_EMPTY;
+				if (t->child) {
+					t->child->type = TEXT_EMPTY;
+					t->child->mate->type = TEXT_EMPTY;
+				}
 
 				if (temp_short3 == scratch->inline_abbreviations_to_free->size) {
 					// This is a reference definition
@@ -1124,7 +1127,10 @@ void mmd_export_token_html(DString * out, const char * source, token * t, scratc
 						print_const("<abbr title=\"");
 						mmd_print_string_html(out, temp_note->clean_text, false);
 						print_const("\">");
-						mmd_export_token_tree_html(out, source, t->child, scratch);
+						if (t->child)
+							mmd_export_token_tree_html(out, source, t->child, scratch);
+						else
+							print_token(t);
 						print_const("</abbr>");
 					} else {
 						// This is the first time this note was used
@@ -1132,7 +1138,10 @@ void mmd_export_token_html(DString * out, const char * source, token * t, scratc
 						print_const(" (<abbr title=\"");
 						mmd_print_string_html(out, temp_note->clean_text, false);
 						print_const("\">");
-						mmd_export_token_tree_html(out, source, t->child, scratch);
+						if (t->child)
+							mmd_export_token_tree_html(out, source, t->child, scratch);
+						else
+							print_token(t);
 						print_const("</abbr>)");
 					}
 				} else {
@@ -1274,6 +1283,7 @@ void mmd_export_token_html(DString * out, const char * source, token * t, scratc
 			}
 			break;
 		case PAIR_BRACKET_GLOSSARY:
+			// Which might also be an "auto-tagged" glossary
 			if (scratch->extensions & EXT_NOTES) {
 				// Note-based syntax enabled
 
diff --git a/Sources/libMultiMarkdown/include/token.h b/Sources/libMultiMarkdown/include/token.h
index 12de1bf..5f720b8 100644
--- a/Sources/libMultiMarkdown/include/token.h
+++ b/Sources/libMultiMarkdown/include/token.h
@@ -222,5 +222,7 @@ void token_skip_until_type_multiple(token ** t, int n, ...);
 
 void token_split_on_char(token * t, const char * source, const char c);
 
+void token_split(token * t, size_t start, size_t len, unsigned short new_type);
+
 #endif
 
diff --git a/Sources/libMultiMarkdown/latex.c b/Sources/libMultiMarkdown/latex.c
index 0a6d7e5..f59fdab 100644
--- a/Sources/libMultiMarkdown/latex.c
+++ b/Sources/libMultiMarkdown/latex.c
@@ -1013,6 +1013,7 @@ void mmd_export_token_latex(DString * out, const char * source, token * t, scrat
 			mmd_export_token_tree_latex(out, source, t->child, scratch);
 			break;
 		case PAIR_BRACKET_ABBREVIATION:
+			// Which might also be an "auto-tagged" abbreviation
 			if (scratch->extensions & EXT_NOTES) {
 				// Note-based syntax enabled
 
@@ -1206,6 +1207,7 @@ void mmd_export_token_latex(DString * out, const char * source, token * t, scrat
 			}
 			break;
 		case PAIR_BRACKET_GLOSSARY:
+			// Which might also be an "auto-tagged" glossary
 			if (scratch->extensions & EXT_NOTES) {
 				// Note-based syntax enabled
 
diff --git a/Sources/libMultiMarkdown/lexer.c b/Sources/libMultiMarkdown/lexer.c
index b52fed1..ed85c4c 100644
--- a/Sources/libMultiMarkdown/lexer.c
+++ b/Sources/libMultiMarkdown/lexer.c
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.14.3 on Sat Mar  4 20:43:39 2017 */
+/* Generated by re2c 0.14.3 on Thu Mar  9 19:02:17 2017 */
 /**
 
 	MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
@@ -348,7 +348,6 @@ yy50:
 	++YYCURSOR;
 	{ return TEXT_PERCENT; }
 yy52:
-	YYCTXMARKER = YYCURSOR + 1;
 	yyaccept = 5;
 	yych = *(YYMARKER = ++YYCURSOR);
 	switch (yych) {
@@ -413,6 +412,7 @@ yy68:
 	default:	goto yy61;
 	}
 yy69:
+	YYCTXMARKER = YYCURSOR + 1;
 	yych = *++YYCURSOR;
 	switch (yych) {
 	case '\t':
@@ -437,7 +437,6 @@ yy70:
 	default:	goto yy264;
 	}
 yy71:
-	YYCTXMARKER = YYCURSOR + 1;
 	++YYCURSOR;
 	yych = *YYCURSOR;
 	switch (yych) {
diff --git a/Sources/libMultiMarkdown/lexer.re b/Sources/libMultiMarkdown/lexer.re
index 9729c19..7417f6e 100644
--- a/Sources/libMultiMarkdown/lexer.re
+++ b/Sources/libMultiMarkdown/lexer.re
@@ -218,7 +218,7 @@ int scan(Scanner * s, const char * stop) {
 		'}'								{ return TEXT_BRACE_RIGHT; }
 		'\\'							{ return TEXT_BACKSLASH; }
 
-		[0-9]+ / ('.' (SP|NL))			{ return TEXT_NUMBER_POSS_LIST; }
+		[0-9]+ '.' / (SP|NL)			{ return TEXT_NUMBER_POSS_LIST; }
 		'.' / (SP|NL)					{ return TEXT_PERIOD; }
 
 		TEXT_LINEBREAK					{ return TEXT_LINEBREAK; }
diff --git a/Sources/libMultiMarkdown/mmd.c b/Sources/libMultiMarkdown/mmd.c
index d53e759..b8e498c 100644
--- a/Sources/libMultiMarkdown/mmd.c
+++ b/Sources/libMultiMarkdown/mmd.c
@@ -418,39 +418,28 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
 			break;
 		case TEXT_NUMBER_POSS_LIST:
 			switch(source[line->child->next->start]) {
-				case '.':
-					switch(source[line->child->next->start + 1]) {
-						case ' ':
-						case '\t':
-							line->type = LINE_LIST_ENUMERATED;
-							line->child->type = MARKER_LIST_ENUMERATOR;
-
-							// Strip period
-							line->child->next->type = TEXT_EMPTY;
-
-							switch (line->child->next->next->type) {
-								case TEXT_PLAIN:
-									// Strip whitespace between bullet and text
-									while (char_is_whitespace(source[line->child->next->next->start])) {
-										line->child->next->next->start++;
-										line->child->next->next->len--;
-									}
-									break;
-								case INDENT_SPACE:
-								case INDENT_TAB:
-								case NON_INDENT_SPACE:
-									t = line->child->next;
-									while(t->next && ((t->next->type == INDENT_SPACE) ||
-										(t->next->type == INDENT_TAB) ||
-										(t->next->type == NON_INDENT_SPACE))) {
-										tokens_prune(t->next, t->next);
-									}
-									break;
+				case ' ':
+				case '\t':
+					line->type = LINE_LIST_ENUMERATED;
+					line->child->type = MARKER_LIST_ENUMERATOR;
+
+					switch (line->child->next->type) {
+						case TEXT_PLAIN:
+							// Strip whitespace between bullet and text
+							while (char_is_whitespace(source[line->child->next->start])) {
+								line->child->next->start++;
+								line->child->next->len--;
 							}
 							break;
-						default:
-							line->type = LINE_PLAIN;
-							line->child->type = TEXT_PLAIN;
+						case INDENT_SPACE:
+						case INDENT_TAB:
+						case NON_INDENT_SPACE:
+							t = line->child;
+							while(t->next && ((t->next->type == INDENT_SPACE) ||
+								(t->next->type == INDENT_TAB) ||
+								(t->next->type == NON_INDENT_SPACE))) {
+								tokens_prune(t->next, t->next);
+							}
 							break;
 					}
 					break;
diff --git a/Sources/libMultiMarkdown/odf.c b/Sources/libMultiMarkdown/odf.c
index 9d2505f..33146b8 100644
--- a/Sources/libMultiMarkdown/odf.c
+++ b/Sources/libMultiMarkdown/odf.c
@@ -1151,6 +1151,7 @@ void mmd_export_token_odf(DString * out, const char * source, token * t, scratch
 			}
 			break;
 		case PAIR_BRACKET_ABBREVIATION:
+			// Which might also be an "auto-tagged" abbreviation
 			if (scratch->extensions & EXT_NOTES) {
 				// Note-based syntax enabled
 
@@ -1170,18 +1171,22 @@ void mmd_export_token_odf(DString * out, const char * source, token * t, scratch
 				// Get instance of the note used
 				temp_note = stack_peek_index(scratch->used_abbreviations, temp_short - 1);
 
-				t->child->type = TEXT_EMPTY;
-				t->child->mate->type = TEXT_EMPTY;
+				if (t->child) {
+					t->child->type = TEXT_EMPTY;
+					t->child->mate->type = TEXT_EMPTY;
+				}
 
 				if (temp_short2 == scratch->used_abbreviations->size) {
 					// This is a re-use of a previously used note
 
 					if (temp_short3 == scratch->inline_abbreviations_to_free->size) {
 						// This is a reference definition
-						mmd_export_token_tree_odf(out, source, t->child, scratch);
+						mmd_print_string_odf(out, temp_note->clean_text);
+//						mmd_export_token_tree_odf(out, source, t->child, scratch);
 					} else {
 						// This is an inline definition
-						mmd_export_token_tree_odf(out, source, t->child, scratch);
+						mmd_print_string_odf(out, temp_note->clean_text);
+//						mmd_export_token_tree_odf(out, source, t->child, scratch);
 					}
 				} else {
 					// This is the first time this note was used
@@ -1210,6 +1215,7 @@ void mmd_export_token_odf(DString * out, const char * source, token * t, scratch
 			}
 			break;
 		case PAIR_BRACKET_GLOSSARY:
+			// Which might also be an "auto-tagged" glossary
 			if (scratch->extensions & EXT_NOTES) {
 				// Note-based syntax enabled
 
diff --git a/Sources/libMultiMarkdown/token.c b/Sources/libMultiMarkdown/token.c
index c853141..8a30ffe 100644
--- a/Sources/libMultiMarkdown/token.c
+++ b/Sources/libMultiMarkdown/token.c
@@ -631,3 +631,28 @@ void token_split_on_char(token * t, const char * source, const char c) {
 	}
 }
 
+
+// Split a token and create 
+void token_split(token * t, size_t start, size_t len, unsigned short new_type) {
+	if (!t)
+		return;
+
+	token * u = token_new(new_type, start, len);
+	size_t stop = start + len;
+
+	if (t->start + t->len > stop) {
+		token * v = token_new(t->type, stop, t->start + t->len - stop);
+
+		u->next = v;
+		v->prev = u;
+		v->next = t->next;
+	} else {
+		u->next = t->next;
+	}
+
+	t->next = u;
+	u->prev = t;
+
+	t->len = start - t->start;
+}
+
diff --git a/Sources/libMultiMarkdown/writer.c b/Sources/libMultiMarkdown/writer.c
index ae76e4d..9d1cbee 100644
--- a/Sources/libMultiMarkdown/writer.c
+++ b/Sources/libMultiMarkdown/writer.c
@@ -59,6 +59,7 @@
 
 #include "libMultiMarkdown.h"
 
+#include "aho-corasick.h"
 #include "beamer.h"
 #include "char.h"
 #include "d_string.h"
@@ -1473,108 +1474,40 @@ void process_metadata_stack(mmd_engine * e, scratch_pad * scratch) {
 		scratch->base_header_level = header_level;
 }
 
-/// kmp from http://stackoverflow.com/questions/8584644/strstr-for-a-string-that-is-not-null-terminated
-/// Search for a string within certain bounds (so we don't go past the end of the token)
-int *kmp_borders(char * needle, size_t nlen){
-    if (!needle) return NULL;
-    int i, j, *borders = malloc((nlen+1)*sizeof(*borders));
-    if (!borders) return NULL;
-    i = 0;
-    j = -1;
-    borders[i] = j;
-    while((size_t)i < nlen){
-        while(j >= 0 && needle[i] != needle[j]){
-            j = borders[j];
-        }
-        ++i;
-        ++j;
-        borders[i] = j;
-    }
-    return borders;
-}
-
-const char * kmp_search(const char * haystack, size_t haylen, char * needle, size_t nlen, int * borders){
-    size_t max_index = haylen-nlen, i = 0, j = 0;
-    while(i <= max_index){
-        while(j < nlen && *haystack && needle[j] == *haystack){
-            ++j;
-            ++haystack;
-        }
-        if (j == nlen){
-            return haystack-nlen;
-        }
-        if (!(*haystack)){
-            return NULL;
-        }
-        if (j == 0){
-            ++haystack;
-            ++i;
-        } else {
-            do{
-                i += j - (size_t)borders[j];
-                j = borders[j];
-            }while(j > 0 && needle[j] != *haystack);
-        }
-    }
-    return NULL;
-}
-
-const char * sstrnstr(const char * haystack, char * needle, size_t haylen){
-    if (!haystack || !needle){
-        return NULL;
-    }
-    size_t nlen = strlen(needle);
-    if (haylen < nlen){
-        return NULL;
-    }
-    int *borders = kmp_borders(needle, nlen);
-    if (!borders){
-        return NULL;
-    }
-    const char *match = kmp_search(haystack, haylen, needle, nlen, borders);
-    free(borders);
-    return match;
-}
-
-
-/// Search a text node for abbreviation matches
-/// TODO: This is an inefficient algorithm, searching
-/// each node once for *each* abbreviation.  A more
-/// advanced algorithm would search for all abbreviations
-/// simultaneously but require more setup (e.g. Aho-Corasick)
-void abbr_search_text(mmd_engine * e, token * t) {
-	const char * str = &e->dstr->str[t->start];
-
-	const char * match;
-	abbr * a;
 
-	for (int i = 0; i < e->abbreviation_stack->size; ++i)
-	{
-		a = stack_peek_index(e->abbreviation_stack, i);
+void automatic_search_text(mmd_engine * e, token * t, trie * ac) {
+	match * m = ac_trie_leftmost_longest_search(ac, &e->dstr->str[t->start], t->len);
+
+	match * walker;
+
+	token * tok = t;
+
+	if (m) {
+		walker = m->next;
 
-		match = sstrnstr(str, a->abbr, t->len);
+		while (walker) {
+			token_split(tok, walker->start + t->start, walker->len, walker->match_type);
+
+			// Advance token to section after the split (if present)
+			tok = tok->next->next;
 
-		if (match) {
-			fprintf(stderr, "Found match '%s' -> '%s' at %lu\n", a->abbr, a->expansion, (size_t) (match - e->dstr->str));
+			// Advance to next match (if present)
+			walker = walker->next;
 		}
 	}
+
+	match_free(m);
 }
 
 
 /// Determine which nodes to descend into to search for abbreviations
-void abbr_search(mmd_engine * e, token * t) {
+void automatic_search(mmd_engine * e, token * t, trie * ac) {
 	while (t) {
 		switch (t->type) {
 			case TEXT_PLAIN:
-				abbr_search_text(e, t);
+				automatic_search_text(e, t, ac);
 				break;
 			case DOC_START_TOKEN:
-			case BLOCK_LIST_BULLETED:
-			case BLOCK_LIST_BULLETED_LOOSE:
-			case BLOCK_LIST_ENUMERATED:
-			case BLOCK_LIST_ENUMERATED_LOOSE:
-				abbr_search(e, t->child);
-				break;
 			case BLOCK_PARA:
 			case BLOCK_H1:
 			case BLOCK_H2:
@@ -1582,6 +1515,10 @@ void abbr_search(mmd_engine * e, token * t) {
 			case BLOCK_H4:
 			case BLOCK_H5:
 			case BLOCK_H6:
+			case BLOCK_LIST_BULLETED:
+			case BLOCK_LIST_BULLETED_LOOSE:
+			case BLOCK_LIST_ENUMERATED:
+			case BLOCK_LIST_ENUMERATED_LOOSE:
 			case BLOCK_LIST_ITEM_TIGHT:
 			case BLOCK_LIST_ITEM:
 			case BLOCK_SETEXT_1:
@@ -1589,8 +1526,13 @@ void abbr_search(mmd_engine * e, token * t) {
 			case BLOCK_TABLE:
 			case BLOCK_TABLE_HEADER:
 			case BLOCK_TABLE_SECTION:
-				abbr_search_text(e, t);
+			case PAIR_QUOTE_DOUBLE:
+			case PAIR_QUOTE_SINGLE:
+			case PAIR_STAR:
+			case PAIR_UL:
+				automatic_search(e, t->child, ac);
 				break;			
+//			case PAIR_PAREN:
 			default:
 				break;
 		}
@@ -1600,16 +1542,34 @@ void abbr_search(mmd_engine * e, token * t) {
 }
 
 
-void process_abbreviation_stack(mmd_engine * e, scratch_pad * scratch) {
-	abbr * a;
+void identify_global_search_terms(mmd_engine * e, scratch_pad * scratch) {
+	// Only search if we have a target
+	int count = e->abbreviation_stack->size + e->glossary_stack->size;
 
-	// Describe which abbreviations we are searching for
+	if (count == 0) {
+		return;
+	}
+
+	trie * ac = trie_new(0);
+	footnote * f;
+
+	// Add abbreviations to search trie
 	for (int i = 0; i < e->abbreviation_stack->size; ++i)
 	{
-		a = stack_peek_index(e->abbreviation_stack, i);
+		f = stack_peek_index(e->abbreviation_stack, i);
+		trie_insert(ac, f->label_text, PAIR_BRACKET_ABBREVIATION);
+	}
+
+	// Add glossary to search trie
+	for (int i = 0; i < e->glossary_stack->size; ++i)
+	{
+		f = stack_peek_index(e->glossary_stack, i);
+		trie_insert(ac, f->clean_text, PAIR_BRACKET_GLOSSARY);
 	}
 
-	abbr_search(e, e->root);
+	ac_trie_prepare(ac);
+	automatic_search(e, e->root, ac);
+	trie_free(ac);
 }
 
 
@@ -1627,8 +1587,9 @@ void mmd_export_token_tree(DString * out, mmd_engine * e, short format) {
 	// Process metadata
 	process_metadata_stack(e, scratch);
 
-	// Process abbreviations
-	// process_abbreviation_stack(e, scratch);
+	// Process abbreviations, glossary, etc.
+	if (!(e->extensions & EXT_COMPATIBILITY)) 
+		identify_global_search_terms(e, scratch);
 
 
 	switch (scratch->output_format) {
@@ -2017,15 +1978,27 @@ void citation_from_bracket(const char * source, scratch_pad * scratch, token * t
 
 void glossary_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) {
 	// Get text inside bracket
-	char * text = text_inside_pair(source, t);
+	char * text;
+
+	if (t->child) {
+		text = text_inside_pair(source, t);
+	} else {
+		text = malloc(t->len + 2);
+		text[0] = '?';
+		memcpy(&text[1], &source[t->start], t->len);
+		text[t->len + 1] = '\0';
+	}
+
 	short glossary_id = extract_glossary_from_stack(scratch, text);
 	
 	free(text);
 
 	if (glossary_id == -1) {
 		// No match, this is an inline glossary -- create a new glossary entry
-		t->child->type = TEXT_EMPTY;
-		t->child->mate->type = TEXT_EMPTY;
+		if (t->child) {
+			t->child->type = TEXT_EMPTY;
+			t->child->mate->type = TEXT_EMPTY;
+		}
 
 		// Create glossary
 		token * label = t->child;
@@ -2056,16 +2029,28 @@ void glossary_from_bracket(const char * source, scratch_pad * scratch, token * t
 
 void abbreviation_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) {
 	// Get text inside bracket
-	char * text = text_inside_pair(source, t);
+	char * text;
+
+	if (t->child) {
+		text = text_inside_pair(source, t);
+	} else {
+		text = malloc(t->len + 2);
+		text[0] = '>';
+		memcpy(&text[1], &source[t->start], t->len);
+		text[t->len + 1] = '\0';
+	}
+
 	short abbr_id = extract_abbreviation_from_stack(scratch, &text[1]);
 	
 	free(text);
 
 	if (abbr_id == -1) {
 		// No match, this is an inline glossary -- create a new glossary entry
-		t->child->type = TEXT_EMPTY;
-		t->child->mate->type = TEXT_EMPTY;
-
+		if (t->child) {
+			t->child->type = TEXT_EMPTY;
+			t->child->mate->type = TEXT_EMPTY;
+		}
+		
 		// Create glossary
 		token * label = t->child;
 		while (label && label->type != PAIR_PAREN)
diff --git a/tests/MMD6Tests/Abbreviations.fodt b/tests/MMD6Tests/Abbreviations.fodt
index f39d962..eed1c05 100644
--- a/tests/MMD6Tests/Abbreviations.fodt
+++ b/tests/MMD6Tests/Abbreviations.fodt
@@ -280,7 +280,7 @@
 
 <text:p text:style-name="Standard">BAR (bar)</text:p>
 
-<text:p text:style-name="Standard">foo bar</text:p>
+<text:p text:style-name="Standard">FOO BAR</text:p>
 
 <text:p text:style-name="Standard">FOOBAR (foobar)</text:p>
 
@@ -289,6 +289,14 @@
 <text:p text:style-name="Standard">5</text:p>
 
 <text:p text:style-name="Standard"><text:p text:style-name="Footnote">BAZ (BAT)</text:p> (baz)</text:p>
+
+<text:p text:style-name="Standard">FOO</text:p>
+
+<text:p text:style-name="Standard">BAR</text:p>
+
+<text:p text:style-name="Standard">FOOBAR</text:p>
+
+<text:p text:style-name="Standard">FOO BAR baz</text:p>
 </office:text>
 </office:body>
 </office:document>
diff --git a/tests/MMD6Tests/Abbreviations.html b/tests/MMD6Tests/Abbreviations.html
index f5e2b4e..539a78b 100644
--- a/tests/MMD6Tests/Abbreviations.html
+++ b/tests/MMD6Tests/Abbreviations.html
@@ -20,6 +20,14 @@
 
 <p>BAZ (BAT) (<abbr title="BAZ (BAT)">baz</abbr>)</p>
 
+<p><abbr title="FOO">foo</abbr></p>
+
+<p><abbr title="BAR">bar</abbr></p>
+
+<p><abbr title="FOOBAR">foobar</abbr></p>
+
+<p><abbr title="FOO BAR">foo bar</abbr> baz</p>
+
 </body>
 </html>
 
diff --git a/tests/MMD6Tests/Abbreviations.htmlc b/tests/MMD6Tests/Abbreviations.htmlc
index ff3286b..dabcbaf 100644
--- a/tests/MMD6Tests/Abbreviations.htmlc
+++ b/tests/MMD6Tests/Abbreviations.htmlc
@@ -14,3 +14,11 @@ latex config:	article</p>
 <p>5</p>
 
 <p>[>(baz) BAZ (BAT)]</p>
+
+<p>foo</p>
+
+<p>bar</p>
+
+<p>foobar</p>
+
+<p>foo bar baz</p>
diff --git a/tests/MMD6Tests/Abbreviations.tex b/tests/MMD6Tests/Abbreviations.tex
index 3e1c4b4..3042175 100644
--- a/tests/MMD6Tests/Abbreviations.tex
+++ b/tests/MMD6Tests/Abbreviations.tex
@@ -24,5 +24,13 @@
 
 \newacronym{baz}{baz}{BAZ (BAT)}\gls{baz}
 
+\gls{foo}
+
+\gls{bar}
+
+\gls{foobar}
+
+\gls{foo bar} baz
+
 \input{mmd6-article-footer}
 \end{document}
diff --git a/tests/MMD6Tests/Abbreviations.text b/tests/MMD6Tests/Abbreviations.text
index 9bd19fa..cac55dc 100644
--- a/tests/MMD6Tests/Abbreviations.text
+++ b/tests/MMD6Tests/Abbreviations.text
@@ -15,6 +15,14 @@ latex config:	article
 
 [>(baz) BAZ (BAT)]
 
+foo
+
+bar
+
+foobar
+
+foo bar baz
+
 
 [>foo]: FOO
 [>bar]: BAR
diff --git a/tests/MMD6Tests/Glossaries.fodt b/tests/MMD6Tests/Glossaries.fodt
index 92a66ff..4e50cd6 100644
--- a/tests/MMD6Tests/Glossaries.fodt
+++ b/tests/MMD6Tests/Glossaries.fodt
@@ -289,6 +289,10 @@
 <text:p text:style-name="Standard">[?bar]</text:p>
 
 <text:p text:style-name="Standard">5</text:p>
+
+<text:p text:style-name="Standard">foo</text:p>
+
+<text:p text:style-name="Standard">Foo Bar</text:p>
 </office:text>
 </office:body>
 </office:document>
diff --git a/tests/MMD6Tests/Glossaries.html b/tests/MMD6Tests/Glossaries.html
index 6acdac5..49eb2e7 100644
--- a/tests/MMD6Tests/Glossaries.html
+++ b/tests/MMD6Tests/Glossaries.html
@@ -18,6 +18,10 @@
 
 <p>5</p>
 
+<p><a href="#gn:1" title="see glossary" class="glossary">foo</a></p>
+
+<p><a href="#gn:3" title="see glossary" class="glossary">Foo Bar</a></p>
+
 <div class="glossary">
 <hr />
 <ol>
diff --git a/tests/MMD6Tests/Glossaries.htmlc b/tests/MMD6Tests/Glossaries.htmlc
index 1d44024..1355517 100644
--- a/tests/MMD6Tests/Glossaries.htmlc
+++ b/tests/MMD6Tests/Glossaries.htmlc
@@ -13,5 +13,9 @@ latex config:	article</p>
 
 <p>5</p>
 
+<p>foo</p>
+
+<p>Foo Bar</p>
+
 <pre><code>With second para.
 </code></pre>
diff --git a/tests/MMD6Tests/Glossaries.tex b/tests/MMD6Tests/Glossaries.tex
index a5a6a09..2001950 100644
--- a/tests/MMD6Tests/Glossaries.tex
+++ b/tests/MMD6Tests/Glossaries.tex
@@ -22,5 +22,9 @@ With second para.}
 
 5
 
+\gls{foo}
+
+\gls{foobar}
+
 \input{mmd6-article-footer}
 \end{document}
diff --git a/tests/MMD6Tests/Glossaries.text b/tests/MMD6Tests/Glossaries.text
index 3a3f860..5d529ea 100644
--- a/tests/MMD6Tests/Glossaries.text
+++ b/tests/MMD6Tests/Glossaries.text
@@ -13,6 +13,11 @@ latex config:	article
 
 5
 
+foo
+
+Foo Bar
+
+
 [?foo]: Reference
 
 [?Foo Bar]: Reference
-- 
2.40.0