From: Fletcher T. Penney <fletcher@fletcherpenney.net>
Date: Wed, 8 Feb 2017 03:38:32 +0000 (-0500)
Subject: FIXED: Fix compatibiity mode handling of footnote and citation labels
X-Git-Tag: 0.1.2a^2~2
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bc715ba3df5403e3525cb5b4908f8b49edd97c6e;p=multimarkdown

FIXED: Fix compatibiity mode handling of footnote and citation labels
---

diff --git a/README.md b/README.md
index 6638a30..0b77979 100644
--- a/README.md
+++ b/README.md
@@ -398,14 +398,10 @@ most circumstances.
 
 		</div>
 
-5. I haven't worked a lot yet on the MMD-specific features, so there may be
-more changes to come.  One thing I do anticipate is that if fenced code blocks
-stay, they will work slightly differently.  Currently, an opening fence
-doesn't mean anything unless there is a closing fence that follows it.  Again,
-this requires backtracking in the parser.  I suspect that an opening fence
-will definitely open a code block.  If there is no closing fence, then the
-rest of the document will remain inside the code block.  This is the approach
-used by CommonMark and it's a reasonable one, IMO.
+5. "Malformed" reference link definitions are handled slightly differently.
+For example, `Reference Footnotes.text` is parsed differently in compatibility
+mode than MMD-5.  This started as a side-effect of the parsing algorithm, but
+I actually think it makes sense.  This may or may not change in the future.
 
 
 ## Where Does MultiMarkdown 6 Stand? ##
@@ -420,6 +416,8 @@ features have been implemented:
 * Automatic cross-reference targets
 * Basic Citation support
 * CriticMarkup support
+* Definition lists
+* Footnotes
 * Inline and reference footnotes
 * Image and Link attributes (attributes can now be used with inline links as
 	well as reference links)
@@ -440,9 +438,7 @@ Things that are partially completed:
 	* Locators required?
 * CriticMarkup -- need to decide:
 	* How to handle CM stretches that include blank lines
-* Definition lists
 * Fenced code blocks
-* Footnotes -- need support for multiple blocks inside
 * Headers -- need support for manual labels
 * Metadata
 * Full/Snippet modes
diff --git a/src/html.c b/src/html.c
index 2279175..698f89b 100644
--- a/src/html.c
+++ b/src/html.c
@@ -224,6 +224,12 @@ void mmd_export_link_html(DString * out, const char * source, token * text, link
 
 	print(">");
 
+	// If we're printing contents of bracket as text, then ensure we include it all
+	if (text && text->child && text->child->len > 1) {
+		text->child->next->start--;
+		text->child->next->len++;
+	}
+	
 	mmd_export_token_tree_html(out, source, text->child, offset, scratch);
 
 	print("</a>");
diff --git a/src/main.c b/src/main.c
index b5cd556..76b71d4 100644
--- a/src/main.c
+++ b/src/main.c
@@ -249,7 +249,7 @@ int main(int argc, char** argv) {
 	// Parse options
 	unsigned long extensions = EXT_SMART | EXT_NOTES | EXT_CRITIC;
 
-	if (a_compatibility->count > 0) {
+	if (a_compatibility->count >= 0) {
 		// Compatibility mode disables certain features
 		// Reset extensions
 		extensions = EXT_COMPATIBILITY | EXT_NO_LABELS | EXT_OBFUSCATE;
diff --git a/src/mmd.c b/src/mmd.c
index 3e31bdd..1258518 100644
--- a/src/mmd.c
+++ b/src/mmd.c
@@ -122,10 +122,18 @@ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) {
 
 		// Brackets, Parentheses, Angles
 		token_pair_engine_add_pairing(e->pairings2, BRACKET_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
-		token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_CITATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
-		token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_FOOTNOTE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
-		token_pair_engine_add_pairing(e->pairings2, BRACKET_IMAGE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_IMAGE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+
+		if (extensions & EXT_NOTES) {
+			token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_CITATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+			token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_FOOTNOTE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+		} else {
+			token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+			token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+		}
+		
 		token_pair_engine_add_pairing(e->pairings2, BRACKET_VARIABLE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_VARIABLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+		
+		token_pair_engine_add_pairing(e->pairings2, BRACKET_IMAGE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_IMAGE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
 		token_pair_engine_add_pairing(e->pairings2, PAREN_LEFT, PAREN_RIGHT, PAIR_PAREN, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
 		token_pair_engine_add_pairing(e->pairings2, ANGLE_LEFT, ANGLE_RIGHT, PAIR_ANGLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
 		token_pair_engine_add_pairing(e->pairings2, BRACE_DOUBLE_LEFT, BRACE_DOUBLE_RIGHT, PAIR_BRACES, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
@@ -533,7 +541,8 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
 				scan_len = scan_ref_citation(&source[line->start]);
 				line->type = (scan_len) ? LINE_DEF_CITATION : LINE_PLAIN;
 			} else {
-				line->type = LINE_PLAIN;
+				scan_len = scan_ref_link_no_attributes(&source[line->start]);
+				line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
 			}
 			break;
 		case BRACKET_FOOTNOTE_LEFT:
@@ -541,7 +550,8 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
 				scan_len = scan_ref_foot(&source[line->start]);
 				line->type = (scan_len) ? LINE_DEF_FOOTNOTE : LINE_PLAIN;
 			} else {
-				line->type = LINE_PLAIN;
+				scan_len = scan_ref_link_no_attributes(&source[line->start]);
+				line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
 			}
 			break;
 		case PIPE:
diff --git a/src/scanners.c b/src/scanners.c
index 8dae2a1..779d9dc 100644
--- a/src/scanners.c
+++ b/src/scanners.c
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.14.3 on Sat Feb  4 17:39:29 2017 */
+/* Generated by re2c 0.14.3 on Tue Feb  7 20:44:00 2017 */
 /**
 
 	MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
@@ -9389,6 +9389,67 @@ yy717:
 }
 
 
+size_t scan_destination(const char * c) {
+	const char * marker = NULL;
+	const char * start = c;
+
+
+{
+	char yych;
+	yych = *c;
+	switch (yych) {
+	case 0x00:
+	case '\t':
+	case '\r':
+	case ' ':	goto yy725;
+	case '\n':	goto yy721;
+	case '<':	goto yy722;
+	default:	goto yy724;
+	}
+yy721:
+	{ return 0; }
+yy722:
+	++c;
+	yych = *c;
+	goto yy729;
+yy723:
+	{ return (size_t)( c - start ); }
+yy724:
+	yych = *++c;
+	goto yy727;
+yy725:
+	yych = *++c;
+	goto yy721;
+yy726:
+	++c;
+	yych = *c;
+yy727:
+	switch (yych) {
+	case 0x00:
+	case '\t':
+	case '\n':
+	case '\r':
+	case ' ':	goto yy723;
+	default:	goto yy726;
+	}
+yy728:
+	++c;
+	yych = *c;
+yy729:
+	switch (yych) {
+	case 0x00:
+	case '\t':
+	case '\n':
+	case '\r':
+	case ' ':	goto yy723;
+	case '>':	goto yy726;
+	default:	goto yy728;
+	}
+}
+	
+}
+
+
 #ifdef TEST
 void Test_scan_url(CuTest* tc) {
 	int url_len;
diff --git a/src/scanners.h b/src/scanners.h
index 6abd6bb..e285db4 100644
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -73,6 +73,7 @@ size_t scan_alignment_string(const char * c);
 size_t scan_attr(const char * c);
 size_t scan_attributes(const char * c);
 size_t scan_definition(const char * c);
+size_t scan_destination(const char * c);
 size_t scan_email(const char * c);
 size_t scan_fence_start(const char * c);
 size_t scan_fence_end(const char * c);
diff --git a/src/scanners.re b/src/scanners.re
index e6e8040..f4cedae 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -387,6 +387,17 @@ size_t scan_alignment_string(const char * c) {
 }
 
 
+size_t scan_destination(const char * c) {
+	const char * marker = NULL;
+	const char * start = c;
+
+/*!re2c
+	destination	{ return (size_t)( c - start ); }
+	.?			{ return 0; }
+*/	
+}
+
+
 #ifdef TEST
 void Test_scan_url(CuTest* tc) {
 	int url_len;
diff --git a/src/writer.c b/src/writer.c
index e511e2a..9810ec7 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -246,7 +246,12 @@ char * text_inside_pair(const char * source, token * pair) {
 	char * result = NULL;
 
 	if (source && pair) {
-		result = strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1));
+		if (pair->child->mate) {
+			// [foo], [^foo], [#foo] should give different strings -- use closer len
+			result = strndup(&source[pair->start + pair->child->mate->len], pair->len - (pair->child->mate->len * 2));
+		} else {
+			result = strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1));
+		}
 	}
 
 	return result;
@@ -644,6 +649,67 @@ bool validate_url(const char * url) {
 }
 
 
+char * destination_accept(const char * source, token ** remainder, bool validate) {
+	char * url = NULL;
+	char * clean = NULL;
+	token * t = NULL;
+	size_t start;
+	size_t scan_len;
+
+	switch ((*remainder)->type) {
+		case PAIR_PAREN:
+		case PAIR_ANGLE:
+		case PAIR_QUOTE_SINGLE:
+		case PAIR_QUOTE_DOUBLE:
+			t = token_chain_accept_multiple(remainder, 2, PAIR_ANGLE, PAIR_PAREN);
+			url = text_inside_pair(source, t);
+			break;
+		case TEXT_PLAIN:
+			start = (*remainder)->start;
+			
+			// Skip any whitespace
+			while (char_is_whitespace(source[start]))
+				start++;
+
+			scan_len = scan_destination(&source[start]);
+
+			// Grab destination string
+			url = strndup(&source[start], scan_len);
+
+			// Advance remainder
+			while ((*remainder)->start < start + scan_len)
+				*remainder = (*remainder)->next;
+
+
+			t = (*remainder)->prev;
+
+			// Is there a space in a URL concatenated with a title or attribute?
+			// e.g. [foo]: http://foo.bar/ class="foo"
+			// Since only one space between URL and class, they are joined.
+
+			if (t->type == TEXT_PLAIN) {
+				// Trim leading whitespace
+				token_trim_leading_whitespace(t, source);
+				token_split_on_char(t, source, ' ');
+				*remainder = t->next;
+			}
+
+			break;
+	}
+
+	// Is this a valid URL?
+	clean = clean_string(url, false);
+	
+	if (validate && !validate_url(clean)) {
+		free(clean);
+		clean = NULL;
+	}
+
+	free(url);
+	return clean;
+}
+
+
 char * url_accept(const char * source, token ** remainder, bool validate) {
 	char * url = NULL;
 	char * clean = NULL;
@@ -880,7 +946,35 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
 	
 	// Prepare for parsing
 
+	// Account for settings
+
 	switch (label->type) {
+		case PAIR_BRACKET_CITATION:
+			if (e->extensions & EXT_NOTES) {
+				if (!token_chain_accept(remainder, COLON))
+					return false;
+
+				title = *remainder;		// Track first token of content in 'title'
+				f = footnote_new(e->dstr->str, label, title);
+
+				// Store citation for later use
+				stack_push(e->citation_stack, f);
+				
+				break;
+			}
+		case PAIR_BRACKET_FOOTNOTE:
+			if (e->extensions & EXT_NOTES) {
+				if (!token_chain_accept(remainder, COLON))
+					return false;
+
+				title = *remainder;		// Track first token of content in 'title'
+				f = footnote_new(e->dstr->str, label, title);
+
+				// Store footnote for later use
+				stack_push(e->footnote_stack, f);
+				
+				break;
+			}
 		case PAIR_BRACKET:
 			// Reference Link Definition
 
@@ -890,8 +984,8 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
 			// Skip space
 			whitespace_accept(remainder);
 
-			// Grab URL
-			url_char = url_accept(e->dstr->str, remainder, false);
+			// Grab destination
+			url_char = destination_accept(e->dstr->str, remainder, false);
 
 			whitespace_accept(remainder);
 
@@ -941,28 +1035,6 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
 			if (l)
 				stack_push(e->link_stack, l);
 
-			break;
-		case PAIR_BRACKET_CITATION:
-			if (!token_chain_accept(remainder, COLON))
-				return false;
-
-			title = *remainder;		// Track first token of content in 'title'
-			f = footnote_new(e->dstr->str, label, title);
-
-			// Store citation for later use
-			stack_push(e->citation_stack, f);
-			
-			break;
-		case PAIR_BRACKET_FOOTNOTE:
-			if (!token_chain_accept(remainder, COLON))
-				return false;
-
-			title = *remainder;		// Track first token of content in 'title'
-			f = footnote_new(e->dstr->str, label, title);
-
-			// Store footnote for later use
-			stack_push(e->footnote_stack, f);
-			
 			break;
 		case PAIR_BRACKET_VARIABLE:
 			fprintf(stderr, "Process variable:\n");
@@ -990,6 +1062,7 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
 void process_definition_block(mmd_engine * e, token * block) {
 	footnote * f;
 
+
 	token * label = block->child;
 	if (label->type == BLOCK_PARA)
 		label = label->child;
@@ -1433,4 +1506,4 @@ void strip_leading_whitespace(token * chain, const char * source) {
 
 		chain = chain->next;
 	}
-}
\ No newline at end of file
+}
diff --git a/templates/README.md.in b/templates/README.md.in
index 4828810..9e8b908 100644
--- a/templates/README.md.in
+++ b/templates/README.md.in
@@ -398,14 +398,10 @@ most circumstances.
 
 		</div>
 
-5. I haven't worked a lot yet on the MMD-specific features, so there may be
-more changes to come.  One thing I do anticipate is that if fenced code blocks
-stay, they will work slightly differently.  Currently, an opening fence
-doesn't mean anything unless there is a closing fence that follows it.  Again,
-this requires backtracking in the parser.  I suspect that an opening fence
-will definitely open a code block.  If there is no closing fence, then the
-rest of the document will remain inside the code block.  This is the approach
-used by CommonMark and it's a reasonable one, IMO.
+5. "Malformed" reference link definitions are handled slightly differently.
+For example, `Reference Footnotes.text` is parsed differently in compatibility
+mode than MMD-5.  This started as a side-effect of the parsing algorithm, but
+I actually think it makes sense.  This may or may not change in the future.
 
 
 ## Where Does MultiMarkdown 6 Stand? ##
@@ -420,6 +416,8 @@ features have been implemented:
 * Automatic cross-reference targets
 * Basic Citation support
 * CriticMarkup support
+* Definition lists
+* Footnotes
 * Inline and reference footnotes
 * Image and Link attributes (attributes can now be used with inline links as
 	well as reference links)
@@ -440,9 +438,7 @@ Things that are partially completed:
 	* Locators required?
 * CriticMarkup -- need to decide:
 	* How to handle CM stretches that include blank lines
-* Definition lists
 * Fenced code blocks
-* Footnotes -- need support for multiple blocks inside
 * Headers -- need support for manual labels
 * Metadata
 * Full/Snippet modes
diff --git a/tests/MMD6Tests/Reference Footnotes.html b/tests/MMD6Tests/Reference Footnotes.html
index eeb3645..7ac7822 100644
--- a/tests/MMD6Tests/Reference Footnotes.html	
+++ b/tests/MMD6Tests/Reference Footnotes.html	
@@ -1,20 +1,31 @@
-<p>Reference.<a href="#fn:1" id="fnref:1" title="see footnote" class="footnote">[1]</a></p>
+<p>foo.<a href="#fn:1" id="fnref:1" title="see footnote" class="footnote">[1]</a></p>
 
-<p>Reference.<a href="#fn:2" id="fnref:2" title="see footnote" class="footnote">[2]</a></p>
+<p>foo.<a href="#fn:2" id="fnref:2" title="see footnote" class="footnote">[2]</a></p>
+
+<p>foo.<a href="#fn:3" id="fnref:3" title="see footnote" class="footnote">[3]</a></p>
 
 <div class="footnotes">
 <hr />
 <ol>
 
 <li id="fn:1">
-<p>This is a <em>short</em> footnote. <a href="#fnref:1" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+<p><em>foo</em> <a href="#fnref:1" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
 </li>
 
 <li id="fn:2">
-<p>This is a longer footnote.
-With two lines. <a href="#fnref:2" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+<p><em>foo
+bar</em> <a href="#fnref:2" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+</li>
+
+<li id="fn:3">
+<p>foo</p>
+
+<p><em>bar</em> <a href="#fnref:3" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+
+<ul>
+<li>bat</li>
+</ul>
 </li>
 
 </ol>
 </div>
-
diff --git a/tests/MMD6Tests/Reference Footnotes.htmlc b/tests/MMD6Tests/Reference Footnotes.htmlc
index e7a323c..df0aa3e 100644
--- a/tests/MMD6Tests/Reference Footnotes.htmlc	
+++ b/tests/MMD6Tests/Reference Footnotes.htmlc	
@@ -1,7 +1,10 @@
-<p>Reference.[^foo]</p>
+<p>foo.<a href="*foo*">^bar</a></p>
 
-<p>Reference.[^foo2]</p>
+<p>foo.<a href="*foo">^bar2</a></p>
 
-<p>[^foo]: This is a <em>short</em> footnote.
-[^foo2]: This is a longer footnote.
-With two lines.</p>
+<p>foo.<a href="foo">^bar3</a></p>
+
+<pre><code>*bar*
+
+* bat
+</code></pre>
diff --git a/tests/MMD6Tests/Reference Footnotes.text b/tests/MMD6Tests/Reference Footnotes.text
index c254336..2376ec1 100644
--- a/tests/MMD6Tests/Reference Footnotes.text	
+++ b/tests/MMD6Tests/Reference Footnotes.text	
@@ -1,8 +1,14 @@
-Reference.[^foo]
+foo.[^bar]
 
-Reference.[^foo2]
+foo.[^bar2]
 
+foo.[^bar3]
 
-[^foo]: This is a *short* footnote.
-[^foo2]: This is a longer footnote.
-With two lines.
+[^bar]: *foo*
+[^bar2]: *foo
+bar*
+[^bar3]: foo
+
+	*bar*
+
+	* bat