FIXED: Fix compatibiity mode handling of footnote and citation labels

author Fletcher T. Penney <fletcher@fletcherpenney.net>

Wed, 8 Feb 2017 03:38:32 +0000 (22:38 -0500)

committer Fletcher T. Penney <fletcher@fletcherpenney.net>

Wed, 8 Feb 2017 03:38:32 +0000 (22:38 -0500)
author Fletcher T. Penney <fletcher@fletcherpenney.net>
Wed, 8 Feb 2017 03:38:32 +0000 (22:38 -0500)
committer Fletcher T. Penney <fletcher@fletcherpenney.net>
Wed, 8 Feb 2017 03:38:32 +0000 (22:38 -0500)
diff --git a/README.md b/README.md

index 6638a3099e1f217698a90a7fb2392378adbc85bf..0b779796da24ca374263a1b53da99f1e40591c8f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -398,14 +398,10 @@ most circumstances.
  
                 </div>
  
-5. I haven't worked a lot yet on the MMD-specific features, so there may be
-more changes to come.  One thing I do anticipate is that if fenced code blocks
-stay, they will work slightly differently.  Currently, an opening fence
-doesn't mean anything unless there is a closing fence that follows it.  Again,
-this requires backtracking in the parser.  I suspect that an opening fence
-will definitely open a code block.  If there is no closing fence, then the
-rest of the document will remain inside the code block.  This is the approach
-used by CommonMark and it's a reasonable one, IMO.
+5. "Malformed" reference link definitions are handled slightly differently.
+For example, `Reference Footnotes.text` is parsed differently in compatibility
+mode than MMD-5.  This started as a side-effect of the parsing algorithm, but
+I actually think it makes sense.  This may or may not change in the future.
  
  
  ## Where Does MultiMarkdown 6 Stand? ##
@@ -420,6 +416,8 @@ features have been implemented:
  * Automatic cross-reference targets
  * Basic Citation support
  * CriticMarkup support
+* Definition lists
+* Footnotes
  * Inline and reference footnotes
  * Image and Link attributes (attributes can now be used with inline links as
         well as reference links)
@@ -440,9 +438,7 @@ Things that are partially completed:
         * Locators required?
  * CriticMarkup -- need to decide:
         * How to handle CM stretches that include blank lines
-* Definition lists
  * Fenced code blocks
-* Footnotes -- need support for multiple blocks inside
  * Headers -- need support for manual labels
  * Metadata
  * Full/Snippet modes
diff --git a/src/html.c b/src/html.c

index 22791751d18bdb750c13b58362479cab4ee6471a..698f89bebb14395f0e0242558453fd3a32a11559 100644 (file)
--- a/src/html.c
+++ b/src/html.c
@@ -224,6 +224,12 @@ void mmd_export_link_html(DString * out, const char * source, token * text, link
  
         print(">");
  
+       // If we're printing contents of bracket as text, then ensure we include it all
+       if (text && text->child && text->child->len > 1) {
+               text->child->next->start--;
+               text->child->next->len++;
+       }
+       
         mmd_export_token_tree_html(out, source, text->child, offset, scratch);
  
         print("</a>");
diff --git a/src/main.c b/src/main.c

index b5cd556ab075838554c2ae5096c73f3b38577c8d..76b71d4fa79648dfcc07f14ed3c209e3c36c4267 100644 (file)
--- a/src/main.c
+++ b/src/main.c
@@ -249,7 +249,7 @@ int main(int argc, char** argv) {
         // Parse options
         unsigned long extensions = EXT_SMART | EXT_NOTES | EXT_CRITIC;
  
-       if (a_compatibility->count > 0) {
+       if (a_compatibility->count >= 0) {
                 // Compatibility mode disables certain features
                 // Reset extensions
                 extensions = EXT_COMPATIBILITY | EXT_NO_LABELS | EXT_OBFUSCATE;
diff --git a/src/mmd.c b/src/mmd.c

index 3e31bdd61fb8da6e2595554d4de24d655a0460ea..1258518b1e327f7a6d5d576a991550b771413944 100644 (file)
--- a/src/mmd.c
+++ b/src/mmd.c
@@ -122,10 +122,18 @@ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) {
  
                 // Brackets, Parentheses, Angles
                 token_pair_engine_add_pairing(e->pairings2, BRACKET_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
-               token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_CITATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
-               token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_FOOTNOTE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
-               token_pair_engine_add_pairing(e->pairings2, BRACKET_IMAGE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_IMAGE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+
+               if (extensions & EXT_NOTES) {
+                       token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_CITATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+                       token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_FOOTNOTE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+               } else {
+                       token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+                       token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+               }
+               
                 token_pair_engine_add_pairing(e->pairings2, BRACKET_VARIABLE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_VARIABLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+               
+               token_pair_engine_add_pairing(e->pairings2, BRACKET_IMAGE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_IMAGE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
                 token_pair_engine_add_pairing(e->pairings2, PAREN_LEFT, PAREN_RIGHT, PAIR_PAREN, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
                 token_pair_engine_add_pairing(e->pairings2, ANGLE_LEFT, ANGLE_RIGHT, PAIR_ANGLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
                 token_pair_engine_add_pairing(e->pairings2, BRACE_DOUBLE_LEFT, BRACE_DOUBLE_RIGHT, PAIR_BRACES, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
@@ -533,7 +541,8 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
                                 scan_len = scan_ref_citation(&source[line->start]);
                                 line->type = (scan_len) ? LINE_DEF_CITATION : LINE_PLAIN;
                         } else {
-                               line->type = LINE_PLAIN;
+                               scan_len = scan_ref_link_no_attributes(&source[line->start]);
+                               line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
                         }
                         break;
                 case BRACKET_FOOTNOTE_LEFT:
@@ -541,7 +550,8 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
                                 scan_len = scan_ref_foot(&source[line->start]);
                                 line->type = (scan_len) ? LINE_DEF_FOOTNOTE : LINE_PLAIN;
                         } else {
-                               line->type = LINE_PLAIN;
+                               scan_len = scan_ref_link_no_attributes(&source[line->start]);
+                               line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
                         }
                         break;
                 case PIPE:
diff --git a/src/scanners.c b/src/scanners.c

index 8dae2a147b95df1894e584ae7d65b5c1f75490f7..779d9dce87423ae22bb3062bab6b4894fa27ec65 100644 (file)
--- a/src/scanners.c
+++ b/src/scanners.c
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.14.3 on Sat Feb  4 17:39:29 2017 */
+/* Generated by re2c 0.14.3 on Tue Feb  7 20:44:00 2017 */
  /**
  
         MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
@@ -9389,6 +9389,67 @@ yy717:
  }
  
  
+size_t scan_destination(const char * c) {
+       const char * marker = NULL;
+       const char * start = c;
+
+
+{
+       char yych;
+       yych = *c;
+       switch (yych) {
+       case 0x00:
+       case '\t':
+       case '\r':
+       case ' ':       goto yy725;
+       case '\n':      goto yy721;
+       case '<':       goto yy722;
+       default:        goto yy724;
+       }
+yy721:
+       { return 0; }
+yy722:
+       ++c;
+       yych = *c;
+       goto yy729;
+yy723:
+       { return (size_t)( c - start ); }
+yy724:
+       yych = *++c;
+       goto yy727;
+yy725:
+       yych = *++c;
+       goto yy721;
+yy726:
+       ++c;
+       yych = *c;
+yy727:
+       switch (yych) {
+       case 0x00:
+       case '\t':
+       case '\n':
+       case '\r':
+       case ' ':       goto yy723;
+       default:        goto yy726;
+       }
+yy728:
+       ++c;
+       yych = *c;
+yy729:
+       switch (yych) {
+       case 0x00:
+       case '\t':
+       case '\n':
+       case '\r':
+       case ' ':       goto yy723;
+       case '>':       goto yy726;
+       default:        goto yy728;
+       }
+}
+       
+}
+
+
  #ifdef TEST
  void Test_scan_url(CuTest* tc) {
         int url_len;
diff --git a/src/scanners.h b/src/scanners.h

index 6abd6bb7a377ba2dee66a565dfaa6cebaa1cd474..e285db4a47f5be1fb519205a75a858c553ff07a6 100644 (file)
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -73,6 +73,7 @@ size_t scan_alignment_string(const char * c);
  size_t scan_attr(const char * c);
  size_t scan_attributes(const char * c);
  size_t scan_definition(const char * c);
+size_t scan_destination(const char * c);
  size_t scan_email(const char * c);
  size_t scan_fence_start(const char * c);
  size_t scan_fence_end(const char * c);
diff --git a/src/scanners.re b/src/scanners.re

index e6e804038ae82ccd55461b0353cafcb6c0138fe9..f4cedae49eb94b318a89b5a7573e9b5b026893de 100644 (file)
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -387,6 +387,17 @@ size_t scan_alignment_string(const char * c) {
  }
  
  
+size_t scan_destination(const char * c) {
+       const char * marker = NULL;
+       const char * start = c;
+
+/*!re2c
+       destination     { return (size_t)( c - start ); }
+       .?                      { return 0; }
+*/     
+}
+
+
  #ifdef TEST
  void Test_scan_url(CuTest* tc) {
         int url_len;
diff --git a/src/writer.c b/src/writer.c

index e511e2adba7d4bba5f4a12983ddb4769e99864f1..9810ec755a74aa86efc0e60af4ba882b8633e1d2 100644 (file)
--- a/src/writer.c
+++ b/src/writer.c
@@ -246,7 +246,12 @@ char * text_inside_pair(const char * source, token * pair) {
         char * result = NULL;
  
         if (source && pair) {
-               result = strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1));
+               if (pair->child->mate) {
+                       // [foo], [^foo], [#foo] should give different strings -- use closer len
+                       result = strndup(&source[pair->start + pair->child->mate->len], pair->len - (pair->child->mate->len * 2));
+               } else {
+                       result = strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1));
+               }
         }
  
         return result;
@@ -644,6 +649,67 @@ bool validate_url(const char * url) {
  }
  
  
+char * destination_accept(const char * source, token ** remainder, bool validate) {
+       char * url = NULL;
+       char * clean = NULL;
+       token * t = NULL;
+       size_t start;
+       size_t scan_len;
+
+       switch ((*remainder)->type) {
+               case PAIR_PAREN:
+               case PAIR_ANGLE:
+               case PAIR_QUOTE_SINGLE:
+               case PAIR_QUOTE_DOUBLE:
+                       t = token_chain_accept_multiple(remainder, 2, PAIR_ANGLE, PAIR_PAREN);
+                       url = text_inside_pair(source, t);
+                       break;
+               case TEXT_PLAIN:
+                       start = (*remainder)->start;
+                       
+                       // Skip any whitespace
+                       while (char_is_whitespace(source[start]))
+                               start++;
+
+                       scan_len = scan_destination(&source[start]);
+
+                       // Grab destination string
+                       url = strndup(&source[start], scan_len);
+
+                       // Advance remainder
+                       while ((*remainder)->start < start + scan_len)
+                               *remainder = (*remainder)->next;
+
+
+                       t = (*remainder)->prev;
+
+                       // Is there a space in a URL concatenated with a title or attribute?
+                       // e.g. [foo]: http://foo.bar/ class="foo"
+                       // Since only one space between URL and class, they are joined.
+
+                       if (t->type == TEXT_PLAIN) {
+                               // Trim leading whitespace
+                               token_trim_leading_whitespace(t, source);
+                               token_split_on_char(t, source, ' ');
+                               *remainder = t->next;
+                       }
+
+                       break;
+       }
+
+       // Is this a valid URL?
+       clean = clean_string(url, false);
+       
+       if (validate && !validate_url(clean)) {
+               free(clean);
+               clean = NULL;
+       }
+
+       free(url);
+       return clean;
+}
+
+
  char * url_accept(const char * source, token ** remainder, bool validate) {
         char * url = NULL;
         char * clean = NULL;
@@ -880,7 +946,35 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
         
         // Prepare for parsing
  
+       // Account for settings
+
         switch (label->type) {
+               case PAIR_BRACKET_CITATION:
+                       if (e->extensions & EXT_NOTES) {
+                               if (!token_chain_accept(remainder, COLON))
+                                       return false;
+
+                               title = *remainder;             // Track first token of content in 'title'
+                               f = footnote_new(e->dstr->str, label, title);
+
+                               // Store citation for later use
+                               stack_push(e->citation_stack, f);
+                               
+                               break;
+                       }
+               case PAIR_BRACKET_FOOTNOTE:
+                       if (e->extensions & EXT_NOTES) {
+                               if (!token_chain_accept(remainder, COLON))
+                                       return false;
+
+                               title = *remainder;             // Track first token of content in 'title'
+                               f = footnote_new(e->dstr->str, label, title);
+
+                               // Store footnote for later use
+                               stack_push(e->footnote_stack, f);
+                               
+                               break;
+                       }
                 case PAIR_BRACKET:
                         // Reference Link Definition
  
@@ -890,8 +984,8 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
                         // Skip space
                         whitespace_accept(remainder);
  
-                       // Grab URL
-                       url_char = url_accept(e->dstr->str, remainder, false);
+                       // Grab destination
+                       url_char = destination_accept(e->dstr->str, remainder, false);
  
                         whitespace_accept(remainder);
  
@@ -941,28 +1035,6 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
                         if (l)
                                 stack_push(e->link_stack, l);
  
-                       break;
-               case PAIR_BRACKET_CITATION:
-                       if (!token_chain_accept(remainder, COLON))
-                               return false;
-
-                       title = *remainder;             // Track first token of content in 'title'
-                       f = footnote_new(e->dstr->str, label, title);
-
-                       // Store citation for later use
-                       stack_push(e->citation_stack, f);
-                       
-                       break;
-               case PAIR_BRACKET_FOOTNOTE:
-                       if (!token_chain_accept(remainder, COLON))
-                               return false;
-
-                       title = *remainder;             // Track first token of content in 'title'
-                       f = footnote_new(e->dstr->str, label, title);
-
-                       // Store footnote for later use
-                       stack_push(e->footnote_stack, f);
-                       
                         break;
                 case PAIR_BRACKET_VARIABLE:
                         fprintf(stderr, "Process variable:\n");
@@ -990,6 +1062,7 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
  void process_definition_block(mmd_engine * e, token * block) {
         footnote * f;
  
+
         token * label = block->child;
         if (label->type == BLOCK_PARA)
                 label = label->child;
@@ -1433,4 +1506,4 @@ void strip_leading_whitespace(token * chain, const char * source) {
  
                 chain = chain->next;
         }
-}
-\ No newline at end of file
+}
diff --git a/templates/README.md.in b/templates/README.md.in

index 482881058d9c3d46709102277557c7e70ca1aca2..9e8b908db5955e5387a9316a19c4ca4ddded1b30 100644 (file)
--- a/templates/README.md.in
+++ b/templates/README.md.in
@@ -398,14 +398,10 @@ most circumstances.
  
                 </div>
  
-5. I haven't worked a lot yet on the MMD-specific features, so there may be
-more changes to come.  One thing I do anticipate is that if fenced code blocks
-stay, they will work slightly differently.  Currently, an opening fence
-doesn't mean anything unless there is a closing fence that follows it.  Again,
-this requires backtracking in the parser.  I suspect that an opening fence
-will definitely open a code block.  If there is no closing fence, then the
-rest of the document will remain inside the code block.  This is the approach
-used by CommonMark and it's a reasonable one, IMO.
+5. "Malformed" reference link definitions are handled slightly differently.
+For example, `Reference Footnotes.text` is parsed differently in compatibility
+mode than MMD-5.  This started as a side-effect of the parsing algorithm, but
+I actually think it makes sense.  This may or may not change in the future.
  
  
  ## Where Does MultiMarkdown 6 Stand? ##
@@ -420,6 +416,8 @@ features have been implemented:
  * Automatic cross-reference targets
  * Basic Citation support
  * CriticMarkup support
+* Definition lists
+* Footnotes
  * Inline and reference footnotes
  * Image and Link attributes (attributes can now be used with inline links as
         well as reference links)
@@ -440,9 +438,7 @@ Things that are partially completed:
         * Locators required?
  * CriticMarkup -- need to decide:
         * How to handle CM stretches that include blank lines
-* Definition lists
  * Fenced code blocks
-* Footnotes -- need support for multiple blocks inside
  * Headers -- need support for manual labels
  * Metadata
  * Full/Snippet modes
diff --git a/tests/MMD6Tests/Reference Footnotes.html b/tests/MMD6Tests/Reference Footnotes.html

index eeb3645a1879891a12e15ae737eb239acf63310f..7ac782209e7871eefd4adc5b0659077bcb34eab3 100644 (file)
--- a/tests/MMD6Tests/Reference Footnotes.html
+++ b/tests/MMD6Tests/Reference Footnotes.html
@@ -1,20 +1,31 @@
-<p>Reference.<a href="#fn:1" id="fnref:1" title="see footnote" class="footnote">[1]</a></p>
+<p>foo.<a href="#fn:1" id="fnref:1" title="see footnote" class="footnote">[1]</a></p>
  
-<p>Reference.<a href="#fn:2" id="fnref:2" title="see footnote" class="footnote">[2]</a></p>
+<p>foo.<a href="#fn:2" id="fnref:2" title="see footnote" class="footnote">[2]</a></p>
+
+<p>foo.<a href="#fn:3" id="fnref:3" title="see footnote" class="footnote">[3]</a></p>
  
  <div class="footnotes">
  <hr />
  <ol>
  
  <li id="fn:1">
-<p>This is a <em>short</em> footnote. <a href="#fnref:1" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+<p><em>foo</em> <a href="#fnref:1" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
  </li>
  
  <li id="fn:2">
-<p>This is a longer footnote.
-With two lines. <a href="#fnref:2" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+<p><em>foo
+bar</em> <a href="#fnref:2" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+</li>
+
+<li id="fn:3">
+<p>foo</p>
+
+<p><em>bar</em> <a href="#fnref:3" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+
+<ul>
+<li>bat</li>
+</ul>
  </li>
  
  </ol>
  </div>
-
diff --git a/tests/MMD6Tests/Reference Footnotes.htmlc b/tests/MMD6Tests/Reference Footnotes.htmlc

index e7a323c3680ae934399e4322d8f81d9c4c6c8b51..df0aa3e0d8bcd1cee847bbcf6a1af8fc6efbb5ad 100644 (file)
--- a/tests/MMD6Tests/Reference Footnotes.htmlc
+++ b/tests/MMD6Tests/Reference Footnotes.htmlc
@@ -1,7 +1,10 @@
-<p>Reference.[^foo]</p>
+<p>foo.<a href="*foo*">^bar</a></p>
  
-<p>Reference.[^foo2]</p>
+<p>foo.<a href="*foo">^bar2</a></p>
  
-<p>[^foo]: This is a <em>short</em> footnote.
-[^foo2]: This is a longer footnote.
-With two lines.</p>
+<p>foo.<a href="foo">^bar3</a></p>
+
+<pre><code>*bar*
+
+* bat
+</code></pre>
diff --git a/tests/MMD6Tests/Reference Footnotes.text b/tests/MMD6Tests/Reference Footnotes.text

index c2543367c6ffac7c716124850b3ace58d9ef8169..2376ec13ed3530e36cd645126f28c84fd7ec94ca 100644 (file)
--- a/tests/MMD6Tests/Reference Footnotes.text
+++ b/tests/MMD6Tests/Reference Footnotes.text
@@ -1,8 +1,14 @@
-Reference.[^foo]
+foo.[^bar]
  
-Reference.[^foo2]
+foo.[^bar2]
  
+foo.[^bar3]
  
-[^foo]: This is a *short* footnote.
-[^foo2]: This is a longer footnote.
-With two lines.
+[^bar]: *foo*
+[^bar2]: *foo
+bar*
+[^bar3]: foo
+
+       *bar*
+
+       * bat
author	Fletcher T. Penney <fletcher@fletcherpenney.net>
	Wed, 8 Feb 2017 03:38:32 +0000 (22:38 -0500)
committer	Fletcher T. Penney <fletcher@fletcherpenney.net>
	Wed, 8 Feb 2017 03:38:32 +0000 (22:38 -0500)
README.md		patch \| blob \| history
src/html.c		patch \| blob \| history
src/main.c		patch \| blob \| history
src/mmd.c		patch \| blob \| history
src/scanners.c		patch \| blob \| history
src/scanners.h		patch \| blob \| history
src/scanners.re		patch \| blob \| history
src/writer.c		patch \| blob \| history
templates/README.md.in		patch \| blob \| history
tests/MMD6Tests/Reference Footnotes.html		patch \| blob \| history
tests/MMD6Tests/Reference Footnotes.htmlc		patch \| blob \| history
tests/MMD6Tests/Reference Footnotes.text		patch \| blob \| history