]> granicus.if.org Git - multimarkdown/commitdiff
FIXED: Fix compatibiity mode handling of footnote and citation labels
authorFletcher T. Penney <fletcher@fletcherpenney.net>
Wed, 8 Feb 2017 03:38:32 +0000 (22:38 -0500)
committerFletcher T. Penney <fletcher@fletcherpenney.net>
Wed, 8 Feb 2017 03:38:32 +0000 (22:38 -0500)
12 files changed:
README.md
src/html.c
src/main.c
src/mmd.c
src/scanners.c
src/scanners.h
src/scanners.re
src/writer.c
templates/README.md.in
tests/MMD6Tests/Reference Footnotes.html
tests/MMD6Tests/Reference Footnotes.htmlc
tests/MMD6Tests/Reference Footnotes.text

index 6638a3099e1f217698a90a7fb2392378adbc85bf..0b779796da24ca374263a1b53da99f1e40591c8f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -398,14 +398,10 @@ most circumstances.
 
                </div>
 
-5. I haven't worked a lot yet on the MMD-specific features, so there may be
-more changes to come.  One thing I do anticipate is that if fenced code blocks
-stay, they will work slightly differently.  Currently, an opening fence
-doesn't mean anything unless there is a closing fence that follows it.  Again,
-this requires backtracking in the parser.  I suspect that an opening fence
-will definitely open a code block.  If there is no closing fence, then the
-rest of the document will remain inside the code block.  This is the approach
-used by CommonMark and it's a reasonable one, IMO.
+5. "Malformed" reference link definitions are handled slightly differently.
+For example, `Reference Footnotes.text` is parsed differently in compatibility
+mode than MMD-5.  This started as a side-effect of the parsing algorithm, but
+I actually think it makes sense.  This may or may not change in the future.
 
 
 ## Where Does MultiMarkdown 6 Stand? ##
@@ -420,6 +416,8 @@ features have been implemented:
 * Automatic cross-reference targets
 * Basic Citation support
 * CriticMarkup support
+* Definition lists
+* Footnotes
 * Inline and reference footnotes
 * Image and Link attributes (attributes can now be used with inline links as
        well as reference links)
@@ -440,9 +438,7 @@ Things that are partially completed:
        * Locators required?
 * CriticMarkup -- need to decide:
        * How to handle CM stretches that include blank lines
-* Definition lists
 * Fenced code blocks
-* Footnotes -- need support for multiple blocks inside
 * Headers -- need support for manual labels
 * Metadata
 * Full/Snippet modes
index 22791751d18bdb750c13b58362479cab4ee6471a..698f89bebb14395f0e0242558453fd3a32a11559 100644 (file)
@@ -224,6 +224,12 @@ void mmd_export_link_html(DString * out, const char * source, token * text, link
 
        print(">");
 
+       // If we're printing contents of bracket as text, then ensure we include it all
+       if (text && text->child && text->child->len > 1) {
+               text->child->next->start--;
+               text->child->next->len++;
+       }
+       
        mmd_export_token_tree_html(out, source, text->child, offset, scratch);
 
        print("</a>");
index b5cd556ab075838554c2ae5096c73f3b38577c8d..76b71d4fa79648dfcc07f14ed3c209e3c36c4267 100644 (file)
@@ -249,7 +249,7 @@ int main(int argc, char** argv) {
        // Parse options
        unsigned long extensions = EXT_SMART | EXT_NOTES | EXT_CRITIC;
 
-       if (a_compatibility->count > 0) {
+       if (a_compatibility->count >= 0) {
                // Compatibility mode disables certain features
                // Reset extensions
                extensions = EXT_COMPATIBILITY | EXT_NO_LABELS | EXT_OBFUSCATE;
index 3e31bdd61fb8da6e2595554d4de24d655a0460ea..1258518b1e327f7a6d5d576a991550b771413944 100644 (file)
--- a/src/mmd.c
+++ b/src/mmd.c
@@ -122,10 +122,18 @@ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) {
 
                // Brackets, Parentheses, Angles
                token_pair_engine_add_pairing(e->pairings2, BRACKET_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
-               token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_CITATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
-               token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_FOOTNOTE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
-               token_pair_engine_add_pairing(e->pairings2, BRACKET_IMAGE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_IMAGE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+
+               if (extensions & EXT_NOTES) {
+                       token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_CITATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+                       token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_FOOTNOTE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+               } else {
+                       token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+                       token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+               }
+               
                token_pair_engine_add_pairing(e->pairings2, BRACKET_VARIABLE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_VARIABLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+               
+               token_pair_engine_add_pairing(e->pairings2, BRACKET_IMAGE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_IMAGE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
                token_pair_engine_add_pairing(e->pairings2, PAREN_LEFT, PAREN_RIGHT, PAIR_PAREN, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
                token_pair_engine_add_pairing(e->pairings2, ANGLE_LEFT, ANGLE_RIGHT, PAIR_ANGLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
                token_pair_engine_add_pairing(e->pairings2, BRACE_DOUBLE_LEFT, BRACE_DOUBLE_RIGHT, PAIR_BRACES, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
@@ -533,7 +541,8 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
                                scan_len = scan_ref_citation(&source[line->start]);
                                line->type = (scan_len) ? LINE_DEF_CITATION : LINE_PLAIN;
                        } else {
-                               line->type = LINE_PLAIN;
+                               scan_len = scan_ref_link_no_attributes(&source[line->start]);
+                               line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
                        }
                        break;
                case BRACKET_FOOTNOTE_LEFT:
@@ -541,7 +550,8 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
                                scan_len = scan_ref_foot(&source[line->start]);
                                line->type = (scan_len) ? LINE_DEF_FOOTNOTE : LINE_PLAIN;
                        } else {
-                               line->type = LINE_PLAIN;
+                               scan_len = scan_ref_link_no_attributes(&source[line->start]);
+                               line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
                        }
                        break;
                case PIPE:
index 8dae2a147b95df1894e584ae7d65b5c1f75490f7..779d9dce87423ae22bb3062bab6b4894fa27ec65 100644 (file)
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.14.3 on Sat Feb  4 17:39:29 2017 */
+/* Generated by re2c 0.14.3 on Tue Feb  7 20:44:00 2017 */
 /**
 
        MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
@@ -9389,6 +9389,67 @@ yy717:
 }
 
 
+size_t scan_destination(const char * c) {
+       const char * marker = NULL;
+       const char * start = c;
+
+
+{
+       char yych;
+       yych = *c;
+       switch (yych) {
+       case 0x00:
+       case '\t':
+       case '\r':
+       case ' ':       goto yy725;
+       case '\n':      goto yy721;
+       case '<':       goto yy722;
+       default:        goto yy724;
+       }
+yy721:
+       { return 0; }
+yy722:
+       ++c;
+       yych = *c;
+       goto yy729;
+yy723:
+       { return (size_t)( c - start ); }
+yy724:
+       yych = *++c;
+       goto yy727;
+yy725:
+       yych = *++c;
+       goto yy721;
+yy726:
+       ++c;
+       yych = *c;
+yy727:
+       switch (yych) {
+       case 0x00:
+       case '\t':
+       case '\n':
+       case '\r':
+       case ' ':       goto yy723;
+       default:        goto yy726;
+       }
+yy728:
+       ++c;
+       yych = *c;
+yy729:
+       switch (yych) {
+       case 0x00:
+       case '\t':
+       case '\n':
+       case '\r':
+       case ' ':       goto yy723;
+       case '>':       goto yy726;
+       default:        goto yy728;
+       }
+}
+       
+}
+
+
 #ifdef TEST
 void Test_scan_url(CuTest* tc) {
        int url_len;
index 6abd6bb7a377ba2dee66a565dfaa6cebaa1cd474..e285db4a47f5be1fb519205a75a858c553ff07a6 100644 (file)
@@ -73,6 +73,7 @@ size_t scan_alignment_string(const char * c);
 size_t scan_attr(const char * c);
 size_t scan_attributes(const char * c);
 size_t scan_definition(const char * c);
+size_t scan_destination(const char * c);
 size_t scan_email(const char * c);
 size_t scan_fence_start(const char * c);
 size_t scan_fence_end(const char * c);
index e6e804038ae82ccd55461b0353cafcb6c0138fe9..f4cedae49eb94b318a89b5a7573e9b5b026893de 100644 (file)
@@ -387,6 +387,17 @@ size_t scan_alignment_string(const char * c) {
 }
 
 
+size_t scan_destination(const char * c) {
+       const char * marker = NULL;
+       const char * start = c;
+
+/*!re2c
+       destination     { return (size_t)( c - start ); }
+       .?                      { return 0; }
+*/     
+}
+
+
 #ifdef TEST
 void Test_scan_url(CuTest* tc) {
        int url_len;
index e511e2adba7d4bba5f4a12983ddb4769e99864f1..9810ec755a74aa86efc0e60af4ba882b8633e1d2 100644 (file)
@@ -246,7 +246,12 @@ char * text_inside_pair(const char * source, token * pair) {
        char * result = NULL;
 
        if (source && pair) {
-               result = strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1));
+               if (pair->child->mate) {
+                       // [foo], [^foo], [#foo] should give different strings -- use closer len
+                       result = strndup(&source[pair->start + pair->child->mate->len], pair->len - (pair->child->mate->len * 2));
+               } else {
+                       result = strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1));
+               }
        }
 
        return result;
@@ -644,6 +649,67 @@ bool validate_url(const char * url) {
 }
 
 
+char * destination_accept(const char * source, token ** remainder, bool validate) {
+       char * url = NULL;
+       char * clean = NULL;
+       token * t = NULL;
+       size_t start;
+       size_t scan_len;
+
+       switch ((*remainder)->type) {
+               case PAIR_PAREN:
+               case PAIR_ANGLE:
+               case PAIR_QUOTE_SINGLE:
+               case PAIR_QUOTE_DOUBLE:
+                       t = token_chain_accept_multiple(remainder, 2, PAIR_ANGLE, PAIR_PAREN);
+                       url = text_inside_pair(source, t);
+                       break;
+               case TEXT_PLAIN:
+                       start = (*remainder)->start;
+                       
+                       // Skip any whitespace
+                       while (char_is_whitespace(source[start]))
+                               start++;
+
+                       scan_len = scan_destination(&source[start]);
+
+                       // Grab destination string
+                       url = strndup(&source[start], scan_len);
+
+                       // Advance remainder
+                       while ((*remainder)->start < start + scan_len)
+                               *remainder = (*remainder)->next;
+
+
+                       t = (*remainder)->prev;
+
+                       // Is there a space in a URL concatenated with a title or attribute?
+                       // e.g. [foo]: http://foo.bar/ class="foo"
+                       // Since only one space between URL and class, they are joined.
+
+                       if (t->type == TEXT_PLAIN) {
+                               // Trim leading whitespace
+                               token_trim_leading_whitespace(t, source);
+                               token_split_on_char(t, source, ' ');
+                               *remainder = t->next;
+                       }
+
+                       break;
+       }
+
+       // Is this a valid URL?
+       clean = clean_string(url, false);
+       
+       if (validate && !validate_url(clean)) {
+               free(clean);
+               clean = NULL;
+       }
+
+       free(url);
+       return clean;
+}
+
+
 char * url_accept(const char * source, token ** remainder, bool validate) {
        char * url = NULL;
        char * clean = NULL;
@@ -880,7 +946,35 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
        
        // Prepare for parsing
 
+       // Account for settings
+
        switch (label->type) {
+               case PAIR_BRACKET_CITATION:
+                       if (e->extensions & EXT_NOTES) {
+                               if (!token_chain_accept(remainder, COLON))
+                                       return false;
+
+                               title = *remainder;             // Track first token of content in 'title'
+                               f = footnote_new(e->dstr->str, label, title);
+
+                               // Store citation for later use
+                               stack_push(e->citation_stack, f);
+                               
+                               break;
+                       }
+               case PAIR_BRACKET_FOOTNOTE:
+                       if (e->extensions & EXT_NOTES) {
+                               if (!token_chain_accept(remainder, COLON))
+                                       return false;
+
+                               title = *remainder;             // Track first token of content in 'title'
+                               f = footnote_new(e->dstr->str, label, title);
+
+                               // Store footnote for later use
+                               stack_push(e->footnote_stack, f);
+                               
+                               break;
+                       }
                case PAIR_BRACKET:
                        // Reference Link Definition
 
@@ -890,8 +984,8 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
                        // Skip space
                        whitespace_accept(remainder);
 
-                       // Grab URL
-                       url_char = url_accept(e->dstr->str, remainder, false);
+                       // Grab destination
+                       url_char = destination_accept(e->dstr->str, remainder, false);
 
                        whitespace_accept(remainder);
 
@@ -941,28 +1035,6 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
                        if (l)
                                stack_push(e->link_stack, l);
 
-                       break;
-               case PAIR_BRACKET_CITATION:
-                       if (!token_chain_accept(remainder, COLON))
-                               return false;
-
-                       title = *remainder;             // Track first token of content in 'title'
-                       f = footnote_new(e->dstr->str, label, title);
-
-                       // Store citation for later use
-                       stack_push(e->citation_stack, f);
-                       
-                       break;
-               case PAIR_BRACKET_FOOTNOTE:
-                       if (!token_chain_accept(remainder, COLON))
-                               return false;
-
-                       title = *remainder;             // Track first token of content in 'title'
-                       f = footnote_new(e->dstr->str, label, title);
-
-                       // Store footnote for later use
-                       stack_push(e->footnote_stack, f);
-                       
                        break;
                case PAIR_BRACKET_VARIABLE:
                        fprintf(stderr, "Process variable:\n");
@@ -990,6 +1062,7 @@ bool definition_extract(mmd_engine * e, token ** remainder) {
 void process_definition_block(mmd_engine * e, token * block) {
        footnote * f;
 
+
        token * label = block->child;
        if (label->type == BLOCK_PARA)
                label = label->child;
@@ -1433,4 +1506,4 @@ void strip_leading_whitespace(token * chain, const char * source) {
 
                chain = chain->next;
        }
-}
\ No newline at end of file
+}
index 482881058d9c3d46709102277557c7e70ca1aca2..9e8b908db5955e5387a9316a19c4ca4ddded1b30 100644 (file)
@@ -398,14 +398,10 @@ most circumstances.
 
                </div>
 
-5. I haven't worked a lot yet on the MMD-specific features, so there may be
-more changes to come.  One thing I do anticipate is that if fenced code blocks
-stay, they will work slightly differently.  Currently, an opening fence
-doesn't mean anything unless there is a closing fence that follows it.  Again,
-this requires backtracking in the parser.  I suspect that an opening fence
-will definitely open a code block.  If there is no closing fence, then the
-rest of the document will remain inside the code block.  This is the approach
-used by CommonMark and it's a reasonable one, IMO.
+5. "Malformed" reference link definitions are handled slightly differently.
+For example, `Reference Footnotes.text` is parsed differently in compatibility
+mode than MMD-5.  This started as a side-effect of the parsing algorithm, but
+I actually think it makes sense.  This may or may not change in the future.
 
 
 ## Where Does MultiMarkdown 6 Stand? ##
@@ -420,6 +416,8 @@ features have been implemented:
 * Automatic cross-reference targets
 * Basic Citation support
 * CriticMarkup support
+* Definition lists
+* Footnotes
 * Inline and reference footnotes
 * Image and Link attributes (attributes can now be used with inline links as
        well as reference links)
@@ -440,9 +438,7 @@ Things that are partially completed:
        * Locators required?
 * CriticMarkup -- need to decide:
        * How to handle CM stretches that include blank lines
-* Definition lists
 * Fenced code blocks
-* Footnotes -- need support for multiple blocks inside
 * Headers -- need support for manual labels
 * Metadata
 * Full/Snippet modes
index eeb3645a1879891a12e15ae737eb239acf63310f..7ac782209e7871eefd4adc5b0659077bcb34eab3 100644 (file)
@@ -1,20 +1,31 @@
-<p>Reference.<a href="#fn:1" id="fnref:1" title="see footnote" class="footnote">[1]</a></p>
+<p>foo.<a href="#fn:1" id="fnref:1" title="see footnote" class="footnote">[1]</a></p>
 
-<p>Reference.<a href="#fn:2" id="fnref:2" title="see footnote" class="footnote">[2]</a></p>
+<p>foo.<a href="#fn:2" id="fnref:2" title="see footnote" class="footnote">[2]</a></p>
+
+<p>foo.<a href="#fn:3" id="fnref:3" title="see footnote" class="footnote">[3]</a></p>
 
 <div class="footnotes">
 <hr />
 <ol>
 
 <li id="fn:1">
-<p>This is a <em>short</em> footnote. <a href="#fnref:1" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+<p><em>foo</em> <a href="#fnref:1" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
 </li>
 
 <li id="fn:2">
-<p>This is a longer footnote.
-With two lines. <a href="#fnref:2" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+<p><em>foo
+bar</em> <a href="#fnref:2" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+</li>
+
+<li id="fn:3">
+<p>foo</p>
+
+<p><em>bar</em> <a href="#fnref:3" title="return to body" class="reversefootnote">&#160;&#8617;</a></p>
+
+<ul>
+<li>bat</li>
+</ul>
 </li>
 
 </ol>
 </div>
-
index e7a323c3680ae934399e4322d8f81d9c4c6c8b51..df0aa3e0d8bcd1cee847bbcf6a1af8fc6efbb5ad 100644 (file)
@@ -1,7 +1,10 @@
-<p>Reference.[^foo]</p>
+<p>foo.<a href="*foo*">^bar</a></p>
 
-<p>Reference.[^foo2]</p>
+<p>foo.<a href="*foo">^bar2</a></p>
 
-<p>[^foo]: This is a <em>short</em> footnote.
-[^foo2]: This is a longer footnote.
-With two lines.</p>
+<p>foo.<a href="foo">^bar3</a></p>
+
+<pre><code>*bar*
+
+* bat
+</code></pre>
index c2543367c6ffac7c716124850b3ace58d9ef8169..2376ec13ed3530e36cd645126f28c84fd7ec94ca 100644 (file)
@@ -1,8 +1,14 @@
-Reference.[^foo]
+foo.[^bar]
 
-Reference.[^foo2]
+foo.[^bar2]
 
+foo.[^bar3]
 
-[^foo]: This is a *short* footnote.
-[^foo2]: This is a longer footnote.
-With two lines.
+[^bar]: *foo*
+[^bar2]: *foo
+bar*
+[^bar3]: foo
+
+       *bar*
+
+       * bat