From: Fletcher T. Penney Date: Wed, 8 Feb 2017 03:38:32 +0000 (-0500) Subject: FIXED: Fix compatibiity mode handling of footnote and citation labels X-Git-Tag: 0.1.2a^2~2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bc715ba3df5403e3525cb5b4908f8b49edd97c6e;p=multimarkdown FIXED: Fix compatibiity mode handling of footnote and citation labels --- diff --git a/README.md b/README.md index 6638a30..0b77979 100644 --- a/README.md +++ b/README.md @@ -398,14 +398,10 @@ most circumstances. -5. I haven't worked a lot yet on the MMD-specific features, so there may be -more changes to come. One thing I do anticipate is that if fenced code blocks -stay, they will work slightly differently. Currently, an opening fence -doesn't mean anything unless there is a closing fence that follows it. Again, -this requires backtracking in the parser. I suspect that an opening fence -will definitely open a code block. If there is no closing fence, then the -rest of the document will remain inside the code block. This is the approach -used by CommonMark and it's a reasonable one, IMO. +5. "Malformed" reference link definitions are handled slightly differently. +For example, `Reference Footnotes.text` is parsed differently in compatibility +mode than MMD-5. This started as a side-effect of the parsing algorithm, but +I actually think it makes sense. This may or may not change in the future. ## Where Does MultiMarkdown 6 Stand? ## @@ -420,6 +416,8 @@ features have been implemented: * Automatic cross-reference targets * Basic Citation support * CriticMarkup support +* Definition lists +* Footnotes * Inline and reference footnotes * Image and Link attributes (attributes can now be used with inline links as well as reference links) @@ -440,9 +438,7 @@ Things that are partially completed: * Locators required? * CriticMarkup -- need to decide: * How to handle CM stretches that include blank lines -* Definition lists * Fenced code blocks -* Footnotes -- need support for multiple blocks inside * Headers -- need support for manual labels * Metadata * Full/Snippet modes diff --git a/src/html.c b/src/html.c index 2279175..698f89b 100644 --- a/src/html.c +++ b/src/html.c @@ -224,6 +224,12 @@ void mmd_export_link_html(DString * out, const char * source, token * text, link print(">"); + // If we're printing contents of bracket as text, then ensure we include it all + if (text && text->child && text->child->len > 1) { + text->child->next->start--; + text->child->next->len++; + } + mmd_export_token_tree_html(out, source, text->child, offset, scratch); print(""); diff --git a/src/main.c b/src/main.c index b5cd556..76b71d4 100644 --- a/src/main.c +++ b/src/main.c @@ -249,7 +249,7 @@ int main(int argc, char** argv) { // Parse options unsigned long extensions = EXT_SMART | EXT_NOTES | EXT_CRITIC; - if (a_compatibility->count > 0) { + if (a_compatibility->count >= 0) { // Compatibility mode disables certain features // Reset extensions extensions = EXT_COMPATIBILITY | EXT_NO_LABELS | EXT_OBFUSCATE; diff --git a/src/mmd.c b/src/mmd.c index 3e31bdd..1258518 100644 --- a/src/mmd.c +++ b/src/mmd.c @@ -122,10 +122,18 @@ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) { // Brackets, Parentheses, Angles token_pair_engine_add_pairing(e->pairings2, BRACKET_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); - token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_CITATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); - token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_FOOTNOTE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); - token_pair_engine_add_pairing(e->pairings2, BRACKET_IMAGE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_IMAGE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + + if (extensions & EXT_NOTES) { + token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_CITATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_FOOTNOTE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + } else { + token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + } + token_pair_engine_add_pairing(e->pairings2, BRACKET_VARIABLE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_VARIABLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + + token_pair_engine_add_pairing(e->pairings2, BRACKET_IMAGE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_IMAGE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); token_pair_engine_add_pairing(e->pairings2, PAREN_LEFT, PAREN_RIGHT, PAIR_PAREN, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); token_pair_engine_add_pairing(e->pairings2, ANGLE_LEFT, ANGLE_RIGHT, PAIR_ANGLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); token_pair_engine_add_pairing(e->pairings2, BRACE_DOUBLE_LEFT, BRACE_DOUBLE_RIGHT, PAIR_BRACES, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); @@ -533,7 +541,8 @@ void mmd_assign_line_type(mmd_engine * e, token * line) { scan_len = scan_ref_citation(&source[line->start]); line->type = (scan_len) ? LINE_DEF_CITATION : LINE_PLAIN; } else { - line->type = LINE_PLAIN; + scan_len = scan_ref_link_no_attributes(&source[line->start]); + line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN; } break; case BRACKET_FOOTNOTE_LEFT: @@ -541,7 +550,8 @@ void mmd_assign_line_type(mmd_engine * e, token * line) { scan_len = scan_ref_foot(&source[line->start]); line->type = (scan_len) ? LINE_DEF_FOOTNOTE : LINE_PLAIN; } else { - line->type = LINE_PLAIN; + scan_len = scan_ref_link_no_attributes(&source[line->start]); + line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN; } break; case PIPE: diff --git a/src/scanners.c b/src/scanners.c index 8dae2a1..779d9dc 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -1,4 +1,4 @@ -/* Generated by re2c 0.14.3 on Sat Feb 4 17:39:29 2017 */ +/* Generated by re2c 0.14.3 on Tue Feb 7 20:44:00 2017 */ /** MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. @@ -9389,6 +9389,67 @@ yy717: } +size_t scan_destination(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case 0x00: + case '\t': + case '\r': + case ' ': goto yy725; + case '\n': goto yy721; + case '<': goto yy722; + default: goto yy724; + } +yy721: + { return 0; } +yy722: + ++c; + yych = *c; + goto yy729; +yy723: + { return (size_t)( c - start ); } +yy724: + yych = *++c; + goto yy727; +yy725: + yych = *++c; + goto yy721; +yy726: + ++c; + yych = *c; +yy727: + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': goto yy723; + default: goto yy726; + } +yy728: + ++c; + yych = *c; +yy729: + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': goto yy723; + case '>': goto yy726; + default: goto yy728; + } +} + +} + + #ifdef TEST void Test_scan_url(CuTest* tc) { int url_len; diff --git a/src/scanners.h b/src/scanners.h index 6abd6bb..e285db4 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -73,6 +73,7 @@ size_t scan_alignment_string(const char * c); size_t scan_attr(const char * c); size_t scan_attributes(const char * c); size_t scan_definition(const char * c); +size_t scan_destination(const char * c); size_t scan_email(const char * c); size_t scan_fence_start(const char * c); size_t scan_fence_end(const char * c); diff --git a/src/scanners.re b/src/scanners.re index e6e8040..f4cedae 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -387,6 +387,17 @@ size_t scan_alignment_string(const char * c) { } +size_t scan_destination(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + destination { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + #ifdef TEST void Test_scan_url(CuTest* tc) { int url_len; diff --git a/src/writer.c b/src/writer.c index e511e2a..9810ec7 100644 --- a/src/writer.c +++ b/src/writer.c @@ -246,7 +246,12 @@ char * text_inside_pair(const char * source, token * pair) { char * result = NULL; if (source && pair) { - result = strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1)); + if (pair->child->mate) { + // [foo], [^foo], [#foo] should give different strings -- use closer len + result = strndup(&source[pair->start + pair->child->mate->len], pair->len - (pair->child->mate->len * 2)); + } else { + result = strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1)); + } } return result; @@ -644,6 +649,67 @@ bool validate_url(const char * url) { } +char * destination_accept(const char * source, token ** remainder, bool validate) { + char * url = NULL; + char * clean = NULL; + token * t = NULL; + size_t start; + size_t scan_len; + + switch ((*remainder)->type) { + case PAIR_PAREN: + case PAIR_ANGLE: + case PAIR_QUOTE_SINGLE: + case PAIR_QUOTE_DOUBLE: + t = token_chain_accept_multiple(remainder, 2, PAIR_ANGLE, PAIR_PAREN); + url = text_inside_pair(source, t); + break; + case TEXT_PLAIN: + start = (*remainder)->start; + + // Skip any whitespace + while (char_is_whitespace(source[start])) + start++; + + scan_len = scan_destination(&source[start]); + + // Grab destination string + url = strndup(&source[start], scan_len); + + // Advance remainder + while ((*remainder)->start < start + scan_len) + *remainder = (*remainder)->next; + + + t = (*remainder)->prev; + + // Is there a space in a URL concatenated with a title or attribute? + // e.g. [foo]: http://foo.bar/ class="foo" + // Since only one space between URL and class, they are joined. + + if (t->type == TEXT_PLAIN) { + // Trim leading whitespace + token_trim_leading_whitespace(t, source); + token_split_on_char(t, source, ' '); + *remainder = t->next; + } + + break; + } + + // Is this a valid URL? + clean = clean_string(url, false); + + if (validate && !validate_url(clean)) { + free(clean); + clean = NULL; + } + + free(url); + return clean; +} + + char * url_accept(const char * source, token ** remainder, bool validate) { char * url = NULL; char * clean = NULL; @@ -880,7 +946,35 @@ bool definition_extract(mmd_engine * e, token ** remainder) { // Prepare for parsing + // Account for settings + switch (label->type) { + case PAIR_BRACKET_CITATION: + if (e->extensions & EXT_NOTES) { + if (!token_chain_accept(remainder, COLON)) + return false; + + title = *remainder; // Track first token of content in 'title' + f = footnote_new(e->dstr->str, label, title); + + // Store citation for later use + stack_push(e->citation_stack, f); + + break; + } + case PAIR_BRACKET_FOOTNOTE: + if (e->extensions & EXT_NOTES) { + if (!token_chain_accept(remainder, COLON)) + return false; + + title = *remainder; // Track first token of content in 'title' + f = footnote_new(e->dstr->str, label, title); + + // Store footnote for later use + stack_push(e->footnote_stack, f); + + break; + } case PAIR_BRACKET: // Reference Link Definition @@ -890,8 +984,8 @@ bool definition_extract(mmd_engine * e, token ** remainder) { // Skip space whitespace_accept(remainder); - // Grab URL - url_char = url_accept(e->dstr->str, remainder, false); + // Grab destination + url_char = destination_accept(e->dstr->str, remainder, false); whitespace_accept(remainder); @@ -941,28 +1035,6 @@ bool definition_extract(mmd_engine * e, token ** remainder) { if (l) stack_push(e->link_stack, l); - break; - case PAIR_BRACKET_CITATION: - if (!token_chain_accept(remainder, COLON)) - return false; - - title = *remainder; // Track first token of content in 'title' - f = footnote_new(e->dstr->str, label, title); - - // Store citation for later use - stack_push(e->citation_stack, f); - - break; - case PAIR_BRACKET_FOOTNOTE: - if (!token_chain_accept(remainder, COLON)) - return false; - - title = *remainder; // Track first token of content in 'title' - f = footnote_new(e->dstr->str, label, title); - - // Store footnote for later use - stack_push(e->footnote_stack, f); - break; case PAIR_BRACKET_VARIABLE: fprintf(stderr, "Process variable:\n"); @@ -990,6 +1062,7 @@ bool definition_extract(mmd_engine * e, token ** remainder) { void process_definition_block(mmd_engine * e, token * block) { footnote * f; + token * label = block->child; if (label->type == BLOCK_PARA) label = label->child; @@ -1433,4 +1506,4 @@ void strip_leading_whitespace(token * chain, const char * source) { chain = chain->next; } -} \ No newline at end of file +} diff --git a/templates/README.md.in b/templates/README.md.in index 4828810..9e8b908 100644 --- a/templates/README.md.in +++ b/templates/README.md.in @@ -398,14 +398,10 @@ most circumstances. -5. I haven't worked a lot yet on the MMD-specific features, so there may be -more changes to come. One thing I do anticipate is that if fenced code blocks -stay, they will work slightly differently. Currently, an opening fence -doesn't mean anything unless there is a closing fence that follows it. Again, -this requires backtracking in the parser. I suspect that an opening fence -will definitely open a code block. If there is no closing fence, then the -rest of the document will remain inside the code block. This is the approach -used by CommonMark and it's a reasonable one, IMO. +5. "Malformed" reference link definitions are handled slightly differently. +For example, `Reference Footnotes.text` is parsed differently in compatibility +mode than MMD-5. This started as a side-effect of the parsing algorithm, but +I actually think it makes sense. This may or may not change in the future. ## Where Does MultiMarkdown 6 Stand? ## @@ -420,6 +416,8 @@ features have been implemented: * Automatic cross-reference targets * Basic Citation support * CriticMarkup support +* Definition lists +* Footnotes * Inline and reference footnotes * Image and Link attributes (attributes can now be used with inline links as well as reference links) @@ -440,9 +438,7 @@ Things that are partially completed: * Locators required? * CriticMarkup -- need to decide: * How to handle CM stretches that include blank lines -* Definition lists * Fenced code blocks -* Footnotes -- need support for multiple blocks inside * Headers -- need support for manual labels * Metadata * Full/Snippet modes diff --git a/tests/MMD6Tests/Reference Footnotes.html b/tests/MMD6Tests/Reference Footnotes.html index eeb3645..7ac7822 100644 --- a/tests/MMD6Tests/Reference Footnotes.html +++ b/tests/MMD6Tests/Reference Footnotes.html @@ -1,20 +1,31 @@ -

Reference.[1]

+

foo.[1]

-

Reference.[2]

+

foo.[2]

+ +

foo.[3]


  1. -

    This is a short footnote.  ↩

    +

    foo  ↩

  2. -

    This is a longer footnote. -With two lines.  ↩

    +

    foo +bar  ↩

    +
  3. + +
  4. +

    foo

    + +

    bar  ↩

    + +
      +
    • bat
    • +
- diff --git a/tests/MMD6Tests/Reference Footnotes.htmlc b/tests/MMD6Tests/Reference Footnotes.htmlc index e7a323c..df0aa3e 100644 --- a/tests/MMD6Tests/Reference Footnotes.htmlc +++ b/tests/MMD6Tests/Reference Footnotes.htmlc @@ -1,7 +1,10 @@ -

Reference.[^foo]

+

foo.^bar

-

Reference.[^foo2]

+

foo.^bar2

-

[^foo]: This is a short footnote. -[^foo2]: This is a longer footnote. -With two lines.

+

foo.^bar3

+ +
*bar*
+
+* bat
+
diff --git a/tests/MMD6Tests/Reference Footnotes.text b/tests/MMD6Tests/Reference Footnotes.text index c254336..2376ec1 100644 --- a/tests/MMD6Tests/Reference Footnotes.text +++ b/tests/MMD6Tests/Reference Footnotes.text @@ -1,8 +1,14 @@ -Reference.[^foo] +foo.[^bar] -Reference.[^foo2] +foo.[^bar2] +foo.[^bar3] -[^foo]: This is a *short* footnote. -[^foo2]: This is a longer footnote. -With two lines. +[^bar]: *foo* +[^bar2]: *foo +bar* +[^bar3]: foo + + *bar* + + * bat