From: Fletcher T. Penney Date: Sun, 12 Feb 2017 19:56:03 +0000 (-0500) Subject: FIXED: Improve reliability or link scanner X-Git-Tag: 0.3.0a^2~19 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=141976a36a911975a4511700645bc08cc6ab0e76;p=multimarkdown FIXED: Improve reliability or link scanner --- diff --git a/src/html.c b/src/html.c index 6d08ee7..689ff8d 100644 --- a/src/html.c +++ b/src/html.c @@ -943,9 +943,7 @@ void mmd_export_token_html(DString * out, const char * source, token * t, size_t print(""); break; case PAIR_ANGLE: - temp_token = t; - - temp_char = url_accept(source, &temp_token, true); + temp_char = url_accept(source, t->start + 1, t->len - 2, NULL, true); if (temp_char) { if (scan_email(temp_char)) diff --git a/src/latex.c b/src/latex.c index 457e913..1d04073 100644 --- a/src/latex.c +++ b/src/latex.c @@ -660,9 +660,7 @@ void mmd_export_token_latex(DString * out, const char * source, token * t, scrat print_char(' '); break; case PAIR_ANGLE: - temp_token = t; - - temp_char = url_accept(source, &temp_token, true); + temp_char = url_accept(source, t->start + 1, t->len - 2, NULL, true); if (temp_char) { if (scan_email(temp_char)) { diff --git a/src/scanners.c b/src/scanners.c index 15ddd0c..9284b6c 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -1,4 +1,4 @@ -/* Generated by re2c 0.14.3 on Sat Feb 11 13:20:22 2017 */ +/* Generated by re2c 0.14.3 on Sun Feb 12 13:52:25 2017 */ /** MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. @@ -9468,7 +9468,7 @@ yy730: } -size_t scan_setext(const char * c) { +size_t scan_title(const char * c) { const char * marker = NULL; const char * start = c; @@ -9478,9 +9478,9 @@ size_t scan_setext(const char * c) { yych = *c; switch (yych) { case '\n': goto yy733; - case ' ': goto yy734; - case '-': goto yy736; - case '=': goto yy735; + case '"': goto yy734; + case '\'': goto yy735; + case '(': goto yy736; default: goto yy737; } yy733: @@ -9488,22 +9488,26 @@ yy733: yy734: yych = *(marker = ++c); switch (yych) { - case ' ': goto yy749; - case '-': goto yy750; - case '=': goto yy751; - default: goto yy733; + case 0x00: + case '\n': + case '\r': goto yy733; + default: goto yy746; } yy735: yych = *(marker = ++c); switch (yych) { - case '=': goto yy744; - default: goto yy733; + case 0x00: + case '\n': + case '\r': goto yy733; + default: goto yy744; } yy736: yych = *(marker = ++c); switch (yych) { - case '-': goto yy738; - default: goto yy733; + case 0x00: + case '\n': + case '\r': goto yy733; + default: goto yy739; } yy737: yych = *++c; @@ -9511,72 +9515,155 @@ yy737: yy738: ++c; yych = *c; +yy739: switch (yych) { case 0x00: - case '\n': goto yy741; - case '\r': goto yy743; - case '-': goto yy738; - default: goto yy740; + case '\n': + case '\r': goto yy740; + case ')': goto yy741; + default: goto yy738; } yy740: c = marker; goto yy733; yy741: ++c; -yy742: { return (size_t)( c - start ); } yy743: + ++c; + yych = *c; +yy744: + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy740; + case '\'': goto yy741; + default: goto yy743; + } +yy745: + ++c; + yych = *c; +yy746: + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy740; + case '"': goto yy741; + default: goto yy745; + } +} + +} + +size_t scan_setext(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy749; + case ' ': goto yy750; + case '-': goto yy752; + case '=': goto yy751; + default: goto yy753; + } +yy749: + { return 0; } +yy750: + yych = *(marker = ++c); + switch (yych) { + case ' ': goto yy765; + case '-': goto yy766; + case '=': goto yy767; + default: goto yy749; + } +yy751: + yych = *(marker = ++c); + switch (yych) { + case '=': goto yy760; + default: goto yy749; + } +yy752: + yych = *(marker = ++c); + switch (yych) { + case '-': goto yy754; + default: goto yy749; + } +yy753: yych = *++c; + goto yy749; +yy754: + ++c; + yych = *c; switch (yych) { - case '\n': goto yy741; - default: goto yy742; + case 0x00: + case '\n': goto yy757; + case '\r': goto yy759; + case '-': goto yy754; + default: goto yy756; } -yy744: +yy756: + c = marker; + goto yy749; +yy757: + ++c; +yy758: + { return (size_t)( c - start ); } +yy759: + yych = *++c; + switch (yych) { + case '\n': goto yy757; + default: goto yy758; + } +yy760: ++c; yych = *c; switch (yych) { case 0x00: - case '\n': goto yy746; - case '\r': goto yy748; - case '=': goto yy744; - default: goto yy740; + case '\n': goto yy762; + case '\r': goto yy764; + case '=': goto yy760; + default: goto yy756; } -yy746: +yy762: ++c; -yy747: +yy763: { return (size_t)( c - start ); } -yy748: +yy764: yych = *++c; switch (yych) { - case '\n': goto yy746; - default: goto yy747; + case '\n': goto yy762; + default: goto yy763; } -yy749: +yy765: yych = *++c; switch (yych) { - case ' ': goto yy752; - case '-': goto yy750; - case '=': goto yy751; - default: goto yy740; + case ' ': goto yy768; + case '-': goto yy766; + case '=': goto yy767; + default: goto yy756; } -yy750: +yy766: yych = *++c; switch (yych) { - case '-': goto yy738; - default: goto yy740; + case '-': goto yy754; + default: goto yy756; } -yy751: +yy767: yych = *++c; switch (yych) { - case '=': goto yy744; - default: goto yy740; + case '=': goto yy760; + default: goto yy756; } -yy752: +yy768: ++c; switch ((yych = *c)) { - case '-': goto yy750; - case '=': goto yy751; - default: goto yy740; + case '-': goto yy766; + case '=': goto yy767; + default: goto yy756; } } diff --git a/src/scanners.h b/src/scanners.h index d952f72..c42ecb0 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -90,6 +90,7 @@ size_t scan_ref_link_no_attributes(const char * c); size_t scan_setext(const char * c); size_t scan_spnl(const char * c); size_t scan_table_separator(const char * c); +size_t scan_title(const char * c); size_t scan_url(const char * c); size_t scan_value(const char * c); diff --git a/src/scanners.re b/src/scanners.re index 9f1394c..8306b16 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -402,6 +402,16 @@ size_t scan_destination(const char * c) { } +size_t scan_title(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + title { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + size_t scan_setext(const char * c) { const char * marker = NULL; const char * start = c; diff --git a/src/writer.c b/src/writer.c index 276e957..f39eaae 100644 --- a/src/writer.c +++ b/src/writer.c @@ -722,90 +722,80 @@ char * destination_accept(const char * source, token ** remainder, bool validate } -char * url_accept(const char * source, token ** remainder, bool validate) { +char * url_accept(const char * source, size_t start, size_t max_len, size_t * end_pos, bool validate) { char * url = NULL; char * clean = NULL; - token * t = NULL; - token * first = NULL; - token * last = NULL; + size_t scan_len; - switch ((*remainder)->type) { - case PAIR_PAREN: - case PAIR_ANGLE: - case PAIR_QUOTE_SINGLE: - case PAIR_QUOTE_DOUBLE: - t = token_chain_accept_multiple(remainder, 2, PAIR_ANGLE, PAIR_PAREN); - url = text_inside_pair(source, t); - break; - case SLASH: - case TEXT_PLAIN: - first = *remainder; - - // Grab parts for URL - while (token_chain_accept_multiple(remainder, 7, AMPERSAND, COLON, EQUAL, SLASH, TEXT_PERIOD, TEXT_PLAIN, UL)); + scan_len = scan_destination(&source[start]); - last = (*remainder)->prev; + if (scan_len) { + if (scan_len > max_len) + scan_len = max_len; - // Is there a space in a URL concatenated with a title or attribute? - // e.g. [foo]: http://foo.bar/ class="foo" - // Since only one space between URL and class, they are joined. + if (end_pos) + *end_pos = start + scan_len; - if (last->type == TEXT_PLAIN) { - // Trim leading whitespace - token_trim_leading_whitespace(last, source); - token_split_on_char(last, source, ' '); - *remainder = last->next; - } + // Is this ? + if ((source[start] == '<') && + (source[start + scan_len - 1] == '>')) { + // Strip '<' and '>' + start++; + scan_len -= 2; + } - url = strndup(&source[first->start], last->start + last->len - first->start); - break; - } + url = strndup(&source[start], scan_len); - // Is this a valid URL? - clean = clean_string(url, false); - - if (validate && !validate_url(clean)) { - free(clean); - clean = NULL; + clean = clean_string(url, false); + + if (validate && !validate_url(clean)) { + free(clean); + clean = NULL; + } + + free(url); } - free(url); return clean; } /// Extract url string from `(foo)` or `()` or `(foo "bar")` void extract_from_paren(token * paren, const char * source, char ** url, char ** title, char ** attributes) { - token * t; + size_t scan_len; + size_t pos = paren->child->next->start; + + size_t attr_len; - token * remainder = paren->child->next; + // Skip whitespace + while (char_is_whitespace(source[pos])) + pos++; - if (remainder) { - // Skip whitespace - whitespace_accept(&remainder); + // Grab URL + *url = url_accept(source, pos, paren->start + paren->len - 1 - pos, &pos, false); - // Grab URL - *url = url_accept(source, &remainder, false); + // Skip whitespace + while (char_is_whitespace(source[pos])) + pos++; - // Skip whitespace - whitespace_accept(&remainder); + // Grab title, if present + scan_len = scan_title(&source[pos]); - // Grab title, if present - t = token_chain_accept_multiple(&remainder, 3, PAIR_QUOTE_DOUBLE, PAIR_QUOTE_SINGLE, PAIR_PAREN); + if (scan_len) { + *title = strndup(&source[pos + 1], scan_len - 2); + pos += scan_len; + } - if (t) { - *title = text_inside_pair(source, t); - } + // Skip whitespace + while (char_is_whitespace(source[pos])) + pos++; - // Grab attributes, if present - if (t) { - attr_len = scan_attributes(&source[t->start + t->len]); - - if (attr_len) { - *attributes = strndup(&source[t->start + t->len], attr_len); - } - } + // Grab attributes, if present + attr_len = scan_attributes(&source[pos]); + + if (attr_len) { + *attributes = strndup(&source[pos], attr_len); } } diff --git a/src/writer.h b/src/writer.h index 85b92d5..b20b1f3 100644 --- a/src/writer.h +++ b/src/writer.h @@ -189,7 +189,7 @@ void print_token_raw(DString * out, const char * source, token * t); void print_token_tree_raw(DString * out, const char * source, token * t); -char * url_accept(const char * source, token ** remainder, bool validate); +char * url_accept(const char * source, size_t start, size_t max_len, size_t * end_pos, bool validate); void footnote_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num); void citation_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num); diff --git a/tests/MMD6Tests/Inline Links.html b/tests/MMD6Tests/Inline Links.html index 664c4b7..6f71ab4 100644 --- a/tests/MMD6Tests/Inline Links.html +++ b/tests/MMD6Tests/Inline Links.html @@ -30,3 +30,15 @@

[URL and title] (/url/file.txt “title”).

+ +

URL and title.

+ +

15

+ +

URL and title.

+ +

URL and title.

+ +

URL and title.

+ +

URL and title.

diff --git a/tests/MMD6Tests/Inline Links.htmlc b/tests/MMD6Tests/Inline Links.htmlc index 302544f..2394629 100644 --- a/tests/MMD6Tests/Inline Links.htmlc +++ b/tests/MMD6Tests/Inline Links.htmlc @@ -30,3 +30,15 @@

[URL and title] (/url/file.txt "title").

+ +

URL and title.

+ +

15

+ +

URL and title.

+ +

URL and title.

+ +

URL and title.

+ +

URL and title.

diff --git a/tests/MMD6Tests/Inline Links.text b/tests/MMD6Tests/Inline Links.text index a6a4095..16d4d38 100644 --- a/tests/MMD6Tests/Inline Links.text +++ b/tests/MMD6Tests/Inline Links.text @@ -30,3 +30,15 @@ Just a [URL](http://url/file.txt). [URL and title] (/url/file.txt "*title*"). + +[URL and title]( /url/file.txt "title"). + +15 + +[URL and title]( /url/file.txt "title"). + +[URL and title]( "title"). + +[URL and title]( "title"). + +[URL and title]( "title").