From: Fletcher T. Penney Date: Sun, 12 Mar 2017 22:16:53 +0000 (-0400) Subject: ADDED: Add CriticMarkup preprocessor that works across empty lines when accepting... X-Git-Tag: 6.0.0-b2~1^2~4 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=842a9141aed7ed7dde0ce8c424e53e4eda97ac9f;p=multimarkdown ADDED: Add CriticMarkup preprocessor that works across empty lines when accepting/rejecting markup --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 22a988a..44002a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,6 +175,7 @@ set(src_files Sources/libMultiMarkdown/aho-corasick.c Sources/libMultiMarkdown/beamer.c Sources/libMultiMarkdown/char.c + Sources/libMultiMarkdown/critic_markup.c Sources/libMultiMarkdown/d_string.c Sources/libMultiMarkdown/html.c Sources/libMultiMarkdown/latex.c @@ -198,6 +199,7 @@ set(header_files Sources/libMultiMarkdown/aho-corasick.h Sources/libMultiMarkdown/beamer.h Sources/libMultiMarkdown/char.h + Sources/libMultiMarkdown/critic_markup.h Sources/libMultiMarkdown/include/d_string.h Sources/libMultiMarkdown/html.h Sources/libMultiMarkdown/latex.h @@ -569,6 +571,10 @@ ADD_MMD_TEST(mmd-6-latex "-t latex" MMD6Tests tex) ADD_MMD_TEST(mmd-6-odf "-t odf" MMD6Tests fodt) +ADD_MMD_TEST(mmd-6-critic-accept "-a" CriticMarkup htmla) + +ADD_MMD_TEST(mmd-6-critic-reject "-r" CriticMarkup htmlr) + ADD_MMD_TEST(pathologic-compat "-c" ../build html) ADD_MMD_TEST(pathologic "" ../build html) diff --git a/Sources/libMultiMarkdown/aho-corasick.c b/Sources/libMultiMarkdown/aho-corasick.c index bc1ee9f..ecd9ac4 100644 --- a/Sources/libMultiMarkdown/aho-corasick.c +++ b/Sources/libMultiMarkdown/aho-corasick.c @@ -361,7 +361,7 @@ match * match_add(match * last, size_t start, size_t len, unsigned short match_t } -match * ac_trie_search(trie * a, const char * source, size_t len) { +match * ac_trie_search(trie * a, const char * source, size_t start, size_t len) { // Store results in a linked list // match * result = match_new(0, 0, 0); @@ -374,9 +374,10 @@ match * ac_trie_search(trie * a, const char * source, size_t len) { // Character being compared int test_value; - size_t counter = 0; + size_t counter = start; + size_t stop = start + len; - while ((counter < len) && (source[counter] != '\0')) { + while ((counter < stop) && (source[counter] != '\0')) { // Read next character test_value = (int)source[counter++]; @@ -494,8 +495,8 @@ void match_set_filter_leftmost_longest(match * header) { } -match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t len) { - match * result = ac_trie_search(a, source, len); +match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t start, size_t len) { + match * result = ac_trie_search(a, source, start, len); if (result) match_set_filter_leftmost_longest(result); @@ -535,12 +536,12 @@ void Test_aho_trie_search(CuTest* tc) { ac_trie_prepare(a); - m = ac_trie_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 26); + m = ac_trie_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 0, 26); fprintf(stderr, "Finish with %d matches\n", match_count(m)); match_set_describe(m, "ABCDEFGGGAZABCABCDZABCABCZ"); match_free(m); - m = ac_trie_leftmost_longest_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 26); + m = ac_trie_leftmost_longest_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 0, 26); fprintf(stderr, "Finish with %d matches\n", match_count(m)); match_set_describe(m, "ABCDEFGGGAZABCABCDZABCABCZ"); match_free(m); diff --git a/Sources/libMultiMarkdown/aho-corasick.h b/Sources/libMultiMarkdown/aho-corasick.h index 73414f2..dc0bb71 100644 --- a/Sources/libMultiMarkdown/aho-corasick.h +++ b/Sources/libMultiMarkdown/aho-corasick.h @@ -96,9 +96,9 @@ bool trie_insert(trie * a, const char * key, unsigned short match_type); void ac_trie_prepare(trie * a); -match * ac_trie_search(trie * a, const char * source, size_t len); +match * ac_trie_search(trie * a, const char * source, size_t start, size_t len); -match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t len); +match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t start, size_t len); void trie_free(trie * a); @@ -109,6 +109,9 @@ void match_set_filter_leftmost_longest(match * header); void match_free(match * m); +void trie_to_graphviz(trie * a); + + #ifdef TEST #include "CuTest.h" #endif diff --git a/Sources/libMultiMarkdown/critic_markup.c b/Sources/libMultiMarkdown/critic_markup.c new file mode 100644 index 0000000..e18dbe5 --- /dev/null +++ b/Sources/libMultiMarkdown/critic_markup.c @@ -0,0 +1,288 @@ +/** + + MultiMarkdown -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file critic_markup.c + + @brief + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the + text of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +*/ + +#include +#include + + +#include "aho-corasick.h" +#include "critic_markup.h" +#include "stack.h" +#include "token_pairs.h" + + +token * critic_tokenize_string(const char * source, size_t start, size_t len) { + trie * ac = trie_new(0); + + trie_insert(ac, "{++", CM_ADD_OPEN); + trie_insert(ac, "++}", CM_ADD_CLOSE); + + trie_insert(ac, "{--", CM_DEL_OPEN); + trie_insert(ac, "--}", CM_DEL_CLOSE); + + trie_insert(ac, "{~~", CM_SUB_OPEN); + trie_insert(ac, "~>", CM_SUB_DIV); + trie_insert(ac, "~~}", CM_SUB_CLOSE); + + trie_insert(ac, "{==", CM_HI_OPEN); + trie_insert(ac, "==}", CM_HI_CLOSE); + + trie_insert(ac, "{>>", CM_COM_OPEN); + trie_insert(ac, "<<}", CM_COM_CLOSE); + + ac_trie_prepare(ac); + + match * m = ac_trie_leftmost_longest_search(ac, source, start, len); + + token * root = NULL; + + if (m) { + match * walker = m->next; + + root = token_new(0, 0, 0); + + size_t last = start; + + while (walker) { + if (walker->start > last) { + token_append_child(root, token_new(CM_PLAIN_TEXT, last, walker->start - last)); + last = walker->start; + } + + if (walker->start == last) { + token_append_child(root, token_new(walker->match_type, walker->start, walker->len)); + last = walker->start + walker->len; + } + + walker = walker->next; + } + + if (last < start + len) { + token_append_child(root, token_new(CM_PLAIN_TEXT, last, start + len)); + } + + match_free(m); + trie_free(ac); + } + + return root; +} + + + +token * critic_parse_substring(const char * source, size_t start, size_t len) { + token * chain = critic_tokenize_string(source, start, len); + + if (chain) { + token_pair_engine * e = token_pair_engine_new(); + + token_pair_engine_add_pairing(e, CM_ADD_OPEN, CM_ADD_CLOSE, CM_ADD_PAIR, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e, CM_DEL_OPEN, CM_DEL_CLOSE, CM_DEL_PAIR, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e, CM_SUB_OPEN, CM_SUB_CLOSE, CM_SUB_PAIR, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e, CM_HI_OPEN, CM_HI_CLOSE, CM_HI_PAIR, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e, CM_COM_OPEN, CM_COM_CLOSE, CM_COM_PAIR, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + + stack * s = stack_new(0); + + token_pairs_match_pairs_inside_token(chain, e, s, 0); + + stack_free(s); + token_pair_engine_free(e); + } + + return chain; +} + + +void accept_token_tree(DString * d, token * t); +void accept_token(DString * d, token * t); + + +void accept_token_tree_sub(DString * d, token * t) { + while (t) { + if (t->type == CM_SUB_DIV) { + while (t) { + d_string_erase(d, t->start, t->len); + t = t->prev; + } + + return; + } + + accept_token(d, t); + + t = t->prev; + } +} + + +void accept_token(DString * d, token * t) { + switch (t->type) { + case CM_SUB_CLOSE: + if (t->mate) { + d_string_erase(d, t->start, t->len); + } + break; + case CM_SUB_OPEN: + case CM_ADD_OPEN: + case CM_ADD_CLOSE: + if (!t->mate) + break; + case CM_SUB_DIV: + case CM_DEL_PAIR: + case CM_COM_PAIR: + case CM_HI_PAIR: + // Erase these + d_string_erase(d, t->start, t->len); + break; + case CM_SUB_PAIR: + // Erase old version and markers + accept_token_tree_sub(d, t->child->mate); + break; + case CM_ADD_PAIR: + // Check children + accept_token_tree(d, t->child->mate); + break; + } +} + + +void accept_token_tree(DString * d, token * t) { + while (t) { + accept_token(d, t); + + // Iterate backwards so offsets are right + t = t->prev; + } +} + +void critic_markup_accept(DString * d) { + token * t = critic_parse_substring(d->str, 0, d->currentStringLength); + + accept_token_tree(d, t->child->tail); + + token_free(t); +} + + +void reject_token_tree(DString * d, token * t); +void reject_token(DString * d, token * t); + + +void reject_token_tree_sub(DString * d, token * t) { + while (t && t->type != CM_SUB_DIV) { + d_string_erase(d, t->start, t->len); + t = t->prev; + } + + while (t) { + + reject_token(d, t); + + t = t->prev; + } +} + + +void reject_token(DString * d, token * t) { + switch (t->type) { + case CM_SUB_CLOSE: + if (t->mate) { + d_string_erase(d, t->start, t->len); + } + break; + case CM_SUB_OPEN: + case CM_DEL_OPEN: + case CM_DEL_CLOSE: + if (!t->mate) + break; + case CM_SUB_DIV: + case CM_ADD_PAIR: + case CM_COM_PAIR: + case CM_HI_PAIR: + // Erase these + d_string_erase(d, t->start, t->len); + break; + case CM_SUB_PAIR: + // Erase new version and markers + reject_token_tree_sub(d, t->child->mate); + break; + case CM_DEL_PAIR: + // Check children + reject_token_tree(d, t->child->mate); + break; + } +} + + +void reject_token_tree(DString * d, token * t) { + while (t) { + reject_token(d, t); + + // Iterate backwards so offsets are right + t = t->prev; + } +} + +void critic_markup_reject(DString * d) { + token * t = critic_parse_substring(d->str, 0, d->currentStringLength); + + reject_token_tree(d, t->child->tail); + + token_free(t); + +} + diff --git a/Sources/libMultiMarkdown/critic_markup.h b/Sources/libMultiMarkdown/critic_markup.h new file mode 100644 index 0000000..61e2327 --- /dev/null +++ b/Sources/libMultiMarkdown/critic_markup.h @@ -0,0 +1,94 @@ +/** + + MultiMarkdown -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file critic_markup.h + + @brief + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the + text of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +*/ + + +#ifndef CRITIC_MARKUP_MULTIMARKDOWN_H +#define CRITIC_MARKUP_MULTIMARKDOWN_H + +#include "d_string.h" + +enum cm_types { + CM_ADD_OPEN = 1, // Can't use type 0 + CM_ADD_CLOSE, + + CM_DEL_OPEN, + CM_DEL_CLOSE, + + CM_SUB_OPEN, + CM_SUB_DIV, + CM_SUB_CLOSE, + + CM_HI_OPEN, + CM_HI_CLOSE, + + CM_COM_OPEN, + CM_COM_CLOSE, + + CM_ADD_PAIR, + CM_DEL_PAIR, + CM_SUB_PAIR, + CM_HI_PAIR, + CM_COM_PAIR, + + CM_PLAIN_TEXT +}; + + +void critic_markup_accept(DString * d); + +void critic_markup_reject(DString * d); + +#endif diff --git a/Sources/libMultiMarkdown/writer.c b/Sources/libMultiMarkdown/writer.c index a1788c2..cb73955 100644 --- a/Sources/libMultiMarkdown/writer.c +++ b/Sources/libMultiMarkdown/writer.c @@ -1486,7 +1486,7 @@ void process_metadata_stack(mmd_engine * e, scratch_pad * scratch) { void automatic_search_text(mmd_engine * e, token * t, trie * ac) { - match * m = ac_trie_leftmost_longest_search(ac, &e->dstr->str[t->start], t->len); + match * m = ac_trie_leftmost_longest_search(ac, e->dstr->str, t->start, t->len); match * walker; @@ -1496,7 +1496,7 @@ void automatic_search_text(mmd_engine * e, token * t, trie * ac) { walker = m->next; while (walker) { - token_split(tok, walker->start + t->start, walker->len, walker->match_type); + token_split(tok, walker->start, walker->len, walker->match_type); // Advance token to section after the split (if present) tok = tok->next->next; diff --git a/Sources/multimarkdown/main.c b/Sources/multimarkdown/main.c index 2e0fdf1..821b574 100644 --- a/Sources/multimarkdown/main.c +++ b/Sources/multimarkdown/main.c @@ -61,6 +61,7 @@ #include "argtable3.h" +#include "critic_markup.h" #include "d_string.h" #include "i18n.h" #include "libMultiMarkdown.h" @@ -73,7 +74,8 @@ #define kBUFFERSIZE 4096 // How many bytes to read at a time // argtable structs -struct arg_lit *a_help, *a_version, *a_compatibility, *a_nolabels, *a_batch, *a_accept, *a_reject, *a_full, *a_snippet; +struct arg_lit *a_help, *a_version, *a_compatibility, *a_nolabels, *a_batch, + *a_accept, *a_reject, *a_full, *a_snippet; struct arg_str *a_format, *a_lang; struct arg_file *a_file, *a_o; struct arg_end *a_end; @@ -349,6 +351,15 @@ int main(int argc, char** argv) { // Don't free folder -- owned by dirname } + // Perform block level CriticMarkup? + if (extensions & EXT_CRITIC_ACCEPT) { + critic_markup_accept(buffer); + } + + if (extensions & EXT_CRITIC_REJECT) { + critic_markup_reject(buffer); + } + // Increment counter and prepare token pool #ifdef kUseObjectPool token_pool_init(); @@ -412,6 +423,15 @@ int main(int argc, char** argv) { // Don't free folder -- owned by dirname } + // Perform block level CriticMarkup? + if (extensions & EXT_CRITIC_ACCEPT) { + critic_markup_accept(buffer); + } + + if (extensions & EXT_CRITIC_REJECT) { + critic_markup_reject(buffer); + } + if (FORMAT_MMD == format) { result = buffer->str; } else { diff --git a/tests/CriticMarkup/CriticMarkup.htmla b/tests/CriticMarkup/CriticMarkup.htmla new file mode 100644 index 0000000..591eb9d --- /dev/null +++ b/tests/CriticMarkup/CriticMarkup.htmla @@ -0,0 +1,23 @@ + + + + + Extended CriticMarkup + + + +

This is a single paragraph

+ +

that was split in two.

+ +

This is two paragraphs joined together.

+ +

This is two paragraphs

+ +

With a new paragraph inserted

+ +

between them.

+ + + + diff --git a/tests/CriticMarkup/CriticMarkup.htmlr b/tests/CriticMarkup/CriticMarkup.htmlr new file mode 100644 index 0000000..59a6ab3 --- /dev/null +++ b/tests/CriticMarkup/CriticMarkup.htmlr @@ -0,0 +1,21 @@ + + + + + Extended CriticMarkup + + + +

This is a single paragraph that was split in two.

+ +

This is two paragraphs

+ +

joined together.

+ +

This is two paragraphs

+ +

with nothing between them.

+ + + + diff --git a/tests/CriticMarkup/CriticMarkup.text b/tests/CriticMarkup/CriticMarkup.text new file mode 100644 index 0000000..e63017a --- /dev/null +++ b/tests/CriticMarkup/CriticMarkup.text @@ -0,0 +1,18 @@ +Title: Extended CriticMarkup +latex config: article + +This is a *single* paragraph {++ + +++}that was split in two. + +This is *two* paragraphs {-- + +--}joined together. + +This is two paragraphs{~~ + +with nothing ~> + +With a *new* paragraph inserted + +~~}between them.