]> granicus.if.org Git - multimarkdown/commitdiff
ADDED: Add CriticMarkup preprocessor that works across empty lines when accepting...
authorFletcher T. Penney <fletcher@fletcherpenney.net>
Sun, 12 Mar 2017 22:16:53 +0000 (18:16 -0400)
committerFletcher T. Penney <fletcher@fletcherpenney.net>
Sun, 12 Mar 2017 22:16:53 +0000 (18:16 -0400)
CMakeLists.txt
Sources/libMultiMarkdown/aho-corasick.c
Sources/libMultiMarkdown/aho-corasick.h
Sources/libMultiMarkdown/critic_markup.c [new file with mode: 0644]
Sources/libMultiMarkdown/critic_markup.h [new file with mode: 0644]
Sources/libMultiMarkdown/writer.c
Sources/multimarkdown/main.c
tests/CriticMarkup/CriticMarkup.htmla [new file with mode: 0644]
tests/CriticMarkup/CriticMarkup.htmlr [new file with mode: 0644]
tests/CriticMarkup/CriticMarkup.text [new file with mode: 0644]

index 22a988a66dc1c3c475cdec490c2f1a6041213957..44002a06934337dfe0a30aa9415248c3c55998e8 100644 (file)
@@ -175,6 +175,7 @@ set(src_files
        Sources/libMultiMarkdown/aho-corasick.c
        Sources/libMultiMarkdown/beamer.c
        Sources/libMultiMarkdown/char.c
+       Sources/libMultiMarkdown/critic_markup.c
        Sources/libMultiMarkdown/d_string.c
        Sources/libMultiMarkdown/html.c
        Sources/libMultiMarkdown/latex.c
@@ -198,6 +199,7 @@ set(header_files
        Sources/libMultiMarkdown/aho-corasick.h
        Sources/libMultiMarkdown/beamer.h
        Sources/libMultiMarkdown/char.h
+       Sources/libMultiMarkdown/critic_markup.h
        Sources/libMultiMarkdown/include/d_string.h
        Sources/libMultiMarkdown/html.h
        Sources/libMultiMarkdown/latex.h
@@ -569,6 +571,10 @@ ADD_MMD_TEST(mmd-6-latex "-t latex" MMD6Tests tex)
 
 ADD_MMD_TEST(mmd-6-odf "-t odf" MMD6Tests fodt)
 
+ADD_MMD_TEST(mmd-6-critic-accept "-a" CriticMarkup htmla)
+
+ADD_MMD_TEST(mmd-6-critic-reject "-r" CriticMarkup htmlr)
+
 ADD_MMD_TEST(pathologic-compat "-c" ../build html)
 
 ADD_MMD_TEST(pathologic "" ../build html)
index bc1ee9f44527feadaea4b3088d00e8d412012ed5..ecd9ac4f4c4531e1d7b3b7b0e076649720ab60ca 100644 (file)
@@ -361,7 +361,7 @@ match * match_add(match * last, size_t start, size_t len, unsigned short match_t
 }
 
 
-match * ac_trie_search(trie * a, const char * source, size_t len) {
+match * ac_trie_search(trie * a, const char * source, size_t start, size_t len) {
 
        // Store results in a linked list
 //     match * result = match_new(0, 0, 0);
@@ -374,9 +374,10 @@ match * ac_trie_search(trie * a, const char * source, size_t len) {
 
        // Character being compared
        int test_value;
-       size_t counter = 0;
+       size_t counter = start;
+       size_t stop = start + len;
 
-       while ((counter < len) && (source[counter] != '\0')) {
+       while ((counter < stop) && (source[counter] != '\0')) {
                // Read next character
                test_value = (int)source[counter++];
 
@@ -494,8 +495,8 @@ void match_set_filter_leftmost_longest(match * header) {
 }
 
 
-match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t len) {
-       match * result = ac_trie_search(a, source, len);
+match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t start, size_t len) {
+       match * result = ac_trie_search(a, source, start, len);
 
        if (result)
                match_set_filter_leftmost_longest(result);
@@ -535,12 +536,12 @@ void Test_aho_trie_search(CuTest* tc) {
 
        ac_trie_prepare(a);
 
-       m = ac_trie_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 26);
+       m = ac_trie_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 0, 26);
        fprintf(stderr, "Finish with %d matches\n", match_count(m));
        match_set_describe(m, "ABCDEFGGGAZABCABCDZABCABCZ");
        match_free(m);
 
-       m = ac_trie_leftmost_longest_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 26);
+       m = ac_trie_leftmost_longest_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 0, 26);
        fprintf(stderr, "Finish with %d matches\n", match_count(m));
        match_set_describe(m, "ABCDEFGGGAZABCABCDZABCABCZ");
        match_free(m);
index 73414f2e2f21f49a685793db6a498d38c465a96b..dc0bb714790d902857cafade0b22f4e2a7f74b54 100644 (file)
@@ -96,9 +96,9 @@ bool trie_insert(trie * a, const char * key, unsigned short match_type);
 
 void ac_trie_prepare(trie * a);
 
-match * ac_trie_search(trie * a, const char * source, size_t len);
+match * ac_trie_search(trie * a, const char * source, size_t start, size_t len);
 
-match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t len);
+match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t start, size_t len);
 
 void trie_free(trie * a);
 
@@ -109,6 +109,9 @@ void match_set_filter_leftmost_longest(match * header);
 void match_free(match * m);
 
 
+void trie_to_graphviz(trie * a);
+
+
 #ifdef TEST
 #include "CuTest.h"
 #endif
diff --git a/Sources/libMultiMarkdown/critic_markup.c b/Sources/libMultiMarkdown/critic_markup.c
new file mode 100644 (file)
index 0000000..e18dbe5
--- /dev/null
@@ -0,0 +1,288 @@
+/**
+
+       MultiMarkdown -- Lightweight markup processor to produce HTML, LaTeX, and more.
+
+       @file critic_markup.c
+
+       @brief 
+
+
+       @author Fletcher T. Penney
+       @bug    
+
+**/
+
+/*
+
+       Copyright © 2016 - 2017 Fletcher T. Penney.
+
+
+       The `MultiMarkdown 6` project is released under the MIT License..
+       
+       GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
+       
+               https://github.com/fletcher/MultiMarkdown-4/
+       
+       MMD 4 is released under both the MIT License and GPL.
+       
+       
+       CuTest is released under the zlib/libpng license. See CuTest.c for the
+       text of the license.
+       
+       
+       ## The MIT License ##
+       
+       Permission is hereby granted, free of charge, to any person obtaining
+       a copy of this software and associated documentation files (the
+       "Software"), to deal in the Software without restriction, including
+       without limitation the rights to use, copy, modify, merge, publish,
+       distribute, sublicense, and/or sell copies of the Software, and to
+       permit persons to whom the Software is furnished to do so, subject to
+       the following conditions:
+       
+       The above copyright notice and this permission notice shall be
+       included in all copies or substantial portions of the Software.
+       
+       THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+       EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+       MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+       IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+       CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+       TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+       SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+       
+
+*/
+
+#include <stdbool.h>
+#include <string.h>
+
+
+#include "aho-corasick.h"
+#include "critic_markup.h"
+#include "stack.h"
+#include "token_pairs.h"
+
+
+token * critic_tokenize_string(const char * source, size_t start, size_t len) {
+       trie * ac = trie_new(0);
+
+       trie_insert(ac, "{++", CM_ADD_OPEN);
+       trie_insert(ac, "++}", CM_ADD_CLOSE);
+
+       trie_insert(ac, "{--", CM_DEL_OPEN);
+       trie_insert(ac, "--}", CM_DEL_CLOSE);
+
+       trie_insert(ac, "{~~", CM_SUB_OPEN);
+       trie_insert(ac, "~>", CM_SUB_DIV);
+       trie_insert(ac, "~~}", CM_SUB_CLOSE);
+
+       trie_insert(ac, "{==", CM_HI_OPEN);
+       trie_insert(ac, "==}", CM_HI_CLOSE);
+
+       trie_insert(ac, "{>>", CM_COM_OPEN);
+       trie_insert(ac, "<<}", CM_COM_CLOSE);
+
+       ac_trie_prepare(ac);
+
+       match * m = ac_trie_leftmost_longest_search(ac, source, start, len);
+
+       token * root = NULL;
+
+       if (m) {
+               match * walker = m->next;
+
+               root = token_new(0, 0, 0);
+
+               size_t last = start;
+
+               while (walker) {
+                       if (walker->start > last) {
+                               token_append_child(root, token_new(CM_PLAIN_TEXT, last, walker->start - last));
+                               last = walker->start;
+                       }
+
+                       if (walker->start == last) {
+                               token_append_child(root, token_new(walker->match_type, walker->start, walker->len));
+                               last = walker->start + walker->len;
+                       }
+
+                       walker = walker->next;
+               }
+
+               if (last < start + len) {
+                       token_append_child(root, token_new(CM_PLAIN_TEXT, last, start + len));
+               }
+
+               match_free(m);
+               trie_free(ac);
+       }
+
+       return root;
+}
+
+
+
+token * critic_parse_substring(const char * source, size_t start, size_t len) {
+       token * chain = critic_tokenize_string(source, start, len);
+
+       if (chain) {
+               token_pair_engine * e = token_pair_engine_new();
+
+               token_pair_engine_add_pairing(e, CM_ADD_OPEN, CM_ADD_CLOSE, CM_ADD_PAIR, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+               token_pair_engine_add_pairing(e, CM_DEL_OPEN, CM_DEL_CLOSE, CM_DEL_PAIR, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+               token_pair_engine_add_pairing(e, CM_SUB_OPEN, CM_SUB_CLOSE, CM_SUB_PAIR, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+               token_pair_engine_add_pairing(e, CM_HI_OPEN,  CM_HI_CLOSE,  CM_HI_PAIR,  PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+               token_pair_engine_add_pairing(e, CM_COM_OPEN, CM_COM_CLOSE, CM_COM_PAIR, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
+
+               stack * s = stack_new(0);
+
+               token_pairs_match_pairs_inside_token(chain, e, s, 0);
+
+               stack_free(s);
+               token_pair_engine_free(e);
+       }
+
+       return chain;
+}
+
+
+void accept_token_tree(DString * d, token * t);
+void accept_token(DString * d, token * t);
+
+
+void accept_token_tree_sub(DString * d, token * t) {
+       while (t) {
+               if (t->type == CM_SUB_DIV) {
+                       while (t) {
+                               d_string_erase(d, t->start, t->len);
+                               t = t->prev;
+                       }
+
+                       return;
+               }
+
+               accept_token(d, t);
+
+               t = t->prev;
+       }
+}
+
+
+void accept_token(DString * d, token * t) {
+       switch (t->type) {
+               case CM_SUB_CLOSE:
+                       if (t->mate) {
+                               d_string_erase(d, t->start, t->len);
+                       }
+                       break;
+               case CM_SUB_OPEN:
+               case CM_ADD_OPEN:
+               case CM_ADD_CLOSE:
+                       if (!t->mate)
+                               break;
+               case CM_SUB_DIV:
+               case CM_DEL_PAIR:
+               case CM_COM_PAIR:
+               case CM_HI_PAIR:
+                       // Erase these
+                       d_string_erase(d, t->start, t->len);
+                       break;
+               case CM_SUB_PAIR:
+                       // Erase old version and markers
+                       accept_token_tree_sub(d, t->child->mate);
+                       break;
+               case CM_ADD_PAIR:
+                       // Check children
+                       accept_token_tree(d, t->child->mate);
+                       break;
+       }
+}
+
+
+void accept_token_tree(DString * d, token * t) {
+       while (t) {
+               accept_token(d, t);
+
+               // Iterate backwards so offsets are right
+               t = t->prev;
+       }
+}
+
+void critic_markup_accept(DString * d) {
+       token * t = critic_parse_substring(d->str, 0, d->currentStringLength);
+
+       accept_token_tree(d, t->child->tail);
+
+       token_free(t);
+}
+
+
+void reject_token_tree(DString * d, token * t);
+void reject_token(DString * d, token * t);
+
+
+void reject_token_tree_sub(DString * d, token * t) {
+       while (t && t->type != CM_SUB_DIV) {
+               d_string_erase(d, t->start, t->len);
+               t = t->prev;
+       }
+
+       while (t) {
+
+               reject_token(d, t);
+
+               t = t->prev;
+       }
+}
+
+
+void reject_token(DString * d, token * t) {
+       switch (t->type) {
+               case CM_SUB_CLOSE:
+                       if (t->mate) {
+                               d_string_erase(d, t->start, t->len);
+                       }
+                       break;
+               case CM_SUB_OPEN:
+               case CM_DEL_OPEN:
+               case CM_DEL_CLOSE:
+                       if (!t->mate)
+                               break;
+               case CM_SUB_DIV:
+               case CM_ADD_PAIR:
+               case CM_COM_PAIR:
+               case CM_HI_PAIR:
+                       // Erase these
+                       d_string_erase(d, t->start, t->len);
+                       break;
+               case CM_SUB_PAIR:
+                       // Erase new version and markers
+                       reject_token_tree_sub(d, t->child->mate);
+                       break;
+               case CM_DEL_PAIR:
+                       // Check children
+                       reject_token_tree(d, t->child->mate);
+                       break;
+       }
+}
+
+
+void reject_token_tree(DString * d, token * t) {
+       while (t) {
+               reject_token(d, t);
+
+               // Iterate backwards so offsets are right
+               t = t->prev;
+       }
+}
+
+void critic_markup_reject(DString * d) {
+       token * t = critic_parse_substring(d->str, 0, d->currentStringLength);
+
+       reject_token_tree(d, t->child->tail);
+
+       token_free(t);
+
+}
+
diff --git a/Sources/libMultiMarkdown/critic_markup.h b/Sources/libMultiMarkdown/critic_markup.h
new file mode 100644 (file)
index 0000000..61e2327
--- /dev/null
@@ -0,0 +1,94 @@
+/**
+
+       MultiMarkdown -- Lightweight markup processor to produce HTML, LaTeX, and more.
+
+       @file critic_markup.h
+
+       @brief 
+
+
+       @author Fletcher T. Penney
+       @bug    
+
+**/
+
+/*
+
+       Copyright © 2016 - 2017 Fletcher T. Penney.
+
+
+       The `MultiMarkdown 6` project is released under the MIT License..
+       
+       GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
+       
+               https://github.com/fletcher/MultiMarkdown-4/
+       
+       MMD 4 is released under both the MIT License and GPL.
+       
+       
+       CuTest is released under the zlib/libpng license. See CuTest.c for the
+       text of the license.
+       
+       
+       ## The MIT License ##
+       
+       Permission is hereby granted, free of charge, to any person obtaining
+       a copy of this software and associated documentation files (the
+       "Software"), to deal in the Software without restriction, including
+       without limitation the rights to use, copy, modify, merge, publish,
+       distribute, sublicense, and/or sell copies of the Software, and to
+       permit persons to whom the Software is furnished to do so, subject to
+       the following conditions:
+       
+       The above copyright notice and this permission notice shall be
+       included in all copies or substantial portions of the Software.
+       
+       THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+       EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+       MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+       IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+       CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+       TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+       SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+       
+
+*/
+
+
+#ifndef CRITIC_MARKUP_MULTIMARKDOWN_H
+#define CRITIC_MARKUP_MULTIMARKDOWN_H
+
+#include "d_string.h"
+
+enum cm_types {
+       CM_ADD_OPEN = 1,                // Can't use type 0
+       CM_ADD_CLOSE,
+
+       CM_DEL_OPEN,
+       CM_DEL_CLOSE,
+
+       CM_SUB_OPEN,
+       CM_SUB_DIV,
+       CM_SUB_CLOSE,
+
+       CM_HI_OPEN,
+       CM_HI_CLOSE,
+
+       CM_COM_OPEN,
+       CM_COM_CLOSE,
+
+       CM_ADD_PAIR,
+       CM_DEL_PAIR,
+       CM_SUB_PAIR,
+       CM_HI_PAIR, 
+       CM_COM_PAIR,
+
+       CM_PLAIN_TEXT
+};
+
+
+void critic_markup_accept(DString * d);
+
+void critic_markup_reject(DString * d);
+
+#endif
index a1788c202beb4e93dcad5eced058747e40874953..cb73955f77517b56f508a72add0182cc097f7f39 100644 (file)
@@ -1486,7 +1486,7 @@ void process_metadata_stack(mmd_engine * e, scratch_pad * scratch) {
 
 
 void automatic_search_text(mmd_engine * e, token * t, trie * ac) {
-       match * m = ac_trie_leftmost_longest_search(ac, &e->dstr->str[t->start], t->len);
+       match * m = ac_trie_leftmost_longest_search(ac, e->dstr->str, t->start, t->len);
 
        match * walker;
 
@@ -1496,7 +1496,7 @@ void automatic_search_text(mmd_engine * e, token * t, trie * ac) {
                walker = m->next;
 
                while (walker) {
-                       token_split(tok, walker->start + t->start, walker->len, walker->match_type);
+                       token_split(tok, walker->start, walker->len, walker->match_type);
 
                        // Advance token to section after the split (if present)
                        tok = tok->next->next;
index 2e0fdf1563e20fe7d83bba45d3124d80a79eb3f1..821b5742eb1d25ceafb03226fe88876a1dc27f1c 100644 (file)
@@ -61,6 +61,7 @@
 
 
 #include "argtable3.h"
+#include "critic_markup.h"
 #include "d_string.h"
 #include "i18n.h"
 #include "libMultiMarkdown.h"
@@ -73,7 +74,8 @@
 #define kBUFFERSIZE 4096       // How many bytes to read at a time
 
 // argtable structs
-struct arg_lit *a_help, *a_version, *a_compatibility, *a_nolabels, *a_batch, *a_accept, *a_reject, *a_full, *a_snippet;
+struct arg_lit *a_help, *a_version, *a_compatibility, *a_nolabels, *a_batch,
+               *a_accept, *a_reject, *a_full, *a_snippet;
 struct arg_str *a_format, *a_lang;
 struct arg_file *a_file, *a_o;
 struct arg_end *a_end;
@@ -349,6 +351,15 @@ int main(int argc, char** argv) {
                                // Don't free folder -- owned by dirname
                        }
 
+                       // Perform block level CriticMarkup?
+                       if (extensions & EXT_CRITIC_ACCEPT) {
+                               critic_markup_accept(buffer);
+                       }
+
+                       if (extensions & EXT_CRITIC_REJECT) {
+                               critic_markup_reject(buffer);
+                       }
+
                        // Increment counter and prepare token pool
 #ifdef kUseObjectPool
                        token_pool_init();
@@ -412,6 +423,15 @@ int main(int argc, char** argv) {
                        // Don't free folder -- owned by dirname
                }
 
+               // Perform block level CriticMarkup?
+               if (extensions & EXT_CRITIC_ACCEPT) {
+                       critic_markup_accept(buffer);
+               }
+
+               if (extensions & EXT_CRITIC_REJECT) {
+                       critic_markup_reject(buffer);
+               }
+
                if (FORMAT_MMD == format) {
                        result = buffer->str;
                } else {
diff --git a/tests/CriticMarkup/CriticMarkup.htmla b/tests/CriticMarkup/CriticMarkup.htmla
new file mode 100644 (file)
index 0000000..591eb9d
--- /dev/null
@@ -0,0 +1,23 @@
+<!DOCTYPE html>
+<html>
+<head>
+       <meta charset="utf-8"/>
+       <title>Extended CriticMarkup</title>
+</head>
+<body>
+
+<p>This is a <em>single</em> paragraph</p>
+
+<p>that was split in two.</p>
+
+<p>This is <em>two</em> paragraphs joined together.</p>
+
+<p>This is two paragraphs</p>
+
+<p>With a <em>new</em> paragraph inserted</p>
+
+<p>between them.</p>
+
+</body>
+</html>
+
diff --git a/tests/CriticMarkup/CriticMarkup.htmlr b/tests/CriticMarkup/CriticMarkup.htmlr
new file mode 100644 (file)
index 0000000..59a6ab3
--- /dev/null
@@ -0,0 +1,21 @@
+<!DOCTYPE html>
+<html>
+<head>
+       <meta charset="utf-8"/>
+       <title>Extended CriticMarkup</title>
+</head>
+<body>
+
+<p>This is a <em>single</em> paragraph that was split in two.</p>
+
+<p>This is <em>two</em> paragraphs</p>
+
+<p>joined together.</p>
+
+<p>This is two paragraphs</p>
+
+<p>with nothing between them.</p>
+
+</body>
+</html>
+
diff --git a/tests/CriticMarkup/CriticMarkup.text b/tests/CriticMarkup/CriticMarkup.text
new file mode 100644 (file)
index 0000000..e63017a
--- /dev/null
@@ -0,0 +1,18 @@
+Title: Extended CriticMarkup
+latex config:  article
+
+This is a *single* paragraph {++
+
+++}that was split in two.
+
+This is *two* paragraphs {--
+
+--}joined together.
+
+This is two paragraphs{~~
+
+with nothing ~>
+
+With a *new* paragraph inserted
+
+~~}between them.