From: Fletcher T. Penney Date: Sat, 28 Jan 2017 22:19:35 +0000 (-0500) Subject: ADDED: Basic metadata support X-Git-Tag: 0.1.1a^2~4 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=832722e6b5602582b42d422ce4eabff38a384f8a;p=multimarkdown ADDED: Basic metadata support --- diff --git a/src/html.c b/src/html.c index 30c4283..e576b85 100644 --- a/src/html.c +++ b/src/html.c @@ -423,9 +423,6 @@ void mmd_export_token_html(DString * out, const char * source, token * t, size_t scratch->padded = 0; break; case BLOCK_META: - print("\n"); - //token_tree_describe(t, source); - print("\n"); break; case BLOCK_PARA: case BLOCK_DEF_CITATION: @@ -744,6 +741,9 @@ void mmd_export_token_html(DString * out, const char * source, token * t, size_t mmd_export_token_tree_html(out, source, t->child, offset, scratch); } break; + case PAIR_BRACKET_VARIABLE: + print_token(t); + break; case PAIR_CRITIC_ADD: // Ignore if we're rejecting if (scratch->extensions & EXT_CRITIC_REJECT) @@ -1003,6 +1003,54 @@ void mmd_export_token_html_raw(DString * out, const char * source, token * t, si } +void mmd_start_complete_html(DString * out, const char * source, scratch_pad * scratch) { + print("\n\n\n\t\n"); + + // Iterate over metadata keys + meta * m; + + for (m = scratch->meta_hash; m != NULL; m = m->hh.next) { + if (strcmp(m->key, "baseheaderlevel") == 0) { + } else if (strcmp(m->key, "bibtex") == 0) { + } else if (strcmp(m->key, "css") == 0) { + print("\tvalue, false); + print("\"/>\n"); + } else if (strcmp(m->key, "htmlfooter") == 0) { + } else if (strcmp(m->key, "htmlheader") == 0) { + } else if (strcmp(m->key, "htmlheaderlevel") == 0) { + } else if (strcmp(m->key, "lang") == 0) { + } else if (strcmp(m->key, "latexfooter") == 0) { + } else if (strcmp(m->key, "latexinput") == 0) { + } else if (strcmp(m->key, "latexmode") == 0) { + } else if (strcmp(m->key, "mmdfooter") == 0) { + } else if (strcmp(m->key, "mmdheader") == 0) { + } else if (strcmp(m->key, "quoteslanguage") == 0) { + } else if (strcmp(m->key, "title") == 0) { + print("\t"); + mmd_print_string_html(out, m->value, false); + print("\n"); + } else if (strcmp(m->key, "transcludebase") == 0) { + } else if (strcmp(m->key, "xhtmlheader") == 0) { + } else if (strcmp(m->key, "xhtmlheaderlevel") == 0) { + } else { + print("\tkey, false); + print("\" content=\""); + mmd_print_string_html(out, m->value, false); + print("\"/>\n"); + } + } + + print("\n\n\n"); +} + + +void mmd_end_complete_html(DString * out, const char * source, scratch_pad * scratch) { + print("\n\n\n\n"); +} + + void mmd_export_token_tree_html_raw(DString * out, const char * source, token * t, size_t offset, scratch_pad * scratch) { while (t != NULL) { if (scratch->skip_token) { diff --git a/src/html.h b/src/html.h index 7caf98e..281d7cd 100644 --- a/src/html.h +++ b/src/html.h @@ -70,5 +70,8 @@ void mmd_export_token_tree_html_raw(DString * out, const char * source, token * void mmd_export_citation_list_html(DString * out, const char * source, scratch_pad * scratch); void mmd_export_footnote_list_html(DString * out, const char * source, scratch_pad * scratch); +void mmd_start_complete_html(DString * out, const char * source, scratch_pad * scratch); +void mmd_end_complete_html(DString * out, const char * source, scratch_pad * scratch); + #endif diff --git a/src/main.c b/src/main.c index 9d4211f..b5cd556 100644 --- a/src/main.c +++ b/src/main.c @@ -71,7 +71,7 @@ #define kBUFFERSIZE 4096 // How many bytes to read at a time // argtable structs -struct arg_lit *a_help, *a_version, *a_compatibility, *a_nolabels, *a_batch, *a_accept, *a_reject; +struct arg_lit *a_help, *a_version, *a_compatibility, *a_nolabels, *a_batch, *a_accept, *a_reject, *a_full, *a_snippet; struct arg_str *a_format, *a_lang; struct arg_file *a_file, *a_o; struct arg_end *a_end; @@ -192,6 +192,8 @@ int main(int argc, char** argv) { a_batch = arg_lit0("b", "batch", "process each file separately"), a_compatibility = arg_lit0("c", "compatibility", "Markdown compatibility mode"), + a_full = arg_lit0("f", "full", "force a complete document"), + a_snippet = arg_lit0("s", "snippet", "force a snippet"), a_rem2 = arg_rem("", ""), @@ -273,6 +275,16 @@ int main(int argc, char** argv) { extensions &= ~(EXT_CRITIC_REJECT | EXT_CRITIC_ACCEPT); } + if (a_full->count > 0) { + // Force complete document + extensions |= EXT_COMPLETE; + } + + if (a_snippet->count > 0) { + // Force snippet + extensions |= EXT_SNIPPET; + } + if (a_format->count > 0) { if (strcmp(a_format->sval[0], "html") == 0) format = FORMAT_HTML; diff --git a/src/mmd.c b/src/mmd.c index 32e27d6..8f15053 100644 --- a/src/mmd.c +++ b/src/mmd.c @@ -101,6 +101,7 @@ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) { e->footnote_stack = stack_new(0); e->header_stack = stack_new(0); e->link_stack = stack_new(0); + e->metadata_stack = stack_new(0); e->pairings1 = token_pair_engine_new(); e->pairings2 = token_pair_engine_new(); @@ -222,7 +223,13 @@ void mmd_engine_free(mmd_engine * e, bool freeDString) { footnote_free(stack_pop(e->footnote_stack)); } stack_free(e->footnote_stack); - + + // Metadata needs to be freed + while (e->metadata_stack->size) { + meta_free(stack_pop(e->metadata_stack)); + } + stack_free(e->metadata_stack); + free(e); } @@ -846,6 +853,13 @@ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, cons while (t != NULL) { switch (t->type) { + case BLOCK_META: + // Do we treat this like metadata? + if (!(e->extensions & EXT_COMPATIBILITY) && + !(e->extensions & EXT_NO_METADATA)) + return; + // This is not metadata + t->type = BLOCK_PARA; case DOC_START_TOKEN: case BLOCK_BLOCKQUOTE: case BLOCK_H1: @@ -860,7 +874,6 @@ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, cons case BLOCK_LIST_ENUMERATED_LOOSE: case BLOCK_LIST_ITEM: case BLOCK_LIST_ITEM_TIGHT: - case BLOCK_META: case BLOCK_PARA: case BLOCK_TABLE: // Assign child tokens of blocks @@ -1278,6 +1291,7 @@ void is_para_html(mmd_engine * e, token * block) { } } + void recursive_parse_blockquote(mmd_engine * e, token * block) { // Strip blockquote markers (if present) strip_quote_markers_from_block(e, block); @@ -1286,7 +1300,69 @@ void recursive_parse_blockquote(mmd_engine * e, token * block) { } -void strip_line_tokens_from_block(token * block) { +void metadata_stack_describe(mmd_engine * e) { + meta * m; + + for (int i = 0; i < e->metadata_stack->size; ++i) + { + m = stack_peek_index(e->metadata_stack, i); + fprintf(stderr, "'%s': '%s'\n", m->key, m->value); + } +} + + +void strip_line_tokens_from_metadata(mmd_engine * e, token * metadata) { + token * l = metadata->child; + char * source = e->dstr->str; + + meta * m = NULL; + size_t start, len; + + DString * d = d_string_new(""); + + while (l) { + switch (l->type) { + case LINE_META: + if (m) { + meta_set_value(m, d->str); + d_string_erase(d, 0, -1); + } + len = scan_meta_key(&source[l->start]); + m = meta_new(source, l->start, len); + start = l->start + len + 1; + len = l->start + l->len - start - 1; + d_string_append_c_array(d, &source[start], len); + stack_push(e->metadata_stack, m); + break; + case LINE_INDENTED_TAB: + case LINE_INDENTED_SPACE: + while (l->len && char_is_whitespace(source[l->start])) { + l->start++; + l->len--; + } + case LINE_PLAIN: + d_string_append_c(d, '\n'); + d_string_append_c_array(d, &source[l->start], l->len); + break; + default: + fprintf(stderr, "ERROR!\n"); + token_describe(l, NULL); + break; + } + + l = l->next; + } + + // Finish last line + if (m) { + meta_set_value(m, d->str); + } + + d_string_free(d, true); +} + + +void strip_line_tokens_from_block(mmd_engine * e, token * block) { if ((block == NULL) || (block->child == NULL)) return; @@ -1297,10 +1373,16 @@ void strip_line_tokens_from_block(token * block) { token * l = block->child; - // Strip trailing empty lines from indented code blocks - if (block->type == BLOCK_CODE_INDENTED) { - while (l->tail->type == LINE_EMPTY) - token_remove_last_child(block); + // Custom actions + switch (block->type) { + case BLOCK_META: + // Handle metadata differently + return strip_line_tokens_from_metadata(e, block); + case BLOCK_CODE_INDENTED: + // Strip trailing empty lines from indented code blocks + while (l->tail->type == LINE_EMPTY) + token_remove_last_child(block); + break; } token * children = NULL; @@ -1308,7 +1390,6 @@ void strip_line_tokens_from_block(token * block) { token * temp; - // Move contents of line directly into the parent block while (l != NULL) { switch (l->type) { diff --git a/src/mmd.h b/src/mmd.h index 1612a94..b99bb6e 100644 --- a/src/mmd.h +++ b/src/mmd.h @@ -78,6 +78,7 @@ struct mmd_engine { stack * header_stack; stack * footnote_stack; stack * link_stack; + stack * metadata_stack; short language; short quotes_lang; @@ -87,7 +88,7 @@ struct mmd_engine { /// Expose routines to lemon parser void recursive_parse_list_item(mmd_engine * e, token * block); void recursive_parse_blockquote(mmd_engine * e, token * block); -void strip_line_tokens_from_block(token * block); +void strip_line_tokens_from_block(mmd_engine * e, token * block); void is_para_html(mmd_engine * e, token * block); diff --git a/src/parser.c b/src/parser.c index 8f77bbf..5f25b72 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1002,7 +1002,7 @@ static void yy_reduce( break; case 1: /* blocks ::= blocks block */ { - strip_line_tokens_from_block(yymsp[0].minor.yy0); + strip_line_tokens_from_block(engine, yymsp[0].minor.yy0); if (yymsp[-1].minor.yy0 == NULL) { yymsp[-1].minor.yy0 = yymsp[0].minor.yy0; yymsp[0].minor.yy0 = NULL;} yylhsminor.yy0 = yymsp[-1].minor.yy0; token_chain_append(yylhsminor.yy0, yymsp[0].minor.yy0); @@ -1014,7 +1014,7 @@ static void yy_reduce( break; case 2: /* blocks ::= block */ { - strip_line_tokens_from_block(yymsp[0].minor.yy0); + strip_line_tokens_from_block(engine, yymsp[0].minor.yy0); #ifndef NDEBUG fprintf(stderr, "First block %d\n", yymsp[0].minor.yy0->type); #endif diff --git a/src/parser.y b/src/parser.y index 3793d6f..bffe618 100644 --- a/src/parser.y +++ b/src/parser.y @@ -70,7 +70,7 @@ doc ::= blocks(B). { engine->root = B; } blocks(A) ::= blocks(B) block(C). { - strip_line_tokens_from_block(C); + strip_line_tokens_from_block(engine, C); if (B == NULL) { B = C; C = NULL;} A = B; token_chain_append(A, C); @@ -80,7 +80,7 @@ blocks(A) ::= blocks(B) block(C). } blocks(A) ::= block(B). { - strip_line_tokens_from_block(B); + strip_line_tokens_from_block(engine, B); #ifndef NDEBUG fprintf(stderr, "First block %d\n", B->type); #endif diff --git a/src/scanners.c b/src/scanners.c index 565362c..221ec4b 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -1,4 +1,4 @@ -/* Generated by re2c 0.14.3 on Tue Jan 24 17:14:02 2017 */ +/* Generated by re2c 0.14.3 on Sat Jan 28 15:02:52 2017 */ /** MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. @@ -8786,6 +8786,180 @@ yy633: } +size_t scan_meta_key(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy636; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy637; + default: goto yy639; + } +yy636: + { return 0; } +yy637: + ++c; + yych = *c; + goto yy641; +yy638: + { return (size_t)( c - start ); } +yy639: + yych = *++c; + goto yy636; +yy640: + ++c; + yych = *c; +yy641: + switch (yych) { + case ' ': + case '!': + case '"': + case '#': + case '$': + case '%': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy640; + default: goto yy638; + } +} + +} + #ifdef TEST void Test_scan_url(CuTest* tc) { diff --git a/src/scanners.h b/src/scanners.h index 67df9d9..68110fd 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -70,6 +70,7 @@ size_t scan_html(const char * c); size_t scan_html_block(const char * c); size_t scan_html_line(const char * c); size_t scan_key(const char * c); +size_t scan_meta_key(const char * c); size_t scan_meta_line(const char * c); size_t scan_ref_citation(const char * c); size_t scan_ref_foot(const char * c); diff --git a/src/scanners.re b/src/scanners.re index d22bd5c..2ae6207 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -324,6 +324,16 @@ size_t scan_meta_line(const char * c) { } +size_t scan_meta_key(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + meta_key { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + #ifdef TEST void Test_scan_url(CuTest* tc) { diff --git a/src/writer.c b/src/writer.c index 4b229e2..0d11873 100644 --- a/src/writer.c +++ b/src/writer.c @@ -74,6 +74,8 @@ void store_footnote(scratch_pad * scratch, footnote * f); void store_link(scratch_pad * scratch, link * l); +void store_metadata(scratch_pad * scratch, meta * m); + /// Temporary storage while exporting parse tree to output format scratch_pad * scratch_pad_new(mmd_engine * e) { @@ -84,8 +86,12 @@ scratch_pad * scratch_pad_new(mmd_engine * e) { p->list_is_tight = false; // Tight vs Loose list p->skip_token = 0; // Skip over next n tokens - p->link_hash = NULL; // Store defined links in a hash + p->extensions = e->extensions; + p->quotes_lang = e->quotes_lang; + p->language = e->language; + // Store links in a hash for rapid retrieval when exporting + p->link_hash = NULL; link * l; for (int i = 0; i < e->link_stack->size; ++i) @@ -95,6 +101,7 @@ scratch_pad * scratch_pad_new(mmd_engine * e) { store_link(p, l); } + // Store footnotes in a hash for rapid retrieval when exporting p->used_footnotes = stack_new(0); // Store footnotes as we use them p->inline_footnotes_to_free = stack_new(0); // Inline footnotes need to be freed p->footnote_being_printed = 0; @@ -111,6 +118,7 @@ scratch_pad * scratch_pad_new(mmd_engine * e) { store_footnote(p, f); } + // Store citations in a hash for rapid retrieval when exporting p->used_citations = stack_new(0); p->inline_citations_to_free = stack_new(0); p->citation_being_printed = 0; @@ -124,10 +132,16 @@ scratch_pad * scratch_pad_new(mmd_engine * e) { store_citation(p, f); } + // Store links in a hash for rapid retrieval when exporting + p->meta_hash = NULL; + meta * m; - p->extensions = e->extensions; - p->quotes_lang = e->quotes_lang; - p->language = e->language; + for (int i = 0; i < e->metadata_stack->size; ++i) + { + m = stack_peek_index(e->metadata_stack, i); + + store_metadata(p, m); + } } return p; @@ -176,6 +190,14 @@ void scratch_pad_free(scratch_pad * scratch) { } stack_free(scratch->inline_citations_to_free); + // Free metadata hash + meta * m, * m_tmp; + + HASH_ITER(hh, scratch->meta_hash, m, m_tmp) { + HASH_DEL(scratch->meta_hash, m); // Remove item from hash + // Don't free meta pointer since it is freed with the mmd_engine + //meta_free(m); + } free(scratch); } @@ -549,6 +571,18 @@ void store_citation(scratch_pad * scratch, footnote * f) { } +void store_metadata(scratch_pad * scratch, meta * m) { + meta * temp; + + // Store by `key` + HASH_FIND_STR(scratch->meta_hash, m->key, temp); + + if (!temp) { + HASH_ADD_KEYPTR(hh, scratch->meta_hash, m->key, strlen(m->key), m); + } +} + + void link_free(link * l) { free(l->label_text); free(l->clean_text); @@ -763,6 +797,53 @@ void footnote_free(footnote * f) { } +meta * meta_new(const char * source, size_t key_start, size_t len) { + meta * m = malloc(sizeof(meta)); + char * key; + + if (m) { + key = strndup(&source[key_start], len); + m->key = label_from_string(key); + free(key); + m->value = NULL; + } + + return m; +} + + +void meta_set_value(meta * m, const char * value) { + if (value) { + if (m->value) + free(m->value); + + m->value = clean_string(value, false); + } +} + + +void meta_free(meta * m) { + free(m->key); + free(m->value); + + free(m); +} + + +/// Find metadata based on key +meta * extract_meta_from_stack(scratch_pad * scratch, const char * target) { + char * key = clean_string(target, true); + + meta * temp = NULL; + + HASH_FIND_STR(scratch->meta_hash, key, temp); + + free(key); + + return temp; +} + + bool definition_extract(mmd_engine * e, token ** remainder) { char * source = e->dstr->str; token * label = NULL; @@ -956,6 +1037,40 @@ void process_header_stack(mmd_engine * e) { } } + +/// Parse metadata +void process_metadata_stack(mmd_engine * e, scratch_pad * scratch) { + if ((scratch->extensions & EXT_NO_METADATA) || + (scratch->extensions & EXT_COMPATIBILITY)) + return; + + meta * m; + + for (int i = 0; i < e->metadata_stack->size; ++i) + { + // Check for certain metadata keys + m = stack_peek_index(e->metadata_stack, i); + + // Certain keys do not force complete documents + if (!(scratch->extensions & EXT_COMPLETE) && + !(scratch->extensions & EXT_SNIPPET)) { + if ((strcmp(m->key, "baseheaderlevel") != 0) && + (strcmp(m->key, "xhtmlheaderlevel") != 0) && + (strcmp(m->key, "htmlheaderlevel") != 0) && + (strcmp(m->key, "latexheaderlevel") != 0) && + (strcmp(m->key, "odfheaderlevel") != 0) && + (strcmp(m->key, "xhtmlheader") != 0) && + (strcmp(m->key, "htmlheader") != 0) && + (strcmp(m->key, "quoteslanguage") != 0)) { + // We found a key that is not in the list, so + // Force a complete document + scratch->extensions |= EXT_COMPLETE; + } + } + } +} + + void mmd_export_token_tree(DString * out, mmd_engine * e, short format) { // Process potential reference definitions @@ -967,11 +1082,22 @@ void mmd_export_token_tree(DString * out, mmd_engine * e, short format) { // Create scratch pad scratch_pad * scratch = scratch_pad_new(e); + // Process metadata + process_metadata_stack(e, scratch); + + switch (format) { case FORMAT_HTML: + if (scratch->extensions & EXT_COMPLETE) + mmd_start_complete_html(out, e->dstr->str, scratch); + mmd_export_token_tree_html(out, e->dstr->str, e->root, 0, scratch); mmd_export_footnote_list_html(out, e->dstr->str, scratch); mmd_export_citation_list_html(out, e->dstr->str, scratch); + + if (scratch->extensions & EXT_COMPLETE) + mmd_end_complete_html(out, e->dstr->str, scratch); + break; } diff --git a/src/writer.h b/src/writer.h index db288b3..f02c28c 100644 --- a/src/writer.h +++ b/src/writer.h @@ -69,6 +69,7 @@ typedef struct { struct link * link_hash; + struct meta * meta_hash; unsigned long extensions; short padded; //!< How many empty lines at end output buffer @@ -134,6 +135,14 @@ struct fn_holder { typedef struct fn_holder fn_holder; +struct meta { + char * key; + char * value; + UT_hash_handle hh; +}; + +typedef struct meta meta; + /// Temporary storage while exporting parse tree to output format scratch_pad * scratch_pad_new(mmd_engine * e); @@ -168,6 +177,9 @@ char * url_accept(const char * source, token ** remainder, bool validate); void footnote_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num); void citation_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num); +meta * meta_new(const char * source, size_t start, size_t len); +void meta_set_value(meta * m, const char * value); +void meta_free(meta * m); #endif diff --git a/tests/MMD6Tests/Metadata.html b/tests/MMD6Tests/Metadata.html new file mode 100644 index 0000000..c61bbfe --- /dev/null +++ b/tests/MMD6Tests/Metadata.html @@ -0,0 +1,14 @@ + + + + + *foo* "bar" + + + + + +

foo: bar

+ + + diff --git a/tests/MMD6Tests/Metadata.htmlc b/tests/MMD6Tests/Metadata.htmlc new file mode 100644 index 0000000..7519afa --- /dev/null +++ b/tests/MMD6Tests/Metadata.htmlc @@ -0,0 +1,8 @@ +

title: foo "bar" +css: http://foo.com/bar.css +foo: bar +foo bar +foo bar +foo: bar

+ +

foo: bar

diff --git a/tests/MMD6Tests/Metadata.text b/tests/MMD6Tests/Metadata.text new file mode 100644 index 0000000..f7821fc --- /dev/null +++ b/tests/MMD6Tests/Metadata.text @@ -0,0 +1,8 @@ +title: *foo* "bar" +css: http://foo.com/bar.css +foo: bar +foo bar + foo bar +foo: *bar* + +foo: bar