From 0864bc4281af8afa363e905593fb30990885d41a Mon Sep 17 00:00:00 2001 From: "Fletcher T. Penney" Date: Tue, 23 May 2017 16:30:27 -0400 Subject: [PATCH] CHANGED: Refactor code for more consistent API naming in libMultiMarkdown.h; Add functionality to list metadata keys and extract specific metadata values --- .../include/libMultiMarkdown.h | 60 ++++++-- Sources/libMultiMarkdown/mmd.c | 89 +++++++++-- Sources/libMultiMarkdown/transclude.c | 6 +- Sources/libMultiMarkdown/writer.c | 2 +- Sources/multimarkdown/main.c | 140 ++++++++++++------ 5 files changed, 225 insertions(+), 72 deletions(-) diff --git a/Sources/libMultiMarkdown/include/libMultiMarkdown.h b/Sources/libMultiMarkdown/include/libMultiMarkdown.h index 2738796..41fb6c8 100644 --- a/Sources/libMultiMarkdown/include/libMultiMarkdown.h +++ b/Sources/libMultiMarkdown/include/libMultiMarkdown.h @@ -10,6 +10,20 @@ @author Fletcher T. Penney @bug + + ******IMPORTANT****** + + If you are using libMultiMarkdown in your own project, you need to either: + + 1. Disable kUseObjectPool in `token.h` + + 2. Properly manage the `token_pool_init` and `token_pool_free` functions. + + + I recommend option #1, unless you absolutely need the best performance for + long documents. Doing #2 properly is tricky in any program that can handle + multiple MMD text strings at overlapping times. + **/ /* @@ -64,18 +78,33 @@ #include "token.h" -// Convert MMD text to specified format, with specified extensions, and language -// Returned char * must be freed -char * mmd_convert_string(const char * source, unsigned long extensions, short format, short language); +/// Convert MMD text to specified format, with specified extensions, and language +/// Returned char * must be freed +char * mmd_string_convert(const char * source, unsigned long extensions, short format, short language); + + +/// Does the text have metadata? +bool mmd_string_has_metadata(char * source, size_t * end); + + +/// Return metadata keys, one per line +/// Returned char * must be freed +char * mmd_string_metadata_keys(char * source); + + +/// Extract desired metadata as string value +/// Returned char * must be freed +char * mmd_string_metavalue_for_key(char * source, const char * key); + +/// Convert MMD text to specified format, with specified extensions, and language +/// Returned char * must be freed +char * mmd_d_string_convert(DString * source, unsigned long extensions, short format, short language); -// Convert MMD text to specified format, with specified extensions, and language -// Returned char * must be freed -char * mmd_convert_d_string(DString * source, unsigned long extensions, short format, short language); -// Convert MMD text and write results to specified file -- used for "complex" output formats requiring -// multiple documents (e.g. EPUB) -void mmd_write_to_file(DString * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath); +/// Convert MMD text and write results to specified file -- used for "complex" output formats requiring +/// multiple documents (e.g. EPUB) +void mmd_d_string_convert_to_file(DString * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath); /// MMD Engine is used for storing configuration information for MMD parser @@ -117,23 +146,30 @@ void mmd_engine_parse_string(mmd_engine * e); /// Does the text have metadata? -bool mmd_has_metadata(mmd_engine * e, size_t * end); +bool mmd_engine_has_metadata(mmd_engine * e, size_t * end); + + +/// Return metadata keys, one per line +/// Returned char * must be freed +char * mmd_engine_metadata_keys(mmd_engine * e); /// Extract desired metadata as string value -char * metavalue_for_key(mmd_engine * e, const char * key); +char * mmd_engine_metavalue_for_key(mmd_engine * e, const char * key); -void mmd_export_token_tree(DString * out, mmd_engine * e, short format); +void mmd_engine_export_token_tree(DString * out, mmd_engine * e, short format); /// Set language and smart quotes language void mmd_engine_set_language(mmd_engine * e, short language); + /// Return the version string for this build of libMultiMarkdown /// The returned `char *` will need to be freed after it is no longer needed. char * mmd_version(void); + /// Token types for parse tree enum token_types { DOC_START_TOKEN = 0, //!< DOC_START_TOKEN must be type 0 diff --git a/Sources/libMultiMarkdown/mmd.c b/Sources/libMultiMarkdown/mmd.c index 0a321a6..afd497b 100644 --- a/Sources/libMultiMarkdown/mmd.c +++ b/Sources/libMultiMarkdown/mmd.c @@ -1911,14 +1911,27 @@ void mmd_engine_parse_string(mmd_engine * e) { } -bool mmd_has_metadata(mmd_engine * e, size_t * end) { +/// Does the text have metadata? +bool mmd_string_has_metadata(char * source, size_t * end) { + bool result; + + mmd_engine * e = mmd_engine_create_with_string(source, 0); + result = mmd_engine_has_metadata(e, end); + + mmd_engine_free(e, true); + + return result; +} + + +bool mmd_engine_has_metadata(mmd_engine * e, size_t * end) { bool result = false; if (!(scan_meta_line(&e->dstr->str[0]))) { // First line is not metadata, so can't have metadata // Saves the time of an unnecessary parse // TODO: Need faster confirmation of actual metadata than full tokenizing - + *end = 0; return false; } @@ -1947,12 +1960,68 @@ bool mmd_has_metadata(mmd_engine * e, size_t * end) { } +/// Return metadata keys, one per line +/// Returned char * must be freed +char * mmd_string_metadata_keys(char * source) { + char * result; + + mmd_engine * e = mmd_engine_create_with_string(source, 0); + result = mmd_engine_metadata_keys(e); + + mmd_engine_free(e, true); + + return result; +} + + +/// Return metadata keys, one per line +/// Returned char * must be freed +char * mmd_engine_metadata_keys(mmd_engine * e) { + if (e->metadata_stack->size == 0) { + // Ensure we have checked for metadata + if (!mmd_engine_has_metadata(e, NULL)) + return NULL; + } + + char * result = NULL; + DString * output = d_string_new(""); + + meta * m; + + for (int i = 0; i < e->metadata_stack->size; ++i) + { + m = stack_peek_index(e->metadata_stack, i); + + d_string_append_printf(output, "%s\n", m->key); + } + + result = output->str; + d_string_free(output, false); + + return result; +} + + +/// Extract desired metadata as string value +/// Returned char * must be freed +char * mmd_string_metavalue_for_key(char * source, const char * key) { + char * result; + + mmd_engine * e = mmd_engine_create_with_string(source, 0); + result = strdup(mmd_engine_metavalue_for_key(e, key)); + + mmd_engine_free(e, true); + + return result; +} + + /// Grab metadata without processing entire document /// Returned char * does not need to be freed -char * metavalue_for_key(mmd_engine * e, const char * key) { +char * mmd_engine_metavalue_for_key(mmd_engine * e, const char * key) { if (e->metadata_stack->size == 0) { // Ensure we have checked for metadata - if (!mmd_has_metadata(e, NULL)) + if (!mmd_engine_has_metadata(e, NULL)) return NULL; } @@ -1979,7 +2048,7 @@ char * metavalue_for_key(mmd_engine * e, const char * key) { /// Convert MMD text to specified format, with specified extensions, and language /// Returned char * must be freed -char * mmd_convert_string(const char * source, unsigned long extensions, short format, short language) { +char * mmd_string_convert(const char * source, unsigned long extensions, short format, short language) { char * result; mmd_engine * e = mmd_engine_create_with_string(source, extensions); @@ -1990,7 +2059,7 @@ char * mmd_convert_string(const char * source, unsigned long extensions, short f DString * output = d_string_new(""); - mmd_export_token_tree(output, e, format); + mmd_engine_export_token_tree(output, e, format); result = output->str; @@ -2003,7 +2072,7 @@ char * mmd_convert_string(const char * source, unsigned long extensions, short f /// Convert MMD text to specified format, with specified extensions, and language /// Returned char * must be freed -char * mmd_convert_d_string(DString * source, unsigned long extensions, short format, short language) { +char * mmd_d_string_convert(DString * source, unsigned long extensions, short format, short language) { char * result; mmd_engine * e = mmd_engine_create_with_dstring(source, extensions); @@ -2014,7 +2083,7 @@ char * mmd_convert_d_string(DString * source, unsigned long extensions, short fo DString * output = d_string_new(""); - mmd_export_token_tree(output, e, format); + mmd_engine_export_token_tree(output, e, format); result = output->str; @@ -2025,7 +2094,7 @@ char * mmd_convert_d_string(DString * source, unsigned long extensions, short fo } -void mmd_write_to_file(DString * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath) { +void mmd_d_string_convert_to_file(DString * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath) { FILE * output_stream; mmd_engine * e = mmd_engine_create_with_dstring(source, extensions); @@ -2036,7 +2105,7 @@ void mmd_write_to_file(DString * source, unsigned long extensions, short format, DString * output = d_string_new(""); - mmd_export_token_tree(output, e, format); + mmd_engine_export_token_tree(output, e, format); // Now we have the input source string, the output string, the (modified) parse tree, and engine stacks diff --git a/Sources/libMultiMarkdown/transclude.c b/Sources/libMultiMarkdown/transclude.c index e647241..b7badb9 100644 --- a/Sources/libMultiMarkdown/transclude.c +++ b/Sources/libMultiMarkdown/transclude.c @@ -290,9 +290,9 @@ void transclude_source(DString * source, const char * search_path, const char * size_t last_match; mmd_engine * e = mmd_engine_create_with_dstring(source, EXT_TRANSCLUDE); - if (mmd_has_metadata(e, &offset)) { + if (mmd_engine_has_metadata(e, &offset)) { - temp = metavalue_for_key(e, "transclude base"); + temp = mmd_engine_metavalue_for_key(e, "transclude base"); if (temp) { // The new file overrides the search path @@ -437,7 +437,7 @@ void transclude_source(DString * source, const char * search_path, const char * // Strip metadata from buffer now that we have parsed it e = mmd_engine_create_with_dstring(buffer, EXT_TRANSCLUDE); - if (mmd_has_metadata(e, &offset)) { + if (mmd_engine_has_metadata(e, &offset)) { d_string_erase(buffer, 0, offset); } else { // Do we need to strip BOM? diff --git a/Sources/libMultiMarkdown/writer.c b/Sources/libMultiMarkdown/writer.c index f112d38..d57eecf 100644 --- a/Sources/libMultiMarkdown/writer.c +++ b/Sources/libMultiMarkdown/writer.c @@ -1691,7 +1691,7 @@ void identify_global_search_terms(mmd_engine * e, scratch_pad * scratch) { } -void mmd_export_token_tree(DString * out, mmd_engine * e, short format) { +void mmd_engine_export_token_tree(DString * out, mmd_engine * e, short format) { // Process potential reference definitions process_definition_stack(e); diff --git a/Sources/multimarkdown/main.c b/Sources/multimarkdown/main.c index 82a263e..376a468 100644 --- a/Sources/multimarkdown/main.c +++ b/Sources/multimarkdown/main.c @@ -77,11 +77,11 @@ // argtable structs struct arg_lit *a_help, *a_version, *a_compatibility, *a_nolabels, *a_batch, - *a_accept, *a_reject, *a_full, *a_snippet, *a_random; -struct arg_str *a_format, *a_lang; + *a_accept, *a_reject, *a_full, *a_snippet, *a_random, *a_meta; +struct arg_str *a_format, *a_lang, *a_extract; struct arg_file *a_file, *a_o; struct arg_end *a_end; -struct arg_rem *a_rem1, *a_rem2, *a_rem3, *a_rem4, *a_rem5; +struct arg_rem *a_rem1, *a_rem2, *a_rem3, *a_rem4, *a_rem5, *a_rem6; DString * stdin_buffer() { @@ -169,6 +169,11 @@ int main(int argc, char** argv) { a_rem5 = arg_rem("", ""), + a_meta = arg_lit0("m", "metadata-keys", "list all metadata keys"), + a_extract = arg_str0("e", "extract", "KEY", "extract specified metadata key"), + + a_rem6 = arg_rem("", ""), + a_file = arg_filen(NULL, NULL, "", 0, argc+2, "read input from file(s) -- use stdin if no files given"), a_end = arg_end(20), @@ -287,7 +292,7 @@ int main(int argc, char** argv) { } DString * buffer = NULL; - char * result; + char * result = NULL; FILE * output_stream; char * output_filename; @@ -358,32 +363,53 @@ int main(int argc, char** argv) { #ifdef kUseObjectPool token_pool_init(); #endif - - if (FORMAT_EPUB == format) { - mmd_write_to_file(buffer, extensions, format, language, folder, output_filename); - result = NULL; - } else if (FORMAT_MMD == format) { - result = buffer->str; + if (a_meta->count > 0) { + // List metadata keys + result = mmd_string_metadata_keys(buffer->str); + + fputs(result, stdout); + + free(result); + } else if (a_extract->count > 0) { + // Extract metadata key + const char * query = a_extract->sval[0]; + + result = mmd_string_metavalue_for_key(buffer->str, query); + + fputs(result, stdout); + fputc('\n', stdout); + + free(result); } else { - result = mmd_convert_d_string(buffer, extensions, format, language); - } + // Regular processing - if (result) { - if (!(output_stream = fopen(output_filename, "w"))) { - // Failed to open file - perror(output_filename); + if (FORMAT_EPUB == format) { + mmd_d_string_convert_to_file(buffer, extensions, format, language, folder, output_filename); + result = NULL; + } else if (FORMAT_MMD == format) { + result = buffer->str; } else { - fputs(result, output_stream); - fputc('\n', output_stream); - fclose(output_stream); + result = mmd_d_string_convert(buffer, extensions, format, language); + } + + if (result) { + if (!(output_stream = fopen(output_filename, "w"))) { + // Failed to open file + perror(output_filename); + } else { + fputs(result, output_stream); + fputc('\n', output_stream); + fclose(output_stream); + } + } + + if (FORMAT_MMD != format) { + free(result); } } d_string_free(buffer, true); free(output_filename); - if (FORMAT_MMD != format) { - free(result); - } // Decrement counter and drain token_pool_drain(); @@ -431,35 +457,57 @@ int main(int argc, char** argv) { mmd_critic_markup_reject(buffer); } - if (FORMAT_MMD == format) { - result = buffer->str; - } else { - result = mmd_convert_d_string(buffer, extensions, format, language); - } + if (a_meta->count > 0) { + // List metadata keys + result = mmd_string_metadata_keys(buffer->str); + + fputs(result, stdout); - // Where does output go? - if (strcmp(a_o->filename[0], "-") == 0) { - // direct to stdout - output_stream = stdout; - } else if (!(output_stream = fopen(a_o->filename[0], "w"))) { - perror(a_o->filename[0]); free(result); - d_string_free(buffer, true); - - exitcode = 1; - goto exit; - } + } else if (a_extract->count > 0) { + // Extract metadata key + const char * query = a_extract->sval[0]; + + result = mmd_string_metavalue_for_key(buffer->str, query); + + fputs(result, stdout); + fputc('\n', stdout); - fputs(result, output_stream); - fputc('\n', output_stream); - - if (output_stream != stdout) - fclose(output_stream); - - d_string_free(buffer, true); - if (FORMAT_MMD != format) { free(result); + } else { + // Regular processing + + if (FORMAT_MMD == format) { + result = buffer->str; + } else { + result = mmd_d_string_convert(buffer, extensions, format, language); + } + + // Where does output go? + if (strcmp(a_o->filename[0], "-") == 0) { + // direct to stdout + output_stream = stdout; + } else if (!(output_stream = fopen(a_o->filename[0], "w"))) { + perror(a_o->filename[0]); + free(result); + d_string_free(buffer, true); + + exitcode = 1; + goto exit; + } + + fputs(result, output_stream); + fputc('\n', output_stream); + + if (output_stream != stdout) + fclose(output_stream); + + if (FORMAT_MMD != format) { + free(result); + } } + + d_string_free(buffer, true); } -- 2.40.0