From: Fletcher T. Penney Date: Sat, 18 Mar 2017 20:53:09 +0000 (-0400) Subject: ADDED: Add image assets when creating EPUB X-Git-Tag: 6.0.0-rc2^2~12 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b0150463b2f721a11400509b86890fa00bfd51d3;p=multimarkdown ADDED: Add image assets when creating EPUB --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 2dfb004..989d1d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -386,6 +386,12 @@ endif (WIN32) # Define targets # ============== +# Is libcurl available? +find_package(curl) +if (CURL_FOUND) + add_definitions(-DUSE_CURL) +endif (CURL_FOUND) + # Create a library? if (NOT DEFINED TEST) add_library(libMultiMarkdown STATIC @@ -404,6 +410,10 @@ else () ) endif () +if (CURL_FOUND) + target_link_libraries(libMultiMarkdown curl) +endif (CURL_FOUND) + # Create a shared library if defined if (DEFINED SHAREDBUILD) @@ -423,6 +433,10 @@ if (DEFINED SHAREDBUILD) # Remove "Shared" from library name SET_TARGET_PROPERTIES(libMultiMarkdownShared PROPERTIES OUTPUT_NAME libMultiMarkdown) + if (CURL_FOUND) + target_link_libraries(libMultiMarkdownShared curl) + endif (CURL_FOUND) + endif (DEFINED SHAREDBUILD) diff --git a/Sources/libMultiMarkdown/epub.c b/Sources/libMultiMarkdown/epub.c index 7c84774..6f260cb 100644 --- a/Sources/libMultiMarkdown/epub.c +++ b/Sources/libMultiMarkdown/epub.c @@ -53,18 +53,20 @@ */ - #include #include #include -#include +#ifdef USE_CURL +#include +#endif #include "d_string.h" #include "epub.h" #include "html.h" #include "miniz.h" #include "mmd.h" +#include "transclude.h" #include "uuid.h" #include "writer.h" @@ -88,7 +90,7 @@ char * epub_container_xml(void) { d_string_append(container, "\n"); d_string_append(container, "\n"); d_string_append(container, "\n"); - d_string_append(container, "\n"); + d_string_append(container, "\n"); d_string_append(container, "\n"); d_string_append(container, "\n"); @@ -98,22 +100,6 @@ char * epub_container_xml(void) { } -// http://stackoverflow.com/questions/322938/recommended-way-to-initialize-srand -// http://www.concentric.net/~Ttwang/tech/inthash.htm -unsigned long mix(unsigned long a, unsigned long b, unsigned long c) -{ - a=a-b; a=a-c; a=a^(c >> 13); - b=b-c; b=b-a; b=b^(a << 8); - c=c-a; c=c-b; c=c^(b >> 13); - a=a-b; a=a-c; a=a^(c >> 12); - b=b-c; b=b-a; b=b^(a << 16); - c=c-a; c=c-b; c=c^(b >> 5); - a=a-b; a=a-c; a=a^(c >> 3); - b=b-c; b=b-a; b=b^(a << 10); - c=c-a; c=c-b; c=c^(b >> 15); - return c; -} - char * epub_package_document(scratch_pad * scratch) { DString * out = d_string_new(""); @@ -134,11 +120,6 @@ char * epub_package_document(scratch_pad * scratch) { print_const("\n"); } else { print_const("urn:uuid:"); - // Seed random number generator - // This is not a "cryptographically secure" random seed, - // but good enough for an EPUB id.... - unsigned long seed = mix(clock(), time(NULL), clock()); - srand(seed); char * id = uuid_new(); print(id); @@ -295,8 +276,125 @@ char * epub_nav(mmd_engine * e, scratch_pad * scratch) { } +bool add_asset_from_file(const char * filepath, asset * a, const char * destination, const char * directory) { + char * path = path_from_dir_base(directory, a->url); + mz_bool status; + bool result = false; + + DString * buffer = scan_file(path); + + if (buffer && buffer->currentStringLength > 0) { + status = mz_zip_add_mem_to_archive_file_in_place(filepath, destination, buffer->str, buffer->currentStringLength, NULL, 0, MZ_BEST_COMPRESSION); + + d_string_free(buffer, true); + result = true; + } + + free(path); + + return result; +} + + +#ifdef USE_CURL +// Dynamic buffer for downloading files in memory +// Based on https://curl.haxx.se/libcurl/c/getinmemory.html + +struct MemoryStruct { + char * memory; + size_t size; +}; + + +static size_t write_memory(void * contents, size_t size, size_t nmemb, void * userp) { + size_t realsize = size * nmemb; + struct MemoryStruct * mem = (struct MemoryStruct *)userp; + + mem->memory = realloc(mem->memory, mem->size + realsize + 1); + if (mem->memory == NULL) { + // Out of memory + fprintf(stderr, "Out of memory\n"); + return 0; + } + + memcpy(&(mem->memory[mem->size]), contents, realsize); + mem->size += realsize; + mem->memory[mem->size] = 0; + + return realsize; +} + +// Add assets to zipfile using libcurl +void add_assets(const char * filepath, mmd_engine * e, const char * directory) { + asset * a, * a_tmp; + + if (e->asset_hash){ + CURL * curl; + CURLcode res; + struct MemoryStruct chunk; + + char destination[100] = "OEBPS/assets/"; + destination[49] = '\0'; + + mz_bool status; + + curl_global_init(CURL_GLOBAL_ALL); + curl = curl_easy_init(); + + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_memory); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&chunk); + curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0"); + + HASH_ITER(hh, e->asset_hash, a, a_tmp) { + chunk.memory = malloc(1); + chunk.size = 0; + + curl_easy_setopt(curl, CURLOPT_URL, a->url); + res = curl_easy_perform(curl); + + memcpy(&destination[13], a->asset_path, 36); + + if (res != CURLE_OK) { + // Attempt to add asset from local file + if (!add_asset_from_file(filepath, a, destination, directory)) { + fprintf(stderr, "Unable to store '%s' in EPUB\n", a->url); + } + } else { + // Store downloaded file in zip + status = mz_zip_add_mem_to_archive_file_in_place(filepath, destination, chunk.memory, chunk.size, NULL, 0, MZ_BEST_COMPRESSION); + } + } + } +} + +#else +// Add local assets only (libcurl not available) +void add_assets(const char * filepath, mmd_engine * e, const char * directory) { + asset * a, * a_tmp; + + if (e->asset_hash){ + + char destination[100] = "OEBPS/assets/"; + destination[49] = '\0'; + + mz_bool status; + + HASH_ITER(hh, e->asset_hash, a, a_tmp) { + + memcpy(&destination[13], a->asset_path, 36); + + // Attempt to add asset from local file + if (!add_asset_from_file(filepath, a, destination, directory)) { + fprintf(stderr, "Unable to store '%s' in EPUB\n", a->url); + } + } + } +} +#endif + + // Use the miniz library to create a zip archive for the EPUB document -void epub_write_wrapper(const char * filepath, const char * body, mmd_engine * e) { +void epub_write_wrapper(const char * filepath, const char * body, mmd_engine * e, const char * directory) { scratch_pad * scratch = scratch_pad_new(e, FORMAT_EPUB); mz_bool status; char * data; @@ -312,7 +410,7 @@ void epub_write_wrapper(const char * filepath, const char * body, mmd_engine * e free(data); // Create directories - status = mz_zip_add_mem_to_archive_file_in_place(filepath, "Content/", NULL, 0, NULL, 0, MZ_BEST_COMPRESSION); + status = mz_zip_add_mem_to_archive_file_in_place(filepath, "OEBPS/", NULL, 0, NULL, 0, MZ_BEST_COMPRESSION); status = mz_zip_add_mem_to_archive_file_in_place(filepath, "META-INF/", NULL, 0, NULL, 0, MZ_BEST_COMPRESSION); // Add container @@ -324,18 +422,22 @@ void epub_write_wrapper(const char * filepath, const char * body, mmd_engine * e // Add package data = epub_package_document(scratch); len = strlen(data); - status = mz_zip_add_mem_to_archive_file_in_place(filepath, "Content/main.opf", data, len, NULL, 0, MZ_BEST_COMPRESSION); + status = mz_zip_add_mem_to_archive_file_in_place(filepath, "OEBPS/main.opf", data, len, NULL, 0, MZ_BEST_COMPRESSION); free(data); // Add nav data = epub_nav(e, scratch); len = strlen(data); - status = mz_zip_add_mem_to_archive_file_in_place(filepath, "Content/nav.xhtml", data, len, NULL, 0, MZ_BEST_COMPRESSION); + status = mz_zip_add_mem_to_archive_file_in_place(filepath, "OEBPS/nav.xhtml", data, len, NULL, 0, MZ_BEST_COMPRESSION); free(data); - // Add document + // Add main document len = strlen(body); - status = mz_zip_add_mem_to_archive_file_in_place(filepath, "Content/main.xhtml", body, len, NULL, 0, MZ_BEST_COMPRESSION); + status = mz_zip_add_mem_to_archive_file_in_place(filepath, "OEBPS/main.xhtml", body, len, NULL, 0, MZ_BEST_COMPRESSION); + + + // Add assets + add_assets(filepath, e, directory); scratch_pad_free(scratch); } diff --git a/Sources/libMultiMarkdown/epub.h b/Sources/libMultiMarkdown/epub.h index cb384de..e778297 100644 --- a/Sources/libMultiMarkdown/epub.h +++ b/Sources/libMultiMarkdown/epub.h @@ -60,7 +60,7 @@ #include "mmd.h" -void epub_write_wrapper(const char * root_path, const char * body, mmd_engine * e); +void epub_write_wrapper(const char * root_path, const char * body, mmd_engine * e, const char * directory); #endif diff --git a/Sources/libMultiMarkdown/html.c b/Sources/libMultiMarkdown/html.c index 5b9d0e2..085d574 100644 --- a/Sources/libMultiMarkdown/html.c +++ b/Sources/libMultiMarkdown/html.c @@ -253,9 +253,16 @@ void mmd_export_image_html(DString * out, const char * source, token * text, lin scratch->close_para = false; } - if (link->url) - printf("url); - else + if (link->url) { + if (scratch->store_assets) { + store_asset(scratch, link->url); + asset * a = extract_asset(scratch, link->url); + + printf("asset_path); + } else { + printf("url); + } + } else print_const("", temp_char); + printf("", temp_char); free(temp_char); t->next->child->child->type = TEXT_EMPTY; diff --git a/Sources/libMultiMarkdown/include/libMultiMarkdown.h b/Sources/libMultiMarkdown/include/libMultiMarkdown.h index 4a071b6..7c93e6f 100644 --- a/Sources/libMultiMarkdown/include/libMultiMarkdown.h +++ b/Sources/libMultiMarkdown/include/libMultiMarkdown.h @@ -75,7 +75,7 @@ char * mmd_convert_d_string(DString * source, unsigned long extensions, short fo // Convert MMD text and write results to specified file -- used for "complex" output formats requiring // multiple documents (e.g. EPUB) -void mmd_write_to_file(DString * source, unsigned long extensions, short format, short language, const char * filepath); +void mmd_write_to_file(DString * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath); /// MMD Engine is used for storing configuration information for MMD parser diff --git a/Sources/libMultiMarkdown/mmd.c b/Sources/libMultiMarkdown/mmd.c index 76cfbbb..a4b206c 100644 --- a/Sources/libMultiMarkdown/mmd.c +++ b/Sources/libMultiMarkdown/mmd.c @@ -108,6 +108,7 @@ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) { e->header_stack = stack_new(0); e->link_stack = stack_new(0); e->metadata_stack = stack_new(0); + e->asset_hash = NULL; e->pairings1 = token_pair_engine_new(); e->pairings2 = token_pair_engine_new(); @@ -1979,7 +1980,7 @@ char * mmd_convert_d_string(DString * source, unsigned long extensions, short fo } -void mmd_write_to_file(DString * source, unsigned long extensions, short format, short language, const char * filepath) { +void mmd_write_to_file(DString * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath) { FILE * output_stream; mmd_engine * e = mmd_engine_create_with_dstring(source, extensions); @@ -1996,7 +1997,7 @@ void mmd_write_to_file(DString * source, unsigned long extensions, short format, switch (format) { case FORMAT_EPUB: - epub_write_wrapper(filepath, output->str, e); + epub_write_wrapper(filepath, output->str, e, directory); break; default: // Basic formats just write to file diff --git a/Sources/libMultiMarkdown/mmd.h b/Sources/libMultiMarkdown/mmd.h index c32e5fc..3e545aa 100644 --- a/Sources/libMultiMarkdown/mmd.h +++ b/Sources/libMultiMarkdown/mmd.h @@ -61,6 +61,8 @@ #include "stack.h" #include "token.h" #include "token_pairs.h" +#include "uthash.h" + #define kMaxParseRecursiveDepth 1000 //!< Maximum recursion depth when parsing -- to prevent stack overflow with "pathologic" input @@ -88,6 +90,8 @@ struct mmd_engine { short language; short quotes_lang; + + struct asset * asset_hash; }; @@ -101,4 +105,14 @@ void is_para_html(mmd_engine * e, token * block); void is_list_loose(token * list); + +struct asset { + char * url; + char * asset_path; + UT_hash_handle hh; +}; + +typedef struct asset asset; + + #endif diff --git a/Sources/libMultiMarkdown/transclude.h b/Sources/libMultiMarkdown/transclude.h index 58f50e5..612cb3e 100644 --- a/Sources/libMultiMarkdown/transclude.h +++ b/Sources/libMultiMarkdown/transclude.h @@ -66,6 +66,11 @@ /// Combine directory and base filename to create a full path */ char * path_from_dir_base(const char * dir, const char * base); + +// Read file into memory +DString * scan_file(const char * fname); + + /// Recursively transclude source text, given a search directory. /// Track files to prevent infinite recursive loops void transclude_source(DString * source, const char * dir, short format, stack * parsed, stack * manifest); diff --git a/Sources/libMultiMarkdown/uuid.c b/Sources/libMultiMarkdown/uuid.c index 0088653..5f7aa72 100644 --- a/Sources/libMultiMarkdown/uuid.c +++ b/Sources/libMultiMarkdown/uuid.c @@ -69,6 +69,7 @@ #include #include #include +#include #include "uuid.h" @@ -107,3 +108,31 @@ char * uuid_string_from_bits(unsigned char * raw) { return result; } + + + +// http://stackoverflow.com/questions/322938/recommended-way-to-initialize-srand +// http://www.concentric.net/~Ttwang/tech/inthash.htm +unsigned long mix(unsigned long a, unsigned long b, unsigned long c) +{ + a=a-b; a=a-c; a=a^(c >> 13); + b=b-c; b=b-a; b=b^(a << 8); + c=c-a; c=c-b; c=c^(b >> 13); + a=a-b; a=a-c; a=a^(c >> 12); + b=b-c; b=b-a; b=b^(a << 16); + c=c-a; c=c-b; c=c^(b >> 5); + a=a-b; a=a-c; a=a^(c >> 3); + b=b-c; b=b-a; b=b^(a << 10); + c=c-a; c=c-b; c=c^(b >> 15); + return c; +} + + +void custom_seed_rand(void) { + // Seed random number generator + // This is not a "cryptographically secure" random seed, + // but good enough for an EPUB id.... + unsigned long seed = mix(clock(), time(NULL), clock()); + srand(seed); +} + diff --git a/Sources/libMultiMarkdown/uuid.h b/Sources/libMultiMarkdown/uuid.h index 32bcacd..174f59b 100644 --- a/Sources/libMultiMarkdown/uuid.h +++ b/Sources/libMultiMarkdown/uuid.h @@ -72,4 +72,6 @@ char * uuid_new(void); char * uuid_string_from_bits(unsigned char * raw); +void custom_seed_rand(void); + #endif diff --git a/Sources/libMultiMarkdown/writer.c b/Sources/libMultiMarkdown/writer.c index 7bcd604..752de3c 100644 --- a/Sources/libMultiMarkdown/writer.c +++ b/Sources/libMultiMarkdown/writer.c @@ -71,6 +71,7 @@ #include "odf.h" #include "scanners.h" #include "token.h" +#include "uuid.h" #include "writer.h" @@ -216,6 +217,11 @@ scratch_pad * scratch_pad_new(mmd_engine * e, short format) { store_metadata(p, m); } + + + // Store used assets in a hash + p->asset_hash = NULL; + p->store_assets = 0; } return p; @@ -1662,6 +1668,7 @@ void mmd_export_token_tree(DString * out, mmd_engine * e, short format) { break; case FORMAT_EPUB: mmd_start_complete_html(out, e->dstr->str, scratch); + scratch->store_assets = true; mmd_export_token_tree_html(out, e->dstr->str, e->root, scratch); mmd_export_footnote_list_html(out, e->dstr->str, scratch); @@ -1715,6 +1722,9 @@ void mmd_export_token_tree(DString * out, mmd_engine * e, short format) { break; } + // Preserve asset_hash for possible use in export + e->asset_hash = scratch->asset_hash; + scratch_pad_free(scratch); } @@ -2296,3 +2306,37 @@ short raw_level_for_header(token * header) { return 0; } + +asset * asset_new(char * url, scratch_pad * scratch) { + asset * a = malloc(sizeof(asset)); + + if (a) { + a->url = strdup(url); + + // Create a unique local asset path + a->asset_path = uuid_new(); + } + + return a; +} + + +asset * extract_asset(scratch_pad * scratch, char * url) { + asset * a; + + HASH_FIND_STR(scratch->asset_hash, url, a); + + return a; +} + +void store_asset(scratch_pad * scratch, char * url) { + asset * a = extract_asset(scratch, url); + + // Only store if this url has not already been stored + if (!a) { + // Asset not found - create new one + a = asset_new(url, scratch); + HASH_ADD_KEYPTR(hh, scratch->asset_hash, url, strlen(url), a); + } +} + diff --git a/Sources/libMultiMarkdown/writer.h b/Sources/libMultiMarkdown/writer.h index 3ff9441..2ac2681 100644 --- a/Sources/libMultiMarkdown/writer.h +++ b/Sources/libMultiMarkdown/writer.h @@ -120,6 +120,8 @@ typedef struct { short odf_para_type; + struct asset * asset_hash; + short store_assets; } scratch_pad; @@ -241,5 +243,8 @@ char * clean_string(const char * str, bool lowercase); short raw_level_for_header(token * header); +void store_asset(scratch_pad * scratch_pad, char * url); +asset * extract_asset(scratch_pad * scratch, char * url); + #endif diff --git a/Sources/multimarkdown/main.c b/Sources/multimarkdown/main.c index 7e9445b..d595e5f 100644 --- a/Sources/multimarkdown/main.c +++ b/Sources/multimarkdown/main.c @@ -70,6 +70,7 @@ #include "mmd.h" #include "token.h" #include "transclude.h" +#include "uuid.h" #include "version.h" #define kBUFFERSIZE 4096 // How many bytes to read at a time @@ -102,31 +103,6 @@ DString * stdin_buffer() { } -static DString * scan_file(const char * fname) { - /* Read from a file and return a GString * - `buffer` will need to be freed elsewhere */ - - char chunk[kBUFFERSIZE]; - size_t bytes; - - FILE * file; - - if ((file = fopen(fname, "r")) == NULL ) { - return NULL; - } - - DString * buffer = d_string_new(""); - - while ((bytes = fread(chunk, 1, kBUFFERSIZE, file)) > 0) { - d_string_append_c_array(buffer, chunk, bytes); - } - - fclose(file); - - return buffer; -} - - /// Given a filename, remove the extension and replace it with a new one. /// The next extension must include the leading '.', e.g. '.html' char * filename_with_extension(const char * original, const char * new_extension) { @@ -312,6 +288,9 @@ int main(int argc, char** argv) { token_pool_init(); #endif + // Seed random numbers + custom_seed_rand(); + // Determine processing mode -- batch/stdin/files?? if ((a_batch->count) && (a_file->count)) { @@ -349,8 +328,9 @@ int main(int argc, char** argv) { } // Perform transclusion(s) + char * folder = dirname((char *) a_file->filename[i]); + if (extensions & EXT_TRANSCLUDE) { - char * folder = dirname((char *) a_file->filename[i]); transclude_source(buffer, folder, format, NULL, NULL); @@ -372,7 +352,7 @@ int main(int argc, char** argv) { #endif if (FORMAT_EPUB == format) { - mmd_write_to_file(buffer, extensions, format, language, output_filename); + mmd_write_to_file(buffer, extensions, format, language, folder, output_filename); result = NULL; } else if (FORMAT_MMD == format) { result = buffer->str;