]> granicus.if.org Git - multimarkdown/commitdiff
ADDED: Add image assets when creating EPUB
authorFletcher T. Penney <fletcher@fletcherpenney.net>
Sat, 18 Mar 2017 20:53:09 +0000 (16:53 -0400)
committerFletcher T. Penney <fletcher@fletcherpenney.net>
Sat, 18 Mar 2017 20:53:09 +0000 (16:53 -0400)
13 files changed:
CMakeLists.txt
Sources/libMultiMarkdown/epub.c
Sources/libMultiMarkdown/epub.h
Sources/libMultiMarkdown/html.c
Sources/libMultiMarkdown/include/libMultiMarkdown.h
Sources/libMultiMarkdown/mmd.c
Sources/libMultiMarkdown/mmd.h
Sources/libMultiMarkdown/transclude.h
Sources/libMultiMarkdown/uuid.c
Sources/libMultiMarkdown/uuid.h
Sources/libMultiMarkdown/writer.c
Sources/libMultiMarkdown/writer.h
Sources/multimarkdown/main.c

index 2dfb004fa1a0511d97244efeefa5c278bfeba2a8..989d1d9f330a06a12b584e25e614181c34253225 100644 (file)
@@ -386,6 +386,12 @@ endif (WIN32)
 # Define targets
 # ==============
 
+# Is libcurl available?
+find_package(curl)
+if (CURL_FOUND) 
+       add_definitions(-DUSE_CURL)
+endif (CURL_FOUND)     
+
 # Create a library?
 if (NOT DEFINED TEST)
        add_library(libMultiMarkdown STATIC
@@ -404,6 +410,10 @@ else ()
        )
 endif ()
 
+if (CURL_FOUND) 
+       target_link_libraries(libMultiMarkdown curl)
+endif (CURL_FOUND)     
+
 # Create a shared library if defined
 if (DEFINED SHAREDBUILD)
 
@@ -423,6 +433,10 @@ if (DEFINED SHAREDBUILD)
        # Remove "Shared" from library name
        SET_TARGET_PROPERTIES(libMultiMarkdownShared PROPERTIES OUTPUT_NAME libMultiMarkdown)
 
+       if (CURL_FOUND) 
+               target_link_libraries(libMultiMarkdownShared curl)
+       endif (CURL_FOUND)      
+
 endif (DEFINED SHAREDBUILD)
 
 
index 7c8477487b516fc59b0314c91e861bea342b8f5b..6f260cbf03d42da4eec79046c4c25e68e28f903d 100644 (file)
        
 
 */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/stat.h>
-#include <time.h>
 
+#ifdef USE_CURL
+#include <curl/curl.h>
+#endif
 
 #include "d_string.h"
 #include "epub.h"
 #include "html.h"
 #include "miniz.h"
 #include "mmd.h"
+#include "transclude.h"
 #include "uuid.h"
 #include "writer.h"
 
@@ -88,7 +90,7 @@ char * epub_container_xml(void) {
        d_string_append(container, "<?xml version=\"1.0\"?>\n");
        d_string_append(container, "<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n");
        d_string_append(container, "<rootfiles>\n");
-       d_string_append(container, "<rootfile full-path=\"Content/main.opf\" media-type=\"application/oebps-package+xml\" />\n");
+       d_string_append(container, "<rootfile full-path=\"OEBPS/main.opf\" media-type=\"application/oebps-package+xml\" />\n");
        d_string_append(container, "</rootfiles>\n");
        d_string_append(container, "</container>\n");
 
@@ -98,22 +100,6 @@ char * epub_container_xml(void) {
 }
 
 
-// http://stackoverflow.com/questions/322938/recommended-way-to-initialize-srand
-// http://www.concentric.net/~Ttwang/tech/inthash.htm
-unsigned long mix(unsigned long a, unsigned long b, unsigned long c)
-{
-    a=a-b;  a=a-c;  a=a^(c >> 13);
-    b=b-c;  b=b-a;  b=b^(a << 8);
-    c=c-a;  c=c-b;  c=c^(b >> 13);
-    a=a-b;  a=a-c;  a=a^(c >> 12);
-    b=b-c;  b=b-a;  b=b^(a << 16);
-    c=c-a;  c=c-b;  c=c^(b >> 5);
-    a=a-b;  a=a-c;  a=a^(c >> 3);
-    b=b-c;  b=b-a;  b=b^(a << 10);
-    c=c-a;  c=c-b;  c=c^(b >> 15);
-    return c;
-}
-
 char * epub_package_document(scratch_pad * scratch) {
        DString * out = d_string_new("");
 
@@ -134,11 +120,6 @@ char * epub_package_document(scratch_pad * scratch) {
                print_const("</dc:identifier>\n");
        } else {
                print_const("<dc:identifier id=\"pub-id\">urn:uuid:");
-               // Seed random number generator
-               // This is not a "cryptographically secure" random seed,
-               // but good enough for an EPUB id....
-               unsigned long seed = mix(clock(), time(NULL), clock());
-               srand(seed);
 
                char * id = uuid_new();
                print(id);
@@ -295,8 +276,125 @@ char * epub_nav(mmd_engine * e, scratch_pad * scratch) {
 }
 
 
+bool add_asset_from_file(const char * filepath, asset * a, const char * destination, const char * directory) {
+       char * path = path_from_dir_base(directory, a->url);
+       mz_bool status;
+       bool result = false;
+
+       DString * buffer = scan_file(path);
+
+       if (buffer && buffer->currentStringLength > 0) {
+               status = mz_zip_add_mem_to_archive_file_in_place(filepath, destination, buffer->str, buffer->currentStringLength, NULL, 0, MZ_BEST_COMPRESSION);
+
+               d_string_free(buffer, true);
+               result = true;
+       }
+
+       free(path);
+
+       return result;
+}
+
+
+#ifdef USE_CURL
+// Dynamic buffer for downloading files in memory
+// Based on https://curl.haxx.se/libcurl/c/getinmemory.html
+
+struct MemoryStruct {
+       char * memory;
+       size_t size;
+};
+
+
+static size_t write_memory(void * contents, size_t size, size_t nmemb, void * userp) {
+       size_t realsize = size * nmemb;
+       struct MemoryStruct * mem = (struct MemoryStruct *)userp;
+
+       mem->memory = realloc(mem->memory, mem->size + realsize + 1);
+       if (mem->memory == NULL) {
+               // Out of memory
+               fprintf(stderr, "Out of memory\n");
+               return 0;
+       }
+
+       memcpy(&(mem->memory[mem->size]), contents, realsize);
+       mem->size += realsize;
+       mem->memory[mem->size] = 0;
+
+       return realsize;
+}
+
+// Add assets to zipfile using libcurl
+void add_assets(const char * filepath, mmd_engine * e, const char * directory) {
+       asset * a, * a_tmp;
+
+       if (e->asset_hash){
+               CURL * curl;
+               CURLcode res;
+               struct MemoryStruct chunk;
+
+               char destination[100] = "OEBPS/assets/";
+               destination[49] = '\0';
+               
+               mz_bool status;
+
+               curl_global_init(CURL_GLOBAL_ALL);
+               curl = curl_easy_init();
+
+               curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_memory);
+               curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&chunk);
+               curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
+
+               HASH_ITER(hh, e->asset_hash, a, a_tmp) {
+                       chunk.memory = malloc(1);
+                       chunk.size = 0;
+
+                       curl_easy_setopt(curl, CURLOPT_URL, a->url);
+                       res = curl_easy_perform(curl);
+
+                       memcpy(&destination[13], a->asset_path, 36);
+
+                       if (res != CURLE_OK) {
+                               // Attempt to add asset from local file
+                               if (!add_asset_from_file(filepath, a, destination, directory)) {
+                                       fprintf(stderr, "Unable to store '%s' in EPUB\n", a->url);
+                               }
+                       } else {
+                               // Store downloaded file in zip
+                               status = mz_zip_add_mem_to_archive_file_in_place(filepath, destination, chunk.memory, chunk.size, NULL, 0, MZ_BEST_COMPRESSION);
+                       }
+               }
+       }
+}
+
+#else
+// Add local assets only (libcurl not available)
+void add_assets(const char * filepath, mmd_engine * e, const char * directory) {
+       asset * a, * a_tmp;
+
+       if (e->asset_hash){
+
+               char destination[100] = "OEBPS/assets/";
+               destination[49] = '\0';
+               
+               mz_bool status;
+
+               HASH_ITER(hh, e->asset_hash, a, a_tmp) {
+
+                       memcpy(&destination[13], a->asset_path, 36);
+
+                       // Attempt to add asset from local file
+                       if (!add_asset_from_file(filepath, a, destination, directory)) {
+                               fprintf(stderr, "Unable to store '%s' in EPUB\n", a->url);
+                       }
+               }
+       }
+}
+#endif
+
+
 // Use the miniz library to create a zip archive for the EPUB document
-void epub_write_wrapper(const char * filepath, const char * body, mmd_engine * e) {
+void epub_write_wrapper(const char * filepath, const char * body, mmd_engine * e, const char * directory) {
        scratch_pad * scratch = scratch_pad_new(e, FORMAT_EPUB);
        mz_bool status;
        char * data;
@@ -312,7 +410,7 @@ void epub_write_wrapper(const char * filepath, const char * body, mmd_engine * e
        free(data);
 
        // Create directories
-       status = mz_zip_add_mem_to_archive_file_in_place(filepath, "Content/", NULL, 0, NULL, 0, MZ_BEST_COMPRESSION);
+       status = mz_zip_add_mem_to_archive_file_in_place(filepath, "OEBPS/", NULL, 0, NULL, 0, MZ_BEST_COMPRESSION);
        status = mz_zip_add_mem_to_archive_file_in_place(filepath, "META-INF/", NULL, 0, NULL, 0, MZ_BEST_COMPRESSION);
 
        // Add container
@@ -324,18 +422,22 @@ void epub_write_wrapper(const char * filepath, const char * body, mmd_engine * e
        // Add package
        data = epub_package_document(scratch);
        len = strlen(data);
-       status = mz_zip_add_mem_to_archive_file_in_place(filepath, "Content/main.opf", data, len, NULL, 0, MZ_BEST_COMPRESSION);
+       status = mz_zip_add_mem_to_archive_file_in_place(filepath, "OEBPS/main.opf", data, len, NULL, 0, MZ_BEST_COMPRESSION);
        free(data);
 
        // Add nav
        data = epub_nav(e, scratch);
        len = strlen(data);
-       status = mz_zip_add_mem_to_archive_file_in_place(filepath, "Content/nav.xhtml", data, len, NULL, 0, MZ_BEST_COMPRESSION);
+       status = mz_zip_add_mem_to_archive_file_in_place(filepath, "OEBPS/nav.xhtml", data, len, NULL, 0, MZ_BEST_COMPRESSION);
        free(data);
 
-       // Add document
+       // Add main document
        len = strlen(body);
-       status = mz_zip_add_mem_to_archive_file_in_place(filepath, "Content/main.xhtml", body, len, NULL, 0, MZ_BEST_COMPRESSION);
+       status = mz_zip_add_mem_to_archive_file_in_place(filepath, "OEBPS/main.xhtml", body, len, NULL, 0, MZ_BEST_COMPRESSION);
+
+
+       // Add assets
+       add_assets(filepath, e, directory);
 
        scratch_pad_free(scratch);
 }
index cb384de27946a7eb7a0ba6f9af9e38653424a15d..e778297126da543b5c0c5e15333348102c8b835b 100644 (file)
@@ -60,7 +60,7 @@
 
 #include "mmd.h"
 
-void epub_write_wrapper(const char * root_path, const char * body, mmd_engine * e);
+void epub_write_wrapper(const char * root_path, const char * body, mmd_engine * e, const char * directory);
 
 
 #endif
index 5b9d0e2069ff61566c12e9587a27c2395881f5c6..085d5742a8808769364d0ca6bc8dbead3b1fb894 100644 (file)
@@ -253,9 +253,16 @@ void mmd_export_image_html(DString * out, const char * source, token * text, lin
                scratch->close_para = false;
        }
 
-       if (link->url)
-               printf("<img src=\"%s\"", link->url);
-       else
+       if (link->url) {
+               if (scratch->store_assets) {
+                       store_asset(scratch, link->url);
+                       asset * a = extract_asset(scratch, link->url);
+
+                       printf("<img src=\"assets/%s\"", a->asset_path);
+               } else {
+                       printf("<img src=\"%s\"", link->url);
+               }
+       } else
                print_const("<img src=\"\"");
 
        if (text) {
@@ -660,7 +667,7 @@ void mmd_export_token_html(DString * out, const char * source, token * t, scratc
                                }
 
                                temp_char = label_from_token(source, temp_token);
-                               printf("<caption id=\"%s\">", temp_char);
+                               printf("<caption align=\"bottom\" id=\"%s\">", temp_char);
                                free(temp_char);
 
                                t->next->child->child->type = TEXT_EMPTY;
index 4a071b6fdad62530fe9c05f439461e15e0d508fa..7c93e6f96d5f48f856d4188cfa83c9a663dd5d8d 100644 (file)
@@ -75,7 +75,7 @@ char * mmd_convert_d_string(DString * source, unsigned long extensions, short fo
 
 // Convert MMD text and write results to specified file -- used for "complex" output formats requiring
 // multiple documents (e.g. EPUB)
-void mmd_write_to_file(DString * source, unsigned long extensions, short format, short language, const char * filepath);
+void mmd_write_to_file(DString * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath);
 
 
 /// MMD Engine is used for storing configuration information for MMD parser
index 76cfbbbba5a64a0f4387f406a0ca03b94620f33e..a4b206c2b7fd20d01611f8db6cbf645005bba59c 100644 (file)
@@ -108,6 +108,7 @@ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) {
                e->header_stack = stack_new(0);
                e->link_stack = stack_new(0);
                e->metadata_stack = stack_new(0);
+               e->asset_hash = NULL;
 
                e->pairings1 = token_pair_engine_new();
                e->pairings2 = token_pair_engine_new();
@@ -1979,7 +1980,7 @@ char * mmd_convert_d_string(DString * source, unsigned long extensions, short fo
 }
 
 
-void mmd_write_to_file(DString * source, unsigned long extensions, short format, short language, const char * filepath) {
+void mmd_write_to_file(DString * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath) {
        FILE * output_stream;
 
        mmd_engine * e = mmd_engine_create_with_dstring(source, extensions);
@@ -1996,7 +1997,7 @@ void mmd_write_to_file(DString * source, unsigned long extensions, short format,
 
        switch (format) {
                case FORMAT_EPUB:
-                       epub_write_wrapper(filepath, output->str, e);
+                       epub_write_wrapper(filepath, output->str, e, directory);
                        break;
                default:
                        // Basic formats just write to file
index c32e5fc2f97aa9f4c247935f19490ef4089a9e5d..3e545aaee6c1dcaa4da8c8b0c542811d12d7e2a2 100644 (file)
@@ -61,6 +61,8 @@
 #include "stack.h"
 #include "token.h"
 #include "token_pairs.h"
+#include "uthash.h"
+
 
 #define kMaxParseRecursiveDepth 1000           //!< Maximum recursion depth when parsing -- to prevent stack overflow with "pathologic" input
 
@@ -88,6 +90,8 @@ struct mmd_engine {
 
        short                                   language;
        short                                   quotes_lang;
+
+       struct asset *                  asset_hash;
 };
 
 
@@ -101,4 +105,14 @@ void is_para_html(mmd_engine * e, token * block);
 
 void is_list_loose(token * list);
 
+
+struct asset {
+       char *                          url;
+       char *                          asset_path;
+       UT_hash_handle          hh;
+};
+
+typedef struct asset asset;
+
+
 #endif
index 58f50e5932a5bd6d58c824a276f1bdd6e9fe747d..612cb3ee70dcc28029e196c257148446bb94010a 100644 (file)
 /// Combine directory and base filename to create a full path */
 char * path_from_dir_base(const char * dir, const char * base);
 
+
+// Read file into memory
+DString * scan_file(const char * fname);
+
+
 /// Recursively transclude source text, given a search directory.
 /// Track files to prevent infinite recursive loops
 void transclude_source(DString * source, const char * dir, short format, stack * parsed, stack * manifest);
index 0088653df41ad86c67dc101e6b27d5ab7e97bd9a..5f7aa72baaedae1fb4a86d284e4b8e2f6e8f379c 100644 (file)
@@ -69,6 +69,7 @@
 #include <limits.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <time.h>
 
 #include "uuid.h"
 
@@ -107,3 +108,31 @@ char * uuid_string_from_bits(unsigned char * raw) {
 
        return result;
 }
+
+
+
+// http://stackoverflow.com/questions/322938/recommended-way-to-initialize-srand
+// http://www.concentric.net/~Ttwang/tech/inthash.htm
+unsigned long mix(unsigned long a, unsigned long b, unsigned long c)
+{
+    a=a-b;  a=a-c;  a=a^(c >> 13);
+    b=b-c;  b=b-a;  b=b^(a << 8);
+    c=c-a;  c=c-b;  c=c^(b >> 13);
+    a=a-b;  a=a-c;  a=a^(c >> 12);
+    b=b-c;  b=b-a;  b=b^(a << 16);
+    c=c-a;  c=c-b;  c=c^(b >> 5);
+    a=a-b;  a=a-c;  a=a^(c >> 3);
+    b=b-c;  b=b-a;  b=b^(a << 10);
+    c=c-a;  c=c-b;  c=c^(b >> 15);
+    return c;
+}
+
+
+void custom_seed_rand(void) {
+       // Seed random number generator
+       // This is not a "cryptographically secure" random seed,
+       // but good enough for an EPUB id....
+       unsigned long seed = mix(clock(), time(NULL), clock());
+       srand(seed);
+}
+
index 32bcacd5dc487e79afb77e0bdccc1978a3d4b5e9..174f59be8671ebd1d1c84a3a7dd62bd8af2c9004 100644 (file)
@@ -72,4 +72,6 @@
 char * uuid_new(void);
 char * uuid_string_from_bits(unsigned char * raw);
 
+void custom_seed_rand(void);
+
 #endif
index 7bcd604d87eae370cfa0c47f22d3f7ef7a51ba54..752de3c371b95a310137028ea67e7e5b49315e6a 100644 (file)
@@ -71,6 +71,7 @@
 #include "odf.h"
 #include "scanners.h"
 #include "token.h"
+#include "uuid.h"
 #include "writer.h"
 
 
@@ -216,6 +217,11 @@ scratch_pad * scratch_pad_new(mmd_engine * e, short format) {
 
                        store_metadata(p, m);
                }
+
+
+               // Store used assets in a hash 
+               p->asset_hash = NULL;
+               p->store_assets = 0;
        }
 
        return p;
@@ -1662,6 +1668,7 @@ void mmd_export_token_tree(DString * out, mmd_engine * e, short format) {
                        break;
                case FORMAT_EPUB:
                        mmd_start_complete_html(out, e->dstr->str, scratch);
+                       scratch->store_assets = true;
 
                        mmd_export_token_tree_html(out, e->dstr->str, e->root, scratch);
                        mmd_export_footnote_list_html(out, e->dstr->str, scratch);
@@ -1715,6 +1722,9 @@ void mmd_export_token_tree(DString * out, mmd_engine * e, short format) {
                        break;
        }
 
+       // Preserve asset_hash for possible use in export
+       e->asset_hash = scratch->asset_hash;
+
        scratch_pad_free(scratch);
 }
 
@@ -2296,3 +2306,37 @@ short raw_level_for_header(token * header) {
        return 0;
 }
 
+
+asset * asset_new(char * url, scratch_pad * scratch) {
+       asset * a = malloc(sizeof(asset));
+
+       if (a) {
+               a->url = strdup(url);
+
+               // Create a unique local asset path
+               a->asset_path = uuid_new();
+       }
+
+       return a;
+}
+
+
+asset * extract_asset(scratch_pad * scratch, char * url) {
+       asset * a;
+
+       HASH_FIND_STR(scratch->asset_hash, url, a);
+
+       return a;
+}
+
+void store_asset(scratch_pad * scratch, char * url) {
+       asset * a = extract_asset(scratch, url);
+
+       // Only store if this url has not already been stored
+       if (!a) {
+               // Asset not found - create new one
+               a = asset_new(url, scratch);
+               HASH_ADD_KEYPTR(hh, scratch->asset_hash, url, strlen(url), a);
+       }
+}
+
index 3ff94411c9cda635564e77d0c8dd66a0e5499649..2ac2681e97a5ec779ab8a72bd2457f9b4c815cca 100644 (file)
@@ -120,6 +120,8 @@ typedef struct {
 
        short                           odf_para_type;
 
+       struct asset *          asset_hash;
+       short                           store_assets;
 } scratch_pad;
 
 
@@ -241,5 +243,8 @@ char * clean_string(const char * str, bool lowercase);
 
 short raw_level_for_header(token * header);
 
+void store_asset(scratch_pad * scratch_pad, char * url);
+asset * extract_asset(scratch_pad * scratch, char * url);
+
 #endif
 
index 7e9445b95aac87352c6dc9509c032a5940afa984..d595e5fe54008bc937eed7a4cf99fa0a760cac97 100644 (file)
@@ -70,6 +70,7 @@
 #include "mmd.h"
 #include "token.h"
 #include "transclude.h"
+#include "uuid.h"
 #include "version.h"
 
 #define kBUFFERSIZE 4096       // How many bytes to read at a time
@@ -102,31 +103,6 @@ DString * stdin_buffer() {
 }
 
 
-static DString * scan_file(const char * fname) {
-       /* Read from a file and return a GString *
-               `buffer` will need to be freed elsewhere */
-
-       char chunk[kBUFFERSIZE];
-       size_t bytes;
-
-       FILE * file;
-
-       if ((file = fopen(fname, "r")) == NULL ) {
-               return NULL;
-       }
-
-       DString * buffer = d_string_new("");
-
-       while ((bytes = fread(chunk, 1, kBUFFERSIZE, file)) > 0) {
-               d_string_append_c_array(buffer, chunk, bytes);
-       }
-
-       fclose(file);
-
-       return buffer;
-}
-
-
 /// Given a filename, remove the extension and replace it with a new one.
 /// The next extension must include the leading '.', e.g. '.html'
 char * filename_with_extension(const char * original, const char * new_extension) {
@@ -312,6 +288,9 @@ int main(int argc, char** argv) {
        token_pool_init();
 #endif
 
+       // Seed random numbers
+       custom_seed_rand();
+
        // Determine processing mode -- batch/stdin/files??
 
        if ((a_batch->count) && (a_file->count)) {
@@ -349,8 +328,9 @@ int main(int argc, char** argv) {
                        }
 
                        // Perform transclusion(s)
+                       char * folder = dirname((char *) a_file->filename[i]);
+
                        if (extensions & EXT_TRANSCLUDE) {
-                               char * folder = dirname((char *) a_file->filename[i]);
 
                                transclude_source(buffer, folder, format, NULL, NULL);
        
@@ -372,7 +352,7 @@ int main(int argc, char** argv) {
 #endif
        
                        if (FORMAT_EPUB == format) {
-                               mmd_write_to_file(buffer, extensions, format, language, output_filename);
+                               mmd_write_to_file(buffer, extensions, format, language, folder, output_filename);
                                result = NULL;
                        } else if (FORMAT_MMD == format) {
                                result = buffer->str;