]> granicus.if.org Git - multimarkdown/commitdiff
FIXED: Improve BOM stripping
authorFletcher T. Penney <fletcher@fletcherpenney.net>
Sat, 9 Mar 2019 14:59:54 +0000 (09:59 -0500)
committerFletcher T. Penney <fletcher@fletcherpenney.net>
Sat, 9 Mar 2019 14:59:54 +0000 (09:59 -0500)
Sources/libMultiMarkdown/file.c

index 65de2d1e46711884e2003b842bb5494c80123c79..0feffa17c4c9b640d5cb109c15e89df702d8833d 100644 (file)
 /// Scan file into a DString
 DString * scan_file(const char * fname) {
        /* Read from stdin and return a DString *
-               `buffer` will need to be freed elsewhere */
+        `buffer` will need to be freed elsewhere */
 
        char chunk[kBUFFERSIZE];
        size_t bytes;
@@ -132,25 +132,35 @@ DString * scan_file(const char * fname) {
        MultiByteToWideChar(CP_UTF8, 0, fname, -1, wstr, wchars_num);
 
        if ((file = _wfopen(wstr, L"rb")) == NULL) {
+               return NULL;
+       }
+
 #else
 
        if ((file = fopen(fname, "r")) == NULL ) {
-#endif
-
                return NULL;
        }
 
+#endif
+
        DString * buffer = d_string_new("");
 
        while ((bytes = fread(chunk, 1, kBUFFERSIZE, file)) > 0) {
                d_string_append_c_array(buffer, chunk, bytes);
+       }
 
-               if (buffer->currentStringLength <= kBUFFERSIZE) {
-                       // Strip BOM
-                       if (strncmp(buffer->str, "\xef\xbb\xbf", 3) == 0) {
-                               d_string_erase(buffer, 0, 3);
-                       }
-               }
+       // Strip UTF-8 BOM
+       if (strncmp(buffer->str, "\xef\xbb\xbf", 3) == 0) {
+               d_string_erase(buffer, 0, 3);
+       }
+
+       // Strip UTF-16 BOMs
+       if (strncmp(buffer->str, "\xef\xff", 2) == 0) {
+               d_string_erase(buffer, 0, 2);
+       }
+
+       if (strncmp(buffer->str, "\xff\xfe", 2) == 0) {
+               d_string_erase(buffer, 0, 2);
        }
 
        fclose(file);