From b9bb87b67e9197ead02eb4cb89793d3645497adf Mon Sep 17 00:00:00 2001 From: Christos Zoulas <christos@zoulas.com> Date: Wed, 17 Oct 2007 19:33:31 +0000 Subject: [PATCH] - separate mime encoding from mime type - fix printing -\012 --- src/ascmagic.c | 27 ++++++++----- src/compress.c | 20 ++++++---- src/file.c | 25 +++++++++--- src/fsmagic.c | 15 ++++--- src/funcs.c | 102 +++++++++++++++++++++++++++--------------------- src/is_tar.c | 38 +++++++++--------- src/magic.c | 20 +++------- src/magic.h | 24 ++++++------ src/softmagic.c | 7 ++-- 9 files changed, 158 insertions(+), 120 deletions(-) diff --git a/src/ascmagic.c b/src/ascmagic.c index 3d1ee2a6..b507f775 100644 --- a/src/ascmagic.c +++ b/src/ascmagic.c @@ -49,7 +49,7 @@ #include "names.h" #ifndef lint -FILE_RCSID("@(#)$File: ascmagic.c,v 1.50 2007/03/15 14:51:00 christos Exp $") +FILE_RCSID("@(#)$File: ascmagic.c,v 1.51 2007/08/19 03:45:07 christos Exp $") #endif /* lint */ typedef unsigned long unichar; @@ -76,6 +76,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) size_t ulen; struct names *p; int rv = -1; + int mime = ms->flags & MAGIC_MIME; const char *code = NULL; const char *code_mime = NULL; @@ -271,21 +272,27 @@ subtype_identified: if (seen_cr && nbytes < HOWMANY) n_cr++; - if ((ms->flags & MAGIC_MIME)) { - if (subtype_mime) { - if (file_printf(ms, subtype_mime) == -1) - goto done; - } else { - if (file_printf(ms, "text/plain") == -1) - goto done; + if (mime) { + if (mime & MAGIC_MIME_TYPE) { + if (subtype_mime) { + if (file_printf(ms, subtype_mime) == -1) + goto done; + } else { + if (file_printf(ms, "text/plain") == -1) + goto done; + } } - if (code_mime) { - if (file_printf(ms, " charset=") == -1) + if ((mime == 0 || mime == MAGIC_MIME) && code_mime) { + if ((mime & MAGIC_MIME_TYPE) && + file_printf(ms, " charset=") == -1) goto done; if (file_printf(ms, code_mime) == -1) goto done; } + + if (mime == MAGIC_MIME_ENCODING) + file_printf(ms, "binary"); } else { if (file_printf(ms, code) == -1) goto done; diff --git a/src/compress.c b/src/compress.c index 35451f98..5fc741f6 100644 --- a/src/compress.c +++ b/src/compress.c @@ -55,7 +55,7 @@ #ifndef lint -FILE_RCSID("@(#)$File: compress.c,v 1.51 2007/03/05 02:41:29 christos Exp $") +FILE_RCSID("@(#)$File: compress.c,v 1.52 2007/08/19 03:45:08 christos Exp $") #endif private struct { @@ -98,6 +98,7 @@ file_zmagic(struct magic_set *ms, int fd, const char *name, unsigned char *newbuf = NULL; size_t i, nsz; int rv = 0; + int mime = ms->flags & MAGIC_MIME; if ((ms->flags & MAGIC_COMPRESS) == 0) return 0; @@ -112,13 +113,18 @@ file_zmagic(struct magic_set *ms, int fd, const char *name, rv = -1; if (file_buffer(ms, -1, name, newbuf, nsz) == -1) goto error; - if (file_printf(ms, (ms->flags & MAGIC_MIME) ? - " compressed-encoding=" : " (") == -1) - goto error; - if (file_buffer(ms, -1, NULL, buf, nbytes) == -1) + + if (mime == MAGIC_MIME || mime == 0) { + if (file_printf(ms, mime ? + " compressed-encoding=" : " (") == -1) + goto error; + } + + if ((mime == 0 || mime & MAGIC_MIME_ENCODING) && + file_buffer(ms, -1, NULL, buf, nbytes) == -1) goto error; - if (!(ms->flags & MAGIC_MIME) && - file_printf(ms, ")") == -1) + + if (!mime && file_printf(ms, ")") == -1) goto error; rv = 1; break; diff --git a/src/file.c b/src/file.c index df1d9cb5..e6dc6eb4 100644 --- a/src/file.c +++ b/src/file.c @@ -71,7 +71,7 @@ #include "patchlevel.h" #ifndef lint -FILE_RCSID("@(#)$File: file.c,v 1.109 2007/03/15 14:50:34 christos Exp $") +FILE_RCSID("@(#)$File: file.c,v 1.111 2007/05/08 14:44:18 christos Exp $") #endif /* lint */ @@ -133,8 +133,12 @@ main(int argc, char *argv[]) int longindex; static const struct option long_options[] = { - {"version", 0, 0, 'v'}, + /* Put long-only options first */ {"help", 0, 0, 0}, + {"mime-type", 0, 0, 0}, + {"mime-encoding", 0, 0, 0}, + + {"version", 0, 0, 'v'}, {"brief", 0, 0, 'b'}, {"checking-printout", 0, 0, 'c'}, {"debug", 0, 0, 'd'}, @@ -220,9 +224,17 @@ main(int argc, char *argv[]) switch (c) { #ifdef HAVE_GETOPT_LONG case 0 : - if (longindex == 1) + switch (longindex) { + case 0: help(); - break; + break; + case 1: + flags |= MAGIC_MIME_TYPE; + break; + case 2: + flags |= MAGIC_MIME_ENCODING; + break; + } #endif case '0': nulsep = 1; @@ -567,7 +579,10 @@ help(void) " ascii, apptype, elf, compress, soft, tar\n" " -f, --files-from FILE read the filenames to be examined from FILE\n" " -F, --separator string use string as separator instead of `:'\n" -" -i, --mime output mime type strings\n" +" -i, --mime output MIME type strings (--mime-type and\n" +" --mime-encoding)\n" +" --mime-type output the MIME type\n" +" --mime-encoding output the MIME encoding\n" " -k, --keep-going don't stop at the first match\n" " -L, --dereference causes symlinks to be followed\n" " -n, --no-buffer do not buffer output\n" diff --git a/src/fsmagic.c b/src/fsmagic.c index cd8f13e2..ec7d0c9c 100644 --- a/src/fsmagic.c +++ b/src/fsmagic.c @@ -57,13 +57,14 @@ #undef HAVE_MAJOR #ifndef lint -FILE_RCSID("@(#)$File: fsmagic.c,v 1.46 2005/06/25 15:52:14 christos Exp $") +FILE_RCSID("@(#)$File: fsmagic.c,v 1.47 2007/01/12 17:38:28 christos Exp $") #endif /* lint */ protected int file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) { int ret = 0; + int mime = ms->flags & MAGIC_MIME; #ifdef S_IFLNK char buf[BUFSIZ+4]; int nch; @@ -95,11 +96,12 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) return 1; } - if ((ms->flags & MAGIC_MIME) != 0) { + if (mime) { if ((sb->st_mode & S_IFMT) != S_IFREG) { - if (file_printf(ms, "application/x-not-regular-file") + if ((mime & MAGIC_MIME_TYPE) && + file_printf(ms, "application/x-not-regular-file") == -1) - return -1; + return -1; return 1; } } @@ -303,8 +305,9 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) * when we read the file.) */ if ((ms->flags & MAGIC_DEVICES) == 0 && sb->st_size == 0) { - if (file_printf(ms, (ms->flags & MAGIC_MIME) ? - "application/x-empty" : "empty") == -1) + if ((!mime || (mime & MAGIC_MIME_TYPE)) && + file_printf(ms, mime ? "application/x-empty" : + "empty") == -1) return -1; return 1; } diff --git a/src/funcs.c b/src/funcs.c index 3ffcaa8a..5885ff39 100644 --- a/src/funcs.c +++ b/src/funcs.c @@ -48,7 +48,7 @@ #endif #ifndef lint -FILE_RCSID("@(#)$File: funcs.c,v 1.32 2007/05/24 17:22:27 christos Exp $") +FILE_RCSID("@(#)$File: funcs.c,v 1.33 2007/06/15 00:01:15 christos Exp $") #endif /* lint */ #ifndef HAVE_VSNPRINTF @@ -164,59 +164,73 @@ protected int file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf, size_t nb) { - int m; + int m; + int mime = ms->flags & MAGIC_MIME; + + if (nb == 0) { + if ((!mime || (mime & MAGIC_MIME_TYPE)) && + file_printf(ms, mime ? "application/x-empty" : + "empty") == -1) + return -1; + return 1; + } else if (nb == 1) { + if ((!mime || (mime & MAGIC_MIME_TYPE)) && + file_printf(ms, mime ? "application/octet-stream" : + "very short file (no magic)") == -1) + return -1; + return 1; + } #ifdef __EMX__ - if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { - switch (file_os2_apptype(ms, inname, buf, nb)) { - case -1: - return -1; - case 0: - break; - default: - return 1; + if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { + switch (file_os2_apptype(ms, inname, buf, nb)) { + case -1: + return -1; + case 0: + break; + default: + return 1; + } } - } #endif - /* try compression stuff */ - if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) != 0 || - (m = file_zmagic(ms, fd, inname, buf, nb)) == 0) { - /* Check if we have a tar file */ - if ((ms->flags & MAGIC_NO_CHECK_TAR) != 0 || - (m = file_is_tar(ms, buf, nb)) == 0) { - /* try tests in /etc/magic (or surrogate magic file) */ - if ((ms->flags & MAGIC_NO_CHECK_SOFT) != 0 || - (m = file_softmagic(ms, buf, nb)) == 0) { - /* try known keywords, check whether it is ASCII */ - if ((ms->flags & MAGIC_NO_CHECK_ASCII) != 0 || - (m = file_ascmagic(ms, buf, nb)) == 0) { - /* abandon hope, all ye who remain here */ - if (file_printf(ms, ms->flags & MAGIC_MIME ? - (nb ? "application/octet-stream" : - "application/empty") : - (nb ? "data" : - "empty")) == -1) - return -1; - m = 1; + /* try compression stuff */ + if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) != 0 || + (m = file_zmagic(ms, fd, inname, buf, nb)) == 0) { + /* Check if we have a tar file */ + if ((ms->flags & MAGIC_NO_CHECK_TAR) != 0 || + (m = file_is_tar(ms, buf, nb)) == 0) { + /* try tests in /etc/magic (or surrogate magic file) */ + if ((ms->flags & MAGIC_NO_CHECK_SOFT) != 0 || + (m = file_softmagic(ms, buf, nb)) == 0) { + /* try known keywords, check whether it is ASCII */ + if ((ms->flags & MAGIC_NO_CHECK_ASCII) != 0 || + (m = file_ascmagic(ms, buf, nb)) == 0) { + /* abandon hope, all ye who remain here */ + if ((!mime || (mime & MAGIC_MIME_TYPE)) && + file_printf(ms, mime ? "application/octet-stream" : + "data") == -1) + return -1; + m = 1; + } } } } - } #ifdef BUILTIN_ELF - if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && nb > 5 && fd != -1) { - /* - * We matched something in the file, so this *might* - * be an ELF file, and the file is at least 5 bytes - * long, so if it's an ELF file it has at least one - * byte past the ELF magic number - try extracting - * information from the ELF headers that cannot easily - * be extracted with rules in the magic file. - */ - (void)file_tryelf(ms, fd, buf, nb); - } + if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && + nb > 5 && fd != -1) { + /* + * We matched something in the file, so this *might* + * be an ELF file, and the file is at least 5 bytes + * long, so if it's an ELF file it has at least one + * byte past the ELF magic number - try extracting + * information from the ELF headers that cannot easily + * be extracted with rules in the magic file. + */ + (void)file_tryelf(ms, fd, buf, nb); + } #endif - return m; + return m; } #endif diff --git a/src/is_tar.c b/src/is_tar.c index 10c8b05a..cbaba63c 100644 --- a/src/is_tar.c +++ b/src/is_tar.c @@ -45,7 +45,7 @@ #include "tar.h" #ifndef lint -FILE_RCSID("@(#)$File: is_tar.c,v 1.27 2007/01/12 17:38:28 christos Exp $") +FILE_RCSID("@(#)$File: is_tar.c,v 1.28 2007/08/19 03:45:08 christos Exp $") #endif #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) @@ -53,6 +53,12 @@ FILE_RCSID("@(#)$File: is_tar.c,v 1.27 2007/01/12 17:38:28 christos Exp $") private int is_tar(const unsigned char *, size_t); private int from_oct(int, const char *); /* Decode octal number */ +static const char *tartype[] = { + "tar archive", + "POSIX tar archive", + "POSIX tar archive (GNU)", +}; + protected int file_is_tar(struct magic_set *ms, const unsigned char *buf, size_t nbytes) { @@ -60,25 +66,19 @@ file_is_tar(struct magic_set *ms, const unsigned char *buf, size_t nbytes) * Do the tar test first, because if the first file in the tar * archive starts with a dot, we can confuse it with an nroff file. */ - switch (is_tar(buf, nbytes)) { - case 1: - if (file_printf(ms, (ms->flags & MAGIC_MIME) ? - "application/x-tar" : "tar archive") == -1) - return -1; - return 1; - case 2: - if (file_printf(ms, (ms->flags & MAGIC_MIME) ? - "application/x-tar" : "POSIX tar archive") == -1) - return -1; - return 1; - case 3: - if (file_printf(ms, (ms->flags & MAGIC_MIME) ? - "application/x-tar" : "POSIX tar archive (GNU)") == -1) - return -1; - return 1; - default: + int tar = is_tar(buf, nbytes); + int mime = ms->flags & MAGIC_MIME; + + if (tar < 1 || tar > 3) return 0; - } + + if (mime == MAGIC_MIME_ENCODING) + return 0; + + if (file_printf(ms, mime ? "application/x-tar" : + tartype[tar - 1]) == -1) + return -1; + return 1; } /* diff --git a/src/magic.c b/src/magic.c index c6fd9ad8..11cfb20e 100644 --- a/src/magic.c +++ b/src/magic.c @@ -63,7 +63,7 @@ #include "patchlevel.h" #ifndef lint -FILE_RCSID("@(#)$File: magic.c,v 1.42 2007/08/19 03:45:08 christos Exp $") +FILE_RCSID("@(#)$File: magic.c,v 1.43 2007/09/26 20:45:26 christos Exp $") #endif /* lint */ #ifdef __EMX__ @@ -260,6 +260,7 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd) struct stat sb; ssize_t nbytes = 0; /* number of bytes read from a datafile */ int ispipe = 0; + int mime = ms->flags & MAGIC_MIME; /* * one extra for terminating '\0', and @@ -343,20 +344,9 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd) } } - if (nbytes == 0) { - if (file_printf(ms, (ms->flags & MAGIC_MIME) ? - "application/x-empty" : "empty") == -1) - goto done; - } else if (nbytes == 1) { - if (file_printf(ms, (ms->flags & MAGIC_MIME) ? - "application/octet-stream" : "very short file (no magic)") - == -1) - goto done; - } else { - (void)memset(buf + nbytes, 0, SLOP); /* NUL terminate */ - if (file_buffer(ms, fd, inname, buf, (size_t)nbytes) == -1) - goto done; - } + (void)memset(buf + nbytes, 0, SLOP); /* NUL terminate */ + if (file_buffer(ms, fd, inname, buf, (size_t)nbytes) == -1) + goto done; rv = 0; done: free(buf); diff --git a/src/magic.h b/src/magic.h index 483cac3f..96aa2f68 100644 --- a/src/magic.h +++ b/src/magic.h @@ -34,21 +34,23 @@ #define MAGIC_SYMLINK 0x000002 /* Follow symlinks */ #define MAGIC_COMPRESS 0x000004 /* Check inside compressed files */ #define MAGIC_DEVICES 0x000008 /* Look at the contents of devices */ -#define MAGIC_MIME 0x000010 /* Return a mime string */ +#define MAGIC_MIME_TYPE 0x000010 /* Return only the MIME type */ #define MAGIC_CONTINUE 0x000020 /* Return all matches */ #define MAGIC_CHECK 0x000040 /* Print warnings to stderr */ #define MAGIC_PRESERVE_ATIME 0x000080 /* Restore access time on exit */ -#define MAGIC_RAW 0x000100 /* Don't translate unprintable chars */ +#define MAGIC_RAW 0x000100 /* Don't translate unprint chars */ #define MAGIC_ERROR 0x000200 /* Handle ENOENT etc as real errors */ -#define MAGIC_NO_CHECK_COMPRESS 0x001000 /* Don't check for compressed files */ -#define MAGIC_NO_CHECK_TAR 0x002000 /* Don't check for tar files */ -#define MAGIC_NO_CHECK_SOFT 0x004000 /* Don't check magic entries */ -#define MAGIC_NO_CHECK_APPTYPE 0x008000 /* Don't check application type */ -#define MAGIC_NO_CHECK_ELF 0x010000 /* Don't check for elf details */ -#define MAGIC_NO_CHECK_ASCII 0x020000 /* Don't check for ascii files */ -#define MAGIC_NO_CHECK_TROFF 0x040000 /* Don't check ascii/troff */ -#define MAGIC_NO_CHECK_FORTRAN 0x080000 /* Don't check ascii/fortran */ -#define MAGIC_NO_CHECK_TOKENS 0x100000 /* Don't check ascii/tokens */ +#define MAGIC_MIME_ENCODING 0x000400 /* Return only the MIME encoding */ +#define MAGIC_MIME (MAGIC_MIME_TYPE|MAGIC_MIME_ENCODING) +#define MAGIC_NO_CHECK_COMPRESS 0x001000 /* Don't check for compressed files */ +#define MAGIC_NO_CHECK_TAR 0x002000 /* Don't check for tar files */ +#define MAGIC_NO_CHECK_SOFT 0x004000 /* Don't check magic entries */ +#define MAGIC_NO_CHECK_APPTYPE 0x008000 /* Don't check application type */ +#define MAGIC_NO_CHECK_ELF 0x010000 /* Don't check for elf details */ +#define MAGIC_NO_CHECK_ASCII 0x020000 /* Don't check for ascii files */ +#define MAGIC_NO_CHECK_TROFF 0x040000 /* Don't check ascii/troff */ +#define MAGIC_NO_CHECK_FORTRAN 0x080000 /* Don't check ascii/fortran */ +#define MAGIC_NO_CHECK_TOKENS 0x100000 /* Don't check ascii/tokens */ #ifdef __cplusplus extern "C" { diff --git a/src/softmagic.c b/src/softmagic.c index a0e709f3..a0548209 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -38,7 +38,7 @@ #ifndef lint -FILE_RCSID("@(#)$File: softmagic.c,v 1.99 2007/05/08 14:44:18 christos Exp $") +FILE_RCSID("@(#)$File: softmagic.c,v 1.100 2007/09/26 20:19:05 christos Exp $") #endif /* lint */ private int match(struct magic_set *, struct magic *, uint32_t, @@ -254,9 +254,10 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, break; } } - firstline = 0; - if (printed_something) + if (printed_something) { + firstline = 0; returnval = 1; + } if ((ms->flags & MAGIC_CONTINUE) == 0 && printed_something) { return 1; /* don't keep searching */ } -- 2.40.0