From: Christos Zoulas Date: Thu, 16 Oct 2008 16:31:16 +0000 (+0000) Subject: better handling of mime encodings. X-Git-Tag: FILE5_05~306 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9bc91297bd672975b7e0844bd59c31c7e6740fd3;p=file better handling of mime encodings. --- diff --git a/src/ascmagic.c b/src/ascmagic.c index 4c51b108..841dea00 100644 --- a/src/ascmagic.c +++ b/src/ascmagic.c @@ -49,7 +49,7 @@ #include "names.h" #ifndef lint -FILE_RCSID("@(#)$File: ascmagic.c,v 1.65 2008/08/31 07:58:00 christos Exp $") +FILE_RCSID("@(#)$File: ascmagic.c,v 1.66 2008/10/16 16:31:16 christos Exp $") #endif /* lint */ #define MAXLINELEN 300 /* longest sane line length */ @@ -75,6 +75,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) unichar *ubuf = NULL; size_t ulen, mlen; const struct names *p; + const char *encoding = "binary"; int rv = -1; int mime = ms->flags & MAGIC_MIME; @@ -103,12 +104,16 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) while (nbytes > 1 && buf[nbytes - 1] == '\0') nbytes--; - if ((nbuf = CAST(unsigned char *, calloc((size_t)1, - (nbytes + 1) * sizeof(nbuf[0])))) == NULL) + mlen = (nbytes + 1) * sizeof(nbuf[0]); + if ((nbuf = CAST(unsigned char *, calloc((size_t)1, mlen))) == NULL) { + file_oomem(ms, mlen); goto done; - if ((ubuf = CAST(unichar *, calloc((size_t)1, - (nbytes + 1) * sizeof(ubuf[0])))) == NULL) + } + mlen = (nbytes + 1) * sizeof(ubuf[0]); + if ((ubuf = CAST(unichar *, calloc((size_t)1, mlen))) == NULL) { + file_oomem(ms, mlen); goto done; + } /* * Then try to determine whether it's any character code we can @@ -120,6 +125,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) code = "ASCII"; code_mime = "us-ascii"; type = "text"; + encoding = "7bit"; } else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) { code = "UTF-8 Unicode (with BOM)"; code_mime = "utf-8"; @@ -156,6 +162,9 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) type = "character data"; code_mime = "ebcdic"; } else { + if (mime == MAGIC_MIME_ENCODING) + if (file_printf(ms, "%s", encoding) == -1) + goto done; rv = 0; goto done; /* doesn't look like text at all */ } @@ -179,11 +188,11 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) } if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL) goto done; - if (file_softmagic(ms, utf8_buf, (size_t)(utf8_end - utf8_buf), - TEXTTEST) != 0) { - rv = 1; + if ((rv = file_softmagic(ms, utf8_buf, (size_t)(utf8_end - utf8_buf), + TEXTTEST)) != 0) goto done; - } + else + rv = -1; /* look for tokens from names.h - this is expensive! */ if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0) @@ -276,7 +285,8 @@ subtype_identified: } if (mime == MAGIC_MIME_ENCODING) - file_printf(ms, "binary"); + if (file_printf(ms, "%s", encoding) == -1) + goto done; } else { if (file_printf(ms, code) == -1) goto done; diff --git a/src/fsmagic.c b/src/fsmagic.c index f3b23725..0d055493 100644 --- a/src/fsmagic.c +++ b/src/fsmagic.c @@ -57,7 +57,7 @@ #undef HAVE_MAJOR #ifndef lint -FILE_RCSID("@(#)$File: fsmagic.c,v 1.52 2008/07/25 23:59:01 rrt Exp $") +FILE_RCSID("@(#)$File: fsmagic.c,v 1.53 2008/10/16 16:31:16 christos Exp $") #endif /* lint */ private int @@ -84,6 +84,17 @@ bad_link(struct magic_set *ms, int err, char *buf) return 1; } +private int +handle_mime(struct magic_set *ms, int mime, const char *str) +{ + if ((mime & MAGIC_MIME_TYPE) && file_printf(ms, "application/%s", str) + == -1) + return -1; + if ((mime & MAGIC_MIME_ENCODING) && file_printf(ms, "binary") == -1) + return -1; + return 0; +} + protected int file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) { @@ -140,11 +151,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) switch (sb->st_mode & S_IFMT) { case S_IFDIR: - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "application/x-directory") - == -1) - return -1; - if (!mime && file_printf(ms, "directory") == -1) + if (mime) { + if (handle_mime(ms, mime, "x-directory") == -1) + return -1; + } else if (file_printf(ms, "directory") == -1) return -1; return 1; #ifdef S_IFCHR @@ -156,20 +166,20 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) */ if ((ms->flags & MAGIC_DEVICES) != 0) break; - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "application/x-character-device") - == -1) - return -1; - if (!mime) { + if (mime) { + if (handle_mime(ms, mime, "x-character-device") == -1) + return -1; + } else { #ifdef HAVE_STAT_ST_RDEV # ifdef dv_unit if (file_printf(ms, "character special (%d/%d/%d)", - major(sb->st_rdev), dv_unit(sb->st_rdev), + major(sb->st_rdev), dv_unit(sb->st_rdev), dv_subunit(sb->st_rdev)) == -1) return -1; # else if (file_printf(ms, "character special (%ld/%ld)", - (long) major(sb->st_rdev), (long) minor(sb->st_rdev)) == -1) + (long)major(sb->st_rdev), (long)minor(sb->st_rdev)) + == -1) return -1; # endif #else @@ -188,11 +198,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) */ if ((ms->flags & MAGIC_DEVICES) != 0) break; - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "application/x-block-device") - == -1) - return -1; - if (!mime) { + if (mime) { + if (handle_mime(ms, mime, "x-block-device") == -1) + return -1; + } else { #ifdef HAVE_STAT_ST_RDEV # ifdef dv_unit if (file_printf(ms, "block special (%d/%d/%d)", @@ -216,21 +225,19 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) case S_IFIFO: if((ms->flags & MAGIC_DEVICES) != 0) break; - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "application/x-fifo") - == -1) - return -1; - if (!mime && file_printf(ms, "fifo (named pipe)") == -1) + if (mime) { + if (handle_mime(ms, mime, "x-fifo") == -1) + return -1; + } else if (file_printf(ms, "fifo (named pipe)") == -1) return -1; return 1; #endif #ifdef S_IFDOOR case S_IFDOOR: - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "application/x-door") - == -1) - return -1; - if (!mime && file_printf(ms, "door") == -1) + if (mime) { + if (handle_mime(ms, mime, "x-door") == -1) + return -1; + } else if (file_printf(ms, "door") == -1) return -1; return 1; #endif @@ -242,11 +249,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) fn); return -1; } - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "application/x-symlink") - == -1) - return -1; - if (!mime && file_printf(ms, + if (mime) { + if (handle_mime(ms, mime, "x-symlink") == -1) + return -1; + } else if (file_printf(ms, "unreadable symlink `%s' (%s)", fn, strerror(errno)) == -1) return -1; @@ -271,11 +277,11 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) "path too long: `%s'", buf); return -1; } - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "application/x-path-too-long") - == -1) - return -1; - if (!mime && file_printf(ms, + if (mime) { + if (handle_mime(ms, mime, + "x-path-too-long") == -1) + return -1; + } else if (file_printf(ms, "path too long: `%s'", fn) == -1) return -1; return 1; @@ -297,11 +303,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) ms->flags |= MAGIC_SYMLINK; return p != NULL ? 1 : -1; } else { /* just print what it points to */ - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "application/x-symlink") - == -1) - return -1; - if (!mime && file_printf(ms, "symbolic link to `%s'", + if (mime) { + if (handle_mime(ms, mime, "x-symlink") == -1) + return -1; + } else if (file_printf(ms, "symbolic link to `%s'", buf) == -1) return -1; } @@ -310,11 +315,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) #ifdef S_IFSOCK #ifndef __COHERENT__ case S_IFSOCK: - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "application/x-socket") - == -1) - return -1; - if (!mime && file_printf(ms, "socket") == -1) + if (mime) { + if (handle_mime(ms, mime, "x-socket") == -1) + return -1; + } else if (file_printf(ms, "socket") == -1) return -1; return 1; #endif @@ -340,9 +344,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) * when we read the file.) */ if ((ms->flags & MAGIC_DEVICES) == 0 && sb->st_size == 0) { - if ((!mime || (mime & MAGIC_MIME_TYPE)) && - file_printf(ms, mime ? "application/x-empty" : - "empty") == -1) + if (mime) { + if (handle_mime(ms, mime, "x-empty") == -1) + return -1; + } else if (file_printf(ms, "empty") == -1) return -1; return 1; } diff --git a/src/softmagic.c b/src/softmagic.c index 79c287e8..ac2b8bef 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -38,7 +38,7 @@ #ifndef lint -FILE_RCSID("@(#)$File: softmagic.c,v 1.122 2008/09/23 14:34:25 christos Exp $") +FILE_RCSID("@(#)$File: softmagic.c,v 1.123 2008/10/16 16:31:16 christos Exp $") #endif /* lint */ private int match(struct magic_set *, struct magic *, uint32_t, @@ -52,17 +52,12 @@ private int mcopy(struct magic_set *, union VALUETYPE *, int, int, const unsigned char *, uint32_t, size_t, size_t); private int mconvert(struct magic_set *, struct magic *); private int print_sep(struct magic_set *, int); +private int handle_mime(struct magic_set *, struct magic *); private void cvt_8(union VALUETYPE *, const struct magic *); private void cvt_16(union VALUETYPE *, const struct magic *); private void cvt_32(union VALUETYPE *, const struct magic *); private void cvt_64(union VALUETYPE *, const struct magic *); -/* - * Macro to give description string according to whether we want plain - * text or MIME type - */ -#define MAGIC_DESC ((ms->flags & MAGIC_MIME) ? m->mimetype : m->desc) - /* * softmagic - lookup one file in parsed, in-memory copy of database * Passed the name and FILE * of one file to be typed. @@ -73,6 +68,9 @@ file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes, in { struct mlist *ml; int rv; + if (ms->flags & MAGIC_MIME_ENCODING) + /* Let ascmagic do the work */ + return 0; for (ml = ms->mlist->next; ml != ms->mlist; ml = ml->next) if ((rv = match(ms, ml->magic, ml->nmagic, buf, nbytes, mode)) != 0) return rv; @@ -114,7 +112,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, uint32_t magindex = 0; unsigned int cont_level = 0; int need_separator = 0; - int returnval = 0; /* if a match is found it is set to 1*/ + int returnval = 0, e; /* if a match is found it is set to 1*/ int firstline = 1; /* a flag to print X\n X\n- X */ int printed_something = 0; @@ -167,9 +165,11 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, * If we are going to print something, we'll need to print * a blank before we print something else. */ - if (*MAGIC_DESC) { + if (*m->desc) { need_separator = 1; printed_something = 1; + if ((e = handle_mime(ms, m)) != 0) + return e; if (print_sep(ms, firstline) == -1) return -1; } @@ -234,8 +234,10 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, * If we are going to print something, * make sure that we have a separator first. */ - if (*MAGIC_DESC) { + if (*m->desc) { printed_something = 1; + if ((e = handle_mime(ms, m)) != 0) + return e; if (print_sep(ms, firstline) == -1) return -1; } @@ -248,14 +250,14 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, /* space if previous printed */ if (need_separator && ((m->flag & NOSPACE) == 0) - && *MAGIC_DESC) { + && *m->desc) { if (file_printf(ms, " ") == -1) return -1; need_separator = 0; } if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1) return -1; - if (*MAGIC_DESC) + if (*m->desc) need_separator = 1; /* @@ -285,7 +287,7 @@ check_fmt(struct magic_set *ms, struct magic *m) regex_t rx; int rc; - if (strchr(MAGIC_DESC, '%') == NULL) + if (strchr(m->desc, '%') == NULL) return 0; rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); @@ -295,7 +297,7 @@ check_fmt(struct magic_set *ms, struct magic *m) file_magerror(ms, "regex error %d, (%s)", rc, errmsg); return -1; } else { - rc = regexec(&rx, MAGIC_DESC, 0, 0, 0); + rc = regexec(&rx, m->desc, 0, 0, 0); regfree(&rx); return !rc; } @@ -328,7 +330,7 @@ mprint(struct magic_set *ms, struct magic *m) float vf; double vd; int64_t t = 0; - char *buf; + char buf[128]; union VALUETYPE *p = &ms->ms_value; switch (m->type) { @@ -338,13 +340,13 @@ mprint(struct magic_set *ms, struct magic *m) case -1: return -1; case 1: - if (asprintf(&buf, "%c", (unsigned char)v) < 0) - return -1; - if (file_printf(ms, MAGIC_DESC, buf) == -1) + (void)snprintf(buf, sizeof(buf), "%c", + (unsigned char)v); + if (file_printf(ms, m->desc, buf) == -1) return -1; break; default: - if (file_printf(ms, MAGIC_DESC, (unsigned char) v) == -1) + if (file_printf(ms, m->desc, (unsigned char) v) == -1) return -1; break; } @@ -359,13 +361,13 @@ mprint(struct magic_set *ms, struct magic *m) case -1: return -1; case 1: - if (asprintf(&buf, "%hu", (unsigned short)v) < 0) - return -1; - if (file_printf(ms, MAGIC_DESC, buf) == -1) + (void)snprintf(buf, sizeof(buf), "%hu", + (unsigned short)v); + if (file_printf(ms, m->desc, buf) == -1) return -1; break; default: - if (file_printf(ms, MAGIC_DESC, (unsigned short) v) == -1) + if (file_printf(ms, m->desc, (unsigned short) v) == -1) return -1; break; } @@ -381,13 +383,12 @@ mprint(struct magic_set *ms, struct magic *m) case -1: return -1; case 1: - if (asprintf(&buf, "%u", (uint32_t)v) < 0) - return -1; - if (file_printf(ms, MAGIC_DESC, buf) == -1) + (void)snprintf(buf, sizeof(buf), "%u", (uint32_t)v); + if (file_printf(ms, m->desc, buf) == -1) return -1; break; default: - if (file_printf(ms, MAGIC_DESC, (uint32_t) v) == -1) + if (file_printf(ms, m->desc, (uint32_t) v) == -1) return -1; break; } @@ -398,7 +399,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BEQUAD: case FILE_LEQUAD: v = file_signextend(ms, m, p->q); - if (file_printf(ms, MAGIC_DESC, (uint64_t) v) == -1) + if (file_printf(ms, m->desc, (uint64_t) v) == -1) return -1; t = ms->offset + sizeof(int64_t); break; @@ -408,14 +409,14 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BESTRING16: case FILE_LESTRING16: if (m->reln == '=' || m->reln == '!') { - if (file_printf(ms, MAGIC_DESC, m->value.s) == -1) + if (file_printf(ms, m->desc, m->value.s) == -1) return -1; t = ms->offset + m->vallen; } else { if (*m->value.s == '\0') p->s[strcspn(p->s, "\n")] = '\0'; - if (file_printf(ms, MAGIC_DESC, p->s) == -1) + if (file_printf(ms, m->desc, p->s) == -1) return -1; t = ms->offset + strlen(p->s); if (m->type == FILE_PSTRING) @@ -427,7 +428,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BEDATE: case FILE_LEDATE: case FILE_MEDATE: - if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 1)) == -1) + if (file_printf(ms, m->desc, file_fmttime(p->l, 1)) == -1) return -1; t = ms->offset + sizeof(time_t); break; @@ -436,7 +437,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BELDATE: case FILE_LELDATE: case FILE_MELDATE: - if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 0)) == -1) + if (file_printf(ms, m->desc, file_fmttime(p->l, 0)) == -1) return -1; t = ms->offset + sizeof(time_t); break; @@ -444,7 +445,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_QDATE: case FILE_BEQDATE: case FILE_LEQDATE: - if (file_printf(ms, MAGIC_DESC, file_fmttime((uint32_t)p->q, 1)) + if (file_printf(ms, m->desc, file_fmttime((uint32_t)p->q, 1)) == -1) return -1; t = ms->offset + sizeof(uint64_t); @@ -453,7 +454,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_QLDATE: case FILE_BEQLDATE: case FILE_LEQLDATE: - if (file_printf(ms, MAGIC_DESC, file_fmttime((uint32_t)p->q, 0)) + if (file_printf(ms, m->desc, file_fmttime((uint32_t)p->q, 0)) == -1) return -1; t = ms->offset + sizeof(uint64_t); @@ -467,13 +468,12 @@ mprint(struct magic_set *ms, struct magic *m) case -1: return -1; case 1: - if (asprintf(&buf, "%g", vf) < 0) - return -1; - if (file_printf(ms, MAGIC_DESC, buf) == -1) + (void)snprintf(buf, sizeof(buf), "%g", vf); + if (file_printf(ms, m->desc, buf) == -1) return -1; break; default: - if (file_printf(ms, MAGIC_DESC, vf) == -1) + if (file_printf(ms, m->desc, vf) == -1) return -1; break; } @@ -488,13 +488,12 @@ mprint(struct magic_set *ms, struct magic *m) case -1: return -1; case 1: - if (asprintf(&buf, "%g", vd) < 0) - return -1; - if (file_printf(ms, MAGIC_DESC, buf) == -1) + (void)snprintf(buf, sizeof(buf), "%g", vd); + if (file_printf(ms, m->desc, buf) == -1) return -1; break; default: - if (file_printf(ms, MAGIC_DESC, vd) == -1) + if (file_printf(ms, m->desc, vd) == -1) return -1; break; } @@ -510,7 +509,7 @@ mprint(struct magic_set *ms, struct magic *m) file_oomem(ms, ms->search.rm_len); return -1; } - rval = file_printf(ms, MAGIC_DESC, cp); + rval = file_printf(ms, m->desc, cp); free(cp); if (rval == -1) @@ -524,7 +523,7 @@ mprint(struct magic_set *ms, struct magic *m) } case FILE_SEARCH: - if (file_printf(ms, MAGIC_DESC, m->value.s) == -1) + if (file_printf(ms, m->desc, m->value.s) == -1) return -1; if ((m->str_flags & REGEX_OFFSET_START)) t = ms->search.offset; @@ -533,7 +532,7 @@ mprint(struct magic_set *ms, struct magic *m) break; case FILE_DEFAULT: - if (file_printf(ms, MAGIC_DESC, m->value.s) == -1) + if (file_printf(ms, m->desc, m->value.s) == -1) return -1; t = ms->offset; break; @@ -1820,6 +1819,14 @@ magiccheck(struct magic_set *ms, struct magic *m) return matched; } +private int +handle_mime(struct magic_set *ms, struct magic *m) +{ + if (ms->flags & MAGIC_MIME_TYPE) + return file_printf(ms, "%s", m->mimetype); + return 0; +} + private int print_sep(struct magic_set *ms, int firstline) {