From: Christos Zoulas Date: Fri, 7 Nov 2008 18:57:28 +0000 (+0000) Subject: handle encoding better. X-Git-Tag: FILE5_00~30 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fe3c433f57adb5f684eda1ec62f0e387aa03a503;p=file handle encoding better. --- diff --git a/src/ascmagic.c b/src/ascmagic.c index 1a36829e..b58f1e53 100644 --- a/src/ascmagic.c +++ b/src/ascmagic.c @@ -36,7 +36,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: ascmagic.c,v 1.71 2008/11/06 21:36:44 rrt Exp $") +FILE_RCSID("@(#)$File: ascmagic.c,v 1.72 2008/11/07 17:26:17 christos Exp $") #endif /* lint */ #include "magic.h" @@ -88,12 +88,14 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) nbytes = trim_nuls(buf, nbytes); /* If file doesn't look like any sort of text, give up. */ - if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime, &type) == 0) { + if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime, + &type) == 0) { rv = 0; goto done; } - rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code, code_mime, type); + rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code, + type); done: if (ubuf) @@ -103,7 +105,9 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) } protected int -file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t ulen, const char *code, const char *code_mime, const char *type) +file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf, + size_t nbytes, unichar *ubuf, size_t ulen, const char *code, + const char *type) { unsigned char *utf8_buf = NULL, *utf8_end; size_t mlen, i; @@ -225,8 +229,7 @@ subtype_identified: n_cr++; if (mime) { - if ((mime & MAGIC_MIME_TYPE) && - !(ms->event_flags & EVENT_WROTE_MIME_TYPE)) { + if ((mime & MAGIC_MIME_TYPE) != 0) { if (subtype_mime) { if (file_printf(ms, "%s", subtype_mime) == -1) goto done; @@ -235,18 +238,6 @@ subtype_identified: goto done; } } - - if ((mime == 0 || mime == MAGIC_MIME) && code_mime) { - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "; charset=") == -1) - goto done; - if (file_printf(ms, "%s", code_mime) == -1) - goto done; - } - - if (mime == MAGIC_MIME_ENCODING) - if (file_printf(ms, "%s", code_mime) == -1) - goto done; } else { if (file_printf(ms, "%s", code) == -1) goto done; diff --git a/src/file.h b/src/file.h index 911c453c..897ed569 100644 --- a/src/file.h +++ b/src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.114 2008/11/06 21:17:45 rrt Exp $ + * @(#)$File: file.h,v 1.115 2008/11/06 23:22:54 christos Exp $ */ #ifndef __file_h__ @@ -323,7 +323,6 @@ struct magic_set { int flags; /* Control magic tests. */ int event_flags; /* Note things that happened. */ #define EVENT_HAD_ERR 0x01 -#define EVENT_WROTE_MIME_TYPE 0x02 const char *file; size_t line; /* current magic line number */ @@ -360,10 +359,14 @@ protected int file_trycdf(struct magic_set *, int, const unsigned char *, protected int file_zmagic(struct magic_set *, int, const char *, const unsigned char *, size_t); protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t); -protected int file_ascmagic_with_encoding(struct magic_set *, const unsigned char *, size_t, unichar *, size_t, const char *, const char *, const char *); -protected int file_encoding(struct magic_set *, const unsigned char *, size_t, unichar **, size_t *, const char **, const char **, const char **); +protected int file_ascmagic_with_encoding(struct magic_set *, + const unsigned char *, size_t, unichar *, size_t, const char *, + const char *); +protected int file_encoding(struct magic_set *, const unsigned char *, size_t, + unichar **, size_t *, const char **, const char **, const char **); protected int file_is_tar(struct magic_set *, const unsigned char *, size_t); -protected int file_softmagic(struct magic_set *, const unsigned char *, size_t, int); +protected int file_softmagic(struct magic_set *, const unsigned char *, size_t, + int); protected struct mlist *file_apprentice(struct magic_set *, const char *, int); protected uint64_t file_signextend(struct magic_set *, struct magic *, uint64_t); diff --git a/src/fsmagic.c b/src/fsmagic.c index e52496c3..30dbf5bd 100644 --- a/src/fsmagic.c +++ b/src/fsmagic.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: fsmagic.c,v 1.55 2008/11/04 16:38:28 christos Exp $") +FILE_RCSID("@(#)$File: fsmagic.c,v 1.56 2008/11/07 17:25:59 christos Exp $") #endif /* lint */ #include "magic.h" @@ -92,7 +92,7 @@ handle_mime(struct magic_set *ms, int mime, const char *str) if (file_printf(ms, "application/%s", str) == -1) return -1; if ((mime & MAGIC_MIME_ENCODING) && file_printf(ms, - "; encoding=") == -1) + "; charset=") == -1) return -1; } if ((mime & MAGIC_MIME_ENCODING) && file_printf(ms, "binary") == -1) diff --git a/src/funcs.c b/src/funcs.c index 5a8706bb..b9a06cad 100644 --- a/src/funcs.c +++ b/src/funcs.c @@ -27,7 +27,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: funcs.c,v 1.49 2008/11/06 22:49:08 rrt Exp $") +FILE_RCSID("@(#)$File: funcs.c,v 1.50 2008/11/07 17:27:22 christos Exp $") #endif /* lint */ #include "magic.h" @@ -158,11 +158,15 @@ protected int file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf, size_t nb) { - int m = 0, rv = 0; + int m = 0, rv = 0, looks_text = 0; int mime = ms->flags & MAGIC_MIME; const unsigned char *ubuf = CAST(const unsigned char *, buf); unichar *u8buf = NULL; size_t ulen; + const char *code = NULL; + const char *code_mime = "binary"; + const char *type = NULL; + if (nb == 0) { @@ -179,6 +183,11 @@ file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf, return 1; } + if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { + looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, + &code, &code_mime, &type); + } + #ifdef __EMX__ if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { switch (file_os2_apptype(ms, inname, buf, nb)) { @@ -253,17 +262,9 @@ file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf, /* try to discover text encoding */ if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { - const char *code = NULL; - const char *code_mime = NULL; - const char *type = NULL; - int looks_text = 0; - - looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, - &code, &code_mime, &type); if (looks_text == 0) if ((m = file_ascmagic_with_encoding( ms, ubuf, - nb, u8buf, ulen, code, code_mime, type)) - != 0) { + nb, u8buf, ulen, code, type)) != 0) { if ((ms->flags & MAGIC_DEBUG) != 0) (void)fprintf(stderr, "ascmagic/enc %d\n", m); @@ -279,6 +280,13 @@ file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf, rv = -1; } done: + if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { + if (ms->flags & MAGIC_MIME_TYPE) + if (file_printf(ms, "; charset=") == -1) + rv = -1; + if (file_printf(ms, "%s", code_mime) == -1) + rv = -1; + } if (u8buf) free(u8buf); if (rv) diff --git a/src/is_tar.c b/src/is_tar.c index 907a265a..0da0308f 100644 --- a/src/is_tar.c +++ b/src/is_tar.c @@ -40,7 +40,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: is_tar.c,v 1.33 2008/11/03 06:18:34 rrt Exp $") +FILE_RCSID("@(#)$File: is_tar.c,v 1.34 2008/11/04 16:38:28 christos Exp $") #endif #include "magic.h" @@ -70,7 +70,7 @@ file_is_tar(struct magic_set *ms, const unsigned char *buf, size_t nbytes) int tar; int mime = ms->flags & MAGIC_MIME; - if ((ms->flags & MAGIC_APPLE) != 0 || mime == MAGIC_MIME_ENCODING) + if ((ms->flags & MAGIC_APPLE) != 0) return 0; tar = is_tar(buf, nbytes); diff --git a/src/softmagic.c b/src/softmagic.c index a32b7afa..ee8661ec 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: softmagic.c,v 1.130 2008/11/06 23:22:54 christos Exp $") +FILE_RCSID("@(#)$File: softmagic.c,v 1.131 2008/11/07 17:26:44 christos Exp $") #endif /* lint */ #include "magic.h" @@ -166,7 +166,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, if (*m->desc) { need_separator = 1; printed_something = 1; - if ((e = handle_annotation(ms, m)) != -2) + if ((e = handle_annotation(ms, m)) != 0) return e; if (print_sep(ms, firstline) == -1) return -1; @@ -234,7 +234,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, */ if (*m->desc) { printed_something = 1; - if ((e = handle_annotation(ms, m)) != -2) + if ((e = handle_annotation(ms, m)) != 0) return e; if (print_sep(ms, firstline) == -1) return -1; @@ -249,7 +249,9 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, if (need_separator && ((m->flag & NOSPACE) == 0) && *m->desc) { - if (file_printf(ms, " ") == -1) + if ((ms->flags & + (MAGIC_MIME|MAGIC_APPLE)) == 0 && + file_printf(ms, " ") == -1) return -1; need_separator = 0; } @@ -273,9 +275,13 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, returnval = 1; } if ((ms->flags & MAGIC_CONTINUE) == 0 && printed_something) { + if (ms->flags & MAGIC_MIME) + return 0; return 1; /* don't keep searching */ } } + if (ms->flags & MAGIC_MIME) + return 0; return returnval; /* This is hit if -k is set or there is no match */ } @@ -330,6 +336,7 @@ mprint(struct magic_set *ms, struct magic *m) int64_t t = 0; char buf[128]; union VALUETYPE *p = &ms->ms_value; + int np = (ms->flags & MAGIC_MIME) == 0; switch (m->type) { case FILE_BYTE: @@ -340,11 +347,12 @@ mprint(struct magic_set *ms, struct magic *m) case 1: (void)snprintf(buf, sizeof(buf), "%c", (unsigned char)v); - if (file_printf(ms, m->desc, buf) == -1) + if (np && file_printf(ms, m->desc, buf) == -1) return -1; break; default: - if (file_printf(ms, m->desc, (unsigned char) v) == -1) + if (np && + file_printf(ms, m->desc, (unsigned char) v) == -1) return -1; break; } @@ -361,11 +369,12 @@ mprint(struct magic_set *ms, struct magic *m) case 1: (void)snprintf(buf, sizeof(buf), "%hu", (unsigned short)v); - if (file_printf(ms, m->desc, buf) == -1) + if (np && file_printf(ms, m->desc, buf) == -1) return -1; break; default: - if (file_printf(ms, m->desc, (unsigned short) v) == -1) + if (np && + file_printf(ms, m->desc, (unsigned short) v) == -1) return -1; break; } @@ -382,11 +391,11 @@ mprint(struct magic_set *ms, struct magic *m) return -1; case 1: (void)snprintf(buf, sizeof(buf), "%u", (uint32_t)v); - if (file_printf(ms, m->desc, buf) == -1) + if (np && file_printf(ms, m->desc, buf) == -1) return -1; break; default: - if (file_printf(ms, m->desc, (uint32_t) v) == -1) + if (np && file_printf(ms, m->desc, (uint32_t) v) == -1) return -1; break; } @@ -397,7 +406,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BEQUAD: case FILE_LEQUAD: v = file_signextend(ms, m, p->q); - if (file_printf(ms, m->desc, (uint64_t) v) == -1) + if (np && file_printf(ms, m->desc, (uint64_t) v) == -1) return -1; t = ms->offset + sizeof(int64_t); break; @@ -407,14 +416,14 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BESTRING16: case FILE_LESTRING16: if (m->reln == '=' || m->reln == '!') { - if (file_printf(ms, m->desc, m->value.s) == -1) + if (np && file_printf(ms, m->desc, m->value.s) == -1) return -1; t = ms->offset + m->vallen; } else { if (*m->value.s == '\0') p->s[strcspn(p->s, "\n")] = '\0'; - if (file_printf(ms, m->desc, p->s) == -1) + if (np && file_printf(ms, m->desc, p->s) == -1) return -1; t = ms->offset + strlen(p->s); if (m->type == FILE_PSTRING) @@ -426,7 +435,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BEDATE: case FILE_LEDATE: case FILE_MEDATE: - if (file_printf(ms, m->desc, file_fmttime(p->l, 1)) == -1) + if (np && file_printf(ms, m->desc, file_fmttime(p->l, 1)) == -1) return -1; t = ms->offset + sizeof(time_t); break; @@ -435,7 +444,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BELDATE: case FILE_LELDATE: case FILE_MELDATE: - if (file_printf(ms, m->desc, file_fmttime(p->l, 0)) == -1) + if (np && file_printf(ms, m->desc, file_fmttime(p->l, 0)) == -1) return -1; t = ms->offset + sizeof(time_t); break; @@ -443,8 +452,8 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_QDATE: case FILE_BEQDATE: case FILE_LEQDATE: - if (file_printf(ms, m->desc, file_fmttime((uint32_t)p->q, 1)) - == -1) + if (np && file_printf(ms, m->desc, file_fmttime((uint32_t)p->q, + 1)) == -1) return -1; t = ms->offset + sizeof(uint64_t); break; @@ -452,8 +461,8 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_QLDATE: case FILE_BEQLDATE: case FILE_LEQLDATE: - if (file_printf(ms, m->desc, file_fmttime((uint32_t)p->q, 0)) - == -1) + if (np && file_printf(ms, m->desc, file_fmttime((uint32_t)p->q, + 0)) == -1) return -1; t = ms->offset + sizeof(uint64_t); break; @@ -467,11 +476,11 @@ mprint(struct magic_set *ms, struct magic *m) return -1; case 1: (void)snprintf(buf, sizeof(buf), "%g", vf); - if (file_printf(ms, m->desc, buf) == -1) + if (np && file_printf(ms, m->desc, buf) == -1) return -1; break; default: - if (file_printf(ms, m->desc, vf) == -1) + if (np && file_printf(ms, m->desc, vf) == -1) return -1; break; } @@ -487,11 +496,11 @@ mprint(struct magic_set *ms, struct magic *m) return -1; case 1: (void)snprintf(buf, sizeof(buf), "%g", vd); - if (file_printf(ms, m->desc, buf) == -1) + if (np && file_printf(ms, m->desc, buf) == -1) return -1; break; default: - if (file_printf(ms, m->desc, vd) == -1) + if (np && file_printf(ms, m->desc, vd) == -1) return -1; break; } @@ -507,7 +516,10 @@ mprint(struct magic_set *ms, struct magic *m) file_oomem(ms, ms->search.rm_len); return -1; } - rval = file_printf(ms, m->desc, cp); + if (np) + rval = file_printf(ms, m->desc, cp); + else + rval = 0; free(cp); if (rval == -1) @@ -521,7 +533,7 @@ mprint(struct magic_set *ms, struct magic *m) } case FILE_SEARCH: - if (file_printf(ms, m->desc, m->value.s) == -1) + if (np && file_printf(ms, m->desc, m->value.s) == -1) return -1; if ((m->str_flags & REGEX_OFFSET_START)) t = ms->search.offset; @@ -530,7 +542,7 @@ mprint(struct magic_set *ms, struct magic *m) break; case FILE_DEFAULT: - if (file_printf(ms, m->desc, m->value.s) == -1) + if (np && file_printf(ms, m->desc, m->value.s) == -1) return -1; t = ms->offset; break; @@ -1454,7 +1466,8 @@ mget(struct magic_set *ms, const unsigned char *s, break; case FILE_INDIRECT: - if (file_printf(ms, m->desc) == -1) + if ((ms->flags & (MAGIC_MIME|MAGIC_APPLE)) == 0 && + file_printf(ms, m->desc) == -1) return -1; if (nbytes < offset) return 0; @@ -1858,25 +1871,19 @@ handle_annotation(struct magic_set *ms, struct magic *m) return -1; return 1; } - if (ms->flags & MAGIC_MIME) { - if ((ms->flags & MAGIC_MIME_TYPE) && m->mimetype[0]) { - ms->event_flags |= EVENT_WROTE_MIME_TYPE; - if (file_printf(ms, "%s", m->mimetype) == -1) - return -1; - } - /* If we want an encoding, let ascmagic find it. */ - if ((ms->flags & MAGIC_MIME_ENCODING)) - return 0; - /* If we didn't write a MIME type, and we want one, - allow ascmagic to run.*/ - return m->mimetype[0] != '\0'; + if ((ms->flags & MAGIC_MIME_TYPE) && m->mimetype[0]) { + if (file_printf(ms, "%s", m->mimetype) == -1) + return -1; + return 1; } - return -2; + return 0; } private int print_sep(struct magic_set *ms, int firstline) { + if (ms->flags & MAGIC_MIME) + return 0; if (firstline) return 0; /*