From d04de269e0b06ccd0a7d1bf4974fed1d75be7d9e Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Thu, 31 Mar 2016 17:51:12 +0000 Subject: [PATCH] Allow altering how many bytes to read from analyzed files from the command line. Leave the default the same (1M). --- ChangeLog | 4 ++++ doc/file.man | 3 ++- doc/libmagic.man | 3 ++- src/apprentice.c | 5 +++-- src/ascmagic.c | 6 +++--- src/compress.c | 41 +++++++++++++++++++++-------------------- src/file.c | 3 ++- src/file.h | 7 ++++--- src/magic.c | 18 ++++++++++++------ 9 files changed, 53 insertions(+), 37 deletions(-) diff --git a/ChangeLog b/ChangeLog index 75730b1d..76678196 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2016-03-31 13:50 Christos Zoulas + + * make the number of bytes read from files configurable. + 2016-03-21 13:40 Christos Zoulas * Add bounds checks for DER code (discovered by Thomas Jarosch) diff --git a/doc/file.man b/doc/file.man index 8203ceb6..75292b70 100644 --- a/doc/file.man +++ b/doc/file.man @@ -1,4 +1,4 @@ -.\" $File: file.man,v 1.118 2015/09/11 17:24:09 christos Exp $ +.\" $File: file.man,v 1.119 2016/03/14 02:39:46 christos Exp $ .Dd March 13, 2016 .Dt FILE __CSECTION__ .Os @@ -317,6 +317,7 @@ Set various parameter limits. .It Li elf_phnum Ta 128 Ta max ELF program sections processed .It Li elf_shnum Ta 32768 Ta max ELF sections processed .It Li regex Ta 8192 Ta length limit for regex searches +.It Li bytes Ta 1048576 Ta max number of bytes to read from file .El .It Fl r , Fl Fl raw Don't translate unprintable characters to \eooo. diff --git a/doc/libmagic.man b/doc/libmagic.man index 228b345c..638890af 100644 --- a/doc/libmagic.man +++ b/doc/libmagic.man @@ -1,4 +1,4 @@ -.\" $File: libmagic.man,v 1.38 2015/09/11 17:24:09 christos Exp $ +.\" $File: libmagic.man,v 1.39 2015/10/07 02:36:50 christos Exp $ .\" .\" Copyright (c) Christos Zoulas 2003. .\" All Rights Reserved. @@ -292,6 +292,7 @@ library. .It Li MAGIC_PARAM_ELF_PHNUM_MAX Ta size_t Ta 128 .It Li MAGIC_PARAM_ELF_SHNUM_MAX Ta size_t Ta 32768 .It Li MAGIC_PARAM_REGEX_MAX Ta size_t Ta 8192 +.It Li MAGIC_PARAM_BYTES_MAX Ta size_t Ta 1048576 .El .Pp The diff --git a/src/apprentice.c b/src/apprentice.c index ce6234a0..86eb8d80 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: apprentice.c,v 1.246 2016/01/19 15:09:21 christos Exp $") +FILE_RCSID("@(#)$File: apprentice.c,v 1.247 2016/01/19 15:18:02 christos Exp $") #endif /* lint */ #include "magic.h" @@ -534,6 +534,7 @@ file_ms_alloc(int flags) ms->elf_phnum_max = FILE_ELF_PHNUM_MAX; ms->elf_notes_max = FILE_ELF_NOTES_MAX; ms->regex_max = FILE_REGEX_MAX; + ms->bytes_max = FILE_BYTES_MAX; return ms; free: free(ms); @@ -2115,7 +2116,7 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line, /* * TODO finish this macro and start using it! - * #define offsetcheck {if (offset > HOWMANY-1) + * #define offsetcheck {if (offset > ms->bytes_max -1) * magwarn("offset too big"); } */ diff --git a/src/ascmagic.c b/src/ascmagic.c index 4a53ed04..4b379fb7 100644 --- a/src/ascmagic.c +++ b/src/ascmagic.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: ascmagic.c,v 1.92 2015/04/09 20:01:41 christos Exp $") +FILE_RCSID("@(#)$File: ascmagic.c,v 1.93 2016/03/21 15:56:53 christos Exp $") #endif /* lint */ #include "magic.h" @@ -183,10 +183,10 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf, } /* Beware, if the data has been truncated, the final CR could have - been followed by a LF. If we have HOWMANY bytes, it indicates + been followed by a LF. If we have ms->bytes_max bytes, it indicates that the data might have been truncated, probably even before this function was called. */ - if (seen_cr && nbytes < HOWMANY) + if (seen_cr && nbytes < ms->bytes_max) n_cr++; if (strcmp(type, "binary") == 0) { diff --git a/src/compress.c b/src/compress.c index bf9897be..f3224451 100644 --- a/src/compress.c +++ b/src/compress.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: compress.c,v 1.91 2015/11/13 15:42:18 christos Exp $") +FILE_RCSID("@(#)$File: compress.c,v 1.92 2016/02/08 01:29:49 christos Exp $") #endif #include "magic.h" @@ -167,12 +167,12 @@ private const struct { private ssize_t swrite(int, const void *, size_t); #if HAVE_FORK private size_t ncompr = sizeof(compr) / sizeof(compr[0]); -private int uncompressbuf(int, size_t, const unsigned char *, unsigned char **, - size_t *); +private int uncompressbuf(int, size_t, size_t, const unsigned char *, + unsigned char **, size_t *); #ifdef BUILTIN_DECOMPRESS -private int uncompresszlib(const unsigned char *, unsigned char **, size_t *, - int); -private int uncompressgzipped(const unsigned char *, unsigned char **, +private int uncompresszlib(const unsigned char *, unsigned char **, size_t, + size_t *, int); +private int uncompressgzipped(const unsigned char *, unsigned char **, size_t, size_t *); #endif static int makeerror(unsigned char **, size_t *, const char *, ...) @@ -214,7 +214,7 @@ file_zmagic(struct magic_set *ms, int fd, const char *name, if (!zm) continue; nsz = nbytes; - rv = uncompressbuf(fd, i, buf, &newbuf, &nsz); + rv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz); DPRINTF("uncompressbuf = %d, %s, %zu\n", rv, (char *)newbuf, nsz); switch (rv) { @@ -439,7 +439,8 @@ file_pipe2file(struct magic_set *ms, int fd, const void *startbuf, private int -uncompressgzipped(const unsigned char *old, unsigned char **newch, size_t *n) +uncompressgzipped(const unsigned char *old, unsigned char **newch, + size_t bytes_max, size_t *n) { unsigned char flg = old[3]; size_t data_start = 10; @@ -467,25 +468,25 @@ uncompressgzipped(const unsigned char *old, unsigned char **newch, size_t *n) *n -= data_start; old += data_start; - return uncompresszlib(old, newch, n, 0); + return uncompresszlib(old, newch, bytes_max, n, 0); err: return makeerror(newch, n, "File too short"); } private int -uncompresszlib(const unsigned char *old, unsigned char **newch, size_t *n, - int zlib) +uncompresszlib(const unsigned char *old, unsigned char **newch, + size_t bytes_max, size_t *n, int zlib) { int rc; z_stream z; - if ((*newch = CAST(unsigned char *, malloc(HOWMANY + 1))) == NULL) + if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL) return makeerror(newch, n, "No buffer, %s", strerror(errno)); z.next_in = CCAST(Bytef *, old); z.avail_in = CAST(uint32_t, *n); z.next_out = *newch; - z.avail_out = HOWMANY; + z.avail_out = bytes_max; z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL; @@ -509,7 +510,7 @@ uncompresszlib(const unsigned char *old, unsigned char **newch, size_t *n, return OKDATA; err: - strlcpy((char *)*newch, z.msg, HOWMANY); + strlcpy((char *)*newch, z.msg, bytes_max); *n = strlen((char *)*newch); return ERRDATA; } @@ -640,7 +641,7 @@ methodname(size_t method) } private int -uncompressbuf(int fd, size_t method, const unsigned char *old, +uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old, unsigned char **newch, size_t* n) { int fdp[3][2]; @@ -651,9 +652,9 @@ uncompressbuf(int fd, size_t method, const unsigned char *old, #ifdef BUILTIN_DECOMPRESS /* FIXME: This doesn't cope with bzip2 */ if (method == 2) - return uncompressgzipped(old, newch, n); + return uncompressgzipped(old, newch, bytes_max, n); if (compr[method].maglen == 0) - return uncompresszlib(old, newch, n, 1); + return uncompresszlib(old, newch, bytes_max, n, 1); #endif (void)fflush(stdout); (void)fflush(stderr); @@ -696,21 +697,21 @@ uncompressbuf(int fd, size_t method, const unsigned char *old, if (fd == -1) writechild(fdp, old, *n); - *newch = CAST(unsigned char *, malloc(HOWMANY + 1)); + *newch = CAST(unsigned char *, malloc(bytes_max + 1)); if (*newch == NULL) { rv = makeerror(newch, n, "No buffer, %s", strerror(errno)); goto err; } rv = OKDATA; - if ((r = sread(fdp[STDOUT_FILENO][0], *newch, HOWMANY, 0)) > 0) + if ((r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0)) > 0) break; DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0], r != -1 ? strerror(errno) : "no data"); rv = ERRDATA; if (r == 0 && - (r = sread(fdp[STDERR_FILENO][0], *newch, HOWMANY, 0)) > 0) + (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) { r = filter_error(*newch, r); break; diff --git a/src/file.c b/src/file.c index 84632950..f7aa6efe 100644 --- a/src/file.c +++ b/src/file.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: file.c,v 1.168 2015/09/30 14:02:06 christos Exp $") +FILE_RCSID("@(#)$File: file.c,v 1.169 2016/03/14 02:30:22 christos Exp $") #endif /* lint */ #include "magic.h" @@ -132,6 +132,7 @@ private struct { { "elf_shnum", MAGIC_PARAM_ELF_SHNUM_MAX, 0 }, { "elf_notes", MAGIC_PARAM_ELF_NOTES_MAX, 0 }, { "regex", MAGIC_PARAM_REGEX_MAX, 0 }, + { "bytes", MAGIC_PARAM_BYTES_MAX, 0 }, }; private char *progname; /* used throughout */ diff --git a/src/file.h b/src/file.h index 96bba007..c54d39e9 100644 --- a/src/file.h +++ b/src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.176 2016/02/10 15:57:40 christos Exp $ + * @(#)$File: file.h,v 1.177 2016/03/21 15:56:53 christos Exp $ */ #ifndef __file_h__ @@ -127,8 +127,8 @@ #define MAX(a,b) (((a) > (b)) ? (a) : (b)) #endif -#ifndef HOWMANY -# define HOWMANY (1024 * 1024) /* how much of the file to look at */ +#ifndef FILE_BYTES_MAX +# define FILE_BYTES_MAX (1024 * 1024) /* how much of the file to look at */ #endif #define MAXMAGIS 8192 /* max entries in any one magic file or directory */ @@ -419,6 +419,7 @@ struct magic_set { uint16_t elf_phnum_max; uint16_t elf_notes_max; uint16_t regex_max; + size_t bytes_max; /* number of bytes to read from file */ #define FILE_INDIR_MAX 50 #define FILE_NAME_MAX 30 #define FILE_ELF_SHNUM_MAX 32768 diff --git a/src/magic.c b/src/magic.c index c3ceb50b..46e46784 100644 --- a/src/magic.c +++ b/src/magic.c @@ -33,7 +33,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: magic.c,v 1.95 2015/09/11 17:24:09 christos Exp $") +FILE_RCSID("@(#)$File: magic.c,v 1.96 2016/02/08 02:20:36 christos Exp $") #endif /* lint */ #include "magic.h" @@ -417,7 +417,7 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd) * some overlapping space for matches near EOF */ #define SLOP (1 + sizeof(union VALUETYPE)) - if ((buf = CAST(unsigned char *, malloc(HOWMANY + SLOP))) == NULL) + if ((buf = CAST(unsigned char *, malloc(ms->bytes_max + SLOP))) == NULL) return NULL; switch (file_fsmagic(ms, inname, &sb)) { @@ -481,13 +481,13 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd) } /* - * try looking at the first HOWMANY bytes + * try looking at the first ms->bytes_max bytes */ if (ispipe) { ssize_t r = 0; while ((r = sread(fd, (void *)&buf[nbytes], - (size_t)(HOWMANY - nbytes), 1)) > 0) { + (size_t)(ms->bytes_max - nbytes), 1)) > 0) { nbytes += r; if (r < PIPE_BUF) break; } @@ -503,10 +503,10 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd) } else { /* Windows refuses to read from a big console buffer. */ size_t howmany = -#if defined(WIN32) && HOWMANY > 8 * 1024 +#if defined(WIN32) _isatty(fd) ? 8 * 1024 : #endif - HOWMANY; + ms->bytes_max; if ((nbytes = read(fd, (char *)buf, howmany)) == -1) { if (inname == NULL && fd != STDIN_FILENO) file_error(ms, errno, "cannot read fd %d", fd); @@ -606,6 +606,9 @@ magic_setparam(struct magic_set *ms, int param, const void *val) case MAGIC_PARAM_REGEX_MAX: ms->elf_notes_max = (uint16_t)*(const size_t *)val; return 0; + case MAGIC_PARAM_BYTES_MAX: + ms->bytes_max = *(const size_t *)val; + return 0; default: errno = EINVAL; return -1; @@ -634,6 +637,9 @@ magic_getparam(struct magic_set *ms, int param, void *val) case MAGIC_PARAM_REGEX_MAX: *(size_t *)val = ms->regex_max; return 0; + case MAGIC_PARAM_BYTES_MAX: + *(size_t *)val = ms->bytes_max; + return 0; default: errno = EINVAL; return -1; -- 2.40.0