From: Christos Zoulas Date: Sat, 22 Jun 1996 22:04:22 +0000 (+0000) Subject: *** empty log message *** X-Git-Tag: FILE3_27~87 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=719cd1bf0b7b3d3869b99d6b775ce654eaaea912;p=file *** empty log message *** --- diff --git a/doc/file.man b/doc/file.man index a1d33202..32a1c391 100644 --- a/doc/file.man +++ b/doc/file.man @@ -1,5 +1,5 @@ .TH FILE __CSECTION__ "Copyright but distributable" -.\" $Id: file.man,v 1.26 1995/10/27 23:12:01 christos Exp $ +.\" $Id: file.man,v 1.27 1996/06/22 22:05:07 christos Exp $ .SH NAME file \- determine file type @@ -16,7 +16,10 @@ namefile ] magicfiles ] file ... .SH DESCRIPTION -.I File +This manual page documents version __VERSION__ of the +.B file +command. +.B File tests each argument in an attempt to classify it. There are three sets of tests, performed in this order: filesystem tests, magic number tests, and language tests. @@ -26,8 +29,11 @@ test that succeeds causes the file type to be printed. .PP The type printed will usually contain one of the words .B text -(the file contains only ASCII characters and is -probably safe to read on an ASCII terminal), +(the file contains only +.SM ASCII +characters and is probably safe to read on an +.SM ASCII +terminal), .B executable (the file contains the result of compiling a program in a form understandable to some \s-1UNIX\s0 kernel or another), @@ -46,7 +52,7 @@ Don't do as Berkeley did \- change ``shell commands text'' to ``shell script''. .PP The filesystem tests are based on examining the return from a -.IR stat (2) +.BR stat (2) system call. The program checks to see if the file is empty, or if it's some sort of special file. @@ -55,16 +61,16 @@ Any known file types appropriate to the system you are running on implement them) are intuited if they are defined in the system header file -.BR sys/stat.h . +.IR sys/stat.h . .PP The magic number tests are used to check for files with data in particular fixed formats. The canonical example of this is a binary executable (compiled program) -.B a.out +.I a.out file, whose format is defined in -.B a.out.h +.I a.out.h and possibly -.B exec.h +.I exec.h in the standard include directory. These files have a `magic number' stored in a particular place near the beginning of the file that tells the \s-1UNIX\s0 operating system @@ -78,21 +84,23 @@ The information in these files is read from the magic file If an argument appears to be an .SM ASCII file, -.I file +.B file attempts to guess its language. -The language tests look for particular strings (cf \fInames.h\fP) +The language tests look for particular strings (cf +.IR names.h ) that can appear anywhere in the first few blocks of a file. For example, the keyword .B .br -indicates that the file is most likely a troff input file, -just as the keyword +indicates that the file is most likely a +.BR troff (1) +input file, just as the keyword .B struct indicates a C program. These tests are less reliable than the previous two groups, so they are performed last. The language test routines also test for some miscellany (such as -.I tar +.BR tar (1) archives) and determine whether an unknown file should be labelled as `ascii text' or `data'. .SH OPTIONS @@ -125,7 +133,7 @@ to test the standard input, use ``-'' as a filename argument. .TP 8 .B \-L option causes symlinks to be followed, as the like-named option in -.IR ls (1). +.BR ls (1). (on systems that support symbolic links). .SH FILES .I __MAGIC__ @@ -135,10 +143,10 @@ The environment variable .B MAGIC can be used to set the default magic number files. .SH SEE ALSO -.IR magic (__FSECTION__) +.BR magic (__FSECTION__) \- description of magic file format. .br -.IR Strings (1), " od" (1) +.BR strings (1), " od" (1) \- tools for examining non-textfiles. .SH STANDARDS CONFORMANCE This program is believed to exceed the System V Interface Definition @@ -171,7 +179,7 @@ in an existing magic file would have to be changed to .br .PP SunOS releases 3.2 and later from Sun Microsystems include a -.IR file (1) +.BR file (1) command derived from the System V one, but with some extensions. My version differs from Sun's only in minor ways. It includes the extension of the `&' operator, used as, @@ -190,15 +198,15 @@ The order of entries in the magic file is significant. Depending on what system you are using, the order that they are put together may be incorrect. If your old -.I file +.B file command uses a magic file, keep the old magic file around for comparison purposes (rename it to .IR __MAGIC__.orig ). .SH HISTORY There has been a -.I file -command in every UNIX since at least Research Version 6 +.B file +command in every \s-1UNIX\s0 since at least Research Version 6 (man page dated January, 1975). The System V version introduced one significant major change: the external list of magic number types. @@ -227,7 +235,7 @@ put the ``old-style'' `&' operator back the way it was, because 1) Rob McMahon's change broke the previous style of usage, 2) the SunOS ``new-style'' `&' operator, which this version of -.I file +.B file supports, also handles `x&y op z', and 3) Rob's change wasn't documented in any case; .PP @@ -236,7 +244,7 @@ put in multiple levels of `>'; put in ``beshort'', ``leshort'', etc. keywords to look at numbers in the file in a specific byte order, rather than in the native byte order of the process running -.IR file . +.BR file . .RE .PP Changes by Ian Darwin and various authors including @@ -284,33 +292,42 @@ The files and .I is_tar.c were written by John Gilmore from his public-domain -.I tar +.B tar program, and are not covered by the above restrictions. .SH BUGS There must be a better way to automate the construction of the Magic file from all the glop in Magdir. What is it? Better yet, the magic file should be compiled into binary (say, -.IR ndbm (3) -or, better yet, fixed-length ASCII strings -for use in heterogenous network environments) for faster startup. +.BR ndbm (3) +or, better yet, fixed-length +.SM ASCII +strings for use in heterogenous network environments) for faster startup. Then the program would run as fast as the Version 7 program of the same name, with the flexibility of the System V version. .PP -.I File +.B File uses several algorithms that favor speed over accuracy, -thus it can be misled about the contents of ASCII files. +thus it can be misled about the contents of +.SM ASCII +files. .PP -The support for ASCII files (primarily for programming languages) +The support for +.SM ASCII +files (primarily for programming languages) is simplistic, inefficient and requires recompilation to update. .PP There should be an ``else'' clause to follow a series of continuation lines. .PP The magic file and keywords should have regular expression support. -Their use of ASCII TAB as a field delimiter is ugly and makes +Their use of +.SM "ASCII TAB" +as a field delimiter is ugly and makes it hard to edit the files, but is entrenched. .PP It might be advisable to allow upper-case letters in keywords -for e.g., troff commands vs man page macros. +for e.g., +.BR troff (1) +commands vs man page macros. Regular expression support would make this easy. .PP The program doesn't grok \s-2FORTRAN\s0. @@ -345,4 +362,4 @@ You can obtain the original author's latest version by anonymous FTP on .B tesla.ee.cornell.edu in the directory -.BR /pub/file-X.YY.tar.gz +.I /pub/file-X.YY.tar.gz diff --git a/doc/magic.man b/doc/magic.man index fb59feb6..c8e85c44 100644 --- a/doc/magic.man +++ b/doc/magic.man @@ -3,14 +3,17 @@ .SH NAME magic \- file command's magic number file .SH DESCRIPTION -The -.IR file (__CSECTION__) +This manual page documents the format of the magic file as +used by the +.BR file (__CSECTION__) +command, version __VERSION__. The +.B file command identifies the type of a file using, among other tests, a test for whether the file begins with a certain .IR "magic number" . The file -.B __MAGIC__ +.I __MAGIC__ specifies what magic numbers are to be tested for, what message to print if a particular magic number is found, and additional information to extract from the file. @@ -113,7 +116,7 @@ then presumably print that string, by doing .IP message The message to be printed if the comparison succeeds. If the string contains a -.IR printf (3S) +.BR printf (3S) format specification, the value from the file (with any specified masking performed) is printed using the message as the format string. .PP @@ -185,7 +188,7 @@ a system on which the lengths are invariant. There is (currently) no support for specified-endian data to be used in indirect offsets. .SH SEE ALSO -.IR file (__CSECTION__) +.BR file (__CSECTION__) \- the command that reads this file. .\" .\" From: guy@sun.uucp (Guy Harris) @@ -200,4 +203,4 @@ indirect offsets. .\" the changes I posted to the S5R2 version. .\" .\" Modified for Ian Darwin's version of the file command. -.\" @(#)$Id: magic.man,v 1.13 1996/06/22 22:04:22 christos Exp $ +.\" @(#)$Id: magic.man,v 1.14 1996/06/22 22:05:07 christos Exp $ diff --git a/src/Makefile b/src/Makefile index 65c329b4..ebfcd91e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,6 +1,6 @@ # Makefile for file(1) cmd. # Copyright (c) Ian F. Darwin 86/09/01 - see LEGAL.NOTICE. -# @(#)$Id: Makefile,v 1.50 1996/06/22 22:04:22 christos Exp $ +# @(#)$Id: Makefile,v 1.51 1996/06/22 22:05:07 christos Exp $ # # This software is not subject to any license of the American Telephone # and Telegraph Company or of the Regents of the University of California. @@ -121,12 +121,14 @@ magic.${MANFEXT} : Makefile magic.man @rm -f $@ sed -e s@__CSECTION__@${MANCEXT}@g \ -e s@__FSECTION__@${MANFEXT}@g \ + -e s@__VERSION__@${VERSION}@g \ -e s@__MAGIC__@${MAGIC}@g magic.man > $@ file.${MANCEXT} : Makefile file.man @rm -f $@ sed -e s@__CSECTION__@${MANCEXT}@g \ -e s@__FSECTION__@${MANFEXT}@g \ + -e s@__VERSION__@${VERSION}@g \ -e s@__MAGIC__@${MAGIC}@g file.man > $@ send: dist diff --git a/src/apprentice.c b/src/apprentice.c index d637399d..6bdfffef 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -34,7 +34,7 @@ #ifndef lint static char *moduleid = - "@(#)$Id: apprentice.c,v 1.23 1995/10/27 23:12:01 christos Exp $"; + "@(#)$Id: apprentice.c,v 1.24 1996/06/22 22:04:22 christos Exp $"; #endif /* lint */ #define EATAB {while (isascii((unsigned char) *l) && \ @@ -210,6 +210,10 @@ int *ndx, check; ++l; /* step over */ m->flag |= INDIR; } + if (m->cont_level != 0 && *l == '&') { + ++l; /* step over */ + m->flag |= ADD; + } /* get offset, then skip over it */ m->offset = (int) strtoul(l,&t,0); diff --git a/src/file.c b/src/file.c index 0b617e16..37c63964 100644 --- a/src/file.c +++ b/src/file.c @@ -26,7 +26,7 @@ */ #ifndef lint static char *moduleid = - "@(#)$Id: file.c,v 1.34 1995/10/27 23:12:01 christos Exp $"; + "@(#)$Id: file.c,v 1.35 1996/06/22 22:04:22 christos Exp $"; #endif /* lint */ #include @@ -43,9 +43,8 @@ static char *moduleid = #endif #include /* for read() */ -#ifdef __ELF__ -#include -#endif +#include "readelf.h" +#include /* for byte swapping */ #include "patchlevel.h" #include "file.h" @@ -76,7 +75,9 @@ char *progname; /* used throughout */ int lineno; /* line number in the magic file */ -static void unwrap __P((char *fn)); +static void unwrap __P((char *fn)); +static int byteconv4 __P((int, int, int)); +static short byteconv2 __P((int, int, int)); /* * main - parse arguments and handle options @@ -180,18 +181,23 @@ char *fn; FILE *f; int wid = 0, cwid; - if ((f = fopen(fn, "r")) == NULL) { - error("Cannot open `%s' (%s).\n", fn, strerror(errno)); - /*NOTREACHED*/ - } + if (strcmp("-", fn) == 0) { + f = stdin; + wid = 1; + } else { + if ((f = fopen(fn, "r")) == NULL) { + error("Cannot open `%s' (%s).\n", fn, strerror(errno)); + /*NOTREACHED*/ + } - while (fgets(buf, MAXPATHLEN, f) != NULL) { - cwid = strlen(buf) - 1; - if (cwid > wid) - wid = cwid; - } + while (fgets(buf, MAXPATHLEN, f) != NULL) { + cwid = strlen(buf) - 1; + if (cwid > wid) + wid = cwid; + } - rewind(f); + rewind(f); + } while (fgets(buf, MAXPATHLEN, f) != NULL) { buf[strlen(buf)-1] = '\0'; @@ -202,6 +208,69 @@ char *fn; } +/* + * byteconv4 + * Input: + * from 4 byte quantity to convert + * same whether to perform byte swapping + * big_endian whether we are a big endian host + */ +static int +byteconv4(from, same, big_endian) + int from; + int same; + int big_endian; +{ + if (same) + return from; + else if (big_endian) /* lsb -> msb conversion on msb */ + { + union { + int i; + char c[4]; + } retval, tmpval; + + tmpval.i = from; + retval.c[0] = tmpval.c[3]; + retval.c[1] = tmpval.c[2]; + retval.c[2] = tmpval.c[1]; + retval.c[3] = tmpval.c[0]; + + return retval.i; + } + else + return ntohl(from); /* msb -> lsb conversion on lsb */ +} + +/* + * byteconv2 + * Same as byteconv4, but for shorts + */ +static short +byteconv2(from, same, big_endian) + int from; + int same; + int big_endian; +{ + if (same) + return from; + else if (big_endian) /* lsb -> msb conversion on msb */ + { + union { + short s; + char c[2]; + } retval, tmpval; + + tmpval.s = (short) from; + retval.c[0] = tmpval.c[1]; + retval.c[1] = tmpval.c[0]; + + return retval.s; + } + else + return ntohs(from); /* msb -> lsb conversion on lsb */ +} + /* * process - process input file */ @@ -265,14 +334,12 @@ int wid; buf[nbytes++] = '\0'; /* null-terminate it */ match = tryit(buf, nbytes, zflag); } -#ifdef __ELF__ + /* * ELF executables have multiple section headers in arbitrary * file locations and thus file(1) cannot determine it from easily. * Instead we traverse thru all section headers until a symbol table * one is found or else the binary is stripped. - * XXX: This will not work for binaries of a different byteorder. - * Should come up with a better fix. */ if (match == 's' && nbytes > sizeof (Elf32_Ehdr) && @@ -282,36 +349,45 @@ int wid; buf[EI_MAG3] == ELFMAG3) { union { - long l; - char c[sizeof (long)]; + int l; + char c[sizeof (int)]; } u; + Elf32_Ehdr elfhdr; int stripped = 1; + int be,same; + short shnum; u.l = 1; (void) memcpy(&elfhdr, buf, sizeof elfhdr); /* * If the system byteorder does not equal the object byteorder - * then don't test. + * then need to do byte-swapping. */ - if ((u.c[sizeof(long) - 1] + 1) == elfhdr.e_ident[5]) { - if (lseek(fd, elfhdr.e_shoff, SEEK_SET)<0) + be = u.c[sizeof(int) - 1] == 1; /* are we big endian? */ + same = (u.c[sizeof(int) - 1] + 1) == elfhdr.e_ident[5]; + /* are we the same endianness? */; + + if (lseek(fd, byteconv4(elfhdr.e_shoff,same,be), SEEK_SET)<0) error("lseek failed (%s).\n", strerror(errno)); - for ( ; elfhdr.e_shnum ; elfhdr.e_shnum--) { - if (read(fd, buf, elfhdr.e_shentsize)<0) - error("read failed (%s).\n", strerror(errno)); - if (((Elf32_Shdr *)&buf)->sh_type == SHT_SYMTAB) { - stripped = 0; - break; - } + for (shnum = byteconv2(elfhdr.e_shnum,same,be); + shnum; shnum--) { + if (read(fd, buf, + byteconv2(elfhdr.e_shentsize,same,be))<0) + error("read failed (%s).\n", strerror(errno)); + if (byteconv4(((Elf32_Shdr *)buf)->sh_type,same,be) + == SHT_SYMTAB) { + stripped = 0; + break; } - if (stripped) - (void) printf (", stripped"); } + if (stripped) + (void) printf (", stripped"); + else + (void) printf (", not stripped"); } -#endif if (inname != stdname) { /* diff --git a/src/file.h b/src/file.h index c0bbf0e0..3c4363cc 100644 --- a/src/file.h +++ b/src/file.h @@ -1,6 +1,6 @@ /* * file.h - definitions for file(1) program - * @(#)$Id: file.h,v 1.22 1995/05/20 22:09:21 christos Exp $ + * @(#)$Id: file.h,v 1.23 1996/06/22 22:04:22 christos Exp $ * * Copyright (c) Ian F. Darwin, 1987. * Written by Ian F. Darwin. @@ -37,6 +37,7 @@ struct magic { short flag; #define INDIR 1 /* if '>(...)' appears, */ #define UNSIGNED 2 /* comparison is unsigned */ +#define ADD 4 /* if '>&' appears, */ short cont_level; /* level of ">" */ struct { char type; /* byte short long */ diff --git a/src/softmagic.c b/src/softmagic.c index 76fd6d81..4a8afa7f 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -34,14 +34,14 @@ #ifndef lint static char *moduleid = - "@(#)$Id: softmagic.c,v 1.30 1995/05/20 22:09:21 christos Exp $"; + "@(#)$Id: softmagic.c,v 1.31 1996/06/22 22:04:22 christos Exp $"; #endif /* lint */ static int match __P((unsigned char *, int)); static int mget __P((union VALUETYPE *, unsigned char *, struct magic *, int)); static int mcheck __P((union VALUETYPE *, struct magic *)); -static void mprint __P((union VALUETYPE *, struct magic *)); +static long mprint __P((union VALUETYPE *, struct magic *)); static void mdebug __P((long, char *, int)); static int mconvert __P((union VALUETYPE *, struct magic *)); @@ -98,6 +98,13 @@ int nbytes; int cont_level = 0; int need_separator = 0; union VALUETYPE p; + static long *tmpoff = NULL; + static size_t tmplen = 0; + long oldoff = 0; + + if (tmpoff == NULL) + if ((tmpoff = (long *) malloc(tmplen = 20)) == NULL) + error("out of memory\n"); for (magindex = 0; magindex < nmagic; magindex++) { /* if main entry matches, print it... */ @@ -113,7 +120,7 @@ int nbytes; continue; } - mprint(&p, &magic[magindex]); + tmpoff[cont_level] = mprint(&p, &magic[magindex]); /* * If we printed something, we'll need to print * a blank before we print something else. @@ -121,7 +128,10 @@ int nbytes; if (magic[magindex].desc[0]) need_separator = 1; /* and any continuations that match */ - cont_level++; + if (++cont_level >= tmplen) + if ((tmpoff = (long *) realloc(tmpoff, + tmplen += 20)) == NULL) + error("out of memory\n"); while (magic[magindex+1].cont_level != 0 && ++magindex < nmagic) { if (cont_level >= magic[magindex].cont_level) { @@ -132,6 +142,10 @@ int nbytes; */ cont_level = magic[magindex].cont_level; } + if (magic[magindex].flag & ADD) { + oldoff=magic[magindex].offset; + magic[magindex].offset += tmpoff[cont_level-1]; + } if (mget(&p, s, &magic[magindex], nbytes) && mcheck(&p, &magic[magindex])) { /* @@ -149,7 +163,7 @@ int nbytes; (void) putchar(' '); need_separator = 0; } - mprint(&p, &magic[magindex]); + tmpoff[cont_level] = mprint(&p, &magic[magindex]); if (magic[magindex].desc[0]) need_separator = 1; @@ -158,7 +172,14 @@ int nbytes; * at a higher level, * process them. */ - cont_level++; + if (++cont_level >= tmplen) + if ((tmpoff = + (long *) realloc(tmpoff, + tmplen += 20)) == NULL) + error("out of memory\n"); + } + if (magic[magindex].flag & ADD) { + magic[magindex].offset = oldoff; } } } @@ -167,13 +188,14 @@ int nbytes; return 0; /* no match at all */ } -static void +static long mprint(p, m) union VALUETYPE *p; struct magic *m; { char *pp, *rt; unsigned long v; + long t=0 ; switch (m->type) { @@ -181,6 +203,7 @@ struct magic *m; v = p->b; v = signextend(m, v) & m->mask; (void) printf(m->desc, (unsigned char) v); + t = m->offset + sizeof(char); break; case SHORT: @@ -189,6 +212,7 @@ struct magic *m; v = p->h; v = signextend(m, v) & m->mask; (void) printf(m->desc, (unsigned short) v); + t = m->offset + sizeof(short); break; case LONG: @@ -197,16 +221,19 @@ struct magic *m; v = p->l; v = signextend(m, v) & m->mask; (void) printf(m->desc, (unsigned long) v); + t = m->offset + sizeof(long); break; case STRING: if (m->reln == '=') { (void) printf(m->desc, m->value.s); + t = m->offset + strlen(m->value.s); } else { (void) printf(m->desc, p->s); + t = m->offset + strlen(p->s); } - return; + break; case DATE: case BEDATE: @@ -215,11 +242,13 @@ struct magic *m; if ((rt = strchr(pp, '\n')) != NULL) *rt = '\0'; (void) printf(m->desc, pp); + t = m->offset + sizeof(time_t); return; default: error("invalid m->type (%d) in mprint().\n", m->type); /*NOTREACHED*/ } + return(t); } /* @@ -230,8 +259,6 @@ mconvert(p, m) union VALUETYPE *p; struct magic *m; { - char *rt; - switch (m->type) { case BYTE: case SHORT: @@ -239,11 +266,16 @@ struct magic *m; case DATE: return 1; case STRING: - /* Null terminate and eat the return */ - p->s[sizeof(p->s) - 1] = '\0'; - if ((rt = strchr(p->s, '\n')) != NULL) - *rt = '\0'; - return 1; + { + size_t len; + + /* Null terminate and eat the return */ + p->s[sizeof(p->s) - 1] = '\0'; + len = strlen(p->s); + if (len > 0 && p->s[len - 1] == '\n') + p->s[len - 1] = '\0'; + return 1; + } case BESHORT: p->h = (short)((p->hs[0]<<8)|(p->hs[1])); return 1;