.TH FILE __CSECTION__ "Copyright but distributable"
-.\" $Id: file.man,v 1.26 1995/10/27 23:12:01 christos Exp $
+.\" $Id: file.man,v 1.27 1996/06/22 22:05:07 christos Exp $
.SH NAME
file
\- determine file type
magicfiles ]
file ...
.SH DESCRIPTION
-.I File
+This manual page documents version __VERSION__ of the
+.B file
+command.
+.B File
tests each argument in an attempt to classify it.
There are three sets of tests, performed in this order:
filesystem tests, magic number tests, and language tests.
.PP
The type printed will usually contain one of the words
.B text
-(the file contains only ASCII characters and is
-probably safe to read on an ASCII terminal),
+(the file contains only
+.SM ASCII
+characters and is probably safe to read on an
+.SM ASCII
+terminal),
.B executable
(the file contains the result of compiling a program
in a form understandable to some \s-1UNIX\s0 kernel or another),
to ``shell script''.
.PP
The filesystem tests are based on examining the return from a
-.IR stat (2)
+.BR stat (2)
system call.
The program checks to see if the file is empty,
or if it's some sort of special file.
implement them)
are intuited if they are defined in
the system header file
-.BR sys/stat.h .
+.IR sys/stat.h .
.PP
The magic number tests are used to check for files with data in
particular fixed formats.
The canonical example of this is a binary executable (compiled program)
-.B a.out
+.I a.out
file, whose format is defined in
-.B a.out.h
+.I a.out.h
and possibly
-.B exec.h
+.I exec.h
in the standard include directory.
These files have a `magic number' stored in a particular place
near the beginning of the file that tells the \s-1UNIX\s0 operating system
If an argument appears to be an
.SM ASCII
file,
-.I file
+.B file
attempts to guess its language.
-The language tests look for particular strings (cf \fInames.h\fP)
+The language tests look for particular strings (cf
+.IR names.h )
that can appear anywhere in the first few blocks of a file.
For example, the keyword
.B .br
-indicates that the file is most likely a troff input file,
-just as the keyword
+indicates that the file is most likely a
+.BR troff (1)
+input file, just as the keyword
.B struct
indicates a C program.
These tests are less reliable than the previous
two groups, so they are performed last.
The language test routines also test for some miscellany
(such as
-.I tar
+.BR tar (1)
archives) and determine whether an unknown file should be
labelled as `ascii text' or `data'.
.SH OPTIONS
.TP 8
.B \-L
option causes symlinks to be followed, as the like-named option in
-.IR ls (1).
+.BR ls (1).
(on systems that support symbolic links).
.SH FILES
.I __MAGIC__
.B MAGIC
can be used to set the default magic number files.
.SH SEE ALSO
-.IR magic (__FSECTION__)
+.BR magic (__FSECTION__)
\- description of magic file format.
.br
-.IR Strings (1), " od" (1)
+.BR strings (1), " od" (1)
\- tools for examining non-textfiles.
.SH STANDARDS CONFORMANCE
This program is believed to exceed the System V Interface Definition
.br
.PP
SunOS releases 3.2 and later from Sun Microsystems include a
-.IR file (1)
+.BR file (1)
command derived from the System V one, but with some extensions.
My version differs from Sun's only in minor ways.
It includes the extension of the `&' operator, used as,
Depending on what system you are using, the order that
they are put together may be incorrect.
If your old
-.I file
+.B file
command uses a magic file,
keep the old magic file around for comparison purposes
(rename it to
.IR __MAGIC__.orig ).
.SH HISTORY
There has been a
-.I file
-command in every UNIX since at least Research Version 6
+.B file
+command in every \s-1UNIX\s0 since at least Research Version 6
(man page dated January, 1975).
The System V version introduced one significant major change:
the external list of magic number types.
operator back the way it was, because 1) Rob McMahon's change broke the
previous style of usage, 2) the SunOS ``new-style'' `&' operator,
which this version of
-.I file
+.B file
supports, also handles `x&y op z', and 3) Rob's change wasn't documented
in any case;
.PP
put in ``beshort'', ``leshort'', etc. keywords to look at numbers in the
file in a specific byte order, rather than in the native byte order of
the process running
-.IR file .
+.BR file .
.RE
.PP
Changes by Ian Darwin and various authors including
and
.I is_tar.c
were written by John Gilmore from his public-domain
-.I tar
+.B tar
program, and are not covered by the above restrictions.
.SH BUGS
There must be a better way to automate the construction of the Magic
file from all the glop in Magdir. What is it?
Better yet, the magic file should be compiled into binary (say,
-.IR ndbm (3)
-or, better yet, fixed-length ASCII strings
-for use in heterogenous network environments) for faster startup.
+.BR ndbm (3)
+or, better yet, fixed-length
+.SM ASCII
+strings for use in heterogenous network environments) for faster startup.
Then the program would run as fast as the Version 7 program of the same name,
with the flexibility of the System V version.
.PP
-.I File
+.B File
uses several algorithms that favor speed over accuracy,
-thus it can be misled about the contents of ASCII files.
+thus it can be misled about the contents of
+.SM ASCII
+files.
.PP
-The support for ASCII files (primarily for programming languages)
+The support for
+.SM ASCII
+files (primarily for programming languages)
is simplistic, inefficient and requires recompilation to update.
.PP
There should be an ``else'' clause to follow a series of continuation lines.
.PP
The magic file and keywords should have regular expression support.
-Their use of ASCII TAB as a field delimiter is ugly and makes
+Their use of
+.SM "ASCII TAB"
+as a field delimiter is ugly and makes
it hard to edit the files, but is entrenched.
.PP
It might be advisable to allow upper-case letters in keywords
-for e.g., troff commands vs man page macros.
+for e.g.,
+.BR troff (1)
+commands vs man page macros.
Regular expression support would make this easy.
.PP
The program doesn't grok \s-2FORTRAN\s0.
on
.B tesla.ee.cornell.edu
in the directory
-.BR /pub/file-X.YY.tar.gz
+.I /pub/file-X.YY.tar.gz
.SH NAME
magic \- file command's magic number file
.SH DESCRIPTION
-The
-.IR file (__CSECTION__)
+This manual page documents the format of the magic file as
+used by the
+.BR file (__CSECTION__)
+command, version __VERSION__. The
+.B file
command identifies the type of a file using,
among other tests,
a test for whether the file begins with a certain
.IR "magic number" .
The file
-.B __MAGIC__
+.I __MAGIC__
specifies what magic numbers are to be tested for,
what message to print if a particular magic number is found,
and additional information to extract from the file.
.IP message
The message to be printed if the comparison succeeds. If the string
contains a
-.IR printf (3S)
+.BR printf (3S)
format specification, the value from the file (with any specified masking
performed) is printed using the message as the format string.
.PP
There is (currently) no support for specified-endian data to be used in
indirect offsets.
.SH SEE ALSO
-.IR file (__CSECTION__)
+.BR file (__CSECTION__)
\- the command that reads this file.
.\"
.\" From: guy@sun.uucp (Guy Harris)
.\" the changes I posted to the S5R2 version.
.\"
.\" Modified for Ian Darwin's version of the file command.
-.\" @(#)$Id: magic.man,v 1.13 1996/06/22 22:04:22 christos Exp $
+.\" @(#)$Id: magic.man,v 1.14 1996/06/22 22:05:07 christos Exp $
# Makefile for file(1) cmd.
# Copyright (c) Ian F. Darwin 86/09/01 - see LEGAL.NOTICE.
-# @(#)$Id: Makefile,v 1.50 1996/06/22 22:04:22 christos Exp $
+# @(#)$Id: Makefile,v 1.51 1996/06/22 22:05:07 christos Exp $
#
# This software is not subject to any license of the American Telephone
# and Telegraph Company or of the Regents of the University of California.
@rm -f $@
sed -e s@__CSECTION__@${MANCEXT}@g \
-e s@__FSECTION__@${MANFEXT}@g \
+ -e s@__VERSION__@${VERSION}@g \
-e s@__MAGIC__@${MAGIC}@g magic.man > $@
file.${MANCEXT} : Makefile file.man
@rm -f $@
sed -e s@__CSECTION__@${MANCEXT}@g \
-e s@__FSECTION__@${MANFEXT}@g \
+ -e s@__VERSION__@${VERSION}@g \
-e s@__MAGIC__@${MAGIC}@g file.man > $@
send: dist
#ifndef lint
static char *moduleid =
- "@(#)$Id: apprentice.c,v 1.23 1995/10/27 23:12:01 christos Exp $";
+ "@(#)$Id: apprentice.c,v 1.24 1996/06/22 22:04:22 christos Exp $";
#endif /* lint */
#define EATAB {while (isascii((unsigned char) *l) && \
++l; /* step over */
m->flag |= INDIR;
}
+ if (m->cont_level != 0 && *l == '&') {
+ ++l; /* step over */
+ m->flag |= ADD;
+ }
/* get offset, then skip over it */
m->offset = (int) strtoul(l,&t,0);
*/
#ifndef lint
static char *moduleid =
- "@(#)$Id: file.c,v 1.34 1995/10/27 23:12:01 christos Exp $";
+ "@(#)$Id: file.c,v 1.35 1996/06/22 22:04:22 christos Exp $";
#endif /* lint */
#include <stdio.h>
#endif
#include <unistd.h> /* for read() */
-#ifdef __ELF__
-#include <elf.h>
-#endif
+#include "readelf.h"
+#include <netinet/in.h> /* for byte swapping */
#include "patchlevel.h"
#include "file.h"
int lineno; /* line number in the magic file */
-static void unwrap __P((char *fn));
+static void unwrap __P((char *fn));
+static int byteconv4 __P((int, int, int));
+static short byteconv2 __P((int, int, int));
/*
* main - parse arguments and handle options
FILE *f;
int wid = 0, cwid;
- if ((f = fopen(fn, "r")) == NULL) {
- error("Cannot open `%s' (%s).\n", fn, strerror(errno));
- /*NOTREACHED*/
- }
+ if (strcmp("-", fn) == 0) {
+ f = stdin;
+ wid = 1;
+ } else {
+ if ((f = fopen(fn, "r")) == NULL) {
+ error("Cannot open `%s' (%s).\n", fn, strerror(errno));
+ /*NOTREACHED*/
+ }
- while (fgets(buf, MAXPATHLEN, f) != NULL) {
- cwid = strlen(buf) - 1;
- if (cwid > wid)
- wid = cwid;
- }
+ while (fgets(buf, MAXPATHLEN, f) != NULL) {
+ cwid = strlen(buf) - 1;
+ if (cwid > wid)
+ wid = cwid;
+ }
- rewind(f);
+ rewind(f);
+ }
while (fgets(buf, MAXPATHLEN, f) != NULL) {
buf[strlen(buf)-1] = '\0';
}
+/*
+ * byteconv4
+ * Input:
+ * from 4 byte quantity to convert
+ * same whether to perform byte swapping
+ * big_endian whether we are a big endian host
+ */
+static int
+byteconv4(from, same, big_endian)
+ int from;
+ int same;
+ int big_endian;
+{
+ if (same)
+ return from;
+ else if (big_endian) /* lsb -> msb conversion on msb */
+ {
+ union {
+ int i;
+ char c[4];
+ } retval, tmpval;
+
+ tmpval.i = from;
+ retval.c[0] = tmpval.c[3];
+ retval.c[1] = tmpval.c[2];
+ retval.c[2] = tmpval.c[1];
+ retval.c[3] = tmpval.c[0];
+
+ return retval.i;
+ }
+ else
+ return ntohl(from); /* msb -> lsb conversion on lsb */
+}
+
+/*
+ * byteconv2
+ * Same as byteconv4, but for shorts
+ */
+static short
+byteconv2(from, same, big_endian)
+ int from;
+ int same;
+ int big_endian;
+{
+ if (same)
+ return from;
+ else if (big_endian) /* lsb -> msb conversion on msb */
+ {
+ union {
+ short s;
+ char c[2];
+ } retval, tmpval;
+
+ tmpval.s = (short) from;
+ retval.c[0] = tmpval.c[1];
+ retval.c[1] = tmpval.c[0];
+
+ return retval.s;
+ }
+ else
+ return ntohs(from); /* msb -> lsb conversion on lsb */
+}
+
/*
* process - process input file
*/
buf[nbytes++] = '\0'; /* null-terminate it */
match = tryit(buf, nbytes, zflag);
}
-#ifdef __ELF__
+
/*
* ELF executables have multiple section headers in arbitrary
* file locations and thus file(1) cannot determine it from easily.
* Instead we traverse thru all section headers until a symbol table
* one is found or else the binary is stripped.
- * XXX: This will not work for binaries of a different byteorder.
- * Should come up with a better fix.
*/
if (match == 's' && nbytes > sizeof (Elf32_Ehdr) &&
buf[EI_MAG3] == ELFMAG3) {
union {
- long l;
- char c[sizeof (long)];
+ int l;
+ char c[sizeof (int)];
} u;
+
Elf32_Ehdr elfhdr;
int stripped = 1;
+ int be,same;
+ short shnum;
u.l = 1;
(void) memcpy(&elfhdr, buf, sizeof elfhdr);
/*
* If the system byteorder does not equal the object byteorder
- * then don't test.
+ * then need to do byte-swapping.
*/
- if ((u.c[sizeof(long) - 1] + 1) == elfhdr.e_ident[5]) {
- if (lseek(fd, elfhdr.e_shoff, SEEK_SET)<0)
+ be = u.c[sizeof(int) - 1] == 1; /* are we big endian? */
+ same = (u.c[sizeof(int) - 1] + 1) == elfhdr.e_ident[5];
+ /* are we the same endianness? */;
+
+ if (lseek(fd, byteconv4(elfhdr.e_shoff,same,be), SEEK_SET)<0)
error("lseek failed (%s).\n", strerror(errno));
- for ( ; elfhdr.e_shnum ; elfhdr.e_shnum--) {
- if (read(fd, buf, elfhdr.e_shentsize)<0)
- error("read failed (%s).\n", strerror(errno));
- if (((Elf32_Shdr *)&buf)->sh_type == SHT_SYMTAB) {
- stripped = 0;
- break;
- }
+ for (shnum = byteconv2(elfhdr.e_shnum,same,be);
+ shnum; shnum--) {
+ if (read(fd, buf,
+ byteconv2(elfhdr.e_shentsize,same,be))<0)
+ error("read failed (%s).\n", strerror(errno));
+ if (byteconv4(((Elf32_Shdr *)buf)->sh_type,same,be)
+ == SHT_SYMTAB) {
+ stripped = 0;
+ break;
}
- if (stripped)
- (void) printf (", stripped");
}
+ if (stripped)
+ (void) printf (", stripped");
+ else
+ (void) printf (", not stripped");
}
-#endif
if (inname != stdname) {
/*
/*
* file.h - definitions for file(1) program
- * @(#)$Id: file.h,v 1.22 1995/05/20 22:09:21 christos Exp $
+ * @(#)$Id: file.h,v 1.23 1996/06/22 22:04:22 christos Exp $
*
* Copyright (c) Ian F. Darwin, 1987.
* Written by Ian F. Darwin.
short flag;
#define INDIR 1 /* if '>(...)' appears, */
#define UNSIGNED 2 /* comparison is unsigned */
+#define ADD 4 /* if '>&' appears, */
short cont_level; /* level of ">" */
struct {
char type; /* byte short long */
#ifndef lint
static char *moduleid =
- "@(#)$Id: softmagic.c,v 1.30 1995/05/20 22:09:21 christos Exp $";
+ "@(#)$Id: softmagic.c,v 1.31 1996/06/22 22:04:22 christos Exp $";
#endif /* lint */
static int match __P((unsigned char *, int));
static int mget __P((union VALUETYPE *,
unsigned char *, struct magic *, int));
static int mcheck __P((union VALUETYPE *, struct magic *));
-static void mprint __P((union VALUETYPE *, struct magic *));
+static long mprint __P((union VALUETYPE *, struct magic *));
static void mdebug __P((long, char *, int));
static int mconvert __P((union VALUETYPE *, struct magic *));
int cont_level = 0;
int need_separator = 0;
union VALUETYPE p;
+ static long *tmpoff = NULL;
+ static size_t tmplen = 0;
+ long oldoff = 0;
+
+ if (tmpoff == NULL)
+ if ((tmpoff = (long *) malloc(tmplen = 20)) == NULL)
+ error("out of memory\n");
for (magindex = 0; magindex < nmagic; magindex++) {
/* if main entry matches, print it... */
continue;
}
- mprint(&p, &magic[magindex]);
+ tmpoff[cont_level] = mprint(&p, &magic[magindex]);
/*
* If we printed something, we'll need to print
* a blank before we print something else.
if (magic[magindex].desc[0])
need_separator = 1;
/* and any continuations that match */
- cont_level++;
+ if (++cont_level >= tmplen)
+ if ((tmpoff = (long *) realloc(tmpoff,
+ tmplen += 20)) == NULL)
+ error("out of memory\n");
while (magic[magindex+1].cont_level != 0 &&
++magindex < nmagic) {
if (cont_level >= magic[magindex].cont_level) {
*/
cont_level = magic[magindex].cont_level;
}
+ if (magic[magindex].flag & ADD) {
+ oldoff=magic[magindex].offset;
+ magic[magindex].offset += tmpoff[cont_level-1];
+ }
if (mget(&p, s, &magic[magindex], nbytes) &&
mcheck(&p, &magic[magindex])) {
/*
(void) putchar(' ');
need_separator = 0;
}
- mprint(&p, &magic[magindex]);
+ tmpoff[cont_level] = mprint(&p, &magic[magindex]);
if (magic[magindex].desc[0])
need_separator = 1;
* at a higher level,
* process them.
*/
- cont_level++;
+ if (++cont_level >= tmplen)
+ if ((tmpoff =
+ (long *) realloc(tmpoff,
+ tmplen += 20)) == NULL)
+ error("out of memory\n");
+ }
+ if (magic[magindex].flag & ADD) {
+ magic[magindex].offset = oldoff;
}
}
}
return 0; /* no match at all */
}
-static void
+static long
mprint(p, m)
union VALUETYPE *p;
struct magic *m;
{
char *pp, *rt;
unsigned long v;
+ long t=0 ;
switch (m->type) {
v = p->b;
v = signextend(m, v) & m->mask;
(void) printf(m->desc, (unsigned char) v);
+ t = m->offset + sizeof(char);
break;
case SHORT:
v = p->h;
v = signextend(m, v) & m->mask;
(void) printf(m->desc, (unsigned short) v);
+ t = m->offset + sizeof(short);
break;
case LONG:
v = p->l;
v = signextend(m, v) & m->mask;
(void) printf(m->desc, (unsigned long) v);
+ t = m->offset + sizeof(long);
break;
case STRING:
if (m->reln == '=') {
(void) printf(m->desc, m->value.s);
+ t = m->offset + strlen(m->value.s);
}
else {
(void) printf(m->desc, p->s);
+ t = m->offset + strlen(p->s);
}
- return;
+ break;
case DATE:
case BEDATE:
if ((rt = strchr(pp, '\n')) != NULL)
*rt = '\0';
(void) printf(m->desc, pp);
+ t = m->offset + sizeof(time_t);
return;
default:
error("invalid m->type (%d) in mprint().\n", m->type);
/*NOTREACHED*/
}
+ return(t);
}
/*
union VALUETYPE *p;
struct magic *m;
{
- char *rt;
-
switch (m->type) {
case BYTE:
case SHORT:
case DATE:
return 1;
case STRING:
- /* Null terminate and eat the return */
- p->s[sizeof(p->s) - 1] = '\0';
- if ((rt = strchr(p->s, '\n')) != NULL)
- *rt = '\0';
- return 1;
+ {
+ size_t len;
+
+ /* Null terminate and eat the return */
+ p->s[sizeof(p->s) - 1] = '\0';
+ len = strlen(p->s);
+ if (len > 0 && p->s[len - 1] == '\n')
+ p->s[len - 1] = '\0';
+ return 1;
+ }
case BESHORT:
p->h = (short)((p->hs[0]<<8)|(p->hs[1]));
return 1;