From: Christos Zoulas Date: Fri, 28 Apr 1995 17:29:13 +0000 (+0000) Subject: - Incorrect nroff detection fix from der Mouse X-Git-Tag: FILE3_27~99 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=db8adae0ba79701386ad09809d289779eab3b1ba;p=file - Incorrect nroff detection fix from der Mouse - Lost and incorrect magic entries. - Added ELF stripped binary detection [in C; ugh] - Look for $MAGIC to find the magic file. - Eat trailing size specifications from numbers i.e. ignore 10L - More fixes for very short files --- diff --git a/doc/file.man b/doc/file.man index 51aa6435..2d2aa1a9 100644 --- a/doc/file.man +++ b/doc/file.man @@ -1,5 +1,5 @@ .TH FILE __CSECTION__ "Copyright but distributable" -.\" $Id: file.man,v 1.24 1995/01/21 21:03:35 christos Exp $ +.\" $Id: file.man,v 1.25 1995/04/28 17:29:13 christos Exp $ .SH NAME file \- determine file type @@ -129,6 +129,10 @@ option causes symlinks to be followed, as the like-named option in .SH FILES .I __MAGIC__ \- default list of magic numbers +.SH ENVIRONMENT +The environment variable +.B MAGIC +can be used to set the default magic number file. .SH SEE ALSO .IR magic (__FSECTION__) \- description of magic file format. diff --git a/src/apprentice.c b/src/apprentice.c index a82d7249..76e90d46 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -33,20 +33,24 @@ #ifndef lint static char *moduleid = - "@(#)$Id: apprentice.c,v 1.20 1995/03/25 22:08:07 christos Exp $"; + "@(#)$Id: apprentice.c,v 1.21 1995/04/28 17:29:13 christos Exp $"; #endif /* lint */ #define EATAB {while (isascii((unsigned char) *l) && \ isspace((unsigned char) *l)) ++l;} +#define LOWCASE(l) (isupper((unsigned char) (l)) ? \ + tolower((unsigned char) (l)) : (l)) static int getvalue __P((struct magic *, char **)); static int hextoint __P((int)); static char *getstr __P((char *, char *, int, int *)); static int parse __P((char *, int *, int)); +static void eatsize __P((char **)); static int maxmagic = 0; + int apprentice(fn, check) char *fn; /* name of magic file */ @@ -186,13 +190,16 @@ int *ndx, check; * read [.lbs][+-]nnnnn) */ if (*l == '.') { - switch (*++l) { + l++; + switch (LOWCASE(*l)) { case 'l': m->in.type = LONG; break; + case 'h': case 's': m->in.type = SHORT; break; + case 'c': case 'b': m->in.type = BYTE; break; @@ -279,6 +286,7 @@ int *ndx, check; if (*l == '&') { ++l; m->mask = signextend(m, strtoul(l, &l, 0)); + eatsize(&l); } else m->mask = ~0L; EATAB; @@ -360,8 +368,10 @@ char **p; *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); m->vallen = slen; } else - if (m->reln != 'x') + if (m->reln != 'x') { m->value.l = signextend(m, strtoul(*p, p, 0)); + eatsize(&p); + } return 0; } @@ -549,3 +559,30 @@ int len; } } } + +/* + * eatsize(): Eat the size spec from a number [eg. 10UL] + */ +static void +eatsize(p) +char **p; +{ + char *l = *p; + + if (LOWCASE(*l) == 'u') + l++; + + switch (LOWCASE(*l)) { + case 'l': /* long */ + case 's': /* short */ + case 'h': /* short */ + case 'b': /* char/byte */ + case 'c': /* char/byte */ + l++; + /*FALLTHROUGH*/ + default: + break; + } + + *p = l; +} diff --git a/src/ascmagic.c b/src/ascmagic.c index 9f95402a..9f41a76a 100644 --- a/src/ascmagic.c +++ b/src/ascmagic.c @@ -36,7 +36,7 @@ #ifndef lint static char *moduleid = - "@(#)$Id: ascmagic.c,v 1.18 1995/03/25 22:08:07 christos Exp $"; + "@(#)$Id: ascmagic.c,v 1.19 1995/04/28 17:29:13 christos Exp $"; #endif /* lint */ /* an optimisation over plain strcmp() */ @@ -77,13 +77,13 @@ int nbytes; /* size actually read */ while (isascii(*tp) && isspace(*tp)) ++tp; /* skip leading whitespace */ if ((isascii(*tp) && (isalnum(*tp) || *tp=='\\') && - isascii(*(tp+1)) && (isalnum(*(tp+1)) || *tp=='"'))) { + isascii(tp[1]) && (isalnum(tp[1]) || tp[1] == '"'))) { ckfputs("troff or preprocessor input text", stdout); return 1; } } if ((*buf == 'c' || *buf == 'C') && - isascii(*(buf + 1)) && isspace(*(buf + 1))) { + isascii(buf[1]) && isspace(buf[1])) { ckfputs("fortran program text", stdout); return 1; } @@ -93,7 +93,7 @@ int nbytes; /* size actually read */ s = (unsigned char*) memcpy(nbuf, buf, nbytes); s[nbytes] = '\0'; has_escapes = (memchr(s, '\033', nbytes) != NULL); - while ((token = strtok((char*)s, " \t\n\r\f")) != NULL) { + while ((token = strtok((char *) s, " \t\n\r\f")) != NULL) { s = NULL; /* make strtok() keep on tokin' */ for (p = names; p < names + NNAMES; p++) { if (STREQ(p->name, token)) { @@ -108,7 +108,7 @@ int nbytes; /* size actually read */ for (i = 0; i < nbytes; i++) { - if (!isascii(*(buf+i))) + if (!isascii(buf[i])) return 0; /* not all ascii */ } diff --git a/src/file.c b/src/file.c index 5078b8b4..409959cd 100644 --- a/src/file.c +++ b/src/file.c @@ -26,7 +26,7 @@ */ #ifndef lint static char *moduleid = - "@(#)$Id: file.c,v 1.31 1995/03/25 22:08:07 christos Exp $"; + "@(#)$Id: file.c,v 1.32 1995/04/28 17:29:13 christos Exp $"; #endif /* lint */ #include @@ -43,6 +43,10 @@ static char *moduleid = #endif #include /* for read() */ +#ifdef HAVE_ELF +#include +#endif + #include "patchlevel.h" #include "file.h" @@ -66,7 +70,7 @@ int /* Misc globals */ struct magic *magic; /* array of magic entries */ -char *magicfile = MAGIC;/* where magic be found */ +char *magicfile; /* where magic be found */ char *progname; /* used throughout */ int lineno; /* line number in the magic file */ @@ -90,6 +94,9 @@ char *argv[]; else progname = argv[0]; + if (!(magicfile = getenv("MAGIC"))) + magicfile = MAGIC; + while ((c = getopt(argc, argv, "vcdf:Lm:z")) != EOF) switch (c) { case 'v': @@ -254,15 +261,47 @@ int wid; if (nbytes == 0) ckfputs("empty", stdout); else { - buf[nbytes++] = '\0'; /* NULL terminate */ - if (nbytes < sizeof(union VALUETYPE)) { - /* The following is to handle *very* short files */ - memset(buf + nbytes, 0, sizeof(union VALUETYPE) - nbytes); - nbytes = sizeof(union VALUETYPE); - } + buf[nbytes++] = '\0'; /* NULL terminate */ tryit(buf, nbytes, zflag); } +#ifdef HAVE_ELF + /* + * ELF executables have multiple section headers in arbitrary + * file locations and thus file(1) cannot determine it from easily. + * Instead we traverse thru all section headers until a symbol table + * one is found or else the binary is stripped. + * XXX: This will not work for binaries of a different byteorder. + * Should come up with a better fix. + */ + + if (nbytes > sizeof (Elf32_Ehdr) && + buf[EI_MAG0] == ELFMAG0 && + buf[EI_MAG1] == ELFMAG1 && + buf[EI_MAG2] == ELFMAG2 && + buf[EI_MAG3] == ELFMAG3 ) { + + Elf32_Ehdr elfhdr; + int stripped = 1; + + (void) memcpy(&elfhdr, buf, sizeof elfhdr); + + if (lseek(fd, elfhdr.e_shoff, SEEK_SET)<0) + error("lseek failed (%s).\n", strerror(errno)); + + for ( ; elfhdr.e_shnum ; elfhdr.e_shnum--) { + if (read(fd, buf, elfhdr.e_shentsize)<0) + error("read failed (%s).\n", strerror(errno)); + if (((Elf32_Shdr *)&buf)->sh_type == SHT_SYMTAB) { + stripped = 0; + break; + } + } + if (stripped) + (void) printf (" - stripped"); + } +#endif + if (inname != stdname) { /* * Try to restore access, modification times if read it. diff --git a/src/file.h b/src/file.h index 721d0b97..772bd8b4 100644 --- a/src/file.h +++ b/src/file.h @@ -1,6 +1,6 @@ /* * file.h - definitions for file(1) program - * @(#)$Id: file.h,v 1.20 1995/03/25 22:08:07 christos Exp $ + * @(#)$Id: file.h,v 1.21 1995/04/28 17:29:13 christos Exp $ * * Copyright (c) Ian F. Darwin, 1987. * Written by Ian F. Darwin. @@ -26,7 +26,9 @@ * 4. This notice may not be removed or altered. */ -#define HOWMANY 8192 /* how much of the file to look at */ +#ifndef HOWMANY +# define HOWMANY 8192 /* how much of the file to look at */ +#endif #define MAXMAGIS 1000 /* max entries in /etc/magic */ #define MAXDESC 50 /* max leng of text description */ #define MAXstring 32 /* max leng of "string" types */ diff --git a/src/names.h b/src/names.h index 5d989ee5..86633dd0 100644 --- a/src/names.h +++ b/src/names.h @@ -10,7 +10,7 @@ * * See LEGAL.NOTICE * - * $Id: names.h,v 1.11 1993/09/16 21:14:20 christos Exp $ + * $Id: names.h,v 1.12 1995/04/28 17:29:13 christos Exp $ */ /* these types are used to index the table 'types': keep em in sync! */ @@ -25,13 +25,13 @@ #define L_NEWS 8 /* Usenet Netnews */ static char *types[] = { - "c program text", - "fortran program text", + "C program text", + "FORTRAN program text", "make commands text" , - "pl/1 program text", + "PL/1 program text", "assembler program text", "English text", - "pascal program text", + "Pascal program text", "mail text", "news text", "can't happen error on names.h/types", @@ -43,7 +43,7 @@ static struct names { } names[] = { /* These must be sorted by eye for optimal hit rate */ /* Add to this list only after substantial meditation */ - {"/*", L_C}, /* must preced "The", "the", etc. */ + {"/*", L_C}, /* must precede "The", "the", etc. */ {"#include", L_C}, {"char", L_C}, {"The", L_ENG}, @@ -75,6 +75,7 @@ static struct names { {".byte", L_MACH}, {".even", L_MACH}, {".globl", L_MACH}, + {".text", L_MACH}, {"clr", L_MACH}, {"(input,", L_PAS}, {"dcl", L_PLI}, diff --git a/src/patchlevel.h b/src/patchlevel.h index 0c225cd2..48454540 100644 --- a/src/patchlevel.h +++ b/src/patchlevel.h @@ -1,12 +1,20 @@ #define FILE_VERSION_MAJOR 3 -#define patchlevel 16 +#define patchlevel 17 /* * Patchlevel file for Ian Darwin's MAGIC command. - * $Id: patchlevel.h,v 1.16 1995/03/25 22:06:45 christos Exp $ + * $Id: patchlevel.h,v 1.17 1995/04/28 17:29:13 christos Exp $ * * $Log: patchlevel.h,v $ - * Revision 1.16 1995/03/25 22:06:45 christos + * Revision 1.17 1995/04/28 17:29:13 christos + * - Incorrect nroff detection fix from der Mouse + * - Lost and incorrect magic entries. + * - Added ELF stripped binary detection [in C; ugh] + * - Look for $MAGIC to find the magic file. + * - Eat trailing size specifications from numbers i.e. ignore 10L + * - More fixes for very short files + * + * Revision 1.16 1995/03/25 22:06:45 christos * - use strtoul() where it exists. * - fix sign-extend bug * - try to detect tar archives before nroff files, otherwise diff --git a/src/softmagic.c b/src/softmagic.c index d469b14a..1efc367e 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -34,7 +34,7 @@ #ifndef lint static char *moduleid = - "@(#)$Id: softmagic.c,v 1.27 1995/03/25 22:08:07 christos Exp $"; + "@(#)$Id: softmagic.c,v 1.28 1995/04/28 17:29:13 christos Exp $"; #endif /* lint */ static int match __P((unsigned char *, int)); @@ -287,11 +287,20 @@ struct magic *m; int nbytes; { long offset = m->offset; - if (offset + sizeof(union VALUETYPE) > nbytes) - return 0; - + long diff = (offset + sizeof(union VALUETYPE)) - nbytes; + if (diff >= 0) + memcpy(p, s + offset, sizeof(union VALUETYPE)); + else { + /* Not enough space; zeropad */ + long have = sizeof(union VALUETYPE) + diff; + if (have > 0) + memcpy(p, s + offset, have); + else + have = 0; + + memset(p + have, 0, sizeof(union VALUETYPE) - have); + } - memcpy(p, s + offset, sizeof(union VALUETYPE)); if (debug) { mdebug(offset, (char *) p, sizeof(union VALUETYPE));