From d5cc18076e1af99f9b02925b5069cea487f29ec8 Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Sat, 5 Aug 2000 17:36:46 +0000 Subject: [PATCH] Correct indianness detection in elf (Charles Hannum) FreeBSD elf core support (Guy Harris) Use gzip in systems that don't have uncompress (Anthon van der Neut) Internationalization/EBCDIC support (Eric Fisher) Many many magic changes --- Makefile.am | 53 ++-- Makefile.in | 21 +- aclocal.m4 | 4 +- configure | 177 ++++++------- configure.in | 2 +- doc/file.man | 92 ++++--- magic/Header | 2 +- src/Makefile.std | 4 +- src/acconfig.h | 2 +- src/apprentice.c | 89 +++---- src/ascmagic.c | 642 ++++++++++++++++++++++++++++++++++++++++++++--- src/compress.c | 75 +++--- src/file.c | 135 +++++----- src/fsmagic.c | 9 +- src/internat.c | 86 ------- src/is_tar.c | 24 +- src/names.h | 31 +-- src/print.c | 68 ++--- src/readelf.c | 188 +++++++++----- src/softmagic.c | 46 ++-- 20 files changed, 1165 insertions(+), 585 deletions(-) delete mode 100644 src/internat.c diff --git a/Makefile.am b/Makefile.am index bf501157..85c09bed 100644 --- a/Makefile.am +++ b/Makefile.am @@ -11,7 +11,7 @@ CPPFLAGS = -DMAGIC='"$(MAGIC)"' man_MANS = file.1 magic.4 file_SOURCES = file.c apprentice.c fsmagic.c softmagic.c ascmagic.c \ - compress.c is_tar.c readelf.c internat.c print.c \ + compress.c is_tar.c readelf.c print.c \ file.h names.h patchlevel.h readelf.h tar.h EXTRA_DIST = LEGAL.NOTICE MAINT PORTING Makefile.std magic2mime \ @@ -44,26 +44,31 @@ magic.4: Makefile magic.man -e s@__VERSION__@${VERSION}@g \ -e s@__MAGIC__@${MAGIC}@g $(srcdir)/magic.man > $@ -magic_FRAGMENTS = Magdir/adventure Magdir/allegro Magdir/alliant Magdir/alpha Magdir/amanda \ -Magdir/amigaos Magdir/animation Magdir/apl Magdir/apple Magdir/applix \ -Magdir/archive Magdir/asterix Magdir/att3b Magdir/audio Magdir/blit \ -Magdir/bsdi Magdir/c-lang Magdir/chi Magdir/cisco Magdir/claris Magdir/clipper \ -Magdir/commands Magdir/compress Magdir/console Magdir/convex Magdir/database \ -Magdir/diamond Magdir/diff Magdir/digital Magdir/dump Magdir/elf \ -Magdir/encore Magdir/epoc Magdir/filesystems Magdir/flash Magdir/fonts Magdir/frame \ -Magdir/freebsd Magdir/fsav Magdir/gimp Magdir/gnu Magdir/grace Magdir/hp Magdir/ibm370 \ -Magdir/ibm6000 Magdir/iff Magdir/images Magdir/intel Magdir/interleaf \ -Magdir/island Magdir/ispell Magdir/java Magdir/jpeg Magdir/karma Magdir/lecter \ -Magdir/lex Magdir/lif Magdir/linux Magdir/lisp Magdir/mach Magdir/macintosh \ -Magdir/mathematica Magdir/magic Magdir/mail.news Magdir/maple Magdir/mcrypt \ -Magdir/mime Magdir/mirage Magdir/mkid \ -Magdir/mmdf Magdir/modem Magdir/motorola Magdir/msdos Magdir/msvc Magdir/ncr \ -Magdir/netbsd Magdir/netscape Magdir/news Magdir/octave Magdir/olf Magdir/os2 Magdir/os9 \ -Magdir/osf1 Magdir/palm Magdir/pbm Magdir/pdf Magdir/pdp Magdir/pgp Magdir/pkgadd \ -Magdir/project Magdir/plus5 Magdir/printer Magdir/psdbms Magdir/pyramid Magdir/riff \ -Magdir/rpm Magdir/rtf Magdir/sc Magdir/sccs Magdir/sendmail Magdir/sequent \ -Magdir/sgi Magdir/sgml Magdir/sniffer Magdir/softquad Magdir/spectrum Magdir/sun \ -Magdir/teapot Magdir/terminfo Magdir/tex Magdir/ti-8x Magdir/timezone \ -Magdir/troff Magdir/typeset Magdir/unknown Magdir/uuencode \ -Magdir/varied.out Magdir/vax Magdir/vicar Magdir/visx Magdir/vms \ -Magdir/wordperfect Magdir/xdelta Magdir/xenix Magdir/zilog Magdir/zyxel +magic_FRAGMENTS = Magdir/adventure Magdir/allegro Magdir/alliant \ + Magdir/alpha Magdir/amanda Magdir/amigaos Magdir/animation \ + Magdir/apl Magdir/apple Magdir/applix Magdir/archive Magdir/asterix \ + Magdir/att3b Magdir/audio Magdir/blender Magdir/blit Magdir/bsdi \ + Magdir/c-lang Magdir/chi Magdir/cisco Magdir/claris Magdir/clipper \ + Magdir/commands Magdir/compress Magdir/console Magdir/convex \ + Magdir/database Magdir/diamond Magdir/diff Magdir/digital \ + Magdir/dump Magdir/elf Magdir/encore Magdir/epoc Magdir/filesystems \ + Magdir/flash Magdir/fonts Magdir/frame Magdir/freebsd Magdir/fsav \ + Magdir/gimp Magdir/gnu Magdir/grace Magdir/hp Magdir/ibm370 \ + Magdir/ibm6000 Magdir/iff Magdir/images Magdir/intel \ + Magdir/interleaf Magdir/island Magdir/ispell Magdir/java \ + Magdir/jpeg Magdir/karma Magdir/lecter Magdir/lex Magdir/lif \ + Magdir/linux Magdir/lisp Magdir/mach Magdir/macintosh Magdir/magic \ + Magdir/mail.news Magdir/maple Magdir/mathematica Magdir/mcrypt \ + Magdir/mime Magdir/mirage Magdir/mkid Magdir/mmdf Magdir/modem \ + Magdir/motorola Magdir/msdos Magdir/msvc Magdir/ncr Magdir/netbsd \ + Magdir/netscape Magdir/news Magdir/octave Magdir/olf Magdir/os2 \ + Magdir/os9 Magdir/osf1 Magdir/palm Magdir/pbm Magdir/pdf Magdir/pdp \ + Magdir/pgp Magdir/pkgadd Magdir/plus5 Magdir/printer Magdir/project \ + Magdir/psdbms Magdir/pyramid Magdir/python Magdir/riff Magdir/rpm \ + Magdir/rtf Magdir/sc Magdir/sccs Magdir/sendmail Magdir/sequent \ + Magdir/sgi Magdir/sgml Magdir/sniffer Magdir/softquad Magdir/spectrum \ + Magdir/sun Magdir/teapot Magdir/terminfo Magdir/tex Magdir/ti-8x \ + Magdir/timezone Magdir/troff Magdir/typeset Magdir/unknown \ + Magdir/uuencode Magdir/varied.out Magdir/vax Magdir/vicar Magdir/visx \ + Magdir/vms Magdir/vmware Magdir/wordperfect Magdir/xdelta Magdir/xenix \ + Magdir/zilog Magdir/zyxel diff --git a/Makefile.in b/Makefile.in index df4513e9..b0444b24 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am +# Makefile.in generated automatically by automake 1.4a from Makefile.am # Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation @@ -48,9 +48,10 @@ AUTOMAKE = @AUTOMAKE@ AUTOHEADER = @AUTOHEADER@ INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) +INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_FLAG = transform = @program_transform_name@ NORMAL_INSTALL = : @@ -76,7 +77,7 @@ CPPFLAGS = -DMAGIC='"$(MAGIC)"' man_MANS = file.1 magic.4 -file_SOURCES = file.c apprentice.c fsmagic.c softmagic.c ascmagic.c compress.c is_tar.c readelf.c internat.c print.c file.h names.h patchlevel.h readelf.h tar.h +file_SOURCES = file.c apprentice.c fsmagic.c softmagic.c ascmagic.c compress.c is_tar.c readelf.c print.c file.h names.h patchlevel.h readelf.h tar.h EXTRA_DIST = LEGAL.NOTICE MAINT PORTING Makefile.std magic2mime Localstuff Header $(magic_FRAGMENTS) file.man magic.man @@ -84,7 +85,7 @@ EXTRA_DIST = LEGAL.NOTICE MAINT PORTING Makefile.std magic2mime Localstuff Head CLEANFILES = $(man_MANS) magic -magic_FRAGMENTS = Magdir/adventure Magdir/allegro Magdir/alliant Magdir/alpha Magdir/amanda Magdir/amigaos Magdir/animation Magdir/apl Magdir/apple Magdir/applix Magdir/archive Magdir/asterix Magdir/att3b Magdir/audio Magdir/blit Magdir/bsdi Magdir/c-lang Magdir/chi Magdir/cisco Magdir/claris Magdir/clipper Magdir/commands Magdir/compress Magdir/console Magdir/convex Magdir/database Magdir/diamond Magdir/diff Magdir/digital Magdir/dump Magdir/elf Magdir/encore Magdir/epoc Magdir/filesystems Magdir/flash Magdir/fonts Magdir/frame Magdir/freebsd Magdir/fsav Magdir/gimp Magdir/gnu Magdir/grace Magdir/hp Magdir/ibm370 Magdir/ibm6000 Magdir/iff Magdir/images Magdir/intel Magdir/interleaf Magdir/island Magdir/ispell Magdir/java Magdir/jpeg Magdir/karma Magdir/lecter Magdir/lex Magdir/lif Magdir/linux Magdir/lisp Magdir/mach Magdir/macintosh Magdir/mathematica Magdir/magic Magdir/mail.news Magdir/maple Magdir/mcrypt Magdir/mime Magdir/mirage Magdir/mkid Magdir/mmdf Magdir/modem Magdir/motorola Magdir/msdos Magdir/msvc Magdir/ncr Magdir/netbsd Magdir/netscape Magdir/news Magdir/octave Magdir/olf Magdir/os2 Magdir/os9 Magdir/osf1 Magdir/palm Magdir/pbm Magdir/pdf Magdir/pdp Magdir/pgp Magdir/pkgadd Magdir/project Magdir/plus5 Magdir/printer Magdir/psdbms Magdir/pyramid Magdir/riff Magdir/rpm Magdir/rtf Magdir/sc Magdir/sccs Magdir/sendmail Magdir/sequent Magdir/sgi Magdir/sgml Magdir/sniffer Magdir/softquad Magdir/spectrum Magdir/sun Magdir/teapot Magdir/terminfo Magdir/tex Magdir/ti-8x Magdir/timezone Magdir/troff Magdir/typeset Magdir/unknown Magdir/uuencode Magdir/varied.out Magdir/vax Magdir/vicar Magdir/visx Magdir/vms Magdir/wordperfect Magdir/xdelta Magdir/xenix Magdir/zilog Magdir/zyxel +magic_FRAGMENTS = Magdir/adventure Magdir/allegro Magdir/alliant Magdir/alpha Magdir/amanda Magdir/amigaos Magdir/animation Magdir/apl Magdir/apple Magdir/applix Magdir/archive Magdir/asterix Magdir/att3b Magdir/audio Magdir/blender Magdir/blit Magdir/bsdi Magdir/c-lang Magdir/chi Magdir/cisco Magdir/claris Magdir/clipper Magdir/commands Magdir/compress Magdir/console Magdir/convex Magdir/database Magdir/diamond Magdir/diff Magdir/digital Magdir/dump Magdir/elf Magdir/encore Magdir/epoc Magdir/filesystems Magdir/flash Magdir/fonts Magdir/frame Magdir/freebsd Magdir/fsav Magdir/gimp Magdir/gnu Magdir/grace Magdir/hp Magdir/ibm370 Magdir/ibm6000 Magdir/iff Magdir/images Magdir/intel Magdir/interleaf Magdir/island Magdir/ispell Magdir/java Magdir/jpeg Magdir/karma Magdir/lecter Magdir/lex Magdir/lif Magdir/linux Magdir/lisp Magdir/mach Magdir/macintosh Magdir/magic Magdir/mail.news Magdir/maple Magdir/mathematica Magdir/mcrypt Magdir/mime Magdir/mirage Magdir/mkid Magdir/mmdf Magdir/modem Magdir/motorola Magdir/msdos Magdir/msvc Magdir/ncr Magdir/netbsd Magdir/netscape Magdir/news Magdir/octave Magdir/olf Magdir/os2 Magdir/os9 Magdir/osf1 Magdir/palm Magdir/pbm Magdir/pdf Magdir/pdp Magdir/pgp Magdir/pkgadd Magdir/plus5 Magdir/printer Magdir/project Magdir/psdbms Magdir/pyramid Magdir/python Magdir/riff Magdir/rpm Magdir/rtf Magdir/sc Magdir/sccs Magdir/sendmail Magdir/sequent Magdir/sgi Magdir/sgml Magdir/sniffer Magdir/softquad Magdir/spectrum Magdir/sun Magdir/teapot Magdir/terminfo Magdir/tex Magdir/ti-8x Magdir/timezone Magdir/troff Magdir/typeset Magdir/unknown Magdir/uuencode Magdir/varied.out Magdir/vax Magdir/vicar Magdir/visx Magdir/vms Magdir/vmware Magdir/wordperfect Magdir/xdelta Magdir/xenix Magdir/zilog Magdir/zyxel ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs @@ -97,7 +98,7 @@ DEFS = @DEFS@ -I. -I$(srcdir) -I. LDFLAGS = @LDFLAGS@ LIBS = @LIBS@ file_OBJECTS = file.o apprentice.o fsmagic.o softmagic.o ascmagic.o \ -compress.o is_tar.o readelf.o internat.o print.o +compress.o is_tar.o readelf.o print.o file_LDADD = $(LDADD) file_DEPENDENCIES = file_LDFLAGS = @@ -119,7 +120,7 @@ missing mkinstalldirs DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) -TAR = tar +TAR = gtar GZIP_ENV = --best SOURCES = $(file_SOURCES) OBJECTS = $(file_OBJECTS) @@ -184,8 +185,8 @@ install-binPROGRAMS: $(bin_PROGRAMS) $(mkinstalldirs) $(DESTDIR)$(bindir) @list='$(bin_PROGRAMS)'; for p in $$list; do \ if test -f $$p; then \ - echo " $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`"; \ - $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \ + echo " $(INSTALL_PROGRAM) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`"; \ + $(INSTALL_PROGRAM) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \ else :; fi; \ done @@ -380,7 +381,7 @@ distdir: $(DISTFILES) @for file in $(DISTFILES); do \ d=$(srcdir); \ if test -d $$d/$$file; then \ - cp -pr $$/$$file $(distdir)/$$file; \ + cp -pr $$d/$$file $(distdir)/$$file; \ else \ test -f $(distdir)/$$file \ || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ @@ -412,7 +413,7 @@ uninstall: uninstall-am all-am: Makefile $(PROGRAMS) $(MANS) $(DATA) config.h all-redirect: all-am install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install + $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install installdirs: $(mkinstalldirs) $(DESTDIR)$(bindir) $(DESTDIR)$(mandir)/man1 \ $(DESTDIR)$(mandir)/man4 $(DESTDIR)$(datadir) diff --git a/aclocal.m4 b/aclocal.m4 index b00f3cb6..c62b0c88 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -1,4 +1,4 @@ -dnl aclocal.m4 generated automatically by aclocal 1.4 +dnl aclocal.m4 generated automatically by aclocal 1.4a dnl Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation @@ -101,6 +101,8 @@ dnl AM_INIT_AUTOMAKE(package,version, [no-define]) AC_DEFUN(AM_INIT_AUTOMAKE, [AC_REQUIRE([AC_PROG_INSTALL]) +dnl We require 2.13 because we rely on SHELL being computed by configure. +AC_PREREQ([2.13]) PACKAGE=[$1] AC_SUBST(PACKAGE) VERSION=[$2] diff --git a/configure b/configure index b084a7f7..65590e5b 100755 --- a/configure +++ b/configure @@ -693,9 +693,10 @@ else fi + PACKAGE=file -VERSION=3.31 +VERSION=3.32 if test "`cd $srcdir && pwd`" != "`pwd`" && test -f $srcdir/config.status; then { echo "configure: error: source directory already configured; run "make distclean" there first" 1>&2; exit 1; } @@ -712,7 +713,7 @@ EOF missing_dir=`cd $ac_aux_dir && pwd` echo $ac_n "checking for working aclocal""... $ac_c" 1>&6 -echo "configure:716: checking for working aclocal" >&5 +echo "configure:717: checking for working aclocal" >&5 # Run test in a subshell; some versions of sh will print an error if # an executable is not found, even if stderr is redirected. # Redirect stdin to placate older versions of autoconf. Sigh. @@ -725,7 +726,7 @@ else fi echo $ac_n "checking for working autoconf""... $ac_c" 1>&6 -echo "configure:729: checking for working autoconf" >&5 +echo "configure:730: checking for working autoconf" >&5 # Run test in a subshell; some versions of sh will print an error if # an executable is not found, even if stderr is redirected. # Redirect stdin to placate older versions of autoconf. Sigh. @@ -738,7 +739,7 @@ else fi echo $ac_n "checking for working automake""... $ac_c" 1>&6 -echo "configure:742: checking for working automake" >&5 +echo "configure:743: checking for working automake" >&5 # Run test in a subshell; some versions of sh will print an error if # an executable is not found, even if stderr is redirected. # Redirect stdin to placate older versions of autoconf. Sigh. @@ -751,7 +752,7 @@ else fi echo $ac_n "checking for working autoheader""... $ac_c" 1>&6 -echo "configure:755: checking for working autoheader" >&5 +echo "configure:756: checking for working autoheader" >&5 # Run test in a subshell; some versions of sh will print an error if # an executable is not found, even if stderr is redirected. # Redirect stdin to placate older versions of autoconf. Sigh. @@ -764,7 +765,7 @@ else fi echo $ac_n "checking for working makeinfo""... $ac_c" 1>&6 -echo "configure:768: checking for working makeinfo" >&5 +echo "configure:769: checking for working makeinfo" >&5 # Run test in a subshell; some versions of sh will print an error if # an executable is not found, even if stderr is redirected. # Redirect stdin to placate older versions of autoconf. Sigh. @@ -782,7 +783,7 @@ fi echo $ac_n "checking for builtin ELF support""... $ac_c" 1>&6 -echo "configure:786: checking for builtin ELF support" >&5 +echo "configure:787: checking for builtin ELF support" >&5 # Check whether --enable-elf or --disable-elf was given. if test "${enable_elf+set}" = set; then enableval="$enable_elf" @@ -808,7 +809,7 @@ fi echo $ac_n "checking for ELF core file support""... $ac_c" 1>&6 -echo "configure:812: checking for ELF core file support" >&5 +echo "configure:813: checking for ELF core file support" >&5 # Check whether --enable-elf-core or --disable-elf-core was given. if test "${enable_elf_core+set}" = set; then enableval="$enable_elf_core" @@ -836,7 +837,7 @@ fi # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:840: checking for $ac_word" >&5 +echo "configure:841: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -866,7 +867,7 @@ if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:870: checking for $ac_word" >&5 +echo "configure:871: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -917,7 +918,7 @@ fi # Extract the first word of "cl", so it can be a program name with args. set dummy cl; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:921: checking for $ac_word" >&5 +echo "configure:922: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -949,7 +950,7 @@ fi fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:953: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 +echo "configure:954: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. @@ -960,12 +961,12 @@ cross_compiling=$ac_cv_prog_cc_cross cat > conftest.$ac_ext << EOF -#line 964 "configure" +#line 965 "configure" #include "confdefs.h" main(){return(0);} EOF -if { (eval echo configure:969: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:970: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then ac_cv_prog_cc_works=yes # If we can't run a trivial program, we are probably using a cross compiler. if (./conftest; exit) 2>/dev/null; then @@ -991,12 +992,12 @@ if test $ac_cv_prog_cc_works = no; then { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:995: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 +echo "configure:996: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 cross_compiling=$ac_cv_prog_cc_cross echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:1000: checking whether we are using GNU C" >&5 +echo "configure:1001: checking whether we are using GNU C" >&5 if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1005,7 +1006,7 @@ else yes; #endif EOF -if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:1009: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then +if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:1010: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then ac_cv_prog_gcc=yes else ac_cv_prog_gcc=no @@ -1024,7 +1025,7 @@ ac_test_CFLAGS="${CFLAGS+set}" ac_save_CFLAGS="$CFLAGS" CFLAGS= echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 -echo "configure:1028: checking whether ${CC-cc} accepts -g" >&5 +echo "configure:1029: checking whether ${CC-cc} accepts -g" >&5 if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1067,7 +1068,7 @@ fi # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # ./install, which can be erroneously created by make from ./install.sh. echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 -echo "configure:1071: checking for a BSD compatible install" >&5 +echo "configure:1072: checking for a BSD compatible install" >&5 if test -z "$INSTALL"; then if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1120,7 +1121,7 @@ test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' echo $ac_n "checking whether ln -s works""... $ac_c" 1>&6 -echo "configure:1124: checking whether ln -s works" >&5 +echo "configure:1125: checking whether ln -s works" >&5 if eval "test \"`echo '$''{'ac_cv_prog_LN_S'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1142,7 +1143,7 @@ fi echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:1146: checking how to run the C preprocessor" >&5 +echo "configure:1147: checking how to run the C preprocessor" >&5 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= @@ -1157,13 +1158,13 @@ else # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1167: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1168: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : @@ -1174,13 +1175,13 @@ else rm -rf conftest* CPP="${CC-cc} -E -traditional-cpp" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1184: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1185: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : @@ -1191,13 +1192,13 @@ else rm -rf conftest* CPP="${CC-cc} -nologo -E" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1201: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1202: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : @@ -1222,12 +1223,12 @@ fi echo "$ac_t""$CPP" 1>&6 echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6 -echo "configure:1226: checking for ANSI C header files" >&5 +echo "configure:1227: checking for ANSI C header files" >&5 if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #include @@ -1235,7 +1236,7 @@ else #include EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1239: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1240: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* @@ -1252,7 +1253,7 @@ rm -f conftest* if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat > conftest.$ac_ext < EOF @@ -1270,7 +1271,7 @@ fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat > conftest.$ac_ext < EOF @@ -1291,7 +1292,7 @@ if test "$cross_compiling" = yes; then : else cat > conftest.$ac_ext < #define ISLOWER(c) ('a' <= (c) && (c) <= 'z') @@ -1302,7 +1303,7 @@ if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); exit (0); } EOF -if { (eval echo configure:1306: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:1307: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then : else @@ -1326,19 +1327,19 @@ EOF fi echo $ac_n "checking whether sys/types.h defines makedev""... $ac_c" 1>&6 -echo "configure:1330: checking whether sys/types.h defines makedev" >&5 +echo "configure:1331: checking whether sys/types.h defines makedev" >&5 if eval "test \"`echo '$''{'ac_cv_header_sys_types_h_makedev'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < int main() { return makedev(0, 0); ; return 0; } EOF -if { (eval echo configure:1342: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:1343: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_header_sys_types_h_makedev=yes else @@ -1356,17 +1357,17 @@ echo "$ac_t""$ac_cv_header_sys_types_h_makedev" 1>&6 if test $ac_cv_header_sys_types_h_makedev = no; then ac_safe=`echo "sys/mkdev.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for sys/mkdev.h""... $ac_c" 1>&6 -echo "configure:1360: checking for sys/mkdev.h" >&5 +echo "configure:1361: checking for sys/mkdev.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1370: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1371: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* @@ -1394,17 +1395,17 @@ fi if test $ac_cv_header_sys_mkdev_h = no; then ac_safe=`echo "sys/sysmacros.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for sys/sysmacros.h""... $ac_c" 1>&6 -echo "configure:1398: checking for sys/sysmacros.h" >&5 +echo "configure:1399: checking for sys/sysmacros.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1408: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1409: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* @@ -1432,12 +1433,12 @@ fi fi echo $ac_n "checking for sys/wait.h that is POSIX.1 compatible""... $ac_c" 1>&6 -echo "configure:1436: checking for sys/wait.h that is POSIX.1 compatible" >&5 +echo "configure:1437: checking for sys/wait.h that is POSIX.1 compatible" >&5 if eval "test \"`echo '$''{'ac_cv_header_sys_wait_h'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #include @@ -1453,7 +1454,7 @@ wait (&s); s = WIFEXITED (s) ? WEXITSTATUS (s) : 1; ; return 0; } EOF -if { (eval echo configure:1457: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1458: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_header_sys_wait_h=yes else @@ -1477,17 +1478,17 @@ for ac_hdr in unistd.h do ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:1481: checking for $ac_hdr" >&5 +echo "configure:1482: checking for $ac_hdr" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1491: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1492: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* @@ -1517,17 +1518,17 @@ for ac_hdr in locale.h do ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:1521: checking for $ac_hdr" >&5 +echo "configure:1522: checking for $ac_hdr" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1531: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1532: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* @@ -1555,12 +1556,12 @@ done echo $ac_n "checking for working const""... $ac_c" 1>&6 -echo "configure:1559: checking for working const" >&5 +echo "configure:1560: checking for working const" >&5 if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1614: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_c_const=yes else @@ -1630,12 +1631,12 @@ EOF fi echo $ac_n "checking for off_t""... $ac_c" 1>&6 -echo "configure:1634: checking for off_t" >&5 +echo "configure:1635: checking for off_t" >&5 if eval "test \"`echo '$''{'ac_cv_type_off_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #if STDC_HEADERS @@ -1663,12 +1664,12 @@ EOF fi echo $ac_n "checking for size_t""... $ac_c" 1>&6 -echo "configure:1667: checking for size_t" >&5 +echo "configure:1668: checking for size_t" >&5 if eval "test \"`echo '$''{'ac_cv_type_size_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #if STDC_HEADERS @@ -1696,12 +1697,12 @@ EOF fi echo $ac_n "checking for st_rdev in struct stat""... $ac_c" 1>&6 -echo "configure:1700: checking for st_rdev in struct stat" >&5 +echo "configure:1701: checking for st_rdev in struct stat" >&5 if eval "test \"`echo '$''{'ac_cv_struct_st_rdev'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #include @@ -1709,7 +1710,7 @@ int main() { struct stat s; s.st_rdev; ; return 0; } EOF -if { (eval echo configure:1713: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1714: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_struct_st_rdev=yes else @@ -1731,12 +1732,12 @@ fi echo $ac_n "checking for uint8_t""... $ac_c" 1>&6 -echo "configure:1735: checking for uint8_t" >&5 +echo "configure:1736: checking for uint8_t" >&5 if eval "test \"`echo '$''{'ac_cv_type_uint8_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #if STDC_HEADERS @@ -1764,12 +1765,12 @@ EOF fi echo $ac_n "checking for uint16_t""... $ac_c" 1>&6 -echo "configure:1768: checking for uint16_t" >&5 +echo "configure:1769: checking for uint16_t" >&5 if eval "test \"`echo '$''{'ac_cv_type_uint16_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #if STDC_HEADERS @@ -1797,12 +1798,12 @@ EOF fi echo $ac_n "checking for uint32_t""... $ac_c" 1>&6 -echo "configure:1801: checking for uint32_t" >&5 +echo "configure:1802: checking for uint32_t" >&5 if eval "test \"`echo '$''{'ac_cv_type_uint32_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #if STDC_HEADERS @@ -1831,7 +1832,7 @@ fi echo $ac_n "checking for long long""... $ac_c" 1>&6 -echo "configure:1835: checking for long long" >&5 +echo "configure:1836: checking for long long" >&5 if eval "test \"`echo '$''{'ac_cv_c_long_long'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1842,13 +1843,13 @@ if test "$cross_compiling" = yes; then { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:1853: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_c_long_long=yes else @@ -1877,12 +1878,12 @@ else long64='unsigned long'; fi echo $ac_n "checking for uint64_t""... $ac_c" 1>&6 -echo "configure:1881: checking for uint64_t" >&5 +echo "configure:1882: checking for uint64_t" >&5 if eval "test \"`echo '$''{'ac_cv_type_uint64_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #if STDC_HEADERS @@ -1913,7 +1914,7 @@ fi echo $ac_n "checking size of uint8_t""... $ac_c" 1>&6 -echo "configure:1917: checking size of uint8_t" >&5 +echo "configure:1918: checking size of uint8_t" >&5 if eval "test \"`echo '$''{'ac_cv_sizeof_uint8_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1921,7 +1922,7 @@ else ac_cv_sizeof_uint8_t=0 else cat > conftest.$ac_ext < #ifdef STDC_HEADERS @@ -1937,7 +1938,7 @@ main() exit(0); } EOF -if { (eval echo configure:1941: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:1942: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_sizeof_uint8_t=`cat conftestval` else @@ -1958,7 +1959,7 @@ EOF echo $ac_n "checking size of uint16_t""... $ac_c" 1>&6 -echo "configure:1962: checking size of uint16_t" >&5 +echo "configure:1963: checking size of uint16_t" >&5 if eval "test \"`echo '$''{'ac_cv_sizeof_uint16_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1966,7 +1967,7 @@ else ac_cv_sizeof_uint16_t=0 else cat > conftest.$ac_ext < #ifdef STDC_HEADERS @@ -1982,7 +1983,7 @@ main() exit(0); } EOF -if { (eval echo configure:1986: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:1987: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_sizeof_uint16_t=`cat conftestval` else @@ -2003,7 +2004,7 @@ EOF echo $ac_n "checking size of uint32_t""... $ac_c" 1>&6 -echo "configure:2007: checking size of uint32_t" >&5 +echo "configure:2008: checking size of uint32_t" >&5 if eval "test \"`echo '$''{'ac_cv_sizeof_uint32_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -2011,7 +2012,7 @@ else ac_cv_sizeof_uint32_t=0 else cat > conftest.$ac_ext < #ifdef STDC_HEADERS @@ -2027,7 +2028,7 @@ main() exit(0); } EOF -if { (eval echo configure:2031: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:2032: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_sizeof_uint32_t=`cat conftestval` else @@ -2048,7 +2049,7 @@ EOF echo $ac_n "checking size of uint64_t""... $ac_c" 1>&6 -echo "configure:2052: checking size of uint64_t" >&5 +echo "configure:2053: checking size of uint64_t" >&5 if eval "test \"`echo '$''{'ac_cv_sizeof_uint64_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -2056,7 +2057,7 @@ else ac_cv_sizeof_uint64_t=0 else cat > conftest.$ac_ext < #ifdef STDC_HEADERS @@ -2072,7 +2073,7 @@ main() exit(0); } EOF -if { (eval echo configure:2076: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:2077: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_sizeof_uint64_t=`cat conftestval` else @@ -2096,12 +2097,12 @@ EOF for ac_func in strerror strtoul do echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:2100: checking for $ac_func" >&5 +echo "configure:2101: checking for $ac_func" >&5 if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:2129: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_func_$ac_func=yes" else diff --git a/configure.in b/configure.in index 85b65597..22d90974 100644 --- a/configure.in +++ b/configure.in @@ -1,6 +1,6 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT(file.c) -AM_INIT_AUTOMAKE(file, 3.31) +AM_INIT_AUTOMAKE(file, 3.32) AM_CONFIG_HEADER(config.h) AC_MSG_CHECKING(for builtin ELF support) diff --git a/doc/file.man b/doc/file.man index 3d876f2e..198a14c2 100644 --- a/doc/file.man +++ b/doc/file.man @@ -1,5 +1,5 @@ .TH FILE __CSECTION__ "Copyright but distributable" -.\" $Id: file.man,v 1.35 2000/05/14 17:58:36 christos Exp $ +.\" $Id: file.man,v 1.36 2000/08/05 17:36:48 christos Exp $ .SH NAME file \- determine file type @@ -31,7 +31,7 @@ test that succeeds causes the file type to be printed. The type printed will usually contain one of the words .B text (the file contains only -.SM ASCII +printing characters and a few common control characters and is probably safe to read on an .SM ASCII terminal), @@ -49,7 +49,7 @@ or the program itself, .B "preserve these keywords" . People depend on knowing that all the readable files in a directory have the word ``text'' printed. -Don't do as Berkeley did \- change ``shell commands text'' +Don't do as Berkeley did and change ``shell commands text'' to ``shell script''. .PP The filesystem tests are based on examining the return from a @@ -62,7 +62,7 @@ Any known file types appropriate to the system you are running on implement them) are intuited if they are defined in the system header file -.IR sys/stat.h . +.IR . .PP The magic number tests are used to check for files with data in particular fixed formats. @@ -79,14 +79,36 @@ that the file is a binary executable, and which of several types thereof. The concept of `magic number' has been applied by extension to data files. Any file with some invariant identifier at a small fixed offset into the file can usually be described in this way. -The information in these files is read from the magic file +The information identifying these files is read from the magic file .I __MAGIC__. .PP -If an argument appears to be an -.SM ASCII -file, +If a file does not match any of the entries in the magic file, +it is examined to see if it seems to be a text file. +ASCII, ISO-8859-x, non-ISO 8-bit extended-ASCII character sets +(such as those used on Macintosh and IBM PC systems), +UTF-8-encoded Unicode, UTF-16-encoded Unicode, and EBCDIC +character sets can be distinguished by the different +ranges and sequences of bytes that constitute printable text +in each set. +If a file passes any of these tests, its character set is reported. +ASCII, ISO-8859-x, UTF-8, and extended-ASCII files are identified +as ``text'' because they will be mostly readable on nearly any terminal; +UTF-16 and EBCDIC are only ``character data'' because, while +they contain text, it is text that will require translation +before it can be read. +In addition, +.B file +will attempt to determine other characteristics of text-type files. +If the lines of a file are terminated by CR, CRLF, or NEL, instead +of the Unix-standard LF, this will be reported. +Files that contain embedded escape sequences or overstriking +will also be identified. +.PP +Once .B file -attempts to guess its language. +has determined the character set used in a text-type file, +it will +attempt to determine in what language the file is written. The language tests look for particular strings (cf .IR names.h ) that can appear anywhere in the first few blocks of a file. @@ -102,8 +124,10 @@ two groups, so they are performed last. The language test routines also test for some miscellany (such as .BR tar (1) -archives) and determine whether an unknown file should be -labelled as `ascii text' or `data'. +archives). +.PP +Any file that cannot be identified as having been written +in any of the character sets listed above is simply said to be ``data''. .SH OPTIONS .TP 8 .B \-b @@ -123,15 +147,18 @@ before the argument list. Either .I namefile or at least one filename argument must be present; -to test the standard input, use ``-'' as a filename argument. +to test the standard input, use ``\-'' as a filename argument. .TP 8 .B \-i Causes the file command to output mime type strings rather than the more -traditional human readable ones. Thus it may say "text/plain, ASCII" rather -than "ASCII text". In order for this option to work, file changes the way +traditional human readable ones. Thus it may say +``text/plain; charset=us-ascii'' +rather +than ``ASCII text''. In order for this option to work, file changes the way it handles files recognised by the command it's self (such as many of the -text file types, directories etc), and makes use of an alternative "magic" file. -(See "FILES" section, below). +text file types, directories etc), and makes use of an alternative +``magic'' file. +(See ``FILES'' section, below). .TP 8 .B \-k Don't stop at the first match, keep going. @@ -280,7 +307,7 @@ file: application/x-executable, dynamically linked (uses shared libs), no There has been a .B file command in every \s-1UNIX\s0 since at least Research Version 6 -(man page dated January, 1975). +(man page dated January 16, 1975). The System V version introduced one significant major change: the external list of magic number types. This slowed the program down slightly but made it a lot more flexible. @@ -324,8 +351,12 @@ Changes by Ian Darwin and various authors including Christos Zoulas (christos@astron.com), 1990-1999. .PP Altered by Chris Lowth, chris@lowth.com, 2000: -Handle the "-i" option to output mime type strings and using an alternative +Handle the ``-i'' option to output mime type strings and using an alternative magic file and internal logic. +.PP +Altered by Eric Fischer (enf@pobox.com), July, 2000, +to identify character codes and attempt to identify the languages +of non-ASCII files. .SH LEGAL NOTICE Copyright (c) Ian F. Darwin, Toronto, Canada, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993. @@ -355,14 +386,16 @@ misrepresented as being the original software. Since few users ever read sources, credits must appear in the documentation. .PP 4. This notice may not be removed or altered. -.PP -A few support files (\fIgetopt\fP, \fIstrtok\fP) -distributed with this package -are by Henry Spencer and are subject to the same terms as above. -.PP -A few simple support files (\fIstrtol\fP, \fIstrchr\fP) -distributed with this package -are in the public domain; they are so marked. +.\" .PP +.\" A few support files (\fIgetopt\fP, \fIstrtok\fP) +.\" distributed with this package +.\" are by Henry Spencer and are subject to the same terms as above. +.\" .PP +.\" A few simple support files (\fIstrtol\fP, \fIstrchr\fP) +.\" distributed with this package +.\" are in the public domain; they are so marked. +.\" +.\" enf: commented out because these support files don't seem to be included .PP The files .I tar.h @@ -385,11 +418,11 @@ with the flexibility of the System V version. .B File uses several algorithms that favor speed over accuracy, thus it can be misled about the contents of -.SM ASCII +text files. .PP The support for -.SM ASCII +text files (primarily for programming languages) is simplistic, inefficient and requires recompilation to update. .PP @@ -428,10 +461,11 @@ The program should provide a way to give an estimate of ``how good'' a guess is. We end up removing guesses (e.g. ``From '' as first 5 chars of file) because they are not as good as other guesses (e.g. ``Newsgroups:'' versus -"Return-Path:"). Still, if the others don't pan out, it should be +``Return-Path:''). Still, if the others don't pan out, it should be possible to use the first guess. .PP This program is slower than some vendors' file commands. +The new support for multiple character codes makes it even slower. .PP This manual page, and particularly this section, is too long. .SH AVAILABILITY diff --git a/magic/Header b/magic/Header index 861b77f6..3ca9b0eb 100644 --- a/magic/Header +++ b/magic/Header @@ -1,4 +1,4 @@ -#! file +# Magic # Magic data for file(1) command. # Machine-generated from src/cmd/file/magdir/*; edit there only! # Format is described in magic(files), where: diff --git a/src/Makefile.std b/src/Makefile.std index 85e7a9fc..8df91797 100644 --- a/src/Makefile.std +++ b/src/Makefile.std @@ -1,6 +1,6 @@ # Makefile for file(1) cmd. # Copyright (c) Ian F. Darwin 86/09/01 - see LEGAL.NOTICE. -# @(#)$Id: Makefile.std,v 1.8 2000/05/14 17:58:36 christos Exp $ +# @(#)$Id: Makefile.std,v 1.9 2000/08/05 17:36:47 christos Exp $ # # This software is not subject to any license of the American Telephone # and Telegraph Company or of the Regents of the University of California. @@ -22,7 +22,7 @@ # # 4. This notice may not be removed or altered. # -VERSION = 3.31 +VERSION = 3.32 SHELL = /bin/sh #MAGIC = /etc/magic MAGIC = /usr/local/etc/magic diff --git a/src/acconfig.h b/src/acconfig.h index 4cd46b7f..1f4f5d2c 100644 --- a/src/acconfig.h +++ b/src/acconfig.h @@ -2,7 +2,7 @@ #define PACKAGE "file" /* Autoheader needs me */ -#define VERSION "3.29" +#define VERSION "3.32" /* Define if builtin ELF support is enabled. */ #undef BUILTIN_ELF diff --git a/src/apprentice.c b/src/apprentice.c index d714be99..1c4664b5 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -33,7 +33,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$Id: apprentice.c,v 1.32 2000/04/23 04:32:19 christos Exp $") +FILE_RCSID("@(#)$Id: apprentice.c,v 1.33 2000/08/05 17:36:47 christos Exp $") #endif /* lint */ #define EATAB {while (isascii((unsigned char) *l) && \ @@ -61,8 +61,8 @@ static int apprentice_1 __P((const char *, int)); int apprentice(fn, check) -const char *fn; /* list of magic files */ -int check; /* non-zero? checking-only run. */ + const char *fn; /* list of magic files */ + int check; /* non-zero? checking-only run. */ { char *p, *mfn; int file_err, errs = -1; @@ -100,8 +100,8 @@ int check; /* non-zero? checking-only run. */ static int apprentice_1(fn, check) -const char *fn; /* name of magic file */ -int check; /* non-zero? checking-only run. */ + const char *fn; /* name of magic file */ + int check; /* non-zero? checking-only run. */ { static const char hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; @@ -113,8 +113,8 @@ int check; /* non-zero? checking-only run. */ if (f==NULL) { if (errno != ENOENT) (void) fprintf(stderr, - "%s: can't read magic file %s (%s)\n", - progname, fn, strerror(errno)); + "%s: can't read magic file %s (%s)\n", + progname, fn, strerror(errno)); return -1; } @@ -141,8 +141,8 @@ int check; /* non-zero? checking-only run. */ */ uint32 signextend(m, v) -struct magic *m; -uint32 v; + struct magic *m; + uint32 v; { if (!(m->flag & UNSIGNED)) switch(m->type) { @@ -182,8 +182,8 @@ uint32 v; */ static int parse(l, ndx, check) -char *l; -int *ndx, check; + char *l; + int *ndx, check; { int i = 0, nd = *ndx; struct magic *m; @@ -191,19 +191,20 @@ int *ndx, check; #define ALLOC_INCR 200 if (nd+1 >= maxmagic){ - maxmagic += ALLOC_INCR; - if ((m = (struct magic *) realloc(magic, sizeof(struct magic) * - maxmagic)) == NULL) { - (void) fprintf(stderr, "%s: Out of memory.\n", progname); - if (magic) - free(magic); - if (check) - return -1; - else - exit(1); - } - magic = m; - memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR); + maxmagic += ALLOC_INCR; + if ((m = (struct magic *) realloc(magic, sizeof(struct magic) * + maxmagic)) == NULL) { + (void) fprintf(stderr, "%s: Out of memory.\n", + progname); + if (magic) + free(magic); + if (check) + return -1; + else + exit(1); + } + magic = m; + memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR); } m = &magic[*ndx]; m->flag = 0; @@ -443,8 +444,8 @@ GetDesc: */ static int getvalue(m, p) -struct magic *m; -char **p; + struct magic *m; + char **p; { int slen; @@ -467,14 +468,14 @@ char **p; */ static char * getstr(s, p, plen, slen) -register char *s; -register char *p; -int plen, *slen; + char *s; + char *p; + int plen, *slen; { char *origs = s, *origp = p; char *pmax = p + plen - 1; - register int c; - register int val; + int c; + int val; while ((c = *s++) != '\0') { if (isspace((unsigned char) c)) @@ -570,13 +571,17 @@ out: /* Single hex char to int; -1 if not a hex char. */ static int hextoint(c) -int c; + int c; { - if (!isascii((unsigned char) c)) return -1; - if (isdigit((unsigned char) c)) return c - '0'; - if ((c>='a')&&(c<='f')) return c + 10 - 'a'; - if ((c>='A')&&(c<='F')) return c + 10 - 'A'; - return -1; + if (!isascii((unsigned char) c)) + return -1; + if (isdigit((unsigned char) c)) + return c - '0'; + if ((c >= 'a')&&(c <= 'f')) + return c + 10 - 'a'; + if (( c>= 'A')&&(c <= 'F')) + return c + 10 - 'A'; + return -1; } @@ -585,11 +590,11 @@ int c; */ void showstr(fp, s, len) -FILE *fp; -const char *s; -int len; + FILE *fp; + const char *s; + int len; { - register char c; + char c; for (;;) { c = *s++; @@ -644,7 +649,7 @@ int len; */ static void eatsize(p) -char **p; + char **p; { char *l = *p; diff --git a/src/ascmagic.c b/src/ascmagic.c index a126cbfa..2a76a633 100644 --- a/src/ascmagic.c +++ b/src/ascmagic.c @@ -5,6 +5,14 @@ * Copyright (c) Ian F. Darwin, 1987. * Written by Ian F. Darwin. * + * Extensively modified by Eric Fischer in July, 2000, + * to handle character codes other than ASCII on a unified basis. + * + * Joerg Wunsch wrote the original support for 8-bit + * international characters, now subsumed into this file. + */ + +/* * This software is not subject to any license of the American Telephone * and Telegraph Company or of the Regents of the University of California. * @@ -38,22 +46,52 @@ #include "names.h" #ifndef lint -FILE_RCSID("@(#)$Id: ascmagic.c,v 1.27 2000/04/23 04:28:19 christos Exp $") +FILE_RCSID("@(#)$Id: ascmagic.c,v 1.28 2000/08/05 17:36:47 christos Exp $") #endif /* lint */ - /* an optimisation over plain strcmp() */ -#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) +typedef unsigned long unichar; + +#define MAXLINELEN 300 /* longest sane line length */ +#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \ + || (x) == 0x85 || (x) == '\f') + +static int looks_ascii __P((const unsigned char *, int, unichar *, int *)); +static int looks_utf8 __P((const unsigned char *, int, unichar *, int *)); +static int looks_unicode __P((const unsigned char *, int, unichar *, int *)); +static int looks_latin1 __P((const unsigned char *, int, unichar *, int *)); +static int looks_extended __P((const unsigned char *, int, unichar *, int *)); +static void from_ebcdic __P((const unsigned char *, int, unsigned char *)); +static int ascmatch __P((const unsigned char *, const unichar *, int)); int ascmagic(buf, nbytes) -unsigned char *buf; -int nbytes; /* size actually read */ + unsigned char *buf; + int nbytes; /* size actually read */ { - int i, has_escapes = 0; + int i; unsigned char *s; - char nbuf[HOWMANY+1]; /* one extra for terminating '\0' */ + char nbuf[HOWMANY+1]; /* one extra for terminating '\0' */ + unichar ubuf[HOWMANY+1]; /* one extra for terminating '\0' */ + int ulen; char *token; - register struct names *p; + struct names *p; + + char *code = NULL; + char *code_mime = NULL; + char *type = NULL; + char *subtype = NULL; + char *subtype_mime = NULL; + + int has_escapes = 0; + int has_backspace = 0; + + int n_crlf = 0; + int n_lf = 0; + int n_cr = 0; + int n_nel = 0; + + int last_line_end = -1; + int has_long_lines = 0; /* * Do the tar test first, because if the first file in the tar @@ -69,58 +107,582 @@ int nbytes; /* size actually read */ return 1; } + /* Undo the NUL-termination kindly provided by process() */ + + while (nbytes > 0 && buf[nbytes - 1] == '\0') + nbytes--; + + /* + * Then try to determine whether it's any character code we can + * identify. Each of these tests, if it succeeds, will leave + * the text converted into one-unichar-per-character Unicode in + * ubuf, and the number of characters converted in ulen. + */ + if (looks_ascii(buf, nbytes, ubuf, &ulen)) { + code = "ASCII"; + code_mime = "us-ascii"; + type = "text"; + } else if (looks_utf8(buf, nbytes, ubuf, &ulen)) { + code = "UTF-8 Unicode"; + code_mime = "utf-8"; + type = "text"; + } else if ((i = looks_unicode(buf, nbytes, ubuf, &ulen))) { + if (i == 1) + code = "Little-endian UTF-16 Unicode"; + else + code = "Big-endian UTF-16 Unicode"; + + type = "character data"; + code_mime = "utf-16"; /* is this defined? */ + } else if (looks_latin1(buf, nbytes, ubuf, &ulen)) { + code = "ISO-8859"; + type = "text"; + code_mime = "iso-8859-1"; + } else if (looks_extended(buf, nbytes, ubuf, &ulen)) { + code = "Non-ISO extended-ASCII"; + type = "text"; + code_mime = "unknown"; + } else { + from_ebcdic(buf, nbytes, nbuf); + + if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) { + code = "EBCDIC"; + type = "character data"; + code_mime = "ebcdic"; + } else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) { + code = "International EBCDIC"; + type = "character data"; + code_mime = "ebcdic"; + } else { + return 0; /* doesn't look like text at all */ + } + } + /* * for troff, look for . + letter + letter or .\"; * this must be done to disambiguate tar archives' ./file * and other trash from real troff input. + * + * I believe Plan 9 troff allows non-ASCII characters in the names + * of macros, so this test might possibly fail on such a file. */ - if (*buf == '.') { - unsigned char *tp = buf + 1; + if (*ubuf == '.') { + unichar *tp = ubuf + 1; - while (isascii(*tp) && isspace(*tp)) + while (ISSPC(*tp)) ++tp; /* skip leading whitespace */ - if ((isascii(*tp) && (isalnum(*tp) || *tp=='\\') && - isascii(tp[1]) && (isalnum(tp[1]) || tp[1] == '"'))) { - ckfputs(iflag ? "text/troff" - : "troff or preprocessor input text", stdout); - return 1; + if ((tp[0] == '\\' && tp[1] == '\"') || + (isascii(tp[0]) && isalnum(tp[0]) && + isascii(tp[1]) && isalnum(tp[1]) && + ISSPC(tp[2]))) { + subtype_mime = "text/troff"; + subtype = "troff or preprocessor input"; + goto subtype_identified; } } - if ((*buf == 'c' || *buf == 'C') && - isascii(buf[1]) && isspace(buf[1])) { - ckfputs(iflag ? "text/fortran" : "fortran program text", stdout); + + if ((*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) { + subtype_mime = "text/fortran"; + subtype = "fortran program"; + goto subtype_identified; + } + + /* look for tokens from names.h - this is expensive! */ + + i = 0; + while (i < ulen) { + int end; + + /* + * skip past any leading space + */ + while (i < ulen && ISSPC(ubuf[i])) + i++; + if (i >= ulen) + break; + + /* + * find the next whitespace + */ + for (end = i + 1; end < nbytes; end++) + if (ISSPC(ubuf[end])) + break; + + /* + * compare the word thus isolated against the token list + */ + for (p = names; p < names + NNAMES; p++) { + if (ascmatch(p->name, ubuf + i, end - i)) { + subtype = types[p->type].human; + subtype_mime = types[p->type].mime; + goto subtype_identified; + } + } + + i = end; + } + +subtype_identified: + + /* + * Now try to discover other details about the file. + */ + for (i = 0; i < ulen; i++) { + if (i > last_line_end + MAXLINELEN) + has_long_lines = 1; + + if (ubuf[i] == '\033') + has_escapes = 1; + if (ubuf[i] == '\b') + has_backspace = 1; + + if (ubuf[i] == '\r' && (i + 1 < ulen && ubuf[i + 1] == '\n')) { + n_crlf++; + last_line_end = i; + } + if (ubuf[i] == '\r' && (i + 1 >= ulen || ubuf[i + 1] != '\n')) { + n_cr++; + last_line_end = i; + } + if (ubuf[i] == '\n' && (i - 1 < 0 || ubuf[i - 1] != '\r')) { + n_lf++; + last_line_end = i; + } + if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */ + n_nel++; + last_line_end = i; + } + } + + if (iflag) { + if (subtype_mime) + ckfputs(subtype_mime, stdout); + else + ckfputs("text/plain", stdout); + + if (code_mime) { + ckfputs("; charset=", stdout); + ckfputs(code_mime, stdout); + } + } else { + ckfputs(code, stdout); + + if (subtype) { + ckfputs(" ", stdout); + ckfputs(subtype, stdout); + } + + ckfputs(" ", stdout); + ckfputs(type, stdout); + + if (has_long_lines) + ckfputs(", with very long lines", stdout); + + /* + * Only report line terminators if we find one other than LF, + * or if we find none at all. + */ + if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) || + (n_crlf != 0 || n_cr != 0 || n_nel != 0)) { + ckfputs(", with", stdout); + + if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) + ckfputs(" no", stdout); + else { + if (n_crlf) { + ckfputs(" CRLF", stdout); + if (n_cr || n_lf || n_nel) + ckfputs(",", stdout); + } + if (n_cr) { + ckfputs(" CR", stdout); + if (n_lf || n_nel) + ckfputs(",", stdout); + } + if (n_lf) { + ckfputs(" LF", stdout); + if (n_nel) + ckfputs(",", stdout); + } + if (n_nel) + ckfputs(" NEL", stdout); + } + + ckfputs(" line terminators", stdout); + } + + if (has_escapes) + ckfputs(", with escape sequences", stdout); + if (has_backspace) + ckfputs(", with overstriking", stdout); + } + + return 1; +} + +static int +ascmatch(s, us, ulen) + const unsigned char *s; + const unichar *us; + int ulen; +{ + size_t i; + + for (i = 0; i < ulen; i++) { + if (s[i] != us[i]) + return 0; + } + + if (s[i]) + return 0; + else return 1; +} + +/* + * This table reflects a particular philosophy about what constitutes + * "text," and there is room for disagreement about it. + * + * Version 3.31 of the file command considered a file to be ASCII if + * each of its characters was approved by either the isascii() or + * isalpha() function. On most systems, this would mean that any + * file consisting only of characters in the range 0x00 ... 0x7F + * would be called ASCII text, but many systems might reasonably + * consider some characters outside this range to be alphabetic, + * so the file command would call such characters ASCII. It might + * have been more accurate to call this "considered textual on the + * local system" than "ASCII." + * + * It considered a file to be "International language text" if each + * of its characters was either an ASCII printing character (according + * to the real ASCII standard, not the above test), a character in + * the range 0x80 ... 0xFF, or one of the following control characters: + * backspace, tab, line feed, vertical tab, form feed, carriage return, + * escape. No attempt was made to determine the language in which files + * of this type were written. + * + * + * The table below considers a file to be ASCII if all of its characters + * are either ASCII printing characters (again, according to the X3.4 + * standard, not isascii()) or any of the following controls: bell, + * backspace, tab, line feed, form feed, carriage return, esc, nextline. + * + * I include bell because some programs (particularly shell scripts) + * use it literally, even though it is rare in normal text. I exclude + * vertical tab because it never seems to be used in real text. I also + * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85), + * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline + * character to. It might be more appropriate to include it in the 8859 + * set instead of the ASCII set, but it's got to be included in *something* + * we recognize or EBCDIC files aren't going to be considered textual. + * Some old Unix source files use SO/SI (^N/^O) to shift between Greek + * and Latin characters, so these should possibly be allowed. But they + * make a real mess on VT100-style displays if they're not paired properly, + * so we are probably better off not calling them text. + * + * A file is considered to be ISO-8859 text if its characters are all + * either ASCII, according to the above definition, or printing characters + * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF. + * + * Finally, a file is considered to be international text from some other + * character code if its characters are all either ISO-8859 (according to + * the above definition) or characters in the range 0x80 ... 0x9F, which + * ISO-8859 considers to be control characters but the IBM PC and Macintosh + * consider to be printing characters. + */ + +#define F 0 /* character never appears in text */ +#define T 1 /* character appears in plain ASCII text */ +#define I 2 /* character appears in ISO-8859 text */ +#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ + +static char text_chars[256] = { + /* BEL BS HT LF FF CR */ + F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ + /* ESC */ + F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ + /* NEL */ + X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ +}; + +static int +looks_ascii(buf, nbytes, ubuf, ulen) + const unsigned char *buf; + int nbytes; + unichar *ubuf; + int *ulen; +{ + int i; + + *ulen = 0; + + for (i = 0; i < nbytes; i++) { + int t = text_chars[buf[i]]; + + if (t != T) + return 0; + + ubuf[(*ulen)++] = buf[i]; } + return 1; +} + +static int +looks_latin1(buf, nbytes, ubuf, ulen) + const unsigned char *buf; + int nbytes; + unichar *ubuf; + int *ulen; +{ + int i; + + *ulen = 0; - /* Make sure we are dealing with ascii text before looking for tokens */ for (i = 0; i < nbytes; i++) { - if (!isascii(buf[i]) && !isalpha(buf[i])) - return 0; /* not all ASCII */ + int t = text_chars[buf[i]]; + + if (t != T && t != I) + return 0; + + ubuf[(*ulen)++] = buf[i]; } - /* look for tokens from names.h - this is expensive! */ - /* make a copy of the buffer here because strtok() will destroy it */ - s = (unsigned char*) memcpy(nbuf, buf, nbytes); - s[nbytes] = '\0'; - has_escapes = (memchr(s, '\033', nbytes) != NULL); - while ((token = strtok((char *) s, " \t\n\r\f")) != NULL) { - s = NULL; /* make strtok() keep on tokin' */ - for (p = names; p < names + NNAMES; p++) { - if (STREQ(p->name, token)) { - ckfputs(iflag ? types[p->type].mime : types[p->type].human, stdout); - if (has_escapes) - ckfputs(" (with escape sequences)", - stdout); - return 1; + return 1; +} + +static int +looks_extended(buf, nbytes, ubuf, ulen) + const unsigned char *buf; + int nbytes; + unichar *ubuf; + int *ulen; +{ + int i; + + *ulen = 0; + + for (i = 0; i < nbytes; i++) { + int t = text_chars[buf[i]]; + + if (t != T && t != I && t != X) + return 0; + + ubuf[(*ulen)++] = buf[i]; + } + + return 1; +} + +int +looks_utf8(buf, nbytes, ubuf, ulen) + const unsigned char *buf; + int nbytes; + unichar *ubuf; + int *ulen; +{ + int i, n; + unichar c; + int gotone = 0; + + *ulen = 0; + + for (i = 0; i < nbytes; i++) { + if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ + /* + * Even if the whole file is valid UTF-8 sequences, + * still reject it if it uses weird control characters. + */ + + if (text_chars[buf[i]] != T) + return 0; + + ubuf[(*ulen)++] = buf[i]; + } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ + return 0; + } else { /* 11xxxxxx begins UTF-8 */ + int following; + + if ((buf[i] & 0x20) == 0) { /* 110xxxxx */ + c = buf[i] & 0x1f; + following = 1; + } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */ + c = buf[i] & 0x0f; + following = 2; + } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */ + c = buf[i] & 0x07; + following = 3; + } else if ((buf[i] & 0x04) == 0) { /* 111110xx */ + c = buf[i] & 0x03; + following = 4; + } else if ((buf[i] & 0x02) == 0) { /* 1111110x */ + c = buf[i] & 0x01; + following = 5; + } else + return 0; + + for (n = 0; n < following; n++) { + i++; + if (i >= nbytes) + goto done; + + if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) + return 0; + + c = (c << 6) + (buf[i] & 0x3f); } + + ubuf[(*ulen)++] = c; + gotone = 1; } } +done: + return gotone; /* don't claim it's UTF-8 if it's all 7-bit */ +} + +static int +looks_unicode(buf, nbytes, ubuf, ulen) + const unsigned char *buf; + int nbytes; + unichar *ubuf; + int *ulen; +{ + int bigend; + int i; + + if (nbytes < 2) + return 0; + + if (buf[0] == 0xff && buf[1] == 0xfe) + bigend = 0; + else if (buf[0] == 0xfe && buf[1] == 0xff) + bigend = 1; + else + return 0; - /* all else fails, but it is ASCII... */ - ckfputs(iflag ? "text/plain, ASCII" : "ASCII text", stdout); - if (has_escapes) { - ckfputs(" (with escape sequences)", stdout); + *ulen = 0; + + for (i = 2; i + 1 < nbytes; i += 2) { + /* XXX fix to properly handle chars > 65536 */ + + if (bigend) + ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i]; + else + ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1]; + + if (ubuf[*ulen - 1] == 0xfffe) + return 0; + if (ubuf[*ulen - 1] < 128 && text_chars[ubuf[*ulen - 1]] != T) + return 0; } + return 1; } + +#undef F +#undef T +#undef I +#undef X + +/* + * This table maps each EBCDIC character to an (8-bit extended) ASCII + * character, as specified in the rationale for the dd(1) command in + * draft 11.2 (September, 1991) of the POSIX P1003.2 standard. + * + * Unfortunately it does not seem to correspond exactly to any of the + * five variants of EBCDIC documented in IBM's _Enterprise Systems + * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh + * Edition, July, 1999, pp. I-1 - I-4. + * + * Fortunately, though, all versions of EBCDIC, including this one, agree + * on most of the printing characters that also appear in (7-bit) ASCII. + * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all. + * + * Fortunately too, there is general agreement that codes 0x00 through + * 0x3F represent control characters, 0x41 a nonbreaking space, and the + * remainder printing characters. + * + * This is sufficient to allow us to identify EBCDIC text and to distinguish + * between old-style and internationalized examples of text. + */ + +unsigned char ebcdic_to_ascii[] = { + 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, +128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, +144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, +' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|', +'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~', +'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?', +186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"', +195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201, +202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208, +209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215, +216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231, +'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237, +'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243, +'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249, +'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255 +}; + +/* + * The following EBCDIC-to-ASCII table may relate more closely to reality, + * or at least to modern reality. It comes from + * + * http://ftp.s390.ibm.com/products/oe/bpxqp9.html + * + * and maps the characters of EBCDIC code page 1047 (the code used for + * Unix-derived software on IBM's 390 systems) to the corresponding + * characters from ISO 8859-1. + * + * If this table is used instead of the above one, some of the special + * cases for the NEL character can be taken out of the code. + */ + +unsigned char ebcdic_1047_to_8859[] = { +0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, +0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, +0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, +0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, +0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E, +0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, +0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, +0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, +0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, +0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE, +0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7, +0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, +0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, +0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F +}; + +/* + * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII. + */ +static void +from_ebcdic(buf, nbytes, out) + const unsigned char *buf; + int nbytes; + unsigned char *out; +{ + int i; + + for (i = 0; i < nbytes; i++) { + out[i] = ebcdic_to_ascii[buf[i]]; + } +} diff --git a/src/compress.c b/src/compress.c index 514e9db8..5f18d828 100644 --- a/src/compress.c +++ b/src/compress.c @@ -16,23 +16,24 @@ #include #endif #ifndef lint -FILE_RCSID("@(#)$Id: compress.c,v 1.16 2000/05/14 22:58:53 christos Exp $") +FILE_RCSID("@(#)$Id: compress.c,v 1.17 2000/08/05 17:36:47 christos Exp $") #endif static struct { - const char *magic; - int maglen; - const char *const argv[3]; - int silent; + const char *magic; + int maglen; + const char *const argv[3]; + int silent; } compr[] = { - { "\037\235", 2, { "uncompress", "-c", NULL }, 0 }, /* compressed */ - { "\037\213", 2, { "gzip", "-cdq", NULL }, 1 }, /* gzipped */ - { "\037\236", 2, { "gzip", "-cdq", NULL }, 1 }, /* frozen */ - { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 }, /* SCO LZH */ - /* the standard pack utilities do not accept standard input */ - { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 }, /* packed */ - { "BZh", 3, { "bzip2", "-d", NULL }, 1 }, /* bzip2-ed */ + { "\037\235", 2, { "uncompress", "-c", NULL }, 0 }, /* compressed */ + { "\037\235", 2, { "gzip", "-cdq", NULL }, 1 }, /* compressed */ + { "\037\213", 2, { "gzip", "-cdq", NULL }, 1 }, /* gzipped */ + { "\037\236", 2, { "gzip", "-cdq", NULL }, 1 }, /* frozen */ + { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 }, /* SCO LZH */ + /* the standard pack utilities do not accept standard input */ + { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 }, /* packed */ + { "BZh", 3, { "bzip2", "-d", NULL }, 1 }, /* bzip2-ed */ }; static int ncompr = sizeof(compr) / sizeof(compr[0]); @@ -42,8 +43,8 @@ static int uncompress __P((int, const unsigned char *, unsigned char **, int)); int zmagic(buf, nbytes) -unsigned char *buf; -int nbytes; + unsigned char *buf; + int nbytes; { unsigned char *newbuf; int newsize; @@ -52,30 +53,30 @@ int nbytes; for (i = 0; i < ncompr; i++) { if (nbytes < compr[i].maglen) continue; - if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0) - break; + if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 && + (newsize = uncompress(i, buf, &newbuf, nbytes)) != 0) { + tryit(newbuf, newsize, 1); + free(newbuf); + printf(" ("); + tryit(buf, nbytes, 0); + printf(")"); + return 1; + } } if (i == ncompr) return 0; - if ((newsize = uncompress(i, buf, &newbuf, nbytes)) != 0) { - tryit(newbuf, newsize, 1); - free(newbuf); - printf(" ("); - tryit(buf, nbytes, 0); - printf(")"); - } return 1; } static int uncompress(method, old, newch, n) -int method; -const unsigned char *old; -unsigned char **newch; -int n; + int method; + const unsigned char *old; + unsigned char **newch; + int n; { int fdin[2], fdout[2]; @@ -95,12 +96,11 @@ int n; (void) close(fdout[0]); (void) close(fdout[1]); if (compr[method].silent) - (void) close(2); + (void) close(2); execvp(compr[method].argv[0], (char *const *)compr[method].argv); - error("could not execute `%s' (%s).\n", - compr[method].argv[0], strerror(errno)); + exit(1); /*NOTREACHED*/ case -1: error("could not fork (%s).\n", strerror(errno)); @@ -109,19 +109,14 @@ int n; default: /* parent */ (void) close(fdin[0]); (void) close(fdout[1]); - if (write(fdin[1], old, n) != n) { - error("write failed (%s).\n", strerror(errno)); - /*NOTREACHED*/ - } + if (write(fdin[1], old, n) != n) + return 0; (void) close(fdin[1]); - if ((*newch = (unsigned char *) malloc(n)) == NULL) { - error("out of memory.\n"); - /*NOTREACHED*/ - } + if ((*newch = (unsigned char *) malloc(n)) == NULL) + return 0; if ((n = read(fdout[0], *newch, n)) <= 0) { free(*newch); - error("read failed (%s).\n", strerror(errno)); - /*NOTREACHED*/ + return 0; } (void) close(fdout[0]); (void) wait(NULL); diff --git a/src/file.c b/src/file.c index f46110bd..292da410 100644 --- a/src/file.c +++ b/src/file.c @@ -55,7 +55,7 @@ #include "patchlevel.h" #ifndef lint -FILE_RCSID("@(#)$Id: file.c,v 1.52 2000/05/14 22:58:53 christos Exp $") +FILE_RCSID("@(#)$Id: file.c,v 1.53 2000/08/05 17:36:48 christos Exp $") #endif /* lint */ @@ -226,7 +226,7 @@ main(argc, argv) */ static void unwrap(fn) -char *fn; + char *fn; { char buf[MAXPATHLEN]; FILE *f; @@ -271,29 +271,28 @@ char *fn; */ static int byteconv4(from, same, big_endian) - int from; - int same; - int big_endian; + int from; + int same; + int big_endian; { - if (same) - return from; - else if (big_endian) /* lsb -> msb conversion on msb */ - { - union { - int i; - char c[4]; - } retval, tmpval; - - tmpval.i = from; - retval.c[0] = tmpval.c[3]; - retval.c[1] = tmpval.c[2]; - retval.c[2] = tmpval.c[1]; - retval.c[3] = tmpval.c[0]; - - return retval.i; - } - else - return ntohl(from); /* msb -> lsb conversion on lsb */ + if (same) + return from; + else if (big_endian) { /* lsb -> msb conversion on msb */ + union { + int i; + char c[4]; + } retval, tmpval; + + tmpval.i = from; + retval.c[0] = tmpval.c[3]; + retval.c[1] = tmpval.c[2]; + retval.c[2] = tmpval.c[1]; + retval.c[3] = tmpval.c[0]; + + return retval.i; + } + else + return ntohl(from); /* msb -> lsb conversion on lsb */ } /* @@ -306,23 +305,22 @@ byteconv2(from, same, big_endian) int same; int big_endian; { - if (same) - return from; - else if (big_endian) /* lsb -> msb conversion on msb */ - { - union { - short s; - char c[2]; - } retval, tmpval; - - tmpval.s = (short) from; - retval.c[0] = tmpval.c[1]; - retval.c[1] = tmpval.c[0]; - - return retval.s; - } - else - return ntohs(from); /* msb -> lsb conversion on lsb */ + if (same) + return from; + else if (big_endian) { /* lsb -> msb conversion on msb */ + union { + short s; + char c[2]; + } retval, tmpval; + + tmpval.s = (short) from; + retval.c[0] = tmpval.c[1]; + retval.c[1] = tmpval.c[0]; + + return retval.s; + } + else + return ntohs(from); /* msb -> lsb conversion on lsb */ } #endif @@ -331,8 +329,8 @@ byteconv2(from, same, big_endian) */ void process(inname, wid) -const char *inname; -int wid; + const char *inname; + int wid; { int fd = 0; static const char stdname[] = "standard input"; @@ -355,22 +353,22 @@ int wid; (int) (wid - strlen(inname)), ""); if (inname != stdname) { - /* - * first try judging the file based on its filesystem status - */ - if (fsmagic(inname, &sb) != 0) { - putchar('\n'); - return; - } - - if ((fd = open(inname, O_RDONLY)) < 0) { - /* We can't open it, but we were able to stat it. */ - if (sb.st_mode & 0002) ckfputs("writeable, ", stdout); - if (sb.st_mode & 0111) ckfputs("executable, ", stdout); - ckfprintf(stdout, "can't read `%s' (%s).\n", - inname, strerror(errno)); - return; - } + /* + * first try judging the file based on its filesystem status + */ + if (fsmagic(inname, &sb) != 0) { + putchar('\n'); + return; + } + + if ((fd = open(inname, O_RDONLY)) < 0) { + /* We can't open it, but we were able to stat it. */ + if (sb.st_mode & 0002) ckfputs("writeable, ", stdout); + if (sb.st_mode & 0111) ckfputs("executable, ", stdout); + ckfprintf(stdout, "can't read `%s' (%s).\n", + inname, strerror(errno)); + return; + } } @@ -390,8 +388,17 @@ int wid; } #ifdef BUILTIN_ELF - if (match == 's' && nbytes > 5) + if (match == 's' && nbytes > 5) { + /* + * We matched something in the file, so this *might* + * be an ELF file, and the file is at least 5 bytes long, + * so if it's an ELF file it has at least one byte + * past the ELF magic number - try extracting information + * from the ELF headers that can't easily be extracted + * with rules in the magic file. + */ tryelf(fd, buf, nbytes); + } #endif if (inname != stdname) { @@ -424,8 +431,8 @@ int wid; int tryit(buf, nb, zflag) -unsigned char *buf; -int nb, zflag; + unsigned char *buf; + int nb, zflag; { /* try compression stuff */ if (zflag && zmagic(buf, nb)) @@ -439,10 +446,6 @@ int nb, zflag; if (ascmagic(buf, nb)) return 'a'; - /* see if it's international language text */ - if (internatmagic(buf, nb)) - return 'i'; - /* abandon hope, all ye who remain here */ ckfputs("data", stdout); return '\0'; diff --git a/src/fsmagic.c b/src/fsmagic.c index b072dc56..d6a900e1 100644 --- a/src/fsmagic.c +++ b/src/fsmagic.c @@ -54,13 +54,13 @@ #undef HAVE_MAJOR #ifndef lint -FILE_RCSID("@(#)$Id: fsmagic.c,v 1.32 2000/05/14 22:58:54 christos Exp $") +FILE_RCSID("@(#)$Id: fsmagic.c,v 1.33 2000/08/05 17:36:48 christos Exp $") #endif /* lint */ int fsmagic(fn, sb) -const char *fn; -struct stat *sb; + const char *fn; + struct stat *sb; { int ret = 0; @@ -168,7 +168,7 @@ struct stat *sb; case S_IFLNK: { char buf[BUFSIZ+4]; - register int nch; + int nch; struct stat tstatbuf; if ((nch = readlink(fn, buf, BUFSIZ-1)) <= 0) { @@ -249,4 +249,3 @@ struct stat *sb; } return 0; } - diff --git a/src/internat.c b/src/internat.c deleted file mode 100644 index 26411434..00000000 --- a/src/internat.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * ---------------------------------------------------------------------------- - * "THE BEER-WARE LICENSE" (Revision 42): - * wrote this file. As long as you retain this notice you - * can do whatever you want with this stuff. If we meet some day, and you think - * this stuff is worth it, you can buy me a beer in return. Joerg Wunsch - * ---------------------------------------------------------------------------- - */ - -#include "file.h" - -#include -#include - -#ifndef lint -FILE_RCSID("@(#)$Id: internat.c,v 1.4 1998/06/27 13:23:39 christos Exp $") -#endif - -#define F 0 -#define T 1 - -/* - * List of characters that look "reasonable" in international - * language texts. That's almost all characters :), except a - * few in the control range of ASCII (all the known international - * charactersets share the bottom half with ASCII). - */ -static char maybe_internat[256] = { - F, F, F, F, F, F, F, F, T, T, T, T, T, T, F, F, /* 0x0X */ - F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x8X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x9X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xaX */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xbX */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xcX */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xdX */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xeX */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T /* 0xfX */ -}; - -/* Maximal length of a line we consider "reasonable". */ -#define MAXLINELEN 300 - -int -internatmagic(buf, nbytes) - unsigned char *buf; - int nbytes; -{ - int i; - unsigned char *cp; - - nbytes--; - - /* First, look whether there are "unreasonable" characters. */ - for (i = 0, cp = buf; i < nbytes; i++, cp++) - if (!maybe_internat[*cp]) - return 0; - - /* - * Now, look whether the file consists of lines of - * "reasonable" length. - */ - - for (i = 0; i < nbytes;) { - cp = (unsigned char *) memchr(buf, '\n', nbytes - i); - if (cp == NULL) { - /* Don't fail if we hit the end of buffer. */ - if (i + MAXLINELEN >= nbytes) - break; - else - return 0; - } - if (cp - buf > MAXLINELEN) - return 0; - i += (cp - buf + 1); - buf = cp + 1; - } - ckfputs("International language text", stdout); - return 1; -} diff --git a/src/is_tar.c b/src/is_tar.c index f24212e7..6c5b61ce 100644 --- a/src/is_tar.c +++ b/src/is_tar.c @@ -5,7 +5,7 @@ * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). * * @(#)list.c 1.18 9/23/86 Public Domain - gnu - * $Id: is_tar.c,v 1.12 1999/02/14 17:16:08 christos Exp $ + * $Id: is_tar.c,v 1.13 2000/08/05 17:36:48 christos Exp $ * * Comments changed and some code/comments reformatted * for file command by Ian Darwin. @@ -18,7 +18,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$Id: is_tar.c,v 1.12 1999/02/14 17:16:08 christos Exp $") +FILE_RCSID("@(#)$Id: is_tar.c,v 1.13 2000/08/05 17:36:48 christos Exp $") #endif #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) @@ -33,13 +33,13 @@ static int from_oct __P((int, char *)); /* Decode octal number */ */ int is_tar(buf, nbytes) -unsigned char *buf; -int nbytes; + unsigned char *buf; + int nbytes; { - register union record *header = (union record *)buf; - register int i; - register int sum, recsum; - register char *p; + union record *header = (union record *)buf; + int i; + int sum, recsum; + char *p; if (nbytes < sizeof(union record)) return 0; @@ -78,12 +78,12 @@ int nbytes; */ static int from_oct(digs, where) - register int digs; - register char *where; + int digs; + char *where; { - register int value; + int value; - while (isspace((unsigned char)*where)) { /* Skip spaces */ + while (isspace((unsigned char)*where)) { /* Skip spaces */ where++; if (--digs <= 0) return -1; /* All blank field */ diff --git a/src/names.h b/src/names.h index 385790e7..856632a9 100644 --- a/src/names.h +++ b/src/names.h @@ -10,7 +10,7 @@ * * See LEGAL.NOTICE * - * $Id: names.h,v 1.17 2000/04/11 02:32:35 christos Exp $ + * $Id: names.h,v 1.18 2000/08/05 17:36:49 christos Exp $ */ /* @@ -32,24 +32,26 @@ #define L_JAVA 10 /* Java code */ #define L_HTML 11 /* HTML */ #define L_BCPL 12 /* BCPL */ +#define L_M4 13 /* M4 */ static const struct { char *human; char *mime; } types[] = { - { "C program text", "text/x-c", }, - { "C++ program text", "text/x-c++" }, - { "FORTRAN program text", "text/x-fortran" }, - { "make commands text", "text/x-makefile" }, - { "PL/1 program text", "text/x-pl1" }, - { "assembler program text", "text/x-asm" }, - { "English text", "text/plain, English" }, - { "Pascal program text", "text/x-pascal" }, - { "mail text", "text/x-mail" }, - { "news text", "text/x-news" }, - { "Java program text", "text/x-java" }, - { "HTML document text", "text/html", }, - { "BCPL program text", "text/x-bcpl" }, + { "C program", "text/x-c", }, + { "C++ program", "text/x-c++" }, + { "FORTRAN program", "text/x-fortran" }, + { "make commands", "text/x-makefile" }, + { "PL/1 program", "text/x-pl1" }, + { "assembler program", "text/x-asm" }, + { "English", "text/plain, English" }, + { "Pascal program", "text/x-pascal" }, + { "mail", "text/x-mail" }, + { "news", "text/x-news" }, + { "Java program", "text/x-java" }, + { "HTML document", "text/html", }, + { "BCPL program", "text/x-bcpl" }, + { "M4 macro language pre-processor", "text/x-m4" }, { "can't happen error on names.h/types", "error/x-error" }, { 0, 0} }; @@ -96,6 +98,7 @@ static struct names { } names[] = { /* These must be sorted by eye for optimal hit rate */ /* Add to this list only after substantial meditation */ + {"dnl", L_M4}, {"import", L_JAVA}, {"\"libhdr\"", L_BCPL}, {"\"LIBHDR\"", L_BCPL}, diff --git a/src/print.c b/src/print.c index 38dfcb3b..7992f5c5 100644 --- a/src/print.c +++ b/src/print.c @@ -41,14 +41,14 @@ #include #ifndef lint -FILE_RCSID("@(#)$Id: print.c,v 1.30 1999/11/28 20:02:29 christos Exp $") +FILE_RCSID("@(#)$Id: print.c,v 1.31 2000/08/05 17:36:49 christos Exp $") #endif /* lint */ #define SZOF(a) (sizeof(a) / sizeof(a[0])) void mdump(m) -struct magic *m; + struct magic *m; { static const char *typ[] = { "invalid", "byte", "short", "invalid", "long", "string", "date", "beshort", @@ -86,37 +86,37 @@ struct magic *m; (void) fprintf(stderr, ",%c", m->reln); if (m->reln != 'x') { - switch (m->type) { - case BYTE: - case SHORT: - case LONG: - case LESHORT: - case LELONG: - case BESHORT: - case BELONG: - (void) fprintf(stderr, "%d", m->value.l); - break; - case STRING: - showstr(stderr, m->value.s, -1); - break; - case DATE: - case LEDATE: - case BEDATE: - { - time_t t = m->value.l; - char *rt, *pp = ctime(&t); + switch (m->type) { + case BYTE: + case SHORT: + case LONG: + case LESHORT: + case LELONG: + case BESHORT: + case BELONG: + (void) fprintf(stderr, "%d", m->value.l); + break; + case STRING: + showstr(stderr, m->value.s, -1); + break; + case DATE: + case LEDATE: + case BEDATE: + { + time_t t = m->value.l; + char *rt, *pp = ctime(&t); - if ((rt = strchr(pp, '\n')) != NULL) - *rt = '\0'; - (void) fprintf(stderr, "%s,", pp); - if (rt) - *rt = '\n'; - } - break; - default: - (void) fputs("*bad*", stderr); - break; - } + if ((rt = strchr(pp, '\n')) != NULL) + *rt = '\0'; + (void) fprintf(stderr, "%s,", pp); + if (rt) + *rt = '\n'; + } + break; + default: + (void) fputs("*bad*", stderr); + break; + } } (void) fprintf(stderr, ",\"%s\"]\n", m->desc); } @@ -127,8 +127,8 @@ struct magic *m; */ void ckfputs(str, fil) - const char *str; - FILE *fil; + const char *str; + FILE *fil; { if (fputs(str,fil) == EOF) error("write failed.\n"); diff --git a/src/readelf.c b/src/readelf.c index 61c9a887..4d013c70 100644 --- a/src/readelf.c +++ b/src/readelf.c @@ -14,7 +14,7 @@ #include "readelf.h" #ifndef lint -FILE_RCSID("@(#)$Id: readelf.c,v 1.13 2000/07/28 23:03:08 christos Exp $") +FILE_RCSID("@(#)$Id: readelf.c,v 1.14 2000/08/05 17:36:49 christos Exp $") #endif #ifdef ELFCORE @@ -195,8 +195,10 @@ dophn_exec(class, swap, fd, off, num, size) #ifdef ELFCORE size_t prpsoffsets32[] = { + 8, /* FreeBSD */ 84, /* SunOS 5.x */ - 32, /* Linux */ + 32, /* Linux (I forget which kernel version) */ + 28, /* Linux 2.0.36 */ }; size_t prpsoffsets64[] = { @@ -210,14 +212,23 @@ size_t prpsoffsets64[] = { /* * Look through the program headers of an executable image, searching - * for a PT_NOTE section of type NT_PRPSINFO, with a name "CORE"; if one - * is found, try looking in various places in its contents for a 16-character - * string containing only printable characters - if found, that string - * should be the name of the program that dropped core. - * Note: right after that 16-character string is, at least in SunOS 5.x - * (and possibly other SVR4-flavored systems) and Linux, a longer string - * (80 characters, in 5.x, probably other SVR4-flavored systems, and Linux) - * containing the start of the command line for that program. + * for a PT_NOTE section of type NT_PRPSINFO, with a name "CORE" or + * "FreeBSD"; if one is found, try looking in various places in its + * contents for a 16-character string containing only printable + * characters - if found, that string should be the name of the program + * that dropped core. Note: right after that 16-character string is, + * at least in SunOS 5.x (and possibly other SVR4-flavored systems) and + * Linux, a longer string (80 characters, in 5.x, probably other + * SVR4-flavored systems, and Linux) containing the start of the + * command line for that program. + * + * The signal number probably appears in a section of type NT_PRSTATUS, + * but that's also rather OS-dependent, in ways that are harder to + * dissect with heuristics, so I'm not bothering with the signal number. + * (I suppose the signal number could be of interest in situations where + * you don't have the binary of the program that dropped core; if you + * *do* have that binary, the debugger will probably tell you what + * signal it was.) */ static void dophn_core(class, swap, fd, off, num, size) @@ -232,12 +243,16 @@ dophn_core(class, swap, fd, off, num, size) Elf32_Nhdr *nh32; Elf64_Phdr ph64; Elf64_Nhdr *nh64; - size_t offset, noffset, reloffset; + size_t offset, nameoffset, noffset, reloffset; unsigned char c; int i, j; char nbuf[BUFSIZ]; int bufsize; + int is_freebsd; + /* + * Loop through all the program headers. + */ for ( ; num; num--) { if (lseek(fd, off, SEEK_SET) == -1) error("lseek failed (%s).\n", strerror(errno)); @@ -246,6 +261,11 @@ dophn_core(class, swap, fd, off, num, size) off += size; if (ph_type != PT_NOTE) continue; + + /* + * This is a PT_NOTE section; loop through all the notes + * in the section. + */ if (lseek(fd, (off_t) ph_offset, SEEK_SET) == -1) error("lseek failed (%s).\n", strerror(errno)); bufsize = read(fd, nbuf, BUFSIZ); @@ -262,19 +282,8 @@ dophn_core(class, swap, fd, off, num, size) offset += nh_size; /* - * If this note isn't an NT_PRPSINFO note, it's - * not what we're looking for. - */ - if (nh_type != NT_PRPSINFO) { - offset += nh_namesz; - offset = ((offset + 3)/4)*4; - offset += nh_descsz; - offset = ((offset + 3)/4)*4; - continue; - } - - /* - * Make sure this note has the name "CORE". + * Check whether this note has the name "CORE" or + * "FreeBSD". */ if (offset + nh_namesz >= bufsize) { /* @@ -282,62 +291,109 @@ dophn_core(class, swap, fd, off, num, size) */ break; } - if (nh_namesz != 5 - || strcmp(&nbuf[offset], "CORE") != 0) - continue; + + nameoffset = offset; offset += nh_namesz; offset = ((offset + 3)/4)*4; /* - * Extract the program name. We assume it to be - * 16 characters (that's what it is in SunOS 5.x - * and Linux). - * - * Unfortunately, it's at a different offset in - * SunOS 5.x and Linux, so try multiple offsets. - * If the characters aren't all printable, reject - * it. + * Sigh. The 2.0.36 kernel in Debian 2.1, at + * least, doesn't correctly implement name + * sections, in core dumps, as specified by + * the "Program Linking" section of "UNIX(R) System + * V Release 4 Programmer's Guide: ANSI C and + * Programming Support Tools", because my copy + * clearly says "The first 'namesz' bytes in 'name' + * contain a *null-terminated* [emphasis mine] + * character representation of the entry's owner + * or originator", but the 2.0.36 kernel code + * doesn't include the terminating null in the + * name.... */ - for (i = 0; i < NOFFSETS; i++) { - reloffset = prpsoffsets(i); - noffset = offset + reloffset; - for (j = 0; j < 16; - j++, noffset++, reloffset++) { - /* - * Make sure we're not past the end - * of the buffer; if we are, just - * give up. - */ - if (noffset >= bufsize) - return; + if ((nh_namesz == 4 && + strncmp(&nbuf[nameoffset], "CORE", 4) == 0) || + (nh_namesz == 5 && + strcmp(&nbuf[nameoffset], "CORE") == 0)) + is_freebsd = 0; + else if ((nh_namesz == 8 && + strcmp(&nbuf[nameoffset], "FreeBSD") == 0)) + is_freebsd = 1; + else + continue; + if (nh_type == NT_PRPSINFO) { + /* + * Extract the program name. We assume + * it to be 16 characters (that's what it + * is in SunOS 5.x and Linux). + * + * Unfortunately, it's at a different offset + * in varous OSes, so try multiple offsets. + * If the characters aren't all printable, + * reject it. + */ + for (i = 0; i < NOFFSETS; i++) { + reloffset = prpsoffsets(i); + noffset = offset + reloffset; + for (j = 0; j < 16; + j++, noffset++, reloffset++) { + /* + * Make sure we're not past + * the end of the buffer; if + * we are, just give up. + */ + if (noffset >= bufsize) + goto tryanother; + + /* + * Make sure we're not past + * the end of the contents; + * if we are, this obviously + * isn't the right offset. + */ + if (reloffset >= nh_descsz) + goto tryanother; + + c = nbuf[noffset]; + if (c == '\0') { + /* + * A '\0' at the + * beginning is + * obviously wrong. + * Any other '\0' + * means we're done. + */ + if (j == 0) + goto tryanother; + else + break; + } else { + /* + * A nonprintable + * character is also + * wrong. + */ + if (!isprint(c)) + goto tryanother; + } + } /* - * Make sure we're not past the - * end of the contents; if we - * are, this obviously isn't - * the right offset. + * Well, that worked. */ - if (reloffset >= nh_descsz) - goto tryanother; + printf(", from '%.16s'", + &nbuf[offset + prpsoffsets(i)]); + break; - c = nbuf[noffset]; - if (c != '\0' && !isprint(c)) - goto tryanother; + tryanother: + ; } - - /* - * Well, that worked. - */ - printf(", from '%.16s'", - &nbuf[offset + prpsoffsets(i)]); - return; - - tryanother: - ; + break; } offset += nh_descsz; offset = ((offset + 3)/4)*4; } + out: + ; } } #endif diff --git a/src/softmagic.c b/src/softmagic.c index 6f0328cb..2fb4c4a5 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$Id: softmagic.c,v 1.41 2000/05/14 17:58:36 christos Exp $") +FILE_RCSID("@(#)$Id: softmagic.c,v 1.42 2000/08/05 17:36:49 christos Exp $") #endif /* lint */ static int match __P((unsigned char *, int)); @@ -54,8 +54,8 @@ static int mconvert __P((union VALUETYPE *, struct magic *)); /*ARGSUSED1*/ /* nbytes passed for regularity, maybe need later */ int softmagic(buf, nbytes) -unsigned char *buf; -int nbytes; + unsigned char *buf; + int nbytes; { if (match(buf, nbytes)) return 1; @@ -92,8 +92,8 @@ int nbytes; */ static int match(s, nbytes) -unsigned char *s; -int nbytes; + unsigned char *s; + int nbytes; { int magindex = 0; int cont_level = 0; @@ -203,8 +203,8 @@ int nbytes; static int32 mprint(p, m) -union VALUETYPE *p; -struct magic *m; + union VALUETYPE *p; + struct magic *m; { char *pp, *rt; uint32 v; @@ -277,8 +277,8 @@ struct magic *m; */ static int mconvert(p, m) -union VALUETYPE *p; -struct magic *m; + union VALUETYPE *p; + struct magic *m; { switch (m->type) { case BYTE: @@ -321,9 +321,9 @@ struct magic *m; static void mdebug(offset, str, len) -int32 offset; -char *str; -int len; + int32 offset; + char *str; + int len; { (void) fprintf(stderr, "mget @%d: ", offset); showstr(stderr, (char *) str, len); @@ -333,10 +333,10 @@ int len; static int mget(p, s, m, nbytes) -union VALUETYPE* p; -unsigned char *s; -struct magic *m; -int nbytes; + union VALUETYPE* p; + unsigned char *s; + struct magic *m; + int nbytes; { int32 offset = m->offset; @@ -408,11 +408,11 @@ int nbytes; static int mcheck(p, m) -union VALUETYPE* p; -struct magic *m; + union VALUETYPE* p; + struct magic *m; { - register uint32 l = m->value.l; - register uint32 v; + uint32 l = m->value.l; + uint32 v; int matched; if ( (m->value.s[0] == 'x') && (m->value.s[1] == '\0') ) { @@ -448,9 +448,9 @@ struct magic *m; * but ignoring any nulls. bcmp doesn't give -/+/0 * and isn't universally available anyway. */ - register unsigned char *a = (unsigned char*)m->value.s; - register unsigned char *b = (unsigned char*)p->s; - register int len = m->vallen; + unsigned char *a = (unsigned char*)m->value.s; + unsigned char *b = (unsigned char*)p->s; + int len = m->vallen; l = 0; v = 0; if (0L == m->mask) { /* normal string: do it fast */ -- 2.40.0