From: Christos Zoulas Date: Tue, 11 Apr 2000 02:32:34 +0000 (+0000) Subject: mime changes X-Git-Tag: FILE3_30~5 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=299a91d4b191ad2a713e84c92746ff91599de13d;p=file mime changes --- diff --git a/Makefile.am b/Makefile.am index a20da5ca..f54ad908 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3,7 +3,7 @@ AUTOMAKE_OPTIONS = foreign no-dependencies bin_PROGRAMS = file -data_DATA = magic +data_DATA = magic magic.mime MAGIC = @datadir@/magic CPPFLAGS = -DMAGIC='"$(MAGIC)"' @@ -59,7 +59,7 @@ Magdir/magic Magdir/mail.news Magdir/mcrypt Magdir/mime Magdir/mirage Magdir/mki Magdir/mmdf Magdir/modem Magdir/motorola Magdir/msdos Magdir/ncr \ Magdir/netbsd Magdir/news Magdir/octave Magdir/olf Magdir/os2 Magdir/os9 \ Magdir/osf1 Magdir/palm Magdir/pbm Magdir/pdf Magdir/pdp Magdir/pgp Magdir/pkgadd \ -Magdir/plus5 Magdir/printer Magdir/psdbms Magdir/pyramid Magdir/riff \ +Magdir/project Magdir/plus5 Magdir/printer Magdir/psdbms Magdir/pyramid Magdir/riff \ Magdir/rpm Magdir/rtf Magdir/sc Magdir/sccs Magdir/sendmail Magdir/sequent \ Magdir/sgi Magdir/sgml Magdir/sniffer Magdir/softquad Magdir/spectrum Magdir/sun \ Magdir/teapot Magdir/terminfo Magdir/tex Magdir/ti-8x Magdir/timezone \ diff --git a/Makefile.in b/Makefile.in index c45b012a..5060b133 100644 --- a/Makefile.in +++ b/Makefile.in @@ -70,7 +70,7 @@ AUTOMAKE_OPTIONS = foreign no-dependencies bin_PROGRAMS = file -data_DATA = magic +data_DATA = magic magic.mime MAGIC = @datadir@/magic CPPFLAGS = -DMAGIC='"$(MAGIC)"' @@ -85,7 +85,7 @@ EXTRA_DIST = LEGAL.NOTICE MAINT PORTING Makefile.std magic2mime Localstuff Head CLEANFILES = $(man_MANS) magic -magic_FRAGMENTS = Magdir/adventure Magdir/alliant Magdir/alpha Magdir/amanda Magdir/amigaos Magdir/animation Magdir/apl Magdir/apple Magdir/applix Magdir/archive Magdir/asterix Magdir/att3b Magdir/audio Magdir/blit Magdir/bsdi Magdir/c-lang Magdir/chi Magdir/cisco Magdir/clipper Magdir/commands Magdir/compress Magdir/convex Magdir/database Magdir/diamond Magdir/diff Magdir/digital Magdir/dump Magdir/elf Magdir/encore Magdir/filesystems Magdir/flash Magdir/fonts Magdir/frame Magdir/freebsd Magdir/gimp Magdir/gnu Magdir/hp Magdir/ibm370 Magdir/ibm6000 Magdir/iff Magdir/images Magdir/intel Magdir/interleaf Magdir/island Magdir/ispell Magdir/java Magdir/karma Magdir/lecter Magdir/lex Magdir/lif Magdir/linux Magdir/lisp Magdir/mach Magdir/macintosh Magdir/magic Magdir/mail.news Magdir/mime Magdir/mirage Magdir/mkid Magdir/mmdf Magdir/modem Magdir/motorola Magdir/msdos Magdir/ncr Magdir/netbsd Magdir/news Magdir/octave Magdir/olf Magdir/os2 Magdir/os9 Magdir/osf1 Magdir/pbm Magdir/pdf Magdir/pdp Magdir/pgp Magdir/pkgadd Magdir/plus5 Magdir/printer Magdir/psdbms Magdir/pyramid Magdir/riff Magdir/rpm Magdir/rtf Magdir/sc Magdir/sccs Magdir/sendmail Magdir/sequent Magdir/sgi Magdir/sgml Magdir/sniffer Magdir/softquad Magdir/sun Magdir/teapot Magdir/terminfo Magdir/tex Magdir/ti-8x Magdir/timezone Magdir/troff Magdir/typeset Magdir/unknown Magdir/uuencode Magdir/varied.out Magdir/vax Magdir/vicar Magdir/visx Magdir/vms Magdir/wordperfect Magdir/xenix Magdir/zilog Magdir/zyxel +magic_FRAGMENTS = Magdir/adventure Magdir/alliant Magdir/alpha Magdir/amanda Magdir/amigaos Magdir/animation Magdir/apl Magdir/apple Magdir/applix Magdir/archive Magdir/asterix Magdir/att3b Magdir/audio Magdir/blit Magdir/bsdi Magdir/c-lang Magdir/chi Magdir/cisco Magdir/clipper Magdir/commands Magdir/compress Magdir/convex Magdir/database Magdir/diamond Magdir/diff Magdir/digital Magdir/dump Magdir/elf Magdir/encore Magdir/filesystems Magdir/flash Magdir/fonts Magdir/frame Magdir/freebsd Magdir/gimp Magdir/gnu Magdir/grace Magdir/hp Magdir/ibm370 Magdir/ibm6000 Magdir/iff Magdir/images Magdir/intel Magdir/interleaf Magdir/island Magdir/ispell Magdir/java Magdir/jpeg Magdir/karma Magdir/lecter Magdir/lex Magdir/lif Magdir/linux Magdir/lisp Magdir/mach Magdir/macintosh Magdir/magic Magdir/mail.news Magdir/mcrypt Magdir/mime Magdir/mirage Magdir/mkid Magdir/mmdf Magdir/modem Magdir/motorola Magdir/msdos Magdir/ncr Magdir/netbsd Magdir/news Magdir/octave Magdir/olf Magdir/os2 Magdir/os9 Magdir/osf1 Magdir/palm Magdir/pbm Magdir/pdf Magdir/pdp Magdir/pgp Magdir/pkgadd Magdir/project Magdir/plus5 Magdir/printer Magdir/psdbms Magdir/pyramid Magdir/riff Magdir/rpm Magdir/rtf Magdir/sc Magdir/sccs Magdir/sendmail Magdir/sequent Magdir/sgi Magdir/sgml Magdir/sniffer Magdir/softquad Magdir/spectrum Magdir/sun Magdir/teapot Magdir/terminfo Magdir/tex Magdir/ti-8x Magdir/timezone Magdir/troff Magdir/typeset Magdir/unknown Magdir/uuencode Magdir/varied.out Magdir/vax Magdir/vicar Magdir/visx Magdir/vms Magdir/wordperfect Magdir/xenix Magdir/zilog Magdir/zyxel ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs diff --git a/doc/file.man b/doc/file.man index 09ea664f..60683ca0 100644 --- a/doc/file.man +++ b/doc/file.man @@ -1,12 +1,12 @@ .TH FILE __CSECTION__ "Copyright but distributable" -.\" $Id: file.man,v 1.33 1999/02/14 17:16:07 christos Exp $ +.\" $Id: file.man,v 1.34 2000/04/11 02:32:35 christos Exp $ .SH NAME file \- determine file type .SH SYNOPSIS .B file [ -.B \-bcnsvzL +.B \-bcinsvzL ] [ .B \-f @@ -19,6 +19,7 @@ file ... This manual page documents version __VERSION__ of the .B file command. +.PP .B File tests each argument in an attempt to classify it. There are three sets of tests, performed in this order: @@ -124,6 +125,14 @@ Either or at least one filename argument must be present; to test the standard input, use ``-'' as a filename argument. .TP 8 +.B \-i +Causes the file command to output mime type strings rather than the more +traditional human readable ones. Thus it may say "text/plain, ASCII" rather +than "ASCII text". In order for this option to work, file changes the way +it handles files recognised by the command it's self (such as many of the +text file types, directories etc), and makes use of an alternative "magic" file. +(See "FILES" section, below). +.TP 8 .B \-m list Specify an alternate list of files containing magic numbers. This can be a single file, or a colon-separated list of files. @@ -167,6 +176,11 @@ since on some systems it reports a zero size for raw disk partitions. .SH FILES .I __MAGIC__ \- default list of magic numbers +.PP +.I __MAGIC__.mime +\- default list of magic numbers, used to output mime types when the -i option +is specified. + .SH ENVIRONMENT The environment variable .B MAGIC @@ -252,6 +266,12 @@ $ file -s /dev/hda{,1,2,3,4,5,6,7,8,9,10} /dev/hda8: Linux/i386 swap file /dev/hda9: empty /dev/hda10: empty + +$ file -i file.c file /dev/hda +file.c: text/x-c +file: application/x-executable, dynamically linked (uses shared libs), not stripped +/dev/hda: application/x-not-regular-file + .fi .SH HISTORY There has been a @@ -299,6 +319,10 @@ the process running .PP Changes by Ian Darwin and various authors including Christos Zoulas (christos@astron.com), 1990-1999. +.PP +Altered by Chris Lowth, chris@lowth.com, 2000: +Handle the "-i" option to output mime type strings and using an alternative +magic file and internal logic. .SH LEGAL NOTICE Copyright (c) Ian F. Darwin, Toronto, Canada, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993. diff --git a/magic/Magdir/apple b/magic/Magdir/apple index 1d157f77..20231293 100644 --- a/magic/Magdir/apple +++ b/magic/Magdir/apple @@ -4,6 +4,7 @@ # 0 string FiLeStArTfIlEsTaRt binscii (apple ][) text 0 string \x0aGL Binary II (apple ][) data +0 string \x76\xff Squeezed (apple ][) data 0 string NuFile NuFile archive (apple ][) data 0 string N\xf5F\xe9l\xe5 NuFile archive (apple ][) data 0 belong 0x00051600 AppleSingle encoded Macintosh file diff --git a/magic/Magdir/images b/magic/Magdir/images index 0b75a721..eb7a2b03 100644 --- a/magic/Magdir/images +++ b/magic/Magdir/images @@ -150,6 +150,19 @@ # Conflicts with other entries [BABYL] #0 string BA PC bitmap array data +# JPEG images +# SunOS 5.5.1 had +# +# 0 string \377\330\377\340 JPEG file +# 0 string \377\330\377\356 JPG file +# +# both of which turn into "JPEG image data" here. +# +0 beshort 0xffd8 JPEG image data +>6 string JFIF \b, JFIF standard +# HSI is Handmade Software's proprietary JPEG encoding scheme +0 string hsi1 JPEG image data, HSI proprietary + # XPM icons (Greg Roelofs, newt@uchicago.edu) # note possible collision with C/REXX entry in c-lang; currently commented out 0 string /*\ XPM\ */ X pixmap image text diff --git a/magic/magic.mime b/magic/magic.mime new file mode 100644 index 00000000..c01840f0 --- /dev/null +++ b/magic/magic.mime @@ -0,0 +1,531 @@ +# Magic data for KMimeMagic (originally for file(1) command) +# +# The format is 4-5 columns: +# Column #1: byte number to begin checking from, ">" indicates continuation +# Column #2: type of data to match +# Column #3: contents of data to match +# Column #4: MIME type of result +# Column #5: MIME encoding of result (optional) + +#------------------------------------------------------------------------------ +# Localstuff: file(1) magic for locally observed files +# Add any locally observed files here. + +#------------------------------------------------------------------------------ +# end local stuff +#------------------------------------------------------------------------------ + +#------------------------------------------------------------------------------ +# Java + +0 short 0xcafe +>2 short 0xbabe application/java + +#------------------------------------------------------------------------------ +# audio: file(1) magic for sound formats +# +# from Jan Nicolai Langfeldt , +# + +# Sun/NeXT audio data +0 string .snd +>12 belong 1 audio/basic +>12 belong 2 audio/basic +>12 belong 3 audio/basic +>12 belong 4 audio/basic +>12 belong 5 audio/basic +>12 belong 6 audio/basic +>12 belong 7 audio/basic + +>12 belong 23 audio/x-adpcm + +# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format +# that uses little-endian encoding and has a different magic number +# (0x0064732E in little-endian encoding). +0 lelong 0x0064732E +>12 lelong 1 audio/x-dec-basic +>12 lelong 2 audio/x-dec-basic +>12 lelong 3 audio/x-dec-basic +>12 lelong 4 audio/x-dec-basic +>12 lelong 5 audio/x-dec-basic +>12 lelong 6 audio/x-dec-basic +>12 lelong 7 audio/x-dec-basic +# compressed (G.721 ADPCM) +>12 lelong 23 audio/x-dec-adpcm + +# Bytes 0-3 of AIFF, AIFF-C, & 8SVX audio files are "FORM" +# AIFF audio data +8 string AIFF audio/x-aiff +# AIFF-C audio data +8 string AIFC audio/x-aiff +# IFF/8SVX audio data +8 string 8SVX audio/x-aiff + +# Creative Labs AUDIO stuff +# Standard MIDI data +0 string MThd audio/unknown +#>9 byte >0 (format %d) +#>11 byte >1 using %d channels +# Creative Music (CMF) data +0 string CTMF audio/unknown +# SoundBlaster instrument data +0 string SBI audio/unknown +# Creative Labs voice data +0 string Creative\ Voice\ File audio/unknown +## is this next line right? it came this way... +#>19 byte 0x1A +#>23 byte >0 - version %d +#>22 byte >0 \b.%d + +# [GRR 950115: is this also Creative Labs? Guessing that first line +# should be string instead of unknown-endian long...] +#0 long 0x4e54524b MultiTrack sound data +#0 string NTRK MultiTrack sound data +#>4 long x - version %ld + +# Microsoft WAVE format (*.wav) +# [GRR 950115: probably all of the shorts and longs should be leshort/lelong] +# Microsoft RIFF +0 string RIFF audio/unknown +# - WAVE format +>8 string WAVE audio/x-wav +# +0 belong 0x2e7261fd application/x-realaudio + +# MPEG Layer 3 sound files +# Modified the 11/20/97 at 15:59:04 by Christophe Prud'homme +0 belong 0xfffb audio/x-mp3 + +#------------------------------------------------------------------------------ +# c-lang: file(1) magic for C programs or various scripts +# + +# XPM icons (Greg Roelofs, newt@uchicago.edu) +# ideally should go into "images", but entries below would tag XPM as C source +0 string /*\ XPM image/x-xpm 7bit + +# this first will upset you if you're a PL/1 shop... (are there any left?) +# in which case rm it; ascmagic will catch real C programs +# C or REXX program text +#0 string /* text/x-c +# C++ program text +#0 string // text/x-c++ + +#------------------------------------------------------------------------------ +# commands: file(1) magic for various shells and interpreters +# +#0 string :\ shell archive or commands for antique kernel text +0 string #!/bin/sh application/x-shellscript +0 string #!\ /bin/sh application/x-shellscript +0 string #!/bin/csh application/x-shellscript +0 string #!\ /bin/csh application/x-shellscript +# korn shell magic, sent by George Wu, gwu@clyde.att.com +0 string #!/bin/ksh application/x-shellscript +0 string #!\ /bin/ksh application/x-shellscript +0 string #!/bin/tcsh application/x-shellscript +0 string #!\ /bin/tcsh application/x-shellscript +0 string #!/usr/local/tcsh application/x-shellscript +0 string #!\ /usr/local/tcsh application/x-shellscript +0 string #!/usr/local/bin/tcsh application/x-shellscript +0 string #!\ /usr/local/bin/tcsh application/x-shellscript +# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de) +0 string #!/bin/bash application/x-shellscript +0 string #!\ /bin/bash application/x-shellscript +0 string #!/usr/local/bin/bash application/x-shellscript +0 string #!\ /usr/local/bin/bash application/x-shellscript + +# +# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson) +0 string #!/usr/local/bin/zsh application/x-shellscript +0 string #!\ /usr/local/bin/zsh application/x-shellscript +0 string #!/usr/local/bin/ash application/x-shellscript +0 string #!\ /usr/local/bin/ash application/x-shellscript +#0 string #!/usr/local/bin/ae Neil Brown's ae +#0 string #!\ /usr/local/bin/ae Neil Brown's ae +0 string #!/bin/nawk application/x-nawk +0 string #!\ /bin/nawk application/x-nawk +0 string #!/usr/bin/nawk application/x-nawk +0 string #!\ /usr/bin/nawk application/x-nawk +0 string #!/usr/local/bin/nawk application/x-nawk +0 string #!\ /usr/local/bin/nawk application/x-nawk +0 string #!/bin/gawk application/x-gawk +0 string #!\ /bin/gawk application/x-gawk +0 string #!/usr/bin/gawk application/x-gawk +0 string #!\ /usr/bin/gawk application/x-gawk +0 string #!/usr/local/bin/gawk application/x-gawk +0 string #!\ /usr/local/bin/gawk application/x-gawk +# +0 string #!/bin/awk application/x-awk +0 string #!\ /bin/awk application/x-awk +0 string #!/usr/bin/awk application/x-awk +0 string #!\ /usr/bin/awk application/x-awk +0 string BEGIN application/x-awk + +# For Larry Wall's perl language. The ``eval'' line recognizes an +# outrageously clever hack for USG systems. +# Keith Waclena +0 string #!/bin/perl application/x-perl +0 string #!\ /bin/perl application/x-perl +0 string eval\ "exec\ /bin/perl application/x-perl +0 string #!/usr/bin/perl application/x-perl +0 string #!\ /usr/bin/perl application/x-perl +0 string eval\ "exec\ /usr/bin/perl application/x-perl +0 string #!/usr/local/bin/perl application/x-perl +0 string #!\ /usr/local/bin/perl application/x-perl +0 string eval\ "exec\ /usr/local/bin/perl application/x-perl + +#------------------------------------------------------------------------------ +# compress: file(1) magic for pure-compression formats (no archives) +# +# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, whap, etc. +# +# Formats for various forms of compressed data +# Formats for "compress" proper have been moved into "compress.c", +# because it tries to uncompress it to figure out what's inside. + +# standard unix compress +0 string \037\235 application/x-compress + +# gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver) +0 string \037\213 application/x-gzip + +0 string PK\003\004 application/x-zip + +# According to gzip.h, this is the correct byte order for packed data. +0 string \037\036 application/octet-stream +# +# This magic number is byte-order-independent. +# +0 short 017437 application/octet-stream + +# XXX - why *two* entries for "compacted data", one of which is +# byte-order independent, and one of which is byte-order dependent? +# +# compacted data +0 short 0x1fff application/octet-stream +0 string \377\037 application/octet-stream +# huf output +0 short 0145405 application/octet-stream + +# Squeeze and Crunch... +# These numbers were gleaned from the Unix versions of the programs to +# handle these formats. Note that I can only uncrunch, not crunch, and +# I didn't have a crunched file handy, so the crunch number is untested. +# Keith Waclena +#0 leshort 0x76FF squeezed data (CP/M, DOS) +#0 leshort 0x76FE crunched data (CP/M, DOS) + +# Freeze +#0 string \037\237 Frozen file 2.1 +#0 string \037\236 Frozen file 1.0 (or gzip 0.5) + +# lzh? +#0 string \037\240 LZH compressed data + +257 string ustar\0 application/x-tar posix +257 string ustar\040\040\0 application/x-tar gnu + +0 short 070707 application/x-cpio +0 short 0143561 application/x-cpio swapped + +0 string = application/x-archive +0 string ! application/x-archive +>8 string debian application/x-debian-package + +#------------------------------------------------------------------------------ +# +# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com) +# +0 beshort 0xedab +>2 beshort 0xeedb application/x-rpm + +0 lelong&0x8080ffff 0x0000081a application/x-arc lzw +0 lelong&0x8080ffff 0x0000091a application/x-arc squashed +0 lelong&0x8080ffff 0x0000021a application/x-arc uncompressed +0 lelong&0x8080ffff 0x0000031a application/x-arc packed +0 lelong&0x8080ffff 0x0000041a application/x-arc squeezed +0 lelong&0x8080ffff 0x0000061a application/x-arc crunched + +0 leshort 0xea60 application/octet-stream x-arj + +# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) +2 string -lh0- application/x-lharc lh0 +2 string -lh1- application/x-lharc lh1 +2 string -lz4- application/x-lharc lz4 +2 string -lz5- application/x-lharc lz5 +# [never seen any but the last; -lh4- reported in comp.compression:] +2 string -lzs- application/x-lha lzs +2 string -lh\ - application/x-lha lh +2 string -lhd- application/x-lha lhd +2 string -lh2- application/x-lha lh2 +2 string -lh3- application/x-lha lh3 +2 string -lh4- application/x-lha lh4 +2 string -lh5- application/x-lha lh5 +# Shell archives +10 string #\ This\ is\ a\ shell\ archive application/octet-stream x-shell + +#------------------------------------------------------------------------------ +# frame: file(1) magic for FrameMaker files +# +# This stuff came on a FrameMaker demo tape, most of which is +# copyright, but this file is "published" as witness the following: +# +0 string \ +# +0 string \14 byte 12 (OS/2 1.x format) +#>14 byte 64 (OS/2 2.x format) +#>14 byte 40 (Windows 3.x format) +#0 string IC icon +#0 string PI pointer +#0 string CI color icon +#0 string CP color pointer +#0 string BA bitmap array + + +#------------------------------------------------------------------------------ +# lisp: file(1) magic for lisp programs +# +# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com) +0 string ;; text/plain 8bit +# Emacs 18 - this is always correct, but not very magical. +0 string \012( application/x-elc +# Emacs 19 +0 string ;ELC\023\000\000\000 application/x-elc + +#------------------------------------------------------------------------------ +# mail.news: file(1) magic for mail and news +# +# There are tests to ascmagic.c to cope with mail and news. +0 string Relay-Version: message/rfc822 7bit +0 string #!\ rnews message/rfc822 7bit +0 string N#!\ rnews message/rfc822 7bit +0 string Forward\ to message/rfc822 7bit +0 string Pipe\ to message/rfc822 7bit +0 string Return-Path: message/rfc822 7bit +0 string Path: message/news 8bit +0 string Xref: message/news 8bit +0 string From: message/rfc822 7bit +0 string Article message/news 8bit +#------------------------------------------------------------------------------ +# msword: file(1) magic for MS Word files +# +# Contributor claims: +# Reversed-engineered MS Word magic numbers +# + +0 string \376\067\0\043 application/msword +0 string \320\317\021\340\241\261 application/msword +0 string \333\245-\0\0\0 application/msword + + + +#------------------------------------------------------------------------------ +# printer: file(1) magic for printer-formatted files +# + +# PostScript +0 string %! application/postscript +0 string \004%! application/postscript + +# Acrobat +# (due to clamen@cs.cmu.edu) +0 string %PDF- application/pdf + +#------------------------------------------------------------------------------ +# sc: file(1) magic for "sc" spreadsheet +# +38 string Spreadsheet application/x-sc + +#------------------------------------------------------------------------------ +# tex: file(1) magic for TeX files +# +# XXX - needs byte-endian stuff (big-endian and little-endian DVI?) +# +# From + +# Although we may know the offset of certain text fields in TeX DVI +# and font files, we can't use them reliably because they are not +# zero terminated. [but we do anyway, christos] +0 string \367\002 application/x-dvi +#0 string \367\203 TeX generic font data +#0 string \367\131 TeX packed font data +#0 string \367\312 TeX virtual font data +#0 string This\ is\ TeX, TeX transcript text +#0 string This\ is\ METAFONT, METAFONT transcript text + +# There is no way to detect TeX Font Metric (*.tfm) files without +# breaking them apart and reading the data. The following patterns +# match most *.tfm files generated by METAFONT or afm2tfm. +2 string \000\021 application/x-tex-tfm +2 string \000\022 application/x-tex-tfm +#>34 string >\0 (%s) + +# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com) +#0 string \\input\ texinfo Texinfo source text +#0 string This\ is\ Info\ file GNU Info text + +# correct TeX magic for Linux (and maybe more) +# from Peter Tobias (tobias@server.et-inf.fho-emden.de) +# +0 leshort 0x02f7 application/x-dvi + +# RTF - Rich Text Format +0 string {\\rtf text/rtf + +#------------------------------------------------------------------------------ +# animation: file(1) magic for animation/movie formats +# +# animation formats, originally from vax@ccwf.cc.utexas.edu (VaX#n8) +# MPEG file +0 string \000\000\001\263 video/mpeg +# FLI animation format +0 leshort 0xAF11 video/fli +# FLC animation format +0 leshort 0xAF12 video/flc +# AVI +>8 string AVI\ video/avi +# +# SGI and Apple formats +# +0 string MOVI video/sgi +4 string moov video/quicktime moov +4 string mdat video/quicktime mdat +# The contributor claims: +# I couldn't find a real magic number for these, however, this +# -appears- to work. Note that it might catch other files, too, +# so BE CAREFUL! +# +# Note that title and author appear in the two 20-byte chunks +# at decimal offsets 2 and 22, respectively, but they are XOR'ed with +# 255 (hex FF)! DL format SUCKS BIG ROCKS. +# +# DL file version 1 , medium format (160x100, 4 images/screen) +0 byte 1 video/unknown +0 byte 2 video/unknown +# +# Databases +# +# GDBM magic numbers +# Will be maintained as part of the GDBM distribution in the future. +# +0 belong 0x13579ace application/x-gdbm +0 lelong 0x13579ace application/x-gdbm +0 string GDBM application/x-gdbm +# +0 belong 0x061561 application/x-dbm +# +# Executables +# +0 string \177ELF +>4 byte 0 +>4 byte 1 +>4 byte 2 +>5 byte 0 +>5 byte 1 +>>16 leshort 0 +>>16 leshort 1 application/x-object +>>16 leshort 2 application/x-executable +>>16 leshort 3 application/x-sharedlib +>>16 leshort 4 application/x-coredump +# +# DOS +0 string MZ application/x-dosexec +# +# KDE +0 string [KDE\ Desktop\ Entry] application/x-kdelnk +0 string \#\ KDE\ Config\ File application/x-kdelnk +# xmcd database file for kscd +0 string \#\ xmcd text/xmcd + +#------------------------------------------------------------------------------ +# pkgadd: file(1) magic for SysV R4 PKG Datastreams +# +0 string #\ PaCkAgE\ DaTaStReAm application/x-svr4-package + + diff --git a/src/ascmagic.c b/src/ascmagic.c index bda23915..f56be214 100644 --- a/src/ascmagic.c +++ b/src/ascmagic.c @@ -38,7 +38,7 @@ #include "names.h" #ifndef lint -FILE_RCSID("@(#)$Id: ascmagic.c,v 1.25 1999/11/28 20:02:29 christos Exp $") +FILE_RCSID("@(#)$Id: ascmagic.c,v 1.26 2000/04/11 02:32:35 christos Exp $") #endif /* lint */ /* an optimisation over plain strcmp() */ @@ -61,10 +61,11 @@ int nbytes; /* size actually read */ */ switch (is_tar(buf, nbytes)) { case 1: - ckfputs("tar archive", stdout); + ckfputs(iflag ? "application/x-tar" : "tar archive", stdout); return 1; case 2: - ckfputs("POSIX tar archive", stdout); + ckfputs(iflag ? "application/x-tar, POSIX" + : "POSIX tar archive", stdout); return 1; } @@ -80,13 +81,14 @@ int nbytes; /* size actually read */ ++tp; /* skip leading whitespace */ if ((isascii(*tp) && (isalnum(*tp) || *tp=='\\') && isascii(tp[1]) && (isalnum(tp[1]) || tp[1] == '"'))) { - ckfputs("troff or preprocessor input text", stdout); + ckfputs(iflag ? "text/troff" + : "troff or preprocessor input text", stdout); return 1; } } if ((*buf == 'c' || *buf == 'C') && isascii(buf[1]) && isspace(buf[1])) { - ckfputs("fortran program text", stdout); + ckfputs(iflag ? "text/fortran" : "fortran program text", stdout); return 1; } @@ -106,7 +108,7 @@ int nbytes; /* size actually read */ s = NULL; /* make strtok() keep on tokin' */ for (p = names; p < names + NNAMES; p++) { if (STREQ(p->name, token)) { - ckfputs(types[p->type], stdout); + ckfputs(iflag ? types[p->type].mime : types[p->type].human, stdout); if (has_escapes) ckfputs(" (with escape sequences)", stdout); @@ -116,11 +118,9 @@ int nbytes; /* size actually read */ } /* all else fails, but it is ASCII... */ - ckfputs("ASCII text", stdout); + ckfputs(iflag ? "text/plain, ASCII" : "ASCII test", stdout); if (has_escapes) { ckfputs(" (with escape sequences)", stdout); } return 1; } - - diff --git a/src/file.c b/src/file.c index c32ccbef..d5718bb0 100644 --- a/src/file.c +++ b/src/file.c @@ -52,20 +52,21 @@ #ifdef HAVE_LOCALE_H #include #endif +#include #include /* for byte swapping */ #include "patchlevel.h" #ifndef lint -FILE_RCSID("@(#)$Id: file.c,v 1.48 1999/11/28 20:02:29 christos Exp $") +FILE_RCSID("@(#)$Id: file.c,v 1.49 2000/04/11 02:32:35 christos Exp $") #endif /* lint */ #ifdef S_IFLNK -# define USAGE "Usage: %s [-bcnvzL] [-f namefile] [-m magicfiles] file...\n" +# define USAGE "Usage: %s [-bcinvzL] [-f namefile] [-m magicfiles] file...\n" #else -# define USAGE "Usage: %s [-bcnvz] [-f namefile] [-m magicfiles] file...\n" +# define USAGE "Usage: %s [-bcinvz] [-f namefile] [-m magicfiles] file...\n" #endif #ifndef MAGIC @@ -82,13 +83,16 @@ int /* Global command-line options */ bflag = 0, /* brief output format */ zflag = 0, /* follow (uncompress) compressed files */ sflag = 0, /* read block special files */ + iflag = 0, nobuffer = 0; /* Do not buffer stdout */ + int /* Misc globals */ nmagic = 0; /* number of valid magic[]s */ struct magic *magic; /* array of magic entries */ const char *magicfile; /* where magic be found */ +const char *default_magicfile = MAGIC; char *progname; /* used throughout */ int lineno; /* line number in the magic file */ @@ -112,6 +116,9 @@ main(argc, argv) { int c; int check = 0, didsomefiles = 0, errflg = 0, ret = 0, app = 0; + char *mime; + + setlocale(LC_CTYPE, ""); /* makes islower etc work for other langs */ #ifdef LC_CTYPE setlocale(LC_CTYPE, ""); /* makes islower etc work for other langs */ @@ -123,9 +130,9 @@ main(argc, argv) progname = argv[0]; if (!(magicfile = getenv("MAGIC"))) - magicfile = MAGIC; + magicfile = default_magicfile; - while ((c = getopt(argc, argv, "bcdnf:m:svzL")) != EOF) + while ((c = getopt(argc, argv, "bcdinf:m:svzL")) != EOF) switch (c) { case 'v': (void) fprintf(stdout, "%s-%d.%d\n", progname, @@ -169,6 +176,14 @@ main(argc, argv) case 's': sflag++; break; + case 'i': + iflag++; + if ((mime = malloc(strlen(magicfile) + 5)) != NULL) { + (void)strcpy(mime, magicfile); + (void)strcat(mime, ".mime"); + magicfile = mime; + } + break; case '?': default: errflg++; @@ -370,7 +385,7 @@ int wid; } if (nbytes == 0) - ckfputs("empty", stdout); + ckfputs(iflag ? "application/x-empty" : "empty", stdout); else { buf[nbytes++] = '\0'; /* null-terminate it */ match = tryit(buf, nbytes, zflag); diff --git a/src/file.h b/src/file.h index 0206fa89..20ad8d0b 100644 --- a/src/file.h +++ b/src/file.h @@ -1,6 +1,6 @@ /* * file.h - definitions for file(1) program - * @(#)$Id: file.h,v 1.30 1999/11/28 20:02:29 christos Exp $ + * @(#)$Id: file.h,v 1.31 2000/04/11 02:32:35 christos Exp $ * * Copyright (c) Ian F. Darwin, 1987. * Written by Ian F. Darwin. @@ -121,7 +121,6 @@ extern uint32 signextend __P((struct magic *, unsigned int32)); extern int internatmagic __P((unsigned char *, int)); extern void tryelf __P((int, unsigned char *, int)); - extern int errno; /* Some unixes don't define this.. */ extern char *progname; /* the program name */ @@ -136,6 +135,7 @@ extern int debug; /* enable debugging? */ extern int zflag; /* process compressed files? */ extern int lflag; /* follow symbolic links? */ extern int sflag; /* read/analyze block special files? */ +extern int iflag; /* Output types as mime-types */ extern int optind; /* From getopt(3) */ extern char *optarg; diff --git a/src/fsmagic.c b/src/fsmagic.c index ef5b5fbd..7b789673 100644 --- a/src/fsmagic.c +++ b/src/fsmagic.c @@ -57,7 +57,7 @@ #undef HAVE_MAJOR #ifndef lint -FILE_RCSID("@(#)$Id: fsmagic.c,v 1.30 1999/10/31 22:23:03 christos Exp $") +FILE_RCSID("@(#)$Id: fsmagic.c,v 1.31 2000/04/11 02:32:35 christos Exp $") #endif /* lint */ int @@ -86,15 +86,23 @@ struct stat *sb; return 1; } + if (iflag) { + if ((sb->st_mode & S_IFMT) != S_IFREG) { + ckfputs("application/x-not-regular-file", stdout); + return 1; + } + } + else { #ifdef S_ISUID - if (sb->st_mode & S_ISUID) ckfputs("setuid ", stdout); + if (sb->st_mode & S_ISUID) ckfputs("setuid ", stdout); #endif #ifdef S_ISGID - if (sb->st_mode & S_ISGID) ckfputs("setgid ", stdout); + if (sb->st_mode & S_ISGID) ckfputs("setgid ", stdout); #endif #ifdef S_ISVTX - if (sb->st_mode & S_ISVTX) ckfputs("sticky ", stdout); + if (sb->st_mode & S_ISVTX) ckfputs("sticky ", stdout); #endif + } switch (sb->st_mode & S_IFMT) { case S_IFDIR: @@ -239,7 +247,7 @@ struct stat *sb; * when we read the file.) */ if (!sflag && sb->st_size == 0) { - ckfputs("empty", stdout); + ckfputs(iflag ? "application/x-empty" : "empty", stdout); return 1; } return 0; diff --git a/src/names.h b/src/names.h index a5d339d6..385790e7 100644 --- a/src/names.h +++ b/src/names.h @@ -10,9 +10,14 @@ * * See LEGAL.NOTICE * - * $Id: names.h,v 1.16 1999/01/13 15:44:08 christos Exp $ + * $Id: names.h,v 1.17 2000/04/11 02:32:35 christos Exp $ */ +/* + modified by Chris Lowth - 9 April 2000 + to add mime type strings to the types table. +*/ + /* these types are used to index the table 'types': keep em in sync! */ #define L_C 0 /* first and foremost on UNIX */ #define L_CC 1 /* Bjarne's postincrement */ @@ -28,22 +33,26 @@ #define L_HTML 11 /* HTML */ #define L_BCPL 12 /* BCPL */ -static const char *types[] = { - "C program text", - "C++ program text", - "FORTRAN program text", - "make commands text" , - "PL/1 program text", - "assembler program text", - "English text", - "Pascal program text", - "mail text", - "news text", - "Java program text", - "HTML document text", - "BCPL program text", - "can't happen error on names.h/types", - 0}; +static const struct { + char *human; + char *mime; +} types[] = { + { "C program text", "text/x-c", }, + { "C++ program text", "text/x-c++" }, + { "FORTRAN program text", "text/x-fortran" }, + { "make commands text", "text/x-makefile" }, + { "PL/1 program text", "text/x-pl1" }, + { "assembler program text", "text/x-asm" }, + { "English text", "text/plain, English" }, + { "Pascal program text", "text/x-pascal" }, + { "mail text", "text/x-mail" }, + { "news text", "text/x-news" }, + { "Java program text", "text/x-java" }, + { "HTML document text", "text/html", }, + { "BCPL program text", "text/x-bcpl" }, + { "can't happen error on names.h/types", "error/x-error" }, + { 0, 0} +}; /* * XXX - how should we distinguish Java from C++? diff --git a/src/readelf.c b/src/readelf.c index 74ee3325..bff338c9 100644 --- a/src/readelf.c +++ b/src/readelf.c @@ -14,7 +14,7 @@ #include "readelf.h" #ifndef lint -FILE_RCSID("@(#)$Id: readelf.c,v 1.11 1999/10/31 22:23:04 christos Exp $") +FILE_RCSID("@(#)$Id: readelf.c,v 1.12 2000/04/11 02:32:35 christos Exp $") #endif #ifdef ELFCORE @@ -250,7 +250,7 @@ dophn_core(class, swap, fd, off, num, size) error("lseek failed (%s).\n", strerror(errno)); bufsize = read(fd, nbuf, BUFSIZ); if (bufsize == -1) - error("read failed (%s).\n", strerror(errno)); + error(": " "read failed (%s).\n", strerror(errno)); offset = 0; for (;;) { if (offset >= bufsize)