From 574eacad815601aaaa827d7cddeac57034f08813 Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Sun, 6 Mar 2005 05:58:20 +0000 Subject: [PATCH] Add indirect magic offset support and search support. --- ChangeLog | 5 + magic/Magdir/archive | 327 +++++++++++++++++++++++++++++++++++-- magic/Magdir/audio | 2 +- magic/Magdir/bout | 2 +- magic/Magdir/compress | 4 +- magic/Magdir/ctags | 2 +- magic/Magdir/digital | 4 +- magic/Magdir/images | 2 +- magic/Magdir/msdos | 371 ++++++++++++++++++++++++++++-------------- magic/Magdir/sharc | 2 +- src/apprentice.c | 51 ++++-- src/compress.c | 6 +- src/file.c | 4 +- src/file.h | 19 ++- src/magic.c | 15 +- src/softmagic.c | 354 +++++++++++++++++++++++++--------------- 16 files changed, 868 insertions(+), 302 deletions(-) diff --git a/ChangeLog b/ChangeLog index d5f0ba7d..8c4dfb60 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ + +2005-03-06 00:00 Joerg Walter + + * Add indirect magic offset support, and search mode. + 2005-01-12 00:00 Stepan Kasal * src/ascmagic.c (file_ascmagic): Fix three bugs about text files: diff --git a/magic/Magdir/archive b/magic/Magdir/archive index f545bd80..9f8d1913 100644 --- a/magic/Magdir/archive +++ b/magic/Magdir/archive @@ -29,7 +29,7 @@ # Debian package (needs to go before regular portable archives) # -0 string !\ndebian +0 string =!\ndebian >8 string debian-split part of multipart Debian package >8 string debian-binary Debian binary package >68 string >\0 (format %s) @@ -47,7 +47,7 @@ # MIPS archive (needs to go before regular portable archives) # -0 string !\n__________E MIPS archive +0 string =!\n__________E MIPS archive >20 string U with MIPS Ucode members >21 string L with MIPSEL members >21 string B with MIPSEB members @@ -61,7 +61,7 @@ # XXX - why are there multiple thingies? Note that 0x213c6172 is # "! current ar archive +# 0 string =! current ar archive # 0 long 0x213c6172 archive file # # and for SVR1 archives, we have: @@ -73,7 +73,7 @@ # and absolute code program modules in the same format as new-style # "ar" archives? # -0 string ! current ar archive +0 string =! current ar archive >8 string __.SYMDEF random library >0 belong =65538 - pre SR9.5 >0 belong =65539 - post SR9.5 @@ -121,6 +121,10 @@ 0 lelong&0x8080ffff 0x0000031a ARC archive data, packed 0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed 0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched +# [JW] stuff taken from idarc, obviously ARC successors: +0 lelong&0x8080ffff 0x00000a1a PAK archive data +0 lelong&0x8080ffff 0x0000141a ARC+ archive data +0 lelong&0x8080ffff 0x0000481a HYP archive data # Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk) # I can't create either SPARK or ArcFS archives so I have not tested this stuff @@ -132,6 +136,292 @@ 0 string \032 RISC OS archive (spark format) 0 string Archive\000 RISC OS archive (ArcFS format) +# All these were taken from idarc, many could not be verified. Unfortunately, +# there were many low-quality sigs, i.e. easy to trigger false positives. +# Please notify me of any real-world fishy/ambiguous signatures and I'll try +# to get my hands on the actual archiver and see if I find something better. [JW] +# probably many can be enhanced by finding some 0-byte or control char near the start + +# idarc calls this Crush/Uncompressed... *shrug* +0 string CRUSH Crush archive data +# Squeeze It (.sqz) +0 string HLSQZ Squeeze It archive data +# SQWEZ +0 string SQWEZ SQWEZ archive data +# HPack (.hpk) +0 string HPAK HPack archive data +# HAP +0 string \x91\x33HF HAP archive data +# MD/MDCD +0 string MDmd MDCD archive data +# LIM +0 string LIM\x1a LIM archive data +# SAR +3 string LH5 SAR archive data +# BSArc/BS2 +0 string \212\3SB \0 BSArc/BS2 archive data +# MAR +2 string =-ah MAR archive data +# ACB +0 belong&0x00f800ff 0x00800000 ACB archive data +# CPZ +# TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data +# JRC +0 string JRchive JRC archive data +# Quantum +0 string DS\0 Quantum archive data +# ReSOF +0 string PK\3\6 ReSOF archive data +# QuArk +0 string 7\4 QuArk archive data +# YAC +14 string YC YAC archive data +# X1 +0 string X1 X1 archive data +0 string XhDr X1 archive data +# CDC Codec (.dqt) +0 belong&0xffffe000 0x76ff2000 CDC Codec archive data +# AMGC +0 string \xad6" AMGC archive data +# NuLIB +0 string NõFélå NuLIB archive data +# PakLeo +0 string LEOLZW PAKLeo archive data +# ChArc +0 string SChF ChArc archive data +# PSA +0 string PSA PSA archive data +# CrossePAC +0 string DSIGDCC CrossePAC archive data +# Freeze +0 string \x1f\x9f\x4a\x10\x0a Freeze archive data +# KBoom +0 string ¨MP¨ KBoom archive data +# NSQ, must go after CDC Codec +0 string \x76\xff NSQ archive data +# DPA +0 string Dirk\ Paehl DPA archive data +# BA +# TODO: idarc says "bytes 0-2 == bytes 3-5" +# TTComp +0 string \0\6 TTComp archive data +# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation? +0 string ESP ESP archive data +# ZPack +0 string \1ZPK\1 ZPack archive data +# Sky +0 string \xbc\x40 Sky archive data +# UFA +0 string UFA UFA archive data +# Dry +0 string =-H2O DRY archive data +# FoxSQZ +0 string FOXSQZ FoxSQZ archive data +# AR7 +0 string ,AR7 AR7 archive data +# PPMZ +0 string PPMZ PPMZ archive data +# MS Compress +4 string \x88\xf0\x27 MS Compress archive data +# MP3 (archiver, not lossy audio compression) +0 string MP3\x1a MP3-Archiver archive data +# ZET +0 string OZÝ ZET archive data +# TSComp +0 string \x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data +# ARQ +0 string gW\4\1 ARQ archive data +# Squash +3 string OctSqu Squash archive data +# Terse +0 string \5\1\1\0 Terse archive data +# PUCrunch +0 string \x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data +# UHarc +0 string UHA UHarc archive data +# ABComp +0 string \2AB ABComp archive data +0 string \3AB2 ABComp archive data +# CMP +0 string CO\0 CMP archive data +# Splint +0 string \x93\xb9\x06 Splint archive data +# InstallShield +0 string \x13\x5d\x65\x8c InstallShield Z archive Data +# Gather +1 string GTH Gather archive data +# BOA +0 string BOA BOA archive data +# RAX +0 string ULEB\xa RAX archive data +# Xtreme +0 string ULEB\0 Xtreme archive data +# Pack Magic +0 string @â\1\0 Pack Magic archive data +# BTS +0 belong&0xfeffffff 0x1a034465 BTS archive data +# ELI 5750 +0 string Ora\ ELI 5750 archive data +# QFC +0 string \x1aFC\x1a QFC archive data +0 string \x1aQF\x1a QFC archive data +# PRO-PACK +0 string RNC PRO-PACK archive data +# 777 +0 string 777 777 archive data +# LZS221 +0 string sTaC LZS221 archive data +# HPA +0 string HPA HPA archive data +# Arhangel +0 string LG Arhangel archive data +# EXP1, uses bzip2 +0 string 0123456789012345BZh EXP1 archive data +# IMP +0 string IMP\xa IMP archive data +# NRV +0 string \x00\x9E\x6E\x72\x76\xFF NRV archive data +# Squish +0 string \x73\xb2\x90\xf4 Squish archive data +# Par +0 string PHILIPP Par archive data +0 string PAR Par archive data +# HIT +0 string UB HIT archive data +# SBX +0 belong&0xfffff000 0x53423000 SBX archive data +# NaShrink +0 string NSK NaShrink archive data +# SAPCAR +0 string #\ CAR\ archive\ header SAPCAR archive data +0 string CAR\ 2.00RG SAPCAR archive data +# Disintegrator +0 string DST Disintegrator archive data +# ASD +0 string ASD ASD archive data +# InstallShield CAB +0 string ISc( InstallShield CAB +# TOP4 +0 string T4\x1a TOP4 archive data +# BatComp left out: sig looks like COM executable +# so TODO: get real 4dos batcomp file and find sig +# BlakHole +0 string BH\5\7 BlakHole archive data +# BIX +0 string BIX0 BIX archive data +# ChiefLZA +0 string ChfLZ ChiefLZA archive data +# Blink +0 string Blink Blink archive data +# Logitech Compress +0 string \xda\xfa Logitech Compress archive data +# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE) +1 string (C)\ STEPANYUK ARS-Sfx archive data +# AKT/AKT32 +0 string AKT32 AKT32 archive data +0 string AKT AKT archive data +# NPack +0 string MSTSM NPack archive data +# PFT +0 string \0\x50\0\x14 PFT archive data +# SemOne +0 string SEM SemOne archive data +# PPMD +0 string \x8f\xaf\xac\x84 PPMD archive data +# FIZ +0 string FIZ FIZ archive data +# MSXiE +0 belong&0xfffff0f0 0x4d530000 MSXiE archive data +# DeepFreezer +0 belong&0xfffffff0 0x797a3030 DeepFreezer archive data +# DC +0 string =2 byte x \b, version %i +>3 byte x \b.%i +# ZZip archiver (.zz) +0 string ZZ\ \0\0 ZZip archive data +0 string ZZ0 ZZip archive data +# PAQ archiver (.paq) +0 string \xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data +0 string PAQ PAQ archive data +>3 byte&0xf0 0x30 +>>3 byte x (v%c) +# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP) +0xe string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data +0 string JARCS JAR (ARJ Software, Inc.) archive data + # ARJ archiver (jason@jarthur.Claremont.EDU) 0 leshort 0xea60 ARJ archive data >5 byte x \b, v%d, @@ -150,6 +440,8 @@ >7 byte 8 os: NeXT >7 byte 9 os: VAX/VMS >3 byte >0 %d] +# [JW] idarc says this is also possible +2 leshort 0xea60 ARJ archive data # HA archiver (Greg Roelofs, newt@uchicago.edu) # This is a really bad format. A file containing HAWAII will match this... @@ -161,6 +453,15 @@ #>4 byte&0x0f =2 first is type HSC #>4 byte&0x0f =0x0e first is type DIR #>4 byte&0x0f =0x0f first is type SPECIAL +# suggestion: at least identify small archives (<1024 files) +0 belong&0xffff00fc 0x48410000 HA archive data +>2 leshort =1 1 file, +>2 leshort >1 %u files, +>4 byte&0x0f =0 first is type CPY +>4 byte&0x0f =1 first is type ASC +>4 byte&0x0f =2 first is type HSC +>4 byte&0x0f =0x0e first is type DIR +>4 byte&0x0f =0x0f first is type SPECIAL # HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz) 0 string HPAK HPACK archive data @@ -174,12 +475,12 @@ >>0x36 string >\0 fstype %.8s # LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) -2 string -lh0- LHarc 1.x archive data [lh0] -2 string -lh1- LHarc 1.x archive data [lh1] +2 string -lh0- LHarc 1.x/ARX archive data [lh0] +2 string -lh1- LHarc 1.x/ARX archive data [lh1] 2 string -lz4- LHarc 1.x archive data [lz4] 2 string -lz5- LHarc 1.x archive data [lz5] # [never seen any but the last; -lh4- reported in comp.compression:] -2 string -lzs- LHa 2.x? archive data [lzs] +2 string -lzs- LHa/LZS archive data [lzs] 2 string -lh\40- LHa 2.x? archive data [lh ] 2 string -lhd- LHa 2.x? archive data [lhd] 2 string -lh2- LHa 2.x? archive data [lh2] @@ -187,8 +488,12 @@ 2 string -lh4- LHa (2.x) archive data [lh4] 2 string -lh5- LHa (2.x) archive data [lh5] 2 string -lh6- LHa (2.x) archive data [lh6] -2 string -lh7- LHa (2.x) archive data [lh7] +2 string -lh7- LHa (2.x)/LHark archive data [lh7] >20 byte x - header level %d +# taken from idarc [JW] +2 string -lZ PUT archive data +2 string -lz LZS archive data +2 string -sw1- Swag archive data # RAR archiver (Greg Roelofs, newt@uchicago.edu) 0 string Rar! RAR archive data, @@ -197,12 +502,14 @@ >35 byte 1 os: OS/2 >35 byte 2 os: Win32 >35 byte 3 os: Unix +# some old version? idarc says: +0 string RE\x7e\x5e RAR archive data # SQUISH archiver (Greg Roelofs, newt@uchicago.edu) 0 string SQSH squished archive data (Acorn RISCOS) # UC2 archiver (Greg Roelofs, newt@uchicago.edu) -# I can't figure out the self-extracting form of these buggers... +# [JW] see exe section for self-extracting version 0 string UC2\x1a UC2 archive data # ZIP archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) @@ -284,7 +591,7 @@ # ACE archive (from http://www.wotsit.org/download.asp?f=ace) # by Stefan `Sec` Zehl -7 string **ACE** ACE compressed archive +7 string **ACE** ACE archive data >15 byte >0 version %d >16 byte =0x00 \b, from MS-DOS >16 byte =0x01 \b, from OS/2 diff --git a/magic/Magdir/audio b/magic/Magdir/audio index ccdf14cc..18c9c1ad 100644 --- a/magic/Magdir/audio +++ b/magic/Magdir/audio @@ -131,7 +131,7 @@ 0 string Extended\ Module: Fasttracker II module sound data >17 string >\0 Title: "%s" -21 string/c !SCREAM! Screamtracker 2 module sound data +21 string/c =!SCREAM! Screamtracker 2 module sound data 21 string BMOD2STM Screamtracker 2 module sound data 1080 string M.K. 4-channel Protracker module sound data >0 string >\0 Title: "%s" diff --git a/magic/Magdir/bout b/magic/Magdir/bout index ab771fed..4cd6f76e 100644 --- a/magic/Magdir/bout +++ b/magic/Magdir/bout @@ -5,5 +5,5 @@ >16 long >0 not stripped # # b.out archive (hp-rt on i960) -0 string ! b.out archive +0 string =! b.out archive >8 string __.SYMDEF random library diff --git a/magic/Magdir/compress b/magic/Magdir/compress index 71e689c1..39a3d110 100644 --- a/magic/Magdir/compress +++ b/magic/Magdir/compress @@ -161,10 +161,10 @@ >4 belong 0x090A0C0C very good compression >4 belong 0x090A0C0D best compression -# 7z archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at) +# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at) # http://www.7-zip.org or DOC/7zFormat.txt # -0 string 7z\274\257\047\034 7z archive data, +0 string 7z\274\257\047\034 7-zip archive data, >6 byte x version %d >7 byte x \b.%d diff --git a/magic/Magdir/ctags b/magic/Magdir/ctags index 5c039e7b..84c5b7f9 100644 --- a/magic/Magdir/ctags +++ b/magic/Magdir/ctags @@ -2,4 +2,4 @@ # ---------------------------------------------------------------------------- # ctags: file (1) magic for Exuberant Ctags files # From: Alexander Mai -0 string !_TAG Exuberant Ctags tag file text +0 string =!_TAG Exuberant Ctags tag file text diff --git a/magic/Magdir/digital b/magic/Magdir/digital index 6a573a6e..615ef7a5 100644 --- a/magic/Magdir/digital +++ b/magic/Magdir/digital @@ -1,6 +1,6 @@ # Digital UNIX - Info # -0 string !\n________64E Alpha archive +0 string =!\n________64E Alpha archive >22 string X -- out of date # # Alpha COFF Based Executables @@ -32,7 +32,7 @@ 0 string \033c\033 LN03 output 0 long 04553207 X image # -0 string !!\n profiling data file +0 string =!!\n profiling data file # # Locale data tables (MIPS and Alpha). # diff --git a/magic/Magdir/images b/magic/Magdir/images index 6143df4b..70fd84c2 100644 --- a/magic/Magdir/images +++ b/magic/Magdir/images @@ -260,7 +260,7 @@ # other images 0 string This\ is\ a\ BitMap\ file Lisp Machine bit-array-file -0 string !! Bennet Yee's "face" format +0 string =!! Bennet Yee's "face" format # From SunOS 5.5.1 "/etc/magic" - appeared right before Sun raster image # stuff. diff --git a/magic/Magdir/msdos b/magic/Magdir/msdos index c81b2a5a..b3e91d94 100644 --- a/magic/Magdir/msdos +++ b/magic/Magdir/msdos @@ -6,68 +6,11 @@ # .BAT files (Daniel Quinlan, quinlan@yggdrasil.com) 0 string/c @echo\ off MS-DOS batch file text -# XXX - according to Microsoft's spec, at an offset of 0x3c in a -# PE-format executable is the offset in the file of the PE header; -# unfortunately, that's a little-endian offset, and there's no way -# to specify an indirect offset with a specified byte order. -# So, for now, we assume the standard MS-DOS stub, which puts the -# PE header at 0x80 = 128. -# -# Required OS version and subsystem version were 4.0 on some NT 3.51 -# executables built with Visual C++ 4.0, so it's not clear that -# they're interesting. The user version was 0.0, but there's -# probably some linker directive to set it. The linker version was -# 3.0, except for one ".exe" which had it as 4.20 (same damn linker!). -# -128 string PE\0\0 MS Windows PE ->150 leshort&0x0100 >0 32-bit ->132 leshort 0x0 unknown processor ->132 leshort 0x14c Intel 80386 ->132 leshort 0x166 MIPS R4000 ->132 leshort 0x184 Alpha ->132 leshort 0x268 Motorola 68000 ->132 leshort 0x1f0 PowerPC ->132 leshort 0x290 PA-RISC ->148 leshort >27 ->>220 leshort 0 unknown subsystem ->>220 leshort 1 native ->>220 leshort 2 GUI ->>220 leshort 3 console ->>220 leshort 7 POSIX ->150 leshort&0x2000 =0 executable -#>>136 ledate x stamp %s, ->>150 leshort&0x0001 >0 not relocatable -#>>150 leshort&0x0004 =0 with line numbers, -#>>150 leshort&0x0008 =0 with local symbols, -#>>150 leshort&0x0200 =0 with debug symbols, ->>150 leshort&0x1000 >0 system file -#>>148 leshort >0 -#>>>154 byte x linker %d -#>>>155 byte x \b.%d, -#>>148 leshort >27 -#>>>192 leshort x requires OS %d -#>>>194 leshort x \b.%d, -#>>>196 leshort x user version %d -#>>>198 leshort x \b.%d, -#>>>200 leshort x subsystem version %d -#>>>202 leshort x \b.%d, ->150 leshort&0x2000 >0 DLL -#>>136 ledate x stamp %s, ->>150 leshort&0x0001 >0 not relocatable -#>>150 leshort&0x0004 =0 with line numbers, -#>>150 leshort&0x0008 =0 with local symbols, -#>>150 leshort&0x0200 =0 with debug symbols, ->>150 leshort&0x1000 >0 system file -#>>148 leshort >0 -#>>>154 byte x linker %d -#>>>155 byte x \b.%d, -#>>148 leshort >27 -#>>>192 leshort x requires OS %d -#>>>194 leshort x \b.%d, -#>>>196 leshort x user version %d -#>>>198 leshort x \b.%d, -#>>>200 leshort x subsystem version %d -#>>>202 leshort x \b.%d, +# OS/2 batch files are REXX. the second regex is a bit generic, oh well +# the matched commands seem to be common in REXX and uncommon elsewhere +100 regex/c =^\\s*call\s+rxfuncadd.*sysloadfu OS/2 REXX batch file text +100 regex/c =^\\s*say\ ['"] OS/2 REXX batch file text + 0 leshort 0x14c MS Windows COFF Intel 80386 object file #>4 ledate x stamp %s 0 leshort 0x166 MS Windows COFF MIPS R4000 object file @@ -81,47 +24,228 @@ 0 leshort 0x290 MS Windows COFF PA-RISC object file #>4 ledate x stamp %s -# .EXE formats (Greg Roelofs, newt@uchicago.edu) +# XXX - according to Microsoft's spec, at an offset of 0x3c in a +# PE-format executable is the offset in the file of the PE header; +# unfortunately, that's a little-endian offset, and there's no way +# to specify an indirect offset with a specified byte order. +# So, for now, we assume the standard MS-DOS stub, which puts the +# PE header at 0x80 = 128. # -0 string MZ MS-DOS executable (EXE) ->24 string @ \b, OS/2 or MS Windows ->>0xe7 string LH/2\ Self-Extract \b, %s ->>0xe9 string PKSFX2 \b, %s ->>122 string Windows\ self-extracting\ ZIP \b, %s ->0x1c string RJSX\xff\xff \b, ARJ SFX ->0x1c string diet\xf9\x9c \b, diet compressed ->0x1c string LZ09 \b, LZEXE v0.90 compressed ->0x1c string LZ91 \b, LZEXE v0.91 compressed ->0x1e string Copyright\ 1989-1990\ PKWARE\ Inc. \b, PKSFX -# JM: 0x1e "PKLITE Copr. 1990-92 PKWARE Inc. All Rights Reserved\7\0\0\0" ->0x1e string PKLITE\ Copr. \b, %.6s compressed ->0x24 string LHa's\ SFX \b, %.15s ->0x24 string LHA's\ SFX \b, %.15s ->1638 string -lh5- \b, LHa SFX archive v2.13S ->7195 string Rar! \b, RAR self-extracting archive +# Required OS version and subsystem version were 4.0 on some NT 3.51 +# executables built with Visual C++ 4.0, so it's not clear that +# they're interesting. The user version was 0.0, but there's +# probably some linker directive to set it. The linker version was +# 3.0, except for one ".exe" which had it as 4.20 (same damn linker!). # -# [GRR 950118: file 3.15 has a buffer-size limitation; offsets bigger than -# 8161 bytes are ignored. To make the following entries work, increase -# HOWMANY in file.h to 32K at least, and maybe to 70K or more for OS/2, -# NT/Win32 and VMS.] -# [GRR: some company sells a self-extractor/displayer for image data(!)] +# many of the compressed formats were extraced from IDARC 1.23 source code # ->11696 string PK\003\004 \b, PKZIP SFX archive v1.1 ->13297 string PK\003\004 \b, PKZIP SFX archive v1.93a ->15588 string PK\003\004 \b, PKZIP2 SFX archive v1.09 ->15770 string PK\003\004 \b, PKZIP SFX archive v2.04g ->28374 string PK\003\004 \b, PKZIP2 SFX archive v1.02 +0 string MZ +>0 string MZ\0\0\0\0\0\0\0\0\0\0PE\0\0 PE executable for MS Windows +>>&18 leshort&0x2000 >0 (DLL) +>>&88 leshort 0 (unknown subsystem) +>>&88 leshort 1 (native) +>>&88 leshort 2 (GUI) +>>&88 leshort 3 (console) +>>&88 leshort 7 (POSIX) +>>&0 leshort 0x0 unknown processor +>>&0 leshort 0x14c Intel 80386 +>>&0 leshort 0x166 MIPS R4000 +>>&0 leshort 0x184 Alpha +>>&0 leshort 0x268 Motorola 68000 +>>&0 leshort 0x1f0 PowerPC +>>&0 leshort 0x290 PA-RISC +>>&18 leshort&0x0100 >0 32-bit +>>&18 leshort&0x1000 >0 system file +>>&0xf4 search/0x140 \x0\x40\x1\x0 +>>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive + +>0x18 leshort >0x3f +>>(0x3c.l) string PE\0\0 PE executable +# hooray, there's a DOS extender using the PE format, with a valid PE +# executable inside (which just prints a message and exits if run in win) +>>>(8.s*16) string 32STUB for MS-DOS, 32rtm DOS extender +>>>(8.s*16) string !32STUB for MS Windows +>>>>(0x3c.l+22) leshort&0x2000 >0 (DLL) +>>>>(0x3c.l+92) leshort 0 (unknown subsystem) +>>>>(0x3c.l+92) leshort 1 (native) +>>>>(0x3c.l+92) leshort 2 (GUI) +>>>>(0x3c.l+92) leshort 3 (console) +>>>>(0x3c.l+92) leshort 7 (POSIX) +>>>>(0x3c.l+4) leshort 0x0 unknown processor +>>>>(0x3c.l+4) leshort 0x14c Intel 80386 +>>>>(0x3c.l+4) leshort 0x166 MIPS R4000 +>>>>(0x3c.l+4) leshort 0x184 Alpha +>>>>(0x3c.l+4) leshort 0x268 Motorola 68000 +>>>>(0x3c.l+4) leshort 0x1f0 PowerPC +>>>>(0x3c.l+4) leshort 0x290 PA-RISC +>>>>(0x3c.l+22) leshort&0x0100 >0 32-bit +>>>>(0x3c.l+22) leshort&0x1000 >0 system file + +>>>>(0x3c.l+0xf8) string UPX0 \b, UPX compressed +>>>>(0x3c.l+0xf8) search/0x140 PEC2 \b, PECompact2 compressed +>>>>(0x3c.l+0xf8) search/0x140 UPX2 +>>>>>(&0x10.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip) +>>>>(0x3c.l+0xf8) search/0x140 .idata +>>>>>(&0xe.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip) +>>>>>(&0xe.l+(-4)) string ZZ0 \b, ZZip self-extracting archive +>>>>>(&0xe.l+(-4)) string ZZ1 \b, ZZip self-extracting archive +>>>>(0x3c.l+0xf8) search/0x140 .rsrc +>>>>>(&0x0f.l+(-4)) string a\\\4\5 \b, WinHKI self-extracting archive +>>>>>(&0x0f.l+(-4)) string Rar! \b, RAR self-extracting archive +>>>>>(&0x0f.l+(-4)) search/0x3000 MSCF \b, InstallShield self-extracting archive +>>>>>(&0x0f.l+(-4)) search/32 Nullsoft \b, Nullsoft Installer self-extracting archive +>>>>(0x3c.l+0xf8) search/0x140 .data +>>>>>(&0x0f.l) string WEXTRACT \b, MS CAB-Installer self-extracting archive +>>>>(0x3c.l+0xf8) search/0x140 .petite\0 \b, Petite compressed +>>>>>(0x3c.l+0xf7) byte x +>>>>>>(&0x104.l+(-4)) string =!sfx! \b, ACE self-extracting archive +>>>>(0x3c.l+0xf8) search/0x140 .WISE \b, WISE installer self-extracting archive +>>>>(0x3c.l+0xf8) search/0x140 .dz\0\0\0 \b, Dzip self-extracting archive +>>>>(0x3c.l+0xf8) search/0x140 .reloc +>>>>>(&0xe.l+(-4)) search/0x180 PK\3\4 \b, ZIP self-extracting archive (WinZip) + +>>>>&(0x3c.l+0xf8) search/0x100 _winzip_ \b, ZIP self-extracting archive (WinZip) +>>>>&(0x3c.l+0xf8) search/0x100 SharedD \b, Microsoft Installer self-extracting archive +>>>>0x30 string Inno \b, InnoSetup self-extracting archive + +>>(0x3c.l) string NE NE executable +>>>(0x3c.l+0x36) byte 0 (unknown OS) +>>>(0x3c.l+0x36) byte 1 for OS/2 1.x +>>>(0x3c.l+0x36) byte 2 for MS Windows 3.x +>>>(0x3c.l+0x36) byte 3 for MS-DOS +>>>(0x3c.l+0x36) byte >3 (unknown OS) +>>>(0x3c.l+0x36) byte 0x81 for MS-DOS, Phar Lap DOS extender +>>>(0x3c.l+0x0c) leshort&0x8003 0x8002 (DLL) +>>>(0x3c.l+0x0c) leshort&0x8003 0x8001 (driver) +>>>&(&0x24.s-1) string ARJSFX \b, ARJ self-extracting archive +>>>(0x3c.l+0x70) search/0x80 WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip) + +>>(0x3c.l) string LX\0\0 LX executable +>>>(0x3c.l+0x0a) leshort <1 (unknown OS) +>>>(0x3c.l+0x0a) leshort 1 for OS/2 +>>>(0x3c.l+0x0a) leshort 2 for MS Windows +>>>(0x3c.l+0x0a) leshort 3 for DOS +>>>(0x3c.l+0x0a) leshort >3 (unknown OS) +>>>(0x3c.l+0x10) lelong&0x28000 =0x8000 (DLL) +>>>(0x3c.l+0x10) lelong&0x20000 >0 (device driver) +>>>(0x3c.l+0x10) lelong&0x300 0x300 (GUI) +>>>(0x3c.l+0x10) lelong&0x28300 <0x300 (console) +>>>(0x3c.l+0x08) leshort 1 i80286 +>>>(0x3c.l+0x08) leshort 2 i80386 +>>>(0x3c.l+0x08) leshort 3 i80486 +>>>(8.s*16) string emx \b, emx +>>>>&1 string x %s +>>>&(&0x54.l-3) string arjsfx \b, ARJ self-extracting archive + +# MS Windows system file, supposedly a collection of LE executables +>>(0x3c.l) string W3 W3 executable for MS Windows + +>>(0x3c.l) string LE\0\0 LE executable +>>>(0x3c.l+0x0a) leshort 1 +# some DOS extenders use LE files with OS/2 header +>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender +>>>>0x240 search/0x200 WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender +>>>>0x440 search/0x100 CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender +>>>>0x40 search/0x40 PMODE/W for MS-DOS, PMODE/W DOS extender +>>>>0x40 search/0x40 STUB/32A for MS-DOS, DOS/32A DOS extender (stub) +>>>>0x40 search/0x80 STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub) +>>>>0x40 search/0x80 DOS/32A for MS-DOS, DOS/32A DOS extender (embedded) +# this is a wild guess; hopefully it is a specific signature +>>>>&0x24 lelong <0x50 +>>>>>(&0x4c.l) string \xfc\xb8WATCOM +>>>>>>&0 search/8 3\xdbf\xb9 \b, 32Lite compressed +# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP +#>>>>(0x3c.l+0x1c) lelong >0x10000 for OS/2 +# fails with DOS-Extenders. +>>>(0x3c.l+0x0a) leshort 2 for MS Windows +>>>(0x3c.l+0x0a) leshort 3 for MS-DOS +>>>(0x3c.l+0x0a) leshort 4 for MS Windows (VxD) +>>>(&0x7c.l+0x26) string UPX \b, UPX compressed +>>>&(&0x54.l-3) string UNACE \b, ACE self-extracting archive + +# looks like ASCII, probably some embedded copyright message. +# and definitely not NE/LE/LX/PE +>>0x3c lelong >0x20000000 +>>>(4.s*512) leshort !0x014c MZ executable for MS-DOS +# header data too small for extended executable +>2 long !0 +>>0x18 leshort <0x40 +>>>(4.s*512) leshort !0x014c + +>>>>&(2.s-514) string !LE +>>>>>&-2 string !BW MZ executable for MS-DOS +>>>>&(2.s-514) string LE LE executable +>>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender +# educated guess since indirection is still not capable enough for complex offset +# calculations (next embedded executable would be at &(&2*512+&0-2) +# I suspect there are only LE executables in these multi-exe files +>>>>&(2.s-514) string BW +>>>>>0x240 search/0x100 DOS/4G LE executable for MS-DOS, DOS4GW DOS extender (embedded) +>>>>>0x240 search/0x100 !DOS/4G BW executable collection for MS-DOS + +# This sequence skips to the first COFF segment, usually .text +>(4.s*512) leshort 0x014c COFF executable +>>(8.s*16) string go32stub for MS-DOS, DJGPP go32 DOS extender +>>(8.s*16) string emx +>>>&1 string x for DOS, Win or OS/2, emx %s +>>&(&0x42.l-3) byte x +>>>&0x26 string UPX \b, UPX compressed +# and yet another guess: small .text, and after large .data is unusal, could be 32lite +>>&0x2c search/0xa0 .text +>>>&0x0b lelong <0x2000 +>>>>&0 lelong >0x6000 \b, 32lite compressed + +>(8.s*16) string $WdX \b, WDos/X DOS extender + +# .EXE formats (Greg Roelofs, newt@uchicago.edu) # -# Info-ZIP self-extractors -# these are the DOS versions: ->25115 string PK\003\004 \b, Info-ZIP SFX archive v5.12 ->26331 string PK\003\004 \b, Info-ZIP SFX archive v5.12 w/decryption -# these are the OS/2 versions (OS/2 is flagged above): ->47031 string PK\003\004 \b, Info-ZIP SFX archive v5.12 ->49845 string PK\003\004 \b, Info-ZIP SFX archive v5.12 w/decryption -# this is the NT/Win32 version: ->69120 string PK\003\004 \b, Info-ZIP NT SFX archive v5.12 w/decryption +>0x35 string \x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed +>0xe7 string LH/2\ Self-Extract \b, %s +>0x1c string diet \b, diet compressed +>0x1c string LZ09 \b, LZEXE v0.90 compressed +>0x1c string LZ91 \b, LZEXE v0.91 compressed +>0x1c string tz \b, TinyProg compressed +>0x1e string PKLITE \b, %s compressed +>0x64 string W\ Collis\0\0 \b, Compack compressed +>0x24 string LHa's\ SFX \b, LHa self-extracting archive +>0x24 string LHA's\ SFX \b, LHa self-extracting archive +>0x24 string \ $ARX \b, ARX self-extracting archive +>0x24 string \ $LHarc \b, LHarc self-extracting archive +>0x20 string SFX\ by\ LARC \b, LARC self-extracting archive +>1638 string -lh5- \b, LHa self-extracting archive v2.13S +>0x17888 string Rar! \b, RAR self-extracting archive +>0x40 string aPKG \b, aPackage self-extracting archive + +>32 string AIN +>>35 string 2 \b, AIN 2.x compressed +>>35 string <2 \b, AIN 1.x compressed +>>35 string >2 \b, AIN 1.x compressed +>28 string UC2X \b, UCEXE compressed +>28 string WWP\ \b, WWPACK compressed + +# skip to the end of the exe +>(4.s*512) long x +>>&(2.s-517) byte x +>>>&0 string PK\3\4 \b, ZIP self-extracting archive +>>>&0 string Rar! \b, RAR self-extracting archive +>>>&0 string =!\x11 \b, AIN 2.x self-extracting archive +>>>&0 string =!\x12 \b, AIN 2.x self-extracting archive +>>>&0 string =!\x17 \b, AIN 1.x self-extracting archive +>>>&0 string =!\x18 \b, AIN 1.x self-extracting archive +>>>&7 search/400 **ACE** \b, ACE self-extracting archive +>>>&0 search/0x480 UC2SFX\ Header \b, UC2 self-extracting archive + +>0x1c string RJSX \b, ARJ self-extracting archive +# winarj stores a message in the stub instead of the sig in the MZ header +>0x20 search/0xe0 aRJsfX \b, ARJ self-extracting archive + +# a few unknown ZIP sfxes, no idea if they are needed or if they are +# already captured by the generic patterns above +>122 string Windows\ self-extracting\ ZIP \b, ZIP self-extracting archive +>(8.s*16) search/0x20 PKSFX \b, ZIP self-extracting archive (PKZIP) +# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive # + # TELVOX Teleinformatica CODEC self-extractor for OS/2: >49801 string \x79\xff\x80\xff\x76\xff \b, CODEC archive v3.21 >>49824 leshort =1 \b, 1 file @@ -131,12 +255,21 @@ # Uncommenting only the first two lines will cover about 2/3 of COM files, # but it isn't feasible to match all COM files since there must be at least # two dozen different one-byte "magics". -#0 byte 0xe9 MS-DOS executable (COM) -#>6 string SFX\ of\ LHarc (%s) -#0 byte 0x8c MS-DOS executable (COM) +0 byte 0xe9 MS-DOS executable (COM) +>6 string SFX\ of\ LHarc (%s) +0 byte 0x8c MS-DOS executable (COM) # 0xeb conflicts with "sequent" magic -#0 byte 0xeb MS-DOS executable (COM) -#0 byte 0xb8 MS-DOS executable (COM) +0 byte 0xeb MS-DOS executable (COM) +>4 string \ $ARX \b, ARX self-extracting archive +>4 string \ $LHarc \b, LHarc self-extracting archive +>0x20e string SFX\ by\ LARC \b, LARC self-extracting archive +0 byte 0xb8 COM executable for MS-DOS +# many compressed/converted COMs start with a copy loop instead of a jump +0x6 search/0xa \xfc\x57\xf3\xa5\xc3 COM executable for MS-DOS +0x6 search/0xa \xfc\x57\xf3\xa4\xc3 COM executable for MS-DOS +>0x18 search/0x10 \x50\xa4\xff\xd5\x73 \b, aPack compressed +0x3c string W\ Collis\0\0 COM executable for MS-DOS, Compack compressed +# FIXME: missing diet .com compression # miscellaneous formats 0 string LZ MS-DOS executable (built-in) @@ -224,24 +357,24 @@ 0 string \211\000\225\003\005\000\062\122\207\304\100\345\042 PGP sig # windows zips files .dmf -0 string MDIF\032\000\010\000\000\000\372\046\100\175\001\000\001\036\001\000 Ms-windows special zipped file +0 string MDIF\032\000\010\000\000\000\372\046\100\175\001\000\001\036\001\000 MS Windows special zipped file # Windows help file FTG FTS -0 string \164\146\115\122\012\000\000\000\001\000\000\000 ms-windows help cache +0 string \164\146\115\122\012\000\000\000\001\000\000\000 MS Windows help cache # grp old windows 3.1 group files -0 string \120\115\103\103 Ms-windows 3.1 group files +0 string \120\115\103\103 MS Windows 3.1 group files # lnk files windows symlinks -0 string \114\000\000\000\001\024\002\000\000\000\000\000\300\000\000\000\000\000\000\106 ms-Windows shortcut +0 string \114\000\000\000\001\024\002\000\000\000\000\000\300\000\000\000\000\000\000\106 MS Windows shortcut #ico files -0 string \102\101\050\000\000\000\056\000\000\000\000\000\000\000 Icon for ms-windows +0 string \102\101\050\000\000\000\056\000\000\000\000\000\000\000 Icon for MS Windows # Windows icons (Ian Springer ) -0 string \000\000\001\000 ms-windows icon resource +0 string \000\000\001\000 MS Windows icon resource >4 byte 1 - 1 icon >4 byte >1 - %d icons >>6 byte >0 \b, %dx @@ -263,7 +396,7 @@ # recycled/info the windows trash bin index -9 string \000\000\000\030\001\000\000\000 ms-windows recycled bin info +9 string \000\000\000\030\001\000\000\000 MS Windows recycled bin info ##### put in Either Magic/font or Magic/news @@ -283,8 +416,8 @@ 0 string GERBIL First Choice device file 9 string RABBITGRAPH RabbitGraph file 0 string DCU1 Borland Delphi .DCU file -0 string ! MKS Spell hash list (old format) -0 string ! MKS Spell hash list +0 string =! MKS Spell hash list (old format) +0 string =! MKS Spell hash list # Too simple - MPi #0 string AH Halo(TM) bitmapped font file 0 lelong 0x08086b70 TurboC BGI file @@ -324,26 +457,16 @@ # GFA-BASIC (Wolfram Kleff) 2 string GFA-BASIC3 GFA-BASIC 3 data -# DJGPP compiled files -# v >2, uses DPMI & small(2k) stub (Robert vd Boon, rjvdboon@europe.com) -0x200 string go32stub DOS-executable compiled w/DJGPP ->0x20c string >0 (stub v%.4s) ->>0x8b2 string djp [compressed w/%s ->>>&1 string >\0 %.4s] ->>0x8ad string UPX [compressed w/%s ->>>&1 string >\0 %.4s] ->>0x1c string pmodedj stubbed with %s - #------------------------------------------------------------------------------ # From Stuart Caie (developer of cabextract) # Microsoft Cabinet files -0 string MSCF\0\0\0\0 Microsoft Cabinet file +0 string MSCF\0\0\0\0 Microsoft Cabinet archive data >8 lelong x \b, %u bytes >28 leshort 1 \b, 1 file >28 leshort >1 \b, %u files # InstallShield Cabinet files -0 string ISc( InstallShield Cabinet file +0 string ISc( InstallShield Cabinet archive data >5 byte&0xf0 =0x60 version 6, >5 byte&0xf0 !0x60 version 4/5, >(12.l+40) lelong x %u files diff --git a/magic/Magdir/sharc b/magic/Magdir/sharc index 7201e856..8c2cae4c 100644 --- a/magic/Magdir/sharc +++ b/magic/Magdir/sharc @@ -12,7 +12,7 @@ #------------------------------------------------------------------------ # SHARC DSP stuff (based on the FGM SHARC DSP SDK) -0 string ! Assembler source +0 string =! Assembler source 0 string Analog ADi asm listing file 0 string .SYSTEM SHARC architecture file 0 string .system SHARC architecture file diff --git a/src/apprentice.c b/src/apprentice.c index 22e2d847..16c6e847 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -45,7 +45,7 @@ #endif #ifndef lint -FILE_RCSID("@(#)$Id: apprentice.c,v 1.82 2004/11/24 18:56:04 christos Exp $") +FILE_RCSID("@(#)$Id: apprentice.c,v 1.83 2005/03/06 05:58:22 christos Exp $") #endif /* lint */ #define EATAB {while (isascii((unsigned char) *l) && \ @@ -74,8 +74,11 @@ FILE_RCSID("@(#)$Id: apprentice.c,v 1.82 2004/11/24 18:56:04 christos Exp $") #define MAXPATHLEN 1024 #endif -#define IS_STRING(t) ((t) == FILE_STRING || (t) == FILE_PSTRING || \ +#define IS_PLAINSTRING(t) ((t) == FILE_STRING || (t) == FILE_PSTRING || \ (t) == FILE_BESTRING16 || (t) == FILE_LESTRING16) + +#define IS_STRING(t) (IS_PLAINSTRING(t) || (t) == FILE_REGEX || \ + (t) == FILE_SEARCH) private int getvalue(struct magic_set *ms, struct magic *, char **); private int hextoint(int); @@ -375,8 +378,8 @@ file_signextend(struct magic_set *ms, struct magic *m, uint32_t v) case FILE_PSTRING: case FILE_BESTRING16: case FILE_LESTRING16: - break; case FILE_REGEX: + case FILE_SEARCH: break; default: if (ms->flags & MAGIC_CHECK) @@ -423,9 +426,15 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, m->cont_level++; } + if (m->cont_level != 0 && *l == '&') { + ++l; /* step over */ + m->flag |= OFFADD; + } if (m->cont_level != 0 && *l == '(') { ++l; /* step over */ m->flag |= INDIR; + if (m->flag & OFFADD) + m->flag = (m->flag & ~OFFADD) | INDIROFFADD; } if (m->cont_level != 0 && *l == '&') { ++l; /* step over */ @@ -478,7 +487,7 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, l++; } if (*l == '~') { - m->in_op = FILE_OPINVERSE; + m->in_op |= FILE_OPINVERSE; l++; } switch (*l) { @@ -515,11 +524,16 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, l++; break; } - if (isdigit((unsigned char)*l)) - m->in_offset = (uint32_t)strtoul(l, &t, 0); + if (*l == '(') { + m->in_op |= FILE_OPINDIRECT; + l++; + } + if (isdigit((unsigned char)*l) || *l == '-') + m->in_offset = (int32_t)strtol(l, &t, 0); else t = l; - if (*t++ != ')') + if (*t++ != ')' || + ((m->in_op & FILE_OPINDIRECT) && *t++ != ')')) if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "missing ')' in indirect offset"); @@ -549,6 +563,7 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, #define NREGEX 5 #define NBESTRING16 10 #define NLESTRING16 10 +#define NSEARCH 6 if (*l == 'u') { ++l; @@ -613,6 +628,9 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, } else if (strncmp(l, "lestring16", NLESTRING16)==0) { m->type = FILE_LESTRING16; l += NLESTRING16; + } else if (strncmp(l, "search", NSEARCH)==0) { + m->type = FILE_SEARCH; + l += NSEARCH; } else { if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "type `%s' invalid", l); @@ -622,12 +640,12 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ if (*l == '~') { if (!IS_STRING(m->type)) - m->mask_op = FILE_OPINVERSE; + m->mask_op |= FILE_OPINVERSE; ++l; } if ((t = strchr(fops, *l)) != NULL) { uint32_t op = (uint32_t)(t - fops); - if (op != FILE_OPDIVIDE || !IS_STRING(m->type)) { + if (op != FILE_OPDIVIDE || !IS_PLAINSTRING(m->type)) { ++l; m->mask_op |= op; val = (uint32_t)strtoul(l, &l, 0); @@ -655,6 +673,7 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, return -1; } } + ++l; } } /* @@ -678,15 +697,12 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, } break; case '!': - if (!IS_STRING(m->type)) { - m->reln = *l; - ++l; - break; - } - /*FALLTHROUGH*/ + m->reln = *l; + ++l; + break; default: - if (*l == 'x' && isascii((unsigned char)l[1]) && - isspace((unsigned char)l[1])) { + if (*l == 'x' && ((isascii((unsigned char)l[1]) && + isspace((unsigned char)l[1])) || !l[1])) { m->reln = *l; ++l; goto GetDesc; /* Bill The Cat */ @@ -801,6 +817,7 @@ getvalue(struct magic_set *ms, struct magic *m, char **p) case FILE_STRING: case FILE_PSTRING: case FILE_REGEX: + case FILE_SEARCH: *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen); if (*p == NULL) { if (ms->flags & MAGIC_CHECK) diff --git a/src/compress.c b/src/compress.c index a176b8e2..a33c7e93 100644 --- a/src/compress.c +++ b/src/compress.c @@ -50,7 +50,7 @@ #endif #ifndef lint -FILE_RCSID("@(#)$Id: compress.c,v 1.41 2005/01/07 19:17:26 christos Exp $") +FILE_RCSID("@(#)$Id: compress.c,v 1.42 2005/03/06 05:58:22 christos Exp $") #endif @@ -69,6 +69,8 @@ private struct { { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 }, /* SCO LZH */ /* the standard pack utilities do not accept standard input */ { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 }, /* packed */ + { "PK\3\4", 4, { "gzip", "-cdq", NULL }, 1 }, /* pkzipped, */ + /* ...only first file examined */ { "BZh", 3, { "bzip2", "-cd", NULL }, 1 }, /* bzip2-ed */ }; @@ -349,7 +351,7 @@ uncompressbuf(struct magic_set *ms, int fd, size_t method, #endif execvp(compr[method].argv[0], - (char *const *)compr[method].argv); + (char *const *)(intptr_t)compr[method].argv); #ifdef DEBUG (void)fprintf(stderr, "exec `%s' failed (%s)\n", compr[method].argv[0], strerror(errno)); diff --git a/src/file.c b/src/file.c index 80f5c410..bace2579 100644 --- a/src/file.c +++ b/src/file.c @@ -72,7 +72,7 @@ #include "patchlevel.h" #ifndef lint -FILE_RCSID("@(#)$Id: file.c,v 1.95 2004/09/27 15:28:37 christos Exp $") +FILE_RCSID("@(#)$Id: file.c,v 1.96 2005/03/06 05:58:22 christos Exp $") #endif /* lint */ @@ -95,7 +95,7 @@ private int /* Global command-line options */ private const char *magicfile = 0; /* where the magic is */ private const char *default_magicfile = MAGIC; -private char *separator = ":"; /* Default field separator */ +private const char *separator = ":"; /* Default field separator */ private char *progname; /* used throughout */ diff --git a/src/file.h b/src/file.h index 1fa77586..e9e8d809 100644 --- a/src/file.h +++ b/src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$Id: file.h,v 1.65 2005/01/07 19:17:26 christos Exp $ + * @(#)$Id: file.h,v 1.66 2005/03/06 05:58:22 christos Exp $ */ #ifndef __file_h__ @@ -67,7 +67,7 @@ #ifndef HOWMANY # define HOWMANY 65536 /* how much of the file to look at */ #endif -#define MAXMAGIS 4096 /* max entries in /etc/magic */ +#define MAXMAGIS 8192 /* max entries in /etc/magic */ #define MAXDESC 64 /* max leng of text description */ #define MAXstring 32 /* max leng of "string" types */ @@ -87,6 +87,7 @@ struct magic { #define INDIR 1 /* if '>(...)' appears, */ #define UNSIGNED 2 /* comparison is unsigned */ #define OFFADD 4 /* if '>&' appears, */ +#define INDIROFFADD 8 /* if '>&(' appears, */ /* Word 2 */ uint8_t reln; /* relation (0=eq, '>'=gt, etc) */ uint8_t vallen; /* length of string value, if any */ @@ -110,6 +111,7 @@ struct magic { #define FILE_REGEX 17 #define FILE_BESTRING16 18 #define FILE_LESTRING16 19 +#define FILE_SEARCH 20 #define FILE_FORMAT_NAME \ /* 0 */ "invalid 0", \ @@ -121,7 +123,7 @@ struct magic { /* 6 */ "date", \ /* 7 */ "beshort", \ /* 8 */ "belong", \ -/* 9 */ "bedate" \ +/* 9 */ "bedate", \ /* 10 */ "leshort", \ /* 11 */ "lelong", \ /* 12 */ "ledate", \ @@ -131,7 +133,8 @@ struct magic { /* 16 */ "leldate", \ /* 17 */ "regex", \ /* 18 */ "bestring16", \ -/* 19 */ "lestring16", +/* 19 */ "lestring16", \ +/* 20 */ "search", #define FILE_FMT_NUM "cduxXi" #define FILE_FMT_STR "s" @@ -156,7 +159,8 @@ struct magic { /* 16 */ FILE_FMT_STR, \ /* 17 */ FILE_FMT_STR, \ /* 18 */ FILE_FMT_STR, \ -/* 19 */ FILE_FMT_STR, +/* 19 */ FILE_FMT_STR, \ +/* 20 */ FILE_FMT_STR, /* Word 3 */ uint8_t in_op; /* operator for indirection */ @@ -172,11 +176,12 @@ struct magic { #define FILE_OPMULTIPLY 5 #define FILE_OPDIVIDE 6 #define FILE_OPMODULO 7 -#define FILE_OPINVERSE 0x80 +#define FILE_OPINVERSE 0x40 +#define FILE_OPINDIRECT 0x80 /* Word 4 */ uint32_t offset; /* offset to magic number */ /* Word 5 */ - uint32_t in_offset; /* offset from indirection */ + int32_t in_offset; /* offset from indirection */ /* Word 6 */ uint32_t mask; /* mask before comparison with value */ /* Word 7 */ diff --git a/src/magic.c b/src/magic.c index 872b62d0..34ce1994 100644 --- a/src/magic.c +++ b/src/magic.c @@ -63,7 +63,7 @@ #include "patchlevel.h" #ifndef lint -FILE_RCSID("@(#)$Id: magic.c,v 1.25 2005/01/07 19:17:27 christos Exp $") +FILE_RCSID("@(#)$Id: magic.c,v 1.26 2005/03/06 05:58:22 christos Exp $") #endif /* lint */ #ifdef __EMX__ @@ -211,7 +211,12 @@ public const char * magic_file(struct magic_set *ms, const char *inname) { int fd = 0; - unsigned char buf[HOWMANY+1]; /* one extra for terminating '\0' */ + /* + * one extra for terminating '\0', and + * some overlapping space for matches near EOF + */ +#define SLOP (1 + sizeof(union VALUETYPE)) + unsigned char buf[HOWMANY + SLOP]; struct stat sb; ssize_t nbytes = 0; /* number of bytes read from a datafile */ @@ -251,6 +256,9 @@ magic_file(struct magic_set *ms, const char *inname) /* * try looking at the first HOWMANY bytes */ +/*###259 [cc] error: `buf' undeclared (first use in this function)%%%*/ +/*###259 [cc] error: (Each undeclared identifier is reported only once%%%*/ +/*###259 [cc] error: for each function it appears in.)%%%*/ if ((nbytes = read(fd, (char *)buf, HOWMANY)) == -1) { file_error(ms, errno, "cannot read `%s'", inname); goto done; @@ -266,7 +274,8 @@ magic_file(struct magic_set *ms, const char *inname) goto done; goto gotit; } else { - buf[nbytes] = '\0'; /* null-terminate it */ +/*###274 [cc] error: parse error before ';' token%%%*/ + (void)memset(buf + nbytes, 0, SLOP); /* NUL terminate */ #ifdef __EMX__ switch (file_os2_apptype(ms, inname, buf, nbytes)) { case -1: diff --git a/src/softmagic.c b/src/softmagic.c index 1202f726..7c32e7fc 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -39,13 +39,13 @@ #ifndef lint -FILE_RCSID("@(#)$Id: softmagic.c,v 1.72 2004/11/24 17:38:25 christos Exp $") +FILE_RCSID("@(#)$Id: softmagic.c,v 1.73 2005/03/06 05:58:22 christos Exp $") #endif /* lint */ private int match(struct magic_set *, struct magic *, uint32_t, const unsigned char *, size_t); private int mget(struct magic_set *, union VALUETYPE *, const unsigned char *, - struct magic *, size_t); + struct magic *, size_t, int); private int mcheck(struct magic_set *, union VALUETYPE *, struct magic *); private int32_t mprint(struct magic_set *, union VALUETYPE *, struct magic *); private void mdebug(uint32_t, const char *, size_t); @@ -115,15 +115,20 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, for (magindex = 0; magindex < nmagic; magindex++) { /* if main entry matches, print it... */ - int flush = !mget(ms, &p, s, &magic[magindex], nbytes); - switch (mcheck(ms, &p, &magic[magindex])) { - case -1: - return -1; - case 0: - flush++; - break; - default: - break; + int flush = !mget(ms, &p, s, &magic[magindex], nbytes, + cont_level); + if (flush) { + if (magic[magindex].reln == '!') flush = 0; + } else { + switch (mcheck(ms, &p, &magic[magindex])) { + case -1: + return -1; + case 0: + flush++; + break; + default: + break; + } } if (flush) { /* @@ -166,14 +171,18 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, */ cont_level = magic[magindex].cont_level; } + oldoff = magic[magindex].offset; if (magic[magindex].flag & OFFADD) { - oldoff=magic[magindex].offset; - magic[magindex].offset += ms->c.off[cont_level-1]; + magic[magindex].offset += + ms->c.off[cont_level - 1]; } - if (!mget(ms, &p, s, &magic[magindex], nbytes)) + + flush = !mget(ms, &p, s, &magic[magindex], nbytes, + cont_level); + if (flush && magic[magindex].reln != '!') goto done; - switch (mcheck(ms, &p, &magic[magindex])) { + switch (flush ? 1 : mcheck(ms, &p, &magic[magindex])) { case -1: return -1; case 0: @@ -209,9 +218,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, return -1; } done: - if (magic[magindex].flag & OFFADD) { - magic[magindex].offset = oldoff; - } + magic[magindex].offset = oldoff; } firstline = 0; returnval = 1; @@ -275,10 +282,10 @@ mprint(struct magic_set *ms, union VALUETYPE *p, struct magic *m) case FILE_PSTRING: case FILE_BESTRING16: case FILE_LESTRING16: - if (m->reln == '=') { + if (m->reln == '=' || m->reln == '!') { if (file_printf(ms, m->desc, m->value.s) == -1) return -1; - t = m->offset + strlen(m->value.s); + t = m->offset + m->vallen; } else { if (*m->value.s == '\0') { @@ -312,6 +319,11 @@ mprint(struct magic_set *ms, union VALUETYPE *p, struct magic *m) return -1; t = m->offset + strlen(p->s); break; + case FILE_SEARCH: + if (file_printf(ms, m->desc, m->value.s) == -1) + return -1; + t = m->offset + m->vallen; + break; default: file_error(ms, 0, "invalid m->type (%d) in mprint()", m->type); @@ -331,7 +343,7 @@ mconvert(struct magic_set *ms, union VALUETYPE *p, struct magic *m) switch (m->type) { case FILE_BYTE: if (m->mask) - switch (m->mask_op&0x7F) { + switch (m->mask_op & 0x7F) { case FILE_OPAND: p->b &= m->mask; break; @@ -362,7 +374,7 @@ mconvert(struct magic_set *ms, union VALUETYPE *p, struct magic *m) return 1; case FILE_SHORT: if (m->mask) - switch (m->mask_op&0x7F) { + switch (m->mask_op & 0x7F) { case FILE_OPAND: p->h &= m->mask; break; @@ -395,7 +407,7 @@ mconvert(struct magic_set *ms, union VALUETYPE *p, struct magic *m) case FILE_DATE: case FILE_LDATE: if (m->mask) - switch (m->mask_op&0x7F) { + switch (m->mask_op & 0x7F) { case FILE_OPAND: p->l &= m->mask; break; @@ -586,6 +598,7 @@ mconvert(struct magic_set *ms, union VALUETYPE *p, struct magic *m) p->l = ~p->l; return 1; case FILE_REGEX: + case FILE_SEARCH: return 1; default: file_error(ms, 0, "invalid type %d in mconvert()", m->type); @@ -612,15 +625,17 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, * offset is interpreted as last line to search, * (starting at 1), not as bytes-from start-of-file */ - unsigned char *b, *last = NULL; + char *b, *c, *last = NULL; if ((p->buf = strdup((const char *)s)) == NULL) { file_oomem(ms); return -1; } - for (b = (unsigned char *)p->buf; offset && - (b = (unsigned char *)strchr((char *)b, '\n')) != NULL; - offset--, s++) + for (b = p->buf; offset && + ((b = strchr(c = b, '\n')) || (b = strchr(c, '\r'))); + offset--, b++) { last = b; + if (b[0] == '\r' && b[1] == '\n') b++; + } if (last != NULL) *last = '\0'; return 0; @@ -669,80 +684,77 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, private int mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, - struct magic *m, size_t nbytes) + struct magic *m, size_t nbytes, int cont_level) { uint32_t offset = m->offset; if (mcopy(ms, p, m->type, m->flag & INDIR, s, offset, nbytes) == -1) return -1; - /* Verify we have enough data to match magic type */ - switch (m->type) { - case FILE_BYTE: - if (nbytes < (offset + 1)) /* should alway be true */ - return 0; - break; - - case FILE_SHORT: - case FILE_BESHORT: - case FILE_LESHORT: - if (nbytes < (offset + 2)) - return 0; - break; - - case FILE_LONG: - case FILE_BELONG: - case FILE_LELONG: - case FILE_DATE: - case FILE_BEDATE: - case FILE_LEDATE: - case FILE_LDATE: - case FILE_BELDATE: - case FILE_LELDATE: - if (nbytes < (offset + 4)) - return 0; - break; - - case FILE_STRING: - case FILE_PSTRING: - if (nbytes < (offset + m->vallen)) - return 0; - break; - } - if ((ms->flags & MAGIC_DEBUG) != 0) { mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE)); file_mdump(m); } if (m->flag & INDIR) { + int off = m->in_offset; + if (m->in_op & FILE_OPINDIRECT) { + const union VALUETYPE *q = + ((const union VALUETYPE *)(s + offset + off)); + switch (m->in_type) { + case FILE_BYTE: + off = q->b; + break; + case FILE_SHORT: + off = q->h; + break; + case FILE_BESHORT: + off = (short)((q->hs[0]<<8)|(q->hs[1])); + break; + case FILE_LESHORT: + off = (short)((q->hs[1]<<8)|(q->hs[0])); + break; + case FILE_LONG: + off = q->l; + break; + case FILE_BELONG: + off = (int32_t)((q->hl[0]<<24)|(q->hl[1]<<16)| + (q->hl[2]<<8)|(q->hl[3])); + break; + case FILE_LELONG: + off = (int32_t)((q->hl[3]<<24)|(q->hl[2]<<16)| + (q->hl[1]<<8)|(q->hl[0])); + break; + } + } switch (m->in_type) { case FILE_BYTE: - if (m->in_offset) { - switch (m->in_op&0x7F) { + if (nbytes < (offset + 1)) return 0; + if (off) { + switch (m->in_op & 0x3F) { case FILE_OPAND: - offset = p->b & m->in_offset; + offset = p->b & off; break; case FILE_OPOR: - offset = p->b | m->in_offset; + offset = p->b | off; break; case FILE_OPXOR: - offset = p->b ^ m->in_offset; + offset = p->b ^ off; break; case FILE_OPADD: - offset = p->b + m->in_offset; + offset = p->b + off; break; case FILE_OPMINUS: - offset = p->b - m->in_offset; + offset = p->b - off; break; case FILE_OPMULTIPLY: - offset = p->b * m->in_offset; + offset = p->b * off; break; case FILE_OPDIVIDE: - offset = p->b / m->in_offset; + offset = p->b / off; break; case FILE_OPMODULO: - offset = p->b % m->in_offset; + offset = p->b % off; break; } } else @@ -751,47 +763,49 @@ mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, offset = ~offset; break; case FILE_BESHORT: - if (m->in_offset) { + if (nbytes < (offset + 2)) + return 0; + if (off) { switch (m->in_op & 0x7F) { case FILE_OPAND: offset = (short)((p->hs[0]<<8)| (p->hs[1])) & - m->in_offset; + off; break; case FILE_OPOR: offset = (short)((p->hs[0]<<8)| (p->hs[1])) | - m->in_offset; + off; break; case FILE_OPXOR: offset = (short)((p->hs[0]<<8)| (p->hs[1])) ^ - m->in_offset; + off; break; case FILE_OPADD: offset = (short)((p->hs[0]<<8)| (p->hs[1])) + - m->in_offset; + off; break; case FILE_OPMINUS: offset = (short)((p->hs[0]<<8)| (p->hs[1])) - - m->in_offset; + off; break; case FILE_OPMULTIPLY: offset = (short)((p->hs[0]<<8)| (p->hs[1])) * - m->in_offset; + off; break; case FILE_OPDIVIDE: offset = (short)((p->hs[0]<<8)| (p->hs[1])) / - m->in_offset; + off; break; case FILE_OPMODULO: offset = (short)((p->hs[0]<<8)| (p->hs[1])) % - m->in_offset; + off; break; } } else @@ -801,47 +815,49 @@ mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, offset = ~offset; break; case FILE_LESHORT: - if (m->in_offset) { + if (nbytes < (offset + 2)) + return 0; + if (off) { switch (m->in_op & 0x7F) { case FILE_OPAND: offset = (short)((p->hs[1]<<8)| (p->hs[0])) & - m->in_offset; + off; break; case FILE_OPOR: offset = (short)((p->hs[1]<<8)| (p->hs[0])) | - m->in_offset; + off; break; case FILE_OPXOR: offset = (short)((p->hs[1]<<8)| (p->hs[0])) ^ - m->in_offset; + off; break; case FILE_OPADD: offset = (short)((p->hs[1]<<8)| (p->hs[0])) + - m->in_offset; + off; break; case FILE_OPMINUS: offset = (short)((p->hs[1]<<8)| (p->hs[0])) - - m->in_offset; + off; break; case FILE_OPMULTIPLY: offset = (short)((p->hs[1]<<8)| (p->hs[0])) * - m->in_offset; + off; break; case FILE_OPDIVIDE: offset = (short)((p->hs[1]<<8)| (p->hs[0])) / - m->in_offset; + off; break; case FILE_OPMODULO: offset = (short)((p->hs[1]<<8)| (p->hs[0])) % - m->in_offset; + off; break; } } else @@ -851,31 +867,33 @@ mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, offset = ~offset; break; case FILE_SHORT: - if (m->in_offset) { + if (nbytes < (offset + 2)) + return 0; + if (off) { switch (m->in_op & 0x7F) { case FILE_OPAND: - offset = p->h & m->in_offset; + offset = p->h & off; break; case FILE_OPOR: - offset = p->h | m->in_offset; + offset = p->h | off; break; case FILE_OPXOR: - offset = p->h ^ m->in_offset; + offset = p->h ^ off; break; case FILE_OPADD: - offset = p->h + m->in_offset; + offset = p->h + off; break; case FILE_OPMINUS: - offset = p->h - m->in_offset; + offset = p->h - off; break; case FILE_OPMULTIPLY: - offset = p->h * m->in_offset; + offset = p->h * off; break; case FILE_OPDIVIDE: - offset = p->h / m->in_offset; + offset = p->h / off; break; case FILE_OPMODULO: - offset = p->h % m->in_offset; + offset = p->h % off; break; } } @@ -885,63 +903,65 @@ mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, offset = ~offset; break; case FILE_BELONG: - if (m->in_offset) { + if (nbytes < (offset + 4)) + return 0; + if (off) { switch (m->in_op & 0x7F) { case FILE_OPAND: offset = (int32_t)((p->hl[0]<<24)| (p->hl[1]<<16)| (p->hl[2]<<8)| (p->hl[3])) & - m->in_offset; + off; break; case FILE_OPOR: offset = (int32_t)((p->hl[0]<<24)| (p->hl[1]<<16)| (p->hl[2]<<8)| (p->hl[3])) | - m->in_offset; + off; break; case FILE_OPXOR: offset = (int32_t)((p->hl[0]<<24)| (p->hl[1]<<16)| (p->hl[2]<<8)| (p->hl[3])) ^ - m->in_offset; + off; break; case FILE_OPADD: offset = (int32_t)((p->hl[0]<<24)| (p->hl[1]<<16)| (p->hl[2]<<8)| (p->hl[3])) + - m->in_offset; + off; break; case FILE_OPMINUS: offset = (int32_t)((p->hl[0]<<24)| (p->hl[1]<<16)| (p->hl[2]<<8)| (p->hl[3])) - - m->in_offset; + off; break; case FILE_OPMULTIPLY: offset = (int32_t)((p->hl[0]<<24)| (p->hl[1]<<16)| (p->hl[2]<<8)| (p->hl[3])) * - m->in_offset; + off; break; case FILE_OPDIVIDE: offset = (int32_t)((p->hl[0]<<24)| (p->hl[1]<<16)| (p->hl[2]<<8)| (p->hl[3])) / - m->in_offset; + off; break; case FILE_OPMODULO: offset = (int32_t)((p->hl[0]<<24)| (p->hl[1]<<16)| (p->hl[2]<<8)| (p->hl[3])) % - m->in_offset; + off; break; } } else @@ -953,63 +973,65 @@ mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, offset = ~offset; break; case FILE_LELONG: - if (m->in_offset) { + if (nbytes < (offset + 4)) + return 0; + if (off) { switch (m->in_op & 0x7F) { case FILE_OPAND: offset = (int32_t)((p->hl[3]<<24)| (p->hl[2]<<16)| (p->hl[1]<<8)| (p->hl[0])) & - m->in_offset; + off; break; case FILE_OPOR: offset = (int32_t)((p->hl[3]<<24)| (p->hl[2]<<16)| (p->hl[1]<<8)| (p->hl[0])) | - m->in_offset; + off; break; case FILE_OPXOR: offset = (int32_t)((p->hl[3]<<24)| (p->hl[2]<<16)| (p->hl[1]<<8)| (p->hl[0])) ^ - m->in_offset; + off; break; case FILE_OPADD: offset = (int32_t)((p->hl[3]<<24)| (p->hl[2]<<16)| (p->hl[1]<<8)| (p->hl[0])) + - m->in_offset; + off; break; case FILE_OPMINUS: offset = (int32_t)((p->hl[3]<<24)| (p->hl[2]<<16)| (p->hl[1]<<8)| (p->hl[0])) - - m->in_offset; + off; break; case FILE_OPMULTIPLY: offset = (int32_t)((p->hl[3]<<24)| (p->hl[2]<<16)| (p->hl[1]<<8)| (p->hl[0])) * - m->in_offset; + off; break; case FILE_OPDIVIDE: offset = (int32_t)((p->hl[3]<<24)| (p->hl[2]<<16)| (p->hl[1]<<8)| (p->hl[0])) / - m->in_offset; + off; break; case FILE_OPMODULO: offset = (int32_t)((p->hl[3]<<24)| (p->hl[2]<<16)| (p->hl[1]<<8)| (p->hl[0])) % - m->in_offset; + off; break; } } else @@ -1021,31 +1043,33 @@ mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, offset = ~offset; break; case FILE_LONG: - if (m->in_offset) { + if (nbytes < (offset + 4)) + return 0; + if (off) { switch (m->in_op & 0x7F) { case FILE_OPAND: - offset = p->l & m->in_offset; + offset = p->l & off; break; case FILE_OPOR: - offset = p->l | m->in_offset; + offset = p->l | off; break; case FILE_OPXOR: - offset = p->l ^ m->in_offset; + offset = p->l ^ off; break; case FILE_OPADD: - offset = p->l + m->in_offset; + offset = p->l + off; break; case FILE_OPMINUS: - offset = p->l - m->in_offset; + offset = p->l - off; break; case FILE_OPMULTIPLY: - offset = p->l * m->in_offset; + offset = p->l * off; break; case FILE_OPDIVIDE: - offset = p->l / m->in_offset; + offset = p->l / off; break; case FILE_OPMODULO: - offset = p->l % m->in_offset; + offset = p->l % off; break; /* case TOOMANYSWITCHBLOCKS: * ugh = p->eye % m->strain; @@ -1062,8 +1086,10 @@ mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, break; } + if (m->flag & INDIROFFADD) offset += ms->c.off[cont_level-1]; if (mcopy(ms, p, m->type, 0, s, offset, nbytes) == -1) return -1; + m->offset = offset; if ((ms->flags & MAGIC_DEBUG) != 0) { mdebug(offset, (char *)(void *)p, @@ -1071,8 +1097,53 @@ mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, file_mdump(m); } } + + /* Verify we have enough data to match magic type */ + switch (m->type) { + case FILE_BYTE: + if (nbytes < (offset + 1)) /* should alway be true */ + return 0; + break; + + case FILE_SHORT: + case FILE_BESHORT: + case FILE_LESHORT: + if (nbytes < (offset + 2)) + return 0; + break; + + case FILE_LONG: + case FILE_BELONG: + case FILE_LELONG: + case FILE_DATE: + case FILE_BEDATE: + case FILE_LEDATE: + case FILE_LDATE: + case FILE_BELDATE: + case FILE_LELDATE: + if (nbytes < (offset + 4)) + return 0; + break; + + case FILE_STRING: + case FILE_PSTRING: + case FILE_SEARCH: + if (nbytes < (offset + m->vallen)) + return 0; + break; + default: break; + } + + if (m->type == FILE_SEARCH) { + p->buf = malloc(m->mask + m->vallen); + if (p->buf == NULL) { + file_error(ms, errno, "Cannot allocate search buffer"); + return 0; + } + (void)memcpy(p->buf, s + offset, m->mask + m->vallen); + } if (!mconvert(ms, p, m)) - return 0; + return 0; return 1; } @@ -1166,7 +1237,9 @@ mcheck(struct magic_set *ms, union VALUETYPE *p, struct magic *m) regex_t rx; char errmsg[512]; - rc = regcomp(&rx, m->value.s, REG_EXTENDED|REG_NOSUB); + rc = regcomp(&rx, m->value.s, + REG_EXTENDED|REG_NOSUB|REG_NEWLINE| + ((m->mask & STRING_IGNORE_LOWERCASE) ? REG_ICASE : 0)); if (rc) { free(p->buf); regerror(rc, &rx, errmsg, sizeof(errmsg)); @@ -1179,6 +1252,31 @@ mcheck(struct magic_set *ms, union VALUETYPE *p, struct magic *m) return !rc; } } + case FILE_SEARCH: + { + /* + * search for a string in a certain range + */ + unsigned char *a = (unsigned char*)m->value.s; + unsigned char *b = (unsigned char*)p->buf; + int len = m->vallen; + int range = 0; + l = 0; + v = 0; + while (++range <= m->mask) { + while (len-- > 0 && (v = *b++ - *a++) == 0) + continue; + if (!v) { + m->offset += range-1; + break; + } + len = m->vallen; + a = (unsigned char*)m->value.s; + b = (unsigned char*)p->buf + range; + } + free(p->buf); + break; + } default: file_error(ms, 0, "invalid type %d in mcheck()", m->type); return -1; -- 2.40.0