/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* .
*
* Portions of this software are based upon public domain software
* originally written at the National Center for Supercomputing Applications,
* University of Illinois, Urbana-Champaign.
*/
/*
* mod_mime_magic: MIME type lookup via file magic numbers
* Copyright (c) 1996-1997 Cisco Systems, Inc.
*
* This software was submitted by Cisco Systems to the Apache Software Foundation in July
* 1997. Future revisions and derivatives of this source code must
* acknowledge Cisco Systems as the original contributor of this module.
* All other licensing and usage conditions are those of the Apache Software Foundation.
*
* Some of this code is derived from the free version of the file command
* originally posted to comp.sources.unix. Copyright info for that program
* is included below as required.
* ---------------------------------------------------------------------------
* - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin.
*
* This software is not subject to any license of the American Telephone and
* Telegraph Company or of the Regents of the University of California.
*
* Permission is granted to anyone to use this software for any purpose on any
* computer system, and to alter it and redistribute it freely, subject to
* the following restrictions:
*
* 1. The author is not responsible for the consequences of use of this
* software, no matter how awful, even if they arise from flaws in it.
*
* 2. The origin of this software must not be misrepresented, either by
* explicit claim or by omission. Since few users ever read sources, credits
* must appear in the documentation.
*
* 3. Altered versions must be plainly marked as such, and must not be
* misrepresented as being the original software. Since few users ever read
* sources, credits must appear in the documentation.
*
* 4. This notice may not be removed or altered.
* -------------------------------------------------------------------------
*
* For compliance with Mr Darwin's terms: this has been very significantly
* modified from the free "file" command.
* - all-in-one file for compilation convenience when moving from one
* version of Apache to the next.
* - Memory allocation is done through the Apache API's apr_pool_t structure.
* - All functions have had necessary Apache API request or server
* structures passed to them where necessary to call other Apache API
* routines. (i.e. usually for logging, files, or memory allocation in
* itself or a called function.)
* - struct magic has been converted from an array to a single-ended linked
* list because it only grows one record at a time, it's only accessed
* sequentially, and the Apache API has no equivalent of realloc().
* - Functions have been changed to get their parameters from the server
* configuration instead of globals. (It should be reentrant now but has
* not been tested in a threaded environment.)
* - Places where it used to print results to stdout now saves them in a
* list where they're used to set the MIME type in the Apache request
* record.
* - Command-line flags have been removed since they will never be used here.
*
* Ian Kluft
* Engineering Information Framework
* Central Engineering
* Cisco Systems, Inc.
* San Jose, CA, USA
*
* Initial installation July/August 1996
* Misc bug fixes May 1997
* Submission to Apache Software Foundation July 1997
*
*/
#include "apr_strings.h"
#include "ap_config.h"
#include "httpd.h"
#include "http_config.h"
#include "http_request.h"
#include "http_core.h"
#include "http_log.h"
#include "http_protocol.h"
#include "util_script.h"
#ifdef HAVE_UNISTD_H
#include
#endif
#ifdef HAVE_UTIME_H
#include
#endif
/*
* data structures and related constants
*/
#define MODNAME "mod_mime_magic"
#define MIME_MAGIC_DEBUG 0
#define MIME_BINARY_UNKNOWN "application/octet-stream"
#define MIME_TEXT_UNKNOWN "text/plain"
#define MAXMIMESTRING 256
/* HOWMANY must be at least 4096 to make gzip -dcq work */
#define HOWMANY 4096
/* SMALL_HOWMANY limits how much work we do to figure out text files */
#define SMALL_HOWMANY 1024
#define MAXDESC 50 /* max leng of text description */
#define MAXstring 64 /* max leng of "string" types */
struct magic {
struct magic *next; /* link to next entry */
int lineno; /* line number from magic file */
short flag;
#define INDIR 1 /* if '>(...)' appears, */
#define UNSIGNED 2 /* comparison is unsigned */
short cont_level; /* level of ">" */
struct {
char type; /* byte short long */
long offset; /* offset from indirection */
} in;
long offset; /* offset to magic number */
unsigned char reln; /* relation (0=eq, '>'=gt, etc) */
char type; /* int, short, long or string. */
char vallen; /* length of string value, if any */
#define BYTE 1
#define SHORT 2
#define LONG 4
#define STRING 5
#define DATE 6
#define BESHORT 7
#define BELONG 8
#define BEDATE 9
#define LESHORT 10
#define LELONG 11
#define LEDATE 12
union VALUETYPE {
unsigned char b;
unsigned short h;
unsigned long l;
char s[MAXstring];
unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */
unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */
} value; /* either number or string */
unsigned long mask; /* mask before comparison with value */
char nospflag; /* supress space character */
/* NOTE: this string is suspected of overrunning - find it! */
char desc[MAXDESC]; /* description */
};
/*
* data structures for tar file recognition
* --------------------------------------------------------------------------
* Header file for public domain tar (tape archive) program.
*
* @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John
* Gilmore, ihnp4!hoptoad!gnu.
*
* Header block on tape.
*
* I'm going to use traditional DP naming conventions here. A "block" is a big
* chunk of stuff that we do I/O on. A "record" is a piece of info that we
* care about. Typically many "record"s fit into a "block".
*/
#define RECORDSIZE 512
#define NAMSIZ 100
#define TUNMLEN 32
#define TGNMLEN 32
union record {
char charptr[RECORDSIZE];
struct header {
char name[NAMSIZ];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char chksum[8];
char linkflag;
char linkname[NAMSIZ];
char magic[8];
char uname[TUNMLEN];
char gname[TGNMLEN];
char devmajor[8];
char devminor[8];
} header;
};
/* The magic field is filled with this if uname and gname are valid. */
#define TMAGIC "ustar " /* 7 chars and a null */
/*
* file-function prototypes
*/
static int ascmagic(request_rec *, unsigned char *, int);
static int is_tar(unsigned char *, int);
static int softmagic(request_rec *, unsigned char *, int);
static void tryit(request_rec *, unsigned char *, int, int);
static int zmagic(request_rec *, unsigned char *, int);
static int getvalue(server_rec *, struct magic *, char **);
static int hextoint(int);
static char *getstr(server_rec *, char *, char *, int, int *);
static int parse(server_rec *, apr_pool_t *p, char *, int);
static int match(request_rec *, unsigned char *, int);
static int mget(request_rec *, union VALUETYPE *, unsigned char *,
struct magic *, int);
static int mcheck(request_rec *, union VALUETYPE *, struct magic *);
static void mprint(request_rec *, union VALUETYPE *, struct magic *);
static int uncompress(request_rec *, int,
unsigned char **, int);
static long from_oct(int, char *);
static int fsmagic(request_rec *r, const char *fn);
/*
* includes for ASCII substring recognition formerly "names.h" in file
* command
*
* Original notes: names and types used by ascmagic in file(1). These tokens are
* here because they can appear anywhere in the first HOWMANY bytes, while
* tokens in /etc/magic must appear at fixed offsets into the file. Don't
* make HOWMANY too high unless you have a very fast CPU.
*/
/* these types are used to index the apr_table_t 'types': keep em in sync! */
/* HTML inserted in first because this is a web server module now */
#define L_HTML 0 /* HTML */
#define L_C 1 /* first and foremost on UNIX */
#define L_FORT 2 /* the oldest one */
#define L_MAKE 3 /* Makefiles */
#define L_PLI 4 /* PL/1 */
#define L_MACH 5 /* some kinda assembler */
#define L_ENG 6 /* English */
#define L_PAS 7 /* Pascal */
#define L_MAIL 8 /* Electronic mail */
#define L_NEWS 9 /* Usenet Netnews */
static char *types[] =
{
"text/html", /* HTML */
"text/plain", /* "c program text", */
"text/plain", /* "fortran program text", */
"text/plain", /* "make commands text", */
"text/plain", /* "pl/1 program text", */
"text/plain", /* "assembler program text", */
"text/plain", /* "English text", */
"text/plain", /* "pascal program text", */
"message/rfc822", /* "mail text", */
"message/news", /* "news text", */
"application/binary", /* "can't happen error on names.h/types", */
0
};
static struct names {
char *name;
short type;
} names[] = {
/* These must be sorted by eye for optimal hit rate */
/* Add to this list only after substantial meditation */
{
"", L_HTML
},
{
"", L_HTML
},
{
"", L_HTML
},
{
"", L_HTML
},
{
"", L_HTML
},
{
"", L_HTML
},
{
"", L_HTML
},
{
"", L_HTML
},
{
"