1 /* ====================================================================
2 * The Apache Software License, Version 1.1
4 * Copyright (c) 2000 The Apache Software Foundation. All rights
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
19 * 3. The end-user documentation included with the redistribution,
20 * if any, must include the following acknowledgment:
21 * "This product includes software developed by the
22 * Apache Software Foundation (http://www.apache.org/)."
23 * Alternately, this acknowledgment may appear in the software itself,
24 * if and wherever such third-party acknowledgments normally appear.
26 * 4. The names "Apache" and "Apache Software Foundation" must
27 * not be used to endorse or promote products derived from this
28 * software without prior written permission. For written
29 * permission, please contact apache@apache.org.
31 * 5. Products derived from this software may not be called "Apache",
32 * nor may "Apache" appear in their name, without prior written
33 * permission of the Apache Software Foundation.
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * ====================================================================
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation. For more
51 * information on the Apache Software Foundation, please see
52 * <http://www.apache.org/>.
54 * Portions of this software are based upon public domain software
55 * originally written at the National Center for Supercomputing Applications,
56 * University of Illinois, Urbana-Champaign.
60 * mod_mime_magic: MIME type lookup via file magic numbers
61 * Copyright (c) 1996-1997 Cisco Systems, Inc.
63 * This software was submitted by Cisco Systems to the Apache Software Foundation in July
64 * 1997. Future revisions and derivatives of this source code must
65 * acknowledge Cisco Systems as the original contributor of this module.
66 * All other licensing and usage conditions are those of the Apache Software Foundation.
68 * Some of this code is derived from the free version of the file command
69 * originally posted to comp.sources.unix. Copyright info for that program
70 * is included below as required.
71 * ---------------------------------------------------------------------------
72 * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin.
74 * This software is not subject to any license of the American Telephone and
75 * Telegraph Company or of the Regents of the University of California.
77 * Permission is granted to anyone to use this software for any purpose on any
78 * computer system, and to alter it and redistribute it freely, subject to
79 * the following restrictions:
81 * 1. The author is not responsible for the consequences of use of this
82 * software, no matter how awful, even if they arise from flaws in it.
84 * 2. The origin of this software must not be misrepresented, either by
85 * explicit claim or by omission. Since few users ever read sources, credits
86 * must appear in the documentation.
88 * 3. Altered versions must be plainly marked as such, and must not be
89 * misrepresented as being the original software. Since few users ever read
90 * sources, credits must appear in the documentation.
92 * 4. This notice may not be removed or altered.
93 * -------------------------------------------------------------------------
95 * For compliance with Mr Darwin's terms: this has been very significantly
96 * modified from the free "file" command.
97 * - all-in-one file for compilation convenience when moving from one
98 * version of Apache to the next.
99 * - Memory allocation is done through the Apache API's apr_pool_t structure.
100 * - All functions have had necessary Apache API request or server
101 * structures passed to them where necessary to call other Apache API
102 * routines. (i.e. usually for logging, files, or memory allocation in
103 * itself or a called function.)
104 * - struct magic has been converted from an array to a single-ended linked
105 * list because it only grows one record at a time, it's only accessed
106 * sequentially, and the Apache API has no equivalent of realloc().
107 * - Functions have been changed to get their parameters from the server
108 * configuration instead of globals. (It should be reentrant now but has
109 * not been tested in a threaded environment.)
110 * - Places where it used to print results to stdout now saves them in a
111 * list where they're used to set the MIME type in the Apache request
113 * - Command-line flags have been removed since they will never be used here.
115 * Ian Kluft <ikluft@cisco.com>
116 * Engineering Information Framework
117 * Central Engineering
118 * Cisco Systems, Inc.
121 * Initial installation July/August 1996
122 * Misc bug fixes May 1997
123 * Submission to Apache Software Foundation July 1997
128 #include "apr_strings.h"
130 #if APR_HAVE_UNISTD_H
134 #include "ap_config.h"
136 #include "http_config.h"
137 #include "http_request.h"
138 #include "http_core.h"
139 #include "http_log.h"
140 #include "http_protocol.h"
141 #include "util_script.h"
143 /* ### this isn't set by configure? does anybody set this? */
149 * data structures and related constants
152 #define MODNAME "mod_mime_magic"
153 #define MIME_MAGIC_DEBUG 0
155 #define MIME_BINARY_UNKNOWN "application/octet-stream"
156 #define MIME_TEXT_UNKNOWN "text/plain"
158 #define MAXMIMESTRING 256
160 /* HOWMANY must be at least 4096 to make gzip -dcq work */
162 /* SMALL_HOWMANY limits how much work we do to figure out text files */
163 #define SMALL_HOWMANY 1024
164 #define MAXDESC 50 /* max leng of text description */
165 #define MAXstring 64 /* max leng of "string" types */
168 struct magic *next; /* link to next entry */
169 int lineno; /* line number from magic file */
172 #define INDIR 1 /* if '>(...)' appears, */
173 #define UNSIGNED 2 /* comparison is unsigned */
174 short cont_level; /* level of ">" */
176 char type; /* byte short long */
177 long offset; /* offset from indirection */
179 long offset; /* offset to magic number */
180 unsigned char reln; /* relation (0=eq, '>'=gt, etc) */
181 char type; /* int, short, long or string. */
182 char vallen; /* length of string value, if any */
199 unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */
200 unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */
201 } value; /* either number or string */
202 unsigned long mask; /* mask before comparison with value */
203 char nospflag; /* supress space character */
205 /* NOTE: this string is suspected of overrunning - find it! */
206 char desc[MAXDESC]; /* description */
210 * data structures for tar file recognition
211 * --------------------------------------------------------------------------
212 * Header file for public domain tar (tape archive) program.
214 * @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John
215 * Gilmore, ihnp4!hoptoad!gnu.
217 * Header block on tape.
219 * I'm going to use traditional DP naming conventions here. A "block" is a big
220 * chunk of stuff that we do I/O on. A "record" is a piece of info that we
221 * care about. Typically many "record"s fit into a "block".
223 #define RECORDSIZE 512
229 char charptr[RECORDSIZE];
239 char linkname[NAMSIZ];
248 /* The magic field is filled with this if uname and gname are valid. */
249 #define TMAGIC "ustar " /* 7 chars and a null */
252 * file-function prototypes
254 static int ascmagic(request_rec *, unsigned char *, apr_size_t);
255 static int is_tar(unsigned char *, apr_size_t);
256 static int softmagic(request_rec *, unsigned char *, apr_size_t);
257 static void tryit(request_rec *, unsigned char *, apr_size_t, int);
258 static int zmagic(request_rec *, unsigned char *, apr_size_t);
260 static int getvalue(server_rec *, struct magic *, char **);
261 static int hextoint(int);
262 static char *getstr(server_rec *, char *, char *, int, int *);
263 static int parse(server_rec *, apr_pool_t *p, char *, int);
265 static int match(request_rec *, unsigned char *, apr_size_t);
266 static int mget(request_rec *, union VALUETYPE *, unsigned char *,
267 struct magic *, apr_size_t);
268 static int mcheck(request_rec *, union VALUETYPE *, struct magic *);
269 static void mprint(request_rec *, union VALUETYPE *, struct magic *);
271 static int uncompress(request_rec *, int,
272 unsigned char **, apr_size_t);
273 static long from_oct(int, char *);
274 static int fsmagic(request_rec *r, const char *fn);
277 * includes for ASCII substring recognition formerly "names.h" in file
280 * Original notes: names and types used by ascmagic in file(1). These tokens are
281 * here because they can appear anywhere in the first HOWMANY bytes, while
282 * tokens in /etc/magic must appear at fixed offsets into the file. Don't
283 * make HOWMANY too high unless you have a very fast CPU.
286 /* these types are used to index the apr_table_t 'types': keep em in sync! */
287 /* HTML inserted in first because this is a web server module now */
288 #define L_HTML 0 /* HTML */
289 #define L_C 1 /* first and foremost on UNIX */
290 #define L_FORT 2 /* the oldest one */
291 #define L_MAKE 3 /* Makefiles */
292 #define L_PLI 4 /* PL/1 */
293 #define L_MACH 5 /* some kinda assembler */
294 #define L_ENG 6 /* English */
295 #define L_PAS 7 /* Pascal */
296 #define L_MAIL 8 /* Electronic mail */
297 #define L_NEWS 9 /* Usenet Netnews */
299 static char *types[] =
301 "text/html", /* HTML */
302 "text/plain", /* "c program text", */
303 "text/plain", /* "fortran program text", */
304 "text/plain", /* "make commands text", */
305 "text/plain", /* "pl/1 program text", */
306 "text/plain", /* "assembler program text", */
307 "text/plain", /* "English text", */
308 "text/plain", /* "pascal program text", */
309 "message/rfc822", /* "mail text", */
310 "message/news", /* "news text", */
311 "application/binary", /* "can't happen error on names.h/types", */
315 static struct names {
320 /* These must be sorted by eye for optimal hit rate */
321 /* Add to this list only after substantial meditation */
350 "<!DOCTYPE HTML", L_HTML
354 }, /* must precede "The", "the", etc. */
398 * Too many files of text have these words in them. Find another way to
399 * recognize Fortrash.
455 "Return-Path:", L_MAIL
461 "Newsgroups:", L_NEWS
467 "Organization:", L_NEWS
474 #define NNAMES ((sizeof(names)/sizeof(struct names)) - 1)
477 * Result String List (RSL)
479 * The file(1) command prints its output. Instead, we store the various
480 * "printed" strings in a list (allocating memory as we go) and concatenate
481 * them at the end when we finally know how much space they'll need.
484 typedef struct magic_rsl_s {
485 char *str; /* string, possibly a fragment */
486 struct magic_rsl_s *next; /* pointer to next fragment */
490 * Apache module configuration structures
493 /* per-server info */
495 const char *magicfile; /* where magic be found */
496 struct magic *magic; /* head of magic config list */
498 } magic_server_config_rec;
500 /* per-request info */
502 magic_rsl *head; /* result string list */
504 unsigned suf_recursion; /* recursion depth in suffix check */
508 * configuration functions - called by Apache API routines
511 module mime_magic_module;
513 static void *create_magic_server_config(apr_pool_t *p, server_rec *d)
515 /* allocate the config - use pcalloc because it needs to be zeroed */
516 return apr_pcalloc(p, sizeof(magic_server_config_rec));
519 static void *merge_magic_server_config(apr_pool_t *p, void *basev, void *addv)
521 magic_server_config_rec *base = (magic_server_config_rec *) basev;
522 magic_server_config_rec *add = (magic_server_config_rec *) addv;
523 magic_server_config_rec *new = (magic_server_config_rec *)
524 apr_palloc(p, sizeof(magic_server_config_rec));
526 new->magicfile = add->magicfile ? add->magicfile : base->magicfile;
532 static const char *set_magicfile(cmd_parms *cmd, void *dummy, const char *arg)
534 magic_server_config_rec *conf = (magic_server_config_rec *)
535 ap_get_module_config(cmd->server->module_config,
539 return MODNAME ": server structure not allocated";
541 conf->magicfile = arg;
546 * configuration file commands - exported to Apache API
549 static const command_rec mime_magic_cmds[] =
551 AP_INIT_TAKE1("MimeMagicFile", set_magicfile, NULL, RSRC_CONF,
552 "Path to MIME Magic file (in file(1) format)"),
557 * RSL (result string list) processing routines
559 * These collect strings that would have been printed in fragments by file(1)
560 * into a list of magic_rsl structures with the strings. When complete,
561 * they're concatenated together to become the MIME content and encoding
564 * return value conventions for these functions: functions which return int:
565 * failure = -1, other = result functions which return pointers: failure = 0,
569 /* allocate a per-request structure and put it in the request record */
570 static magic_req_rec *magic_set_config(request_rec *r)
572 magic_req_rec *req_dat = (magic_req_rec *) apr_palloc(r->pool,
573 sizeof(magic_req_rec));
575 req_dat->head = req_dat->tail = (magic_rsl *) NULL;
576 ap_set_module_config(r->request_config, &mime_magic_module, req_dat);
580 /* add a string to the result string list for this request */
581 /* it is the responsibility of the caller to allocate "str" */
582 static int magic_rsl_add(request_rec *r, char *str)
584 magic_req_rec *req_dat = (magic_req_rec *)
585 ap_get_module_config(r->request_config, &mime_magic_module);
588 /* make sure we have a list to put it in */
590 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, APR_EINVAL, r,
591 MODNAME ": request config should not be NULL");
592 if (!(req_dat = magic_set_config(r))) {
598 /* allocate the list entry */
599 rsl = (magic_rsl *) apr_palloc(r->pool, sizeof(magic_rsl));
603 rsl->next = (magic_rsl *) NULL;
605 /* append to the list */
606 if (req_dat->head && req_dat->tail) {
607 req_dat->tail->next = rsl;
611 req_dat->head = req_dat->tail = rsl;
618 /* RSL hook for puts-type functions */
619 static int magic_rsl_puts(request_rec *r, char *str)
621 return magic_rsl_add(r, str);
624 /* RSL hook for printf-type functions */
625 static int magic_rsl_printf(request_rec *r, char *str,...)
629 char buf[MAXMIMESTRING];
631 /* assemble the string into the buffer */
633 apr_vsnprintf(buf, sizeof(buf), str, ap);
636 /* add the buffer to the list */
637 return magic_rsl_add(r, strdup(buf));
640 /* RSL hook for putchar-type functions */
641 static int magic_rsl_putchar(request_rec *r, char c)
645 /* high overhead for 1 char - just hope they don't do this much */
648 return magic_rsl_add(r, str);
651 /* allocate and copy a contiguous string from a result string list */
652 static char *rsl_strdup(request_rec *r, int start_frag, int start_pos, int len)
654 char *result; /* return value */
655 int cur_frag, /* current fragment number/counter */
656 cur_pos, /* current position within fragment */
657 res_pos; /* position in result string */
658 magic_rsl *frag; /* list-traversal pointer */
659 magic_req_rec *req_dat = (magic_req_rec *)
660 ap_get_module_config(r->request_config, &mime_magic_module);
662 /* allocate the result string */
663 result = (char *) apr_palloc(r->pool, len + 1);
665 /* loop through and collect the string */
667 for (frag = req_dat->head, cur_frag = 0;
669 frag = frag->next, cur_frag++) {
670 /* loop to the first fragment */
671 if (cur_frag < start_frag)
674 /* loop through and collect chars */
675 for (cur_pos = (cur_frag == start_frag) ? start_pos : 0;
678 if (cur_frag >= start_frag
679 && cur_pos >= start_pos
681 result[res_pos++] = frag->str[cur_pos];
689 /* clean up and return */
692 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
693 MODNAME ": rsl_strdup() %d chars: %s", res_pos - 1, result);
698 /* states for the state-machine algorithm in magic_rsl_to_request() */
700 rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
703 /* process the RSL and set the MIME info in the request record */
704 static int magic_rsl_to_request(request_rec *r)
706 int cur_frag, /* current fragment number/counter */
707 cur_pos, /* current position within fragment */
708 type_frag, /* content type starting point: fragment */
709 type_pos, /* content type starting point: position */
710 type_len, /* content type length */
711 encoding_frag, /* content encoding starting point: fragment */
712 encoding_pos, /* content encoding starting point: position */
713 encoding_len; /* content encoding length */
715 magic_rsl *frag; /* list-traversal pointer */
718 magic_req_rec *req_dat = (magic_req_rec *)
719 ap_get_module_config(r->request_config, &mime_magic_module);
721 /* check if we have a result */
722 if (!req_dat || !req_dat->head) {
723 /* empty - no match, we defer to other Apache modules */
727 /* start searching for the type and encoding */
728 state = rsl_leading_space;
729 type_frag = type_pos = type_len = 0;
730 encoding_frag = encoding_pos = encoding_len = 0;
731 for (frag = req_dat->head, cur_frag = 0;
733 frag = frag->next, cur_frag++) {
734 /* loop through the characters in the fragment */
735 for (cur_pos = 0; frag->str[cur_pos]; cur_pos++) {
736 if (apr_isspace(frag->str[cur_pos])) {
737 /* process whitespace actions for each state */
738 if (state == rsl_leading_space) {
739 /* eat whitespace in this state */
742 else if (state == rsl_type) {
743 /* whitespace: type has no slash! */
746 else if (state == rsl_subtype) {
747 /* whitespace: end of MIME type */
751 else if (state == rsl_separator) {
752 /* eat whitespace in this state */
755 else if (state == rsl_encoding) {
756 /* whitespace: end of MIME encoding */
758 frag = req_dat->tail;
762 /* should not be possible */
763 /* abandon malfunctioning module */
764 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, r,
765 MODNAME ": bad state %d (ws)", state);
770 else if (state == rsl_type &&
771 frag->str[cur_pos] == '/') {
772 /* copy the char and go to rsl_subtype state */
777 /* process non-space actions for each state */
778 if (state == rsl_leading_space) {
779 /* non-space: begin MIME type */
781 type_frag = cur_frag;
786 else if (state == rsl_type ||
787 state == rsl_subtype) {
788 /* non-space: adds to type */
792 else if (state == rsl_separator) {
793 /* non-space: begin MIME encoding */
795 encoding_frag = cur_frag;
796 encoding_pos = cur_pos;
800 else if (state == rsl_encoding) {
801 /* non-space: adds to encoding */
806 /* should not be possible */
807 /* abandon malfunctioning module */
808 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, r,
809 MODNAME ": bad state %d (ns)", state);
818 /* if we ended prior to state rsl_subtype, we had incomplete info */
819 if (state != rsl_subtype && state != rsl_separator &&
820 state != rsl_encoding) {
821 /* defer to other modules */
825 /* save the info in the request record */
826 if (state == rsl_subtype || state == rsl_encoding ||
827 state == rsl_encoding) {
829 tmp = rsl_strdup(r, type_frag, type_pos, type_len);
830 /* XXX: this could be done at config time I'm sure... but I'm
831 * confused by all this magic_rsl stuff. -djg */
832 ap_content_type_tolower(tmp);
833 r->content_type = tmp;
835 if (state == rsl_encoding) {
837 tmp = rsl_strdup(r, encoding_frag,
838 encoding_pos, encoding_len);
839 /* XXX: this could be done at config time I'm sure... but I'm
840 * confused by all this magic_rsl stuff. -djg */
842 r->content_encoding = tmp;
845 /* detect memory allocation errors */
846 if (!r->content_type ||
847 (state == rsl_encoding && !r->content_encoding)) {
848 return HTTP_INTERNAL_SERVER_ERROR;
856 * magic_process - process input file r Apache API request record
857 * (formerly called "process" in file command, prefix added for clarity) Opens
858 * the file and reads a fixed-size buffer to begin processing the contents.
860 static int magic_process(request_rec *r)
862 apr_file_t *fd = NULL;
863 unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */
864 apr_size_t nbytes = 0; /* number of bytes read from a datafile */
868 * first try judging the file based on its filesystem status
870 switch ((result = fsmagic(r, r->filename))) {
872 magic_rsl_putchar(r, '\n');
877 /* fatal error, bail out */
881 if (apr_file_open(&fd, r->filename, APR_READ, APR_OS_DEFAULT, r->pool) != APR_SUCCESS) {
882 /* We can't open it, but we were able to stat it. */
883 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
884 MODNAME ": can't read `%s'", r->filename);
885 /* let some other handler decide what the problem is */
890 * try looking at the first HOWMANY bytes
892 nbytes = sizeof(buf) - 1;
893 if ((result = apr_file_read(fd, (char *) buf, &nbytes)) != APR_SUCCESS) {
894 ap_log_rerror(APLOG_MARK, APLOG_ERR, result, r,
895 MODNAME ": read failed: %s", r->filename);
896 return HTTP_INTERNAL_SERVER_ERROR;
900 magic_rsl_puts(r, MIME_TEXT_UNKNOWN);
902 buf[nbytes++] = '\0'; /* null-terminate it */
903 tryit(r, buf, nbytes, 1);
906 (void) apr_file_close(fd);
907 (void) magic_rsl_putchar(r, '\n');
913 static void tryit(request_rec *r, unsigned char *buf, apr_size_t nb, int checkzmagic)
916 * Try compression stuff
918 if (checkzmagic == 1) {
919 if (zmagic(r, buf, nb) == 1)
924 * try tests in /etc/magic (or surrogate magic file)
926 if (softmagic(r, buf, nb) == 1)
930 * try known keywords, check for ascii-ness too.
932 if (ascmagic(r, buf, nb) == 1)
936 * abandon hope, all ye who remain here
938 magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
941 #define EATAB {while (apr_isspace((unsigned char) *l)) ++l;}
944 * apprentice - load configuration from the magic file r
947 static int apprentice(server_rec *s, apr_pool_t *p)
949 apr_file_t *f = NULL;
951 char line[BUFSIZ + 1];
956 struct magic *m, *prevm;
958 magic_server_config_rec *conf = (magic_server_config_rec *)
959 ap_get_module_config(s->module_config, &mime_magic_module);
961 const char *fname = ap_server_root_relative(p, conf->magicfile);
962 result = apr_file_open(&f, fname, APR_READ | APR_BUFFERED, APR_OS_DEFAULT, p);
963 if (result != APR_SUCCESS) {
964 ap_log_error(APLOG_MARK, APLOG_ERR, result, s,
965 MODNAME ": can't read magic file %s", fname);
969 /* set up the magic list (empty) */
970 conf->magic = conf->last = NULL;
973 for (lineno = 1; apr_file_gets(line, BUFSIZ, f) == APR_SUCCESS; lineno++) {
978 line[strlen(line) - 1] = '\0';
981 /* skip leading whitespace */
983 while (line[ws_offset] && apr_isspace(line[ws_offset])) {
987 /* skip blank lines */
988 if (line[ws_offset] == 0) {
992 /* comment, do not parse */
993 if (line[ws_offset] == '#')
997 /* if we get here, we're going to use it so count it */
1002 if (parse(s, p, line + ws_offset, lineno) != 0)
1006 (void) apr_file_close(f);
1008 #if MIME_MAGIC_DEBUG
1009 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, s,
1010 MODNAME ": apprentice conf=%x file=%s m=%s m->next=%s last=%s",
1012 conf->magicfile ? conf->magicfile : "NULL",
1013 conf->magic ? "set" : "NULL",
1014 (conf->magic && conf->magic->next) ? "set" : "NULL",
1015 conf->last ? "set" : "NULL");
1016 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, s,
1017 MODNAME ": apprentice read %d lines, %d rules, %d errors",
1018 lineno, rule, errs);
1021 #if MIME_MAGIC_DEBUG
1023 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, s,
1024 MODNAME ": apprentice test");
1025 for (m = conf->magic; m; m = m->next) {
1026 if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
1027 apr_isprint((((unsigned long) m) >> 16) & 255) &&
1028 apr_isprint((((unsigned long) m) >> 8) & 255) &&
1029 apr_isprint(((unsigned long) m) & 255)) {
1030 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, s,
1031 MODNAME ": apprentice: POINTER CLOBBERED! "
1032 "m=\"%c%c%c%c\" line=%d",
1033 (((unsigned long) m) >> 24) & 255,
1034 (((unsigned long) m) >> 16) & 255,
1035 (((unsigned long) m) >> 8) & 255,
1036 ((unsigned long) m) & 255,
1037 prevm ? prevm->lineno : -1);
1044 return (errs ? -1 : 0);
1048 * extend the sign bit if the comparison is to be signed
1050 static unsigned long signextend(server_rec *s, struct magic *m, unsigned long v)
1052 if (!(m->flag & UNSIGNED))
1055 * Do not remove the casts below. They are vital. When later
1056 * compared with the data, the sign extension must have happened.
1077 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, s,
1078 MODNAME ": can't happen: m->type=%d", m->type);
1085 * parse one line from magic file, put into magic[index++] if valid
1087 static int parse(server_rec *serv, apr_pool_t *p, char *l, int lineno)
1091 magic_server_config_rec *conf = (magic_server_config_rec *)
1092 ap_get_module_config(serv->module_config, &mime_magic_module);
1094 /* allocate magic structure entry */
1095 m = (struct magic *) apr_pcalloc(p, sizeof(struct magic));
1097 /* append to linked list */
1099 if (!conf->magic || !conf->last) {
1100 conf->magic = conf->last = m;
1103 conf->last->next = m;
1107 /* set values in magic structure */
1113 ++l; /* step over */
1117 if (m->cont_level != 0 && *l == '(') {
1118 ++l; /* step over */
1122 /* get offset, then skip over it */
1123 m->offset = (int) strtol(l, &t, 0);
1125 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, serv,
1126 MODNAME ": offset %s invalid", l);
1130 if (m->flag & INDIR) {
1134 * read [.lbs][+-]nnnnn)
1148 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, serv,
1149 MODNAME ": indirect offset type %c invalid", *l);
1155 if (*l == '+' || *l == '-')
1157 if (apr_isdigit((unsigned char) *l)) {
1158 m->in.offset = strtol(l, &t, 0);
1160 m->in.offset = -m->in.offset;
1165 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, serv,
1166 MODNAME ": missing ')' in indirect offset");
1172 while (apr_isdigit((unsigned char) *l))
1190 m->flag |= UNSIGNED;
1193 /* get type, skip it */
1194 if (strncmp(l, "byte", NBYTE) == 0) {
1198 else if (strncmp(l, "short", NSHORT) == 0) {
1202 else if (strncmp(l, "long", NLONG) == 0) {
1206 else if (strncmp(l, "string", NSTRING) == 0) {
1210 else if (strncmp(l, "date", NDATE) == 0) {
1214 else if (strncmp(l, "beshort", NBESHORT) == 0) {
1218 else if (strncmp(l, "belong", NBELONG) == 0) {
1222 else if (strncmp(l, "bedate", NBEDATE) == 0) {
1226 else if (strncmp(l, "leshort", NLESHORT) == 0) {
1230 else if (strncmp(l, "lelong", NLELONG) == 0) {
1234 else if (strncmp(l, "ledate", NLEDATE) == 0) {
1239 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, serv,
1240 MODNAME ": type %s invalid", l);
1243 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1246 m->mask = signextend(serv, m, strtol(l, &l, 0));
1255 /* Old-style anding: "0 byte &0x80 dynamically linked" */
1263 if (m->type != STRING) {
1270 if (*l == 'x' && apr_isspace((unsigned char) l[1])) {
1273 goto GetDesc; /* Bill The Cat */
1280 if (getvalue(serv, m, &l))
1283 * now get last part - the description
1291 else if ((l[0] == '\\') && (l[1] == 'b')) {
1298 strncpy(m->desc, l, sizeof(m->desc) - 1);
1299 m->desc[sizeof(m->desc) - 1] = '\0';
1301 #if MIME_MAGIC_DEBUG
1302 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, serv,
1303 MODNAME ": parse line=%d m=%x next=%x cont=%d desc=%s",
1304 lineno, m, m->next, m->cont_level, m->desc);
1305 #endif /* MIME_MAGIC_DEBUG */
1311 * Read a numeric value from a pointer, into the value union of a magic
1312 * pointer, according to the magic type. Update the string pointer to point
1313 * just after the number read. Return 0 for success, non-zero for failure.
1315 static int getvalue(server_rec *s, struct magic *m, char **p)
1319 if (m->type == STRING) {
1320 *p = getstr(s, *p, m->value.s, sizeof(m->value.s), &slen);
1323 else if (m->reln != 'x')
1324 m->value.l = signextend(s, m, strtol(*p, p, 0));
1329 * Convert a string containing C character escapes. Stop at an unescaped
1330 * space or tab. Copy the converted version to "p", returning its length in
1331 * *slen. Return updated scan pointer as function result.
1333 static char *getstr(server_rec *serv, register char *s, register char *p,
1334 int plen, int *slen)
1336 char *origs = s, *origp = p;
1337 char *pmax = p + plen - 1;
1341 while ((c = *s++) != '\0') {
1342 if (apr_isspace((unsigned char) c))
1345 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, serv,
1346 MODNAME ": string too long: %s", origs);
1383 /* \ and up to 3 octal digits */
1393 c = *s++; /* try for 2 */
1394 if (c >= '0' && c <= '7') {
1395 val = (val << 3) | (c - '0');
1396 c = *s++; /* try for 3 */
1397 if (c >= '0' && c <= '7')
1398 val = (val << 3) | (c - '0');
1407 /* \x and up to 3 hex digits */
1409 val = 'x'; /* Default if no digits */
1410 c = hextoint(*s++); /* Get next char */
1415 val = (val << 4) + c;
1418 val = (val << 4) + c;
1442 /* Single hex char to int; -1 if not a hex char. */
1443 static int hextoint(int c)
1445 if (apr_isdigit((unsigned char) c))
1447 if ((c >= 'a') && (c <= 'f'))
1448 return c + 10 - 'a';
1449 if ((c >= 'A') && (c <= 'F'))
1450 return c + 10 - 'A';
1456 * return DONE to indicate it's been handled
1457 * return OK to indicate it's a regular file still needing handling
1458 * other returns indicate a failure of some sort
1460 static int fsmagic(request_rec *r, const char *fn)
1462 switch (r->finfo.filetype) {
1464 magic_rsl_puts(r, DIR_MAGIC_TYPE);
1468 * (void) magic_rsl_printf(r,"character special (%d/%d)",
1469 * major(sb->st_rdev), minor(sb->st_rdev));
1471 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1475 * (void) magic_rsl_printf(r,"block special (%d/%d)",
1476 * major(sb->st_rdev), minor(sb->st_rdev));
1478 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1480 /* TODO add code to handle V7 MUX and Blit MUX files */
1483 * magic_rsl_puts(r,"fifo (named pipe)");
1485 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1488 /* We used stat(), the only possible reason for this is that the
1489 * symlink is broken.
1491 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, r,
1492 MODNAME ": broken symlink (%s)", fn);
1493 return HTTP_INTERNAL_SERVER_ERROR;
1495 magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1500 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, r,
1501 MODNAME ": invalid file type %d.", r->finfo.filetype);
1502 return HTTP_INTERNAL_SERVER_ERROR;
1506 * regular file, check next possibility
1508 if (r->finfo.size == 0) {
1509 magic_rsl_puts(r, MIME_TEXT_UNKNOWN);
1516 * softmagic - lookup one file in database (already read from /etc/magic by
1517 * apprentice.c). Passed the name and FILE * of one file to be typed.
1519 /* ARGSUSED1 *//* nbytes passed for regularity, maybe need later */
1520 static int softmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
1522 if (match(r, buf, nbytes))
1529 * Go through the whole list, stopping if you find a match. Process all the
1530 * continuations of that match before returning.
1532 * We support multi-level continuations:
1534 * At any time when processing a successful top-level match, there is a current
1535 * continuation level; it represents the level of the last successfully
1536 * matched continuation.
1538 * Continuations above that level are skipped as, if we see one, it means that
1539 * the continuation that controls them - i.e, the lower-level continuation
1540 * preceding them - failed to match.
1542 * Continuations below that level are processed as, if we see one, it means
1543 * we've finished processing or skipping higher-level continuations under the
1544 * control of a successful or unsuccessful lower-level continuation, and are
1545 * now seeing the next lower-level continuation and should process it. The
1546 * current continuation level reverts to the level of the one we're seeing.
1548 * Continuations at the current level are processed as, if we see one, there's
1549 * no lower-level continuation that may have failed.
1551 * If a continuation matches, we bump the current continuation level so that
1552 * higher-level continuations are processed.
1554 static int match(request_rec *r, unsigned char *s, apr_size_t nbytes)
1556 #if MIME_MAGIC_DEBUG
1557 int rule_counter = 0;
1560 int need_separator = 0;
1562 magic_server_config_rec *conf = (magic_server_config_rec *)
1563 ap_get_module_config(r->server->module_config, &mime_magic_module);
1566 #if MIME_MAGIC_DEBUG
1567 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1568 MODNAME ": match conf=%x file=%s m=%s m->next=%s last=%s",
1570 conf->magicfile ? conf->magicfile : "NULL",
1571 conf->magic ? "set" : "NULL",
1572 (conf->magic && conf->magic->next) ? "set" : "NULL",
1573 conf->last ? "set" : "NULL");
1576 #if MIME_MAGIC_DEBUG
1577 for (m = conf->magic; m; m = m->next) {
1578 if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
1579 apr_isprint((((unsigned long) m) >> 16) & 255) &&
1580 apr_isprint((((unsigned long) m) >> 8) & 255) &&
1581 apr_isprint(((unsigned long) m) & 255)) {
1582 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1583 MODNAME ": match: POINTER CLOBBERED! "
1585 (((unsigned long) m) >> 24) & 255,
1586 (((unsigned long) m) >> 16) & 255,
1587 (((unsigned long) m) >> 8) & 255,
1588 ((unsigned long) m) & 255);
1594 for (m = conf->magic; m; m = m->next) {
1595 #if MIME_MAGIC_DEBUG
1597 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1598 MODNAME ": line=%d desc=%s", m->lineno, m->desc);
1601 /* check if main entry matches */
1602 if (!mget(r, &p, s, m, nbytes) ||
1603 !mcheck(r, &p, m)) {
1604 struct magic *m_cont;
1607 * main entry didn't match, flush its continuations
1609 if (!m->next || (m->next->cont_level == 0)) {
1614 while (m_cont && (m_cont->cont_level != 0)) {
1615 #if MIME_MAGIC_DEBUG
1617 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1618 MODNAME ": line=%d mc=%x mc->next=%x cont=%d desc=%s",
1619 m_cont->lineno, m_cont,
1620 m_cont->next, m_cont->cont_level,
1624 * this trick allows us to keep *m in sync when the continue
1625 * advances the pointer
1628 m_cont = m_cont->next;
1633 /* if we get here, the main entry rule was a match */
1634 /* this will be the last run through the loop */
1635 #if MIME_MAGIC_DEBUG
1636 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1637 MODNAME ": rule matched, line=%d type=%d %s",
1639 (m->type == STRING) ? m->value.s : "");
1642 /* print the match */
1646 * If we printed something, we'll need to print a blank before we
1647 * print something else.
1651 /* and any continuations that match */
1654 * while (m && m->next && m->next->cont_level != 0 && ( m = m->next
1658 while (m && (m->cont_level != 0)) {
1659 #if MIME_MAGIC_DEBUG
1660 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1661 MODNAME ": match line=%d cont=%d type=%d %s",
1662 m->lineno, m->cont_level, m->type,
1663 (m->type == STRING) ? m->value.s : "");
1665 if (cont_level >= m->cont_level) {
1666 if (cont_level > m->cont_level) {
1668 * We're at the end of the level "cont_level"
1671 cont_level = m->cont_level;
1673 if (mget(r, &p, s, m, nbytes) &&
1676 * This continuation matched. Print its message, with a
1677 * blank before it if the previous item printed and this
1680 /* space if previous printed */
1682 && (m->nospflag == 0)
1683 && (m->desc[0] != '\0')
1685 (void) magic_rsl_putchar(r, ' ');
1693 * If we see any continuations at a higher level, process
1700 /* move to next continuation record */
1703 #if MIME_MAGIC_DEBUG
1704 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1705 MODNAME ": matched after %d rules", rule_counter);
1707 return 1; /* all through */
1709 #if MIME_MAGIC_DEBUG
1710 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1711 MODNAME ": failed after %d rules", rule_counter);
1713 return 0; /* no match at all */
1716 static void mprint(request_rec *r, union VALUETYPE *p, struct magic *m)
1739 if (m->reln == '=') {
1740 (void) magic_rsl_printf(r, m->desc, m->value.s);
1743 (void) magic_rsl_printf(r, m->desc, p->s);
1750 /* XXX: not multithread safe */
1751 pp = ctime((time_t *) & p->l);
1752 if ((rt = strchr(pp, '\n')) != NULL)
1754 (void) magic_rsl_printf(r, m->desc, pp);
1757 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, r,
1758 MODNAME ": invalid m->type (%d) in mprint().",
1763 v = signextend(r->server, m, v) & m->mask;
1764 (void) magic_rsl_printf(r, m->desc, (unsigned long) v);
1768 * Convert the byte order of the data we are looking at
1770 static int mconvert(request_rec *r, union VALUETYPE *p, struct magic *m)
1781 /* Null terminate and eat the return */
1782 p->s[sizeof(p->s) - 1] = '\0';
1783 if ((rt = strchr(p->s, '\n')) != NULL)
1787 p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
1792 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
1795 p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
1800 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
1803 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, r,
1804 MODNAME ": invalid type %d in mconvert().", m->type);
1810 static int mget(request_rec *r, union VALUETYPE *p, unsigned char *s,
1811 struct magic *m, apr_size_t nbytes)
1813 long offset = m->offset;
1815 if (offset + sizeof(union VALUETYPE) > nbytes)
1818 memcpy(p, s + offset, sizeof(union VALUETYPE));
1820 if (!mconvert(r, p, m))
1823 if (m->flag & INDIR) {
1825 switch (m->in.type) {
1827 offset = p->b + m->in.offset;
1830 offset = p->h + m->in.offset;
1833 offset = p->l + m->in.offset;
1837 if (offset + sizeof(union VALUETYPE) > nbytes)
1840 memcpy(p, s + offset, sizeof(union VALUETYPE));
1842 if (!mconvert(r, p, m))
1848 static int mcheck(request_rec *r, union VALUETYPE *p, struct magic *m)
1850 register unsigned long l = m->value.l;
1851 register unsigned long v;
1854 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
1855 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, r,
1883 * What we want here is: v = strncmp(m->value.s, p->s, m->vallen);
1884 * but ignoring any nulls. bcmp doesn't give -/+/0 and isn't
1885 * universally available anyway.
1889 register unsigned char *a = (unsigned char *) m->value.s;
1890 register unsigned char *b = (unsigned char *) p->s;
1891 register int len = m->vallen;
1894 if ((v = *b++ - *a++) != 0)
1899 /* bogosity, pretend that it just wasn't a match */
1900 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, r,
1901 MODNAME ": invalid type %d in mcheck().", m->type);
1905 v = signextend(r->server, m, v) & m->mask;
1909 #if MIME_MAGIC_DEBUG
1910 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1911 "%lu == *any* = 1", v);
1918 #if MIME_MAGIC_DEBUG
1919 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1920 "%lu != %lu = %d", v, l, matched);
1926 #if MIME_MAGIC_DEBUG
1927 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1928 "%lu == %lu = %d", v, l, matched);
1933 if (m->flag & UNSIGNED) {
1935 #if MIME_MAGIC_DEBUG
1936 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1937 "%lu > %lu = %d", v, l, matched);
1941 matched = (long) v > (long) l;
1942 #if MIME_MAGIC_DEBUG
1943 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1944 "%ld > %ld = %d", v, l, matched);
1950 if (m->flag & UNSIGNED) {
1952 #if MIME_MAGIC_DEBUG
1953 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1954 "%lu < %lu = %d", v, l, matched);
1958 matched = (long) v < (long) l;
1959 #if MIME_MAGIC_DEBUG
1960 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1961 "%ld < %ld = %d", v, l, matched);
1967 matched = (v & l) == l;
1968 #if MIME_MAGIC_DEBUG
1969 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1970 "((%lx & %lx) == %lx) = %d", v, l, l, matched);
1975 matched = (v & l) != l;
1976 #if MIME_MAGIC_DEBUG
1977 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
1978 "((%lx & %lx) != %lx) = %d", v, l, l, matched);
1983 /* bogosity, pretend it didn't match */
1985 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, 0, r,
1986 MODNAME ": mcheck: can't happen: invalid relation %d.",
1994 /* an optimization over plain strcmp() */
1995 #define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
1997 static int ascmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
1999 int has_escapes = 0;
2001 char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */
2003 register struct names *p;
2006 /* these are easy, do them first */
2009 * for troff, look for . + letter + letter or .\"; this must be done to
2010 * disambiguate tar archives' ./file and other trash from real troff
2014 unsigned char *tp = buf + 1;
2016 while (apr_isspace(*tp))
2017 ++tp; /* skip leading whitespace */
2018 if ((apr_isalnum(*tp) || *tp == '\\') &&
2019 (apr_isalnum(*(tp + 1)) || *tp == '"')) {
2020 magic_rsl_puts(r, "application/x-troff");
2024 if ((*buf == 'c' || *buf == 'C') && apr_isspace(*(buf + 1))) {
2026 magic_rsl_puts(r, "text/plain");
2030 /* look for tokens from names.h - this is expensive!, so we'll limit
2031 * ourselves to only SMALL_HOWMANY bytes */
2032 small_nbytes = (nbytes > SMALL_HOWMANY) ? SMALL_HOWMANY : nbytes;
2033 /* make a copy of the buffer here because strtok() will destroy it */
2034 s = (unsigned char *) memcpy(nbuf, buf, small_nbytes);
2035 s[small_nbytes] = '\0';
2036 has_escapes = (memchr(s, '\033', small_nbytes) != NULL);
2037 /* XXX: not multithread safe */
2038 while ((token = strtok((char *) s, " \t\n\r\f")) != NULL) {
2039 s = NULL; /* make strtok() keep on tokin' */
2040 for (p = names; p < names + NNAMES; p++) {
2041 if (STREQ(p->name, token)) {
2042 magic_rsl_puts(r, types[p->type]);
2044 magic_rsl_puts(r, " (with escape sequences)");
2050 switch (is_tar(buf, nbytes)) {
2052 /* V7 tar archive */
2053 magic_rsl_puts(r, "application/x-tar");
2056 /* POSIX tar archive */
2057 magic_rsl_puts(r, "application/x-tar");
2061 /* all else fails, but it is ascii... */
2063 /* text with escape sequences */
2064 /* we leave this open for further differentiation later */
2065 magic_rsl_puts(r, "text/plain");
2069 magic_rsl_puts(r, "text/plain");
2076 * compress routines: zmagic() - returns 0 if not recognized, uncompresses
2077 * and prints information if recognized uncompress(s, method, old, n, newch)
2078 * - uncompress old into new, using method, return sizeof new
2086 char *encoding; /* MUST be lowercase */
2089 /* we use gzip here rather than uncompress because we have to pass
2090 * it a full filename -- and uncompress only considers filenames
2095 "gzip", "-dcq", NULL
2100 "gzip", "-dcq", NULL
2104 * XXX pcat does not work, cause I don't know how to make it read stdin,
2109 "gzip", "-dcq", NULL
2114 static int ncompr = sizeof(compr) / sizeof(compr[0]);
2116 static int zmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
2118 unsigned char *newbuf;
2122 for (i = 0; i < ncompr; i++) {
2123 if (nbytes < compr[i].maglen)
2125 if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0)
2132 if ((newsize = uncompress(r, i, &newbuf, nbytes)) > 0) {
2133 tryit(r, newbuf, newsize, 0);
2135 /* set encoding type in the request record */
2136 r->content_encoding = compr[i].encoding;
2142 struct uncompress_parms {
2147 static int uncompress_child(struct uncompress_parms *parm, apr_pool_t *cntxt,
2148 apr_file_t **pipe_in)
2151 const char *new_argv[4];
2152 const char *const *env;
2153 request_rec *r = parm->r;
2154 apr_pool_t *child_context = cntxt;
2155 apr_procattr_t *procattr;
2156 apr_proc_t *procnew;
2158 env = (const char *const *)ap_create_environment(child_context, r->subprocess_env);
2160 if ((apr_procattr_create(&procattr, child_context) != APR_SUCCESS) ||
2161 (apr_procattr_io_set(procattr, APR_FULL_BLOCK,
2162 APR_FULL_BLOCK, APR_NO_PIPE) != APR_SUCCESS) ||
2163 (apr_procattr_dir_set(procattr, r->filename) != APR_SUCCESS) ||
2164 (apr_procattr_cmdtype_set(procattr, APR_PROGRAM) != APR_SUCCESS)) {
2165 /* Something bad happened, tell the world. */
2166 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r,
2167 "couldn't setup child process: %s", r->filename);
2170 new_argv[0] = compr[parm->method].argv[0];
2171 new_argv[1] = compr[parm->method].argv[1];
2172 new_argv[2] = r->filename;
2175 if (compr[parm->method].silent) {
2176 close(STDERR_FILENO);
2179 procnew = apr_pcalloc(child_context, sizeof(*procnew));
2180 rc = apr_proc_create(procnew, compr[parm->method].argv[0],
2181 new_argv, env, procattr, child_context);
2183 if (rc != APR_SUCCESS) {
2184 /* Bad things happened. Everyone should have cleaned up. */
2185 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r,
2186 MODNAME ": could not execute `%s'.",
2187 compr[parm->method].argv[0]);
2190 apr_pool_note_subprocess(child_context, procnew, kill_after_timeout);
2191 *pipe_in = procnew->out;
2198 static int uncompress(request_rec *r, int method,
2199 unsigned char **newch, apr_size_t n)
2201 struct uncompress_parms parm;
2202 apr_file_t *pipe_out = NULL;
2203 apr_pool_t *sub_context;
2207 parm.method = method;
2209 /* We make a sub_pool so that we can collect our child early, otherwise
2210 * there are cases (i.e. generating directory indicies with mod_autoindex)
2211 * where we would end up with LOTS of zombies.
2213 if (apr_pool_create(&sub_context, r->pool) != APR_SUCCESS)
2216 if ((rv = uncompress_child(&parm, sub_context, &pipe_out)) != APR_SUCCESS) {
2217 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
2218 MODNAME ": couldn't spawn uncompress process: %s", r->uri);
2222 *newch = (unsigned char *) apr_palloc(r->pool, n);
2223 rv = apr_file_read(pipe_out, *newch, &n);
2225 apr_pool_destroy(sub_context);
2226 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
2227 MODNAME ": read failed %s", r->filename);
2230 apr_pool_destroy(sub_context);
2235 * is_tar() -- figure out whether file is a tar archive.
2237 * Stolen (by author of file utility) from the public domain tar program: Public
2238 * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
2240 * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7
2241 * 1997/06/24 00:41:02 ikluft Exp ikluft $
2243 * Comments changed and some code/comments reformatted for file command by Ian
2247 #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
2250 * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for
2251 * old UNIX tar file, 2 for Unix Std (POSIX) tar file.
2254 static int is_tar(unsigned char *buf, apr_size_t nbytes)
2256 register union record *header = (union record *) buf;
2258 register long sum, recsum;
2261 if (nbytes < sizeof(union record))
2264 recsum = from_oct(8, header->header.chksum);
2267 p = header->charptr;
2268 for (i = sizeof(union record); --i >= 0;) {
2270 * We can't use unsigned char here because of old compilers, e.g. V7.
2275 /* Adjust checksum to count the "chksum" field as blanks. */
2276 for (i = sizeof(header->header.chksum); --i >= 0;)
2277 sum -= 0xFF & header->header.chksum[i];
2278 sum += ' ' * sizeof header->header.chksum;
2281 return 0; /* Not a tar archive */
2283 if (0 == strcmp(header->header.magic, TMAGIC))
2284 return 2; /* Unix Standard tar archive */
2286 return 1; /* Old fashioned tar archive */
2291 * Quick and dirty octal conversion.
2293 * Result is -1 if the field is invalid (all blank, or nonoctal).
2295 static long from_oct(int digs, char *where)
2297 register long value;
2299 while (apr_isspace(*where)) { /* Skip spaces */
2302 return -1; /* All blank field */
2305 while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */
2306 value = (value << 3) | (*where++ - '0');
2310 if (digs > 0 && *where && !apr_isspace(*where))
2311 return -1; /* Ended on non-space/nul */
2317 * Check for file-revision suffix
2319 * This is for an obscure document control system used on an intranet.
2320 * The web representation of each file's revision has an @1, @2, etc
2321 * appended with the revision number. This needs to be stripped off to
2322 * find the file suffix, which can be recognized by sending the name back
2323 * through a sub-request. The base file name (without the @num suffix)
2324 * must exist because its type will be used as the result.
2326 static int revision_suffix(request_rec *r)
2328 int suffix_pos, result;
2332 #if MIME_MAGIC_DEBUG
2333 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
2334 MODNAME ": revision_suffix checking %s", r->filename);
2335 #endif /* MIME_MAGIC_DEBUG */
2337 /* check for recognized revision suffix */
2338 suffix_pos = strlen(r->filename) - 1;
2339 if (!apr_isdigit(r->filename[suffix_pos])) {
2342 while (suffix_pos >= 0 && apr_isdigit(r->filename[suffix_pos]))
2344 if (suffix_pos < 0 || r->filename[suffix_pos] != '@') {
2348 /* perform sub-request for the file name without the suffix */
2350 sub_filename = apr_pstrndup(r->pool, r->filename, suffix_pos);
2351 #if MIME_MAGIC_DEBUG
2352 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
2353 MODNAME ": subrequest lookup for %s", sub_filename);
2354 #endif /* MIME_MAGIC_DEBUG */
2355 sub = ap_sub_req_lookup_file(sub_filename, r, NULL);
2357 /* extract content type/encoding/language from sub-request */
2358 if (sub->content_type) {
2359 r->content_type = apr_pstrdup(r->pool, sub->content_type);
2360 #if MIME_MAGIC_DEBUG
2361 ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, r,
2362 MODNAME ": subrequest %s got %s",
2363 sub_filename, r->content_type);
2364 #endif /* MIME_MAGIC_DEBUG */
2365 if (sub->content_encoding)
2366 r->content_encoding =
2367 apr_pstrdup(r->pool, sub->content_encoding);
2368 if (sub->content_language)
2369 r->content_language =
2370 apr_pstrdup(r->pool, sub->content_language);
2375 ap_destroy_sub_req(sub);
2381 * initialize the module
2383 static void magic_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
2386 magic_server_config_rec *conf;
2387 magic_server_config_rec *main_conf;
2389 #if MIME_MAGIC_DEBUG
2390 struct magic *m, *prevm;
2391 #endif /* MIME_MAGIC_DEBUG */
2393 main_conf = ap_get_module_config(main_server->module_config, &mime_magic_module);
2394 for (s = main_server; s; s = s->next) {
2395 conf = ap_get_module_config(s->module_config, &mime_magic_module);
2396 if (conf->magicfile == NULL && s != main_server) {
2397 /* inherits from the parent */
2400 else if (conf->magicfile) {
2401 result = apprentice(s, p);
2404 #if MIME_MAGIC_DEBUG
2406 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, s,
2407 MODNAME ": magic_init 1 test");
2408 for (m = conf->magic; m; m = m->next) {
2409 if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
2410 apr_isprint((((unsigned long) m) >> 16) & 255) &&
2411 apr_isprint((((unsigned long) m) >> 8) & 255) &&
2412 apr_isprint(((unsigned long) m) & 255)) {
2413 ap_log_error(APLOG_MARK, APLOG_NOERRNO | APLOG_DEBUG, 0, s,
2414 MODNAME ": magic_init 1: POINTER CLOBBERED! "
2415 "m=\"%c%c%c%c\" line=%d",
2416 (((unsigned long) m) >> 24) & 255,
2417 (((unsigned long) m) >> 16) & 255,
2418 (((unsigned long) m) >> 8) & 255,
2419 ((unsigned long) m) & 255,
2420 prevm ? prevm->lineno : -1);
2431 * Find the Content-Type from any resource this module has available
2434 static int magic_find_ct(request_rec *r)
2437 magic_server_config_rec *conf;
2439 /* the file has to exist */
2440 if (r->finfo.filetype == 0 || !r->filename) {
2444 /* was someone else already here? */
2445 if (r->content_type) {
2449 conf = ap_get_module_config(r->server->module_config, &mime_magic_module);
2450 if (!conf || !conf->magic) {
2454 /* initialize per-request info */
2455 if (!magic_set_config(r)) {
2456 return HTTP_INTERNAL_SERVER_ERROR;
2459 /* try excluding file-revision suffixes */
2460 if (revision_suffix(r) != 1) {
2461 /* process it based on the file contents */
2462 if ((result = magic_process(r)) != OK) {
2467 /* if we have any results, put them in the request structure */
2468 return magic_rsl_to_request(r);
2471 static void register_hooks(apr_pool_t *p)
2473 static const char * const aszPre[]={ "mod_mime.c", NULL };
2475 /* mod_mime_magic should be run after mod_mime, if at all. */
2477 ap_hook_type_checker(magic_find_ct, aszPre, NULL, APR_HOOK_MIDDLE);
2478 ap_hook_post_config(magic_init, NULL, NULL, APR_HOOK_FIRST);
2482 * Apache API module interface
2485 module mime_magic_module =
2487 STANDARD20_MODULE_STUFF,
2488 NULL, /* dir config creator */
2489 NULL, /* dir merger --- default is to override */
2490 create_magic_server_config, /* server config */
2491 merge_magic_server_config, /* merge server config */
2492 mime_magic_cmds, /* command apr_table_t */
2493 register_hooks /* register hooks */