1 /* ====================================================================
2 * The Apache Software License, Version 1.1
4 * Copyright (c) 2000-2002 The Apache Software Foundation. All rights
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
19 * 3. The end-user documentation included with the redistribution,
20 * if any, must include the following acknowledgment:
21 * "This product includes software developed by the
22 * Apache Software Foundation (http://www.apache.org/)."
23 * Alternately, this acknowledgment may appear in the software itself,
24 * if and wherever such third-party acknowledgments normally appear.
26 * 4. The names "Apache" and "Apache Software Foundation" must
27 * not be used to endorse or promote products derived from this
28 * software without prior written permission. For written
29 * permission, please contact apache@apache.org.
31 * 5. Products derived from this software may not be called "Apache",
32 * nor may "Apache" appear in their name, without prior written
33 * permission of the Apache Software Foundation.
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * ====================================================================
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation. For more
51 * information on the Apache Software Foundation, please see
52 * <http://www.apache.org/>.
54 * Portions of this software are based upon public domain software
55 * (zlib functions gz_open and gzwrite)
59 * mod_deflate.c: Perform deflate transfer-encoding on the fly
61 * Written by Ian Holsman (IanH@apache.org)
66 #include "http_config.h"
68 #include "apr_strings.h"
69 #include "apr_general.h"
70 #include "util_filter.h"
71 #include "apr_buckets.h"
72 #include "http_request.h"
73 #define APR_WANT_STRFUNC
81 /* As part of the encoding process, we must send what our OS_CODE is
82 * (or so it seems based on what I can tell of how gzip encoding works).
84 * zutil.h is not always included with zlib distributions (it is a private
85 * header), so this is straight from zlib 1.1.3's zutil.h.
91 #ifdef WIN32 /* Window 95 & Windows NT */
95 #if defined(VAXC) || defined(VMS)
103 #if defined(ATARI) || defined(atarist)
107 #if defined(MACOS) || defined(TARGET_OS_MAC)
111 #ifdef __50SERIES /* Prime/PRIMOS */
120 #define OS_CODE 0x03 /* assume Unix */
124 static const char deflateFilterName[] = "DEFLATE";
125 module AP_MODULE_DECLARE_DATA deflate_module;
127 typedef struct deflate_filter_config_t
131 apr_size_t bufferSize;
133 } deflate_filter_config;
135 /* windowsize is negative to suppress Zlib header */
136 #define DEFAULT_WINDOWSIZE -15
137 #define DEFAULT_MEMLEVEL 9
138 #define DEFAULT_BUFFERSIZE 8096
140 /* Outputs a long in LSB order to the given file
141 * only the bottom 4 bits are required for the deflate file format.
143 static void putLong(unsigned char *string, unsigned long x)
145 string[0] = (unsigned char)(x & 0xff);
146 string[1] = (unsigned char)((x & 0xff00) >> 8);
147 string[2] = (unsigned char)((x & 0xff0000) >> 16);
148 string[3] = (unsigned char)((x & 0xff000000) >> 24);
151 /* Inputs a string and returns a long.
153 static unsigned long getLong(unsigned char *string)
155 return ((unsigned long)string[0])
156 | (((unsigned long)string[1]) << 8)
157 | (((unsigned long)string[2]) << 16)
158 | (((unsigned long)string[3]) << 24);
161 static void *create_deflate_server_config(apr_pool_t *p, server_rec *s)
163 deflate_filter_config *c = apr_pcalloc(p, sizeof *c);
165 c->memlevel = DEFAULT_MEMLEVEL;
166 c->windowSize = DEFAULT_WINDOWSIZE;
167 c->bufferSize = DEFAULT_BUFFERSIZE;
172 static const char *deflate_set_window_size(cmd_parms *cmd, void *dummy,
175 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
182 return "DeflateWindowSize must be between 1 and 15";
184 c->windowSize = i * -1;
189 static const char *deflate_set_buffer_size(cmd_parms *cmd, void *dummy,
192 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
197 return "DeflateBufferSize should be positive";
200 c->bufferSize = (apr_size_t)n;
204 static const char *deflate_set_note(cmd_parms *cmd, void *dummy,
207 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
209 c->noteName = apr_pstrdup(cmd->pool, arg);
214 static const char *deflate_set_memlevel(cmd_parms *cmd, void *dummy,
217 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
224 return "DeflateMemLevel must be between 1 and 9";
232 static char deflate_magic[2] = { '\037', '\213' };
234 typedef struct deflate_ctx_t
237 unsigned char *buffer;
239 apr_bucket_brigade *bb, *proc_bb;
242 static apr_status_t deflate_out_filter(ap_filter_t *f,
243 apr_bucket_brigade *bb)
246 request_rec *r = f->r;
247 deflate_ctx *ctx = f->ctx;
249 deflate_filter_config *c = ap_get_module_config(r->server->module_config,
252 /* If we don't have a context, we need to ensure that it is okay to send
253 * the deflated content. If we have a context, that means we've done
254 * this before and we liked it.
255 * This could be not so nice if we always fail. But, if we succeed,
256 * we're in better shape.
260 const char *encoding, *accepts;
262 /* only work on main request/no subrequests */
264 ap_remove_output_filter(f);
265 return ap_pass_brigade(f->next, bb);
268 /* some browsers might have problems, so set no-gzip
269 * (with browsermatch) for them
271 if (apr_table_get(r->subprocess_env, "no-gzip")) {
272 ap_remove_output_filter(f);
273 return ap_pass_brigade(f->next, bb);
276 /* Some browsers might have problems with content types
277 * other than text/html, so set gzip-only-text/html
278 * (with browsermatch) for them
280 if ((r->content_type == NULL
281 || strncmp(r->content_type, "text/html", 9))
282 && apr_table_get(r->subprocess_env, "gzip-only-text/html")) {
283 ap_remove_output_filter(f);
284 return ap_pass_brigade(f->next, bb);
287 /* Let's see what our current Content-Encoding is.
288 * If gzip is present, don't gzip again. (We could, but let's not.)
290 encoding = apr_table_get(r->headers_out, "Content-Encoding");
292 const char *tmp = encoding;
294 token = ap_get_token(r->pool, &tmp, 0);
295 while (token && token[0]) {
296 if (!strcasecmp(token, "gzip")) {
297 ap_remove_output_filter(f);
298 return ap_pass_brigade(f->next, bb);
300 /* Otherwise, skip token */
302 token = ap_get_token(r->pool, &tmp, 0);
306 /* if they don't have the line, then they can't play */
307 accepts = apr_table_get(r->headers_in, "Accept-Encoding");
308 if (accepts == NULL) {
309 ap_remove_output_filter(f);
310 return ap_pass_brigade(f->next, bb);
313 token = ap_get_token(r->pool, &accepts, 0);
314 while (token && token[0] && strcasecmp(token, "gzip")) {
317 token = ap_get_token(r->pool, &accepts, 0);
320 /* No acceptable token found. */
321 if (token == NULL || token[0] == '\0') {
322 ap_remove_output_filter(f);
323 return ap_pass_brigade(f->next, bb);
326 /* We're cool with filtering this. */
327 ctx = f->ctx = apr_pcalloc(r->pool, sizeof(*ctx));
328 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
329 ctx->buffer = apr_palloc(r->pool, c->bufferSize);
331 zRC = deflateInit2(&ctx->stream, Z_BEST_SPEED, Z_DEFLATED,
332 c->windowSize, c->memlevel,
337 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
338 "unable to init Zlib: "
339 "deflateInit2 returned %d: URL %s",
341 return ap_pass_brigade(f->next, bb);
344 /* RFC 1952 Section 2.3 dictates the gzip header:
346 * +---+---+---+---+---+---+---+---+---+---+
347 * |ID1|ID2|CM |FLG| MTIME |XFL|OS |
348 * +---+---+---+---+---+---+---+---+---+---+
350 * If we wish to populate in MTIME (as hinted in RFC 1952), do:
351 * putLong(date_array, apr_time_now() / APR_USEC_PER_SEC);
352 * where date_array is a char[4] and then print date_array in the
353 * MTIME position. WARNING: ENDIANNESS ISSUE HERE.
355 buf = apr_psprintf(r->pool, "%c%c%c%c%c%c%c%c%c%c", deflate_magic[0],
356 deflate_magic[1], Z_DEFLATED, 0 /* flags */,
357 0, 0, 0, 0 /* 4 chars for mtime */,
358 0 /* xflags */, OS_CODE);
359 e = apr_bucket_pool_create(buf, 10, r->pool, f->c->bucket_alloc);
360 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
362 /* If the entire Content-Encoding is "identity", we can replace it. */
363 if (!encoding || !strcasecmp(encoding, "identity")) {
364 apr_table_setn(r->headers_out, "Content-Encoding", "gzip");
367 apr_table_mergen(r->headers_out, "Content-Encoding", "gzip");
369 apr_table_setn(r->headers_out, "Vary", "Accept-Encoding");
370 apr_table_unset(r->headers_out, "Content-Length");
372 /* initialize deflate output buffer */
373 ctx->stream.next_out = ctx->buffer;
374 ctx->stream.avail_out = c->bufferSize;
377 APR_BRIGADE_FOREACH(e, bb) {
384 if (APR_BUCKET_IS_EOS(e)) {
386 unsigned int deflate_len;
388 ctx->stream.avail_in = 0; /* should be zero already anyway */
390 deflate_len = c->bufferSize - ctx->stream.avail_out;
392 if (deflate_len != 0) {
393 b = apr_bucket_heap_create((char *)ctx->buffer,
396 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
397 ctx->stream.next_out = ctx->buffer;
398 ctx->stream.avail_out = c->bufferSize;
405 zRC = deflate(&ctx->stream, Z_FINISH);
407 if (deflate_len == 0 && zRC == Z_BUF_ERROR) {
411 done = (ctx->stream.avail_out != 0 || zRC == Z_STREAM_END);
413 if (zRC != Z_OK && zRC != Z_STREAM_END) {
418 buf = apr_palloc(r->pool, 8);
419 putLong((unsigned char *)&buf[0], ctx->crc);
420 putLong((unsigned char *)&buf[4], ctx->stream.total_in);
422 b = apr_bucket_pool_create(buf, 8, r->pool, f->c->bucket_alloc);
423 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
424 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
425 "Zlib: Compressed %ld to %ld : URL %s",
426 ctx->stream.total_in, ctx->stream.total_out, r->uri);
429 if (ctx->stream.total_in > 0) {
432 total = ctx->stream.total_out * 100 / ctx->stream.total_in;
434 apr_table_setn(r->notes, c->noteName,
435 apr_itoa(r->pool, total));
438 apr_table_setn(r->notes, c->noteName, "-");
442 deflateEnd(&ctx->stream);
444 /* Remove EOS from the old list, and insert into the new. */
445 APR_BUCKET_REMOVE(e);
446 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
448 /* Okay, we've seen the EOS.
449 * Time to pass it along down the chain.
451 return ap_pass_brigade(f->next, ctx->bb);
454 if (APR_BUCKET_IS_FLUSH(e)) {
456 zRC = deflate(&(ctx->stream), Z_SYNC_FLUSH);
461 ctx->stream.next_out = ctx->buffer;
462 len = c->bufferSize - ctx->stream.avail_out;
464 b = apr_bucket_heap_create((char *)ctx->buffer, len,
465 NULL, f->c->bucket_alloc);
466 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
467 ctx->stream.avail_out = c->bufferSize;
469 bkt = apr_bucket_flush_create(f->c->bucket_alloc);
470 APR_BRIGADE_INSERT_TAIL(ctx->bb, bkt);
471 ap_pass_brigade(f->next, ctx->bb);
476 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
478 /* This crc32 function is from zlib. */
479 ctx->crc = crc32(ctx->crc, (const Bytef *)data, len);
482 ctx->stream.next_in = (unsigned char *)data; /* We just lost const-ness,
483 * but we'll just have to
485 ctx->stream.avail_in = len;
487 while (ctx->stream.avail_in != 0) {
488 if (ctx->stream.avail_out == 0) {
489 ctx->stream.next_out = ctx->buffer;
490 len = c->bufferSize - ctx->stream.avail_out;
492 b = apr_bucket_heap_create((char *)ctx->buffer, len,
493 NULL, f->c->bucket_alloc);
494 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
495 ctx->stream.avail_out = c->bufferSize;
498 zRC = deflate(&(ctx->stream), Z_NO_FLUSH);
505 apr_brigade_destroy(bb);
509 /* This is the deflate input filter (inflates). */
510 static apr_status_t deflate_in_filter(ap_filter_t *f,
511 apr_bucket_brigade *bb,
512 ap_input_mode_t mode,
513 apr_read_type_e block,
517 request_rec *r = f->r;
518 deflate_ctx *ctx = f->ctx;
521 deflate_filter_config *c;
523 /* just get out of the way of things we don't want. */
524 if (mode != AP_MODE_READBYTES) {
525 return ap_get_brigade(f->next, bb, mode, block, readbytes);
528 c = ap_get_module_config(r->server->module_config, &deflate_module);
532 char *token, deflate_hdr[10];
533 const char *encoding;
536 /* only work on main request/no subrequests */
538 ap_remove_input_filter(f);
539 return ap_get_brigade(f->next, bb, mode, block, readbytes);
542 /* Let's see what our current Content-Encoding is.
543 * If gzip is present, don't gzip again. (We could, but let's not.)
545 encoding = apr_table_get(r->headers_in, "Content-Encoding");
547 const char *tmp = encoding;
549 token = ap_get_token(r->pool, &tmp, 0);
550 while (token && token[0]) {
551 if (!strcasecmp(token, "gzip")) {
555 /* Otherwise, skip token */
557 token = ap_get_token(r->pool, &tmp, 0);
562 ap_remove_input_filter(f);
563 return ap_get_brigade(f->next, bb, mode, block, readbytes);
566 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
567 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
568 ctx->proc_bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
569 ctx->buffer = apr_palloc(r->pool, c->bufferSize);
571 rv = ap_get_brigade(f->next, ctx->bb, AP_MODE_READBYTES, block, 10);
572 if (rv != APR_SUCCESS) {
577 rv = apr_brigade_flatten(ctx->bb, deflate_hdr, &len);
578 if (rv != APR_SUCCESS) {
582 /* We didn't get the magic bytes. */
584 deflate_hdr[0] != deflate_magic[0] ||
585 deflate_hdr[1] != deflate_magic[1]) {
589 /* We can't handle flags for now. */
590 if (deflate_hdr[3] != 0) {
594 zRC = inflateInit2(&ctx->stream, c->windowSize);
598 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
599 "unable to init Zlib: "
600 "inflateInit2 returned %d: URL %s",
602 ap_remove_input_filter(f);
603 return ap_get_brigade(f->next, bb, mode, block, readbytes);
606 /* initialize deflate output buffer */
607 ctx->stream.next_out = ctx->buffer;
608 ctx->stream.avail_out = c->bufferSize;
610 apr_brigade_cleanup(ctx->bb);
613 if (APR_BRIGADE_EMPTY(ctx->proc_bb)) {
614 rv = ap_get_brigade(f->next, ctx->bb, mode, block, readbytes);
616 if (rv != APR_SUCCESS) {
620 APR_BRIGADE_FOREACH(bkt, ctx->bb) {
624 /* If we actually see the EOS, that means we screwed up! */
625 if (APR_BUCKET_IS_EOS(bkt)) {
629 if (APR_BUCKET_IS_FLUSH(bkt)) {
630 apr_bucket *tmp_heap;
631 zRC = inflate(&(ctx->stream), Z_SYNC_FLUSH);
636 ctx->stream.next_out = ctx->buffer;
637 len = c->bufferSize - ctx->stream.avail_out;
639 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
640 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
641 NULL, f->c->bucket_alloc);
642 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
643 ctx->stream.avail_out = c->bufferSize;
645 /* Move everything to the returning brigade. */
646 APR_BUCKET_REMOVE(bkt);
647 APR_BRIGADE_CONCAT(bb, ctx->bb);
652 apr_bucket_read(bkt, &data, &len, APR_BLOCK_READ);
654 /* pass through zlib inflate. */
655 ctx->stream.next_in = (unsigned char *)data;
656 ctx->stream.avail_in = len;
660 while (ctx->stream.avail_in != 0) {
661 if (ctx->stream.avail_out == 0) {
662 apr_bucket *tmp_heap;
663 ctx->stream.next_out = ctx->buffer;
664 len = c->bufferSize - ctx->stream.avail_out;
666 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
667 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
668 NULL, f->c->bucket_alloc);
669 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
670 ctx->stream.avail_out = c->bufferSize;
673 zRC = inflate(&ctx->stream, Z_NO_FLUSH);
675 if (zRC == Z_STREAM_END) {
683 if (zRC == Z_STREAM_END) {
684 apr_bucket *tmp_heap, *eos;
686 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
687 "Zlib: Inflated %ld to %ld : URL %s",
688 ctx->stream.total_in, ctx->stream.total_out,
691 len = c->bufferSize - ctx->stream.avail_out;
693 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
694 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
695 NULL, f->c->bucket_alloc);
696 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
697 ctx->stream.avail_out = c->bufferSize;
699 /* Is the remaining 8 bytes already in the avail stream? */
700 if (ctx->stream.avail_in >= 8) {
701 unsigned long compCRC, compLen;
702 compCRC = getLong(ctx->stream.next_in);
703 if (ctx->crc != compCRC) {
706 ctx->stream.next_in += 4;
707 compLen = getLong(ctx->stream.next_in);
708 if (ctx->stream.total_out != compLen) {
713 /* FIXME: We need to grab the 8 verification bytes
718 inflateEnd(&ctx->stream);
720 eos = apr_bucket_eos_create(f->c->bucket_alloc);
721 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, eos);
726 apr_brigade_cleanup(ctx->bb);
729 /* If we are about to return nothing for a 'blocking' read and we have
730 * some data in our zlib buffer, flush it out so we can return something.
732 if (block == APR_BLOCK_READ &&
733 APR_BRIGADE_EMPTY(ctx->proc_bb) &&
734 ctx->stream.avail_out < c->bufferSize) {
735 apr_bucket *tmp_heap;
737 ctx->stream.next_out = ctx->buffer;
738 len = c->bufferSize - ctx->stream.avail_out;
740 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
741 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
742 NULL, f->c->bucket_alloc);
743 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
744 ctx->stream.avail_out = c->bufferSize;
747 if (!APR_BRIGADE_EMPTY(ctx->proc_bb)) {
748 apr_bucket_brigade *newbb;
750 /* May return APR_INCOMPLETE which is fine by us. */
751 apr_brigade_partition(ctx->proc_bb, readbytes, &bkt);
753 newbb = apr_brigade_split(ctx->proc_bb, bkt);
754 APR_BRIGADE_CONCAT(bb, ctx->proc_bb);
755 APR_BRIGADE_CONCAT(ctx->proc_bb, newbb);
761 static void register_hooks(apr_pool_t *p)
763 ap_register_output_filter(deflateFilterName, deflate_out_filter,
764 AP_FTYPE_CONTENT_SET);
765 ap_register_input_filter(deflateFilterName, deflate_in_filter,
766 AP_FTYPE_CONTENT_SET);
769 static const command_rec deflate_filter_cmds[] = {
770 AP_INIT_TAKE1("DeflateFilterNote", deflate_set_note, NULL, RSRC_CONF,
771 "Set a note to report on compression ratio"),
772 AP_INIT_TAKE1("DeflateWindowSize", deflate_set_window_size, NULL,
773 RSRC_CONF, "Set the Deflate window size (1-15)"),
774 AP_INIT_TAKE1("DeflateBufferSize", deflate_set_buffer_size, NULL, RSRC_CONF,
775 "Set the Deflate Buffer Size"),
776 AP_INIT_TAKE1("DeflateMemLevel", deflate_set_memlevel, NULL, RSRC_CONF,
777 "Set the Deflate Memory Level (1-9)"),
781 module AP_MODULE_DECLARE_DATA deflate_module = {
782 STANDARD20_MODULE_STUFF,
783 NULL, /* dir config creater */
784 NULL, /* dir merger --- default is to override */
785 create_deflate_server_config, /* server config */
786 NULL, /* merge server config */
787 deflate_filter_cmds, /* command table */
788 register_hooks /* register hooks */