1 /* ====================================================================
2 * The Apache Software License, Version 1.1
4 * Copyright (c) 2000-2002 The Apache Software Foundation. All rights
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
19 * 3. The end-user documentation included with the redistribution,
20 * if any, must include the following acknowledgment:
21 * "This product includes software developed by the
22 * Apache Software Foundation (http://www.apache.org/)."
23 * Alternately, this acknowledgment may appear in the software itself,
24 * if and wherever such third-party acknowledgments normally appear.
26 * 4. The names "Apache" and "Apache Software Foundation" must
27 * not be used to endorse or promote products derived from this
28 * software without prior written permission. For written
29 * permission, please contact apache@apache.org.
31 * 5. Products derived from this software may not be called "Apache",
32 * nor may "Apache" appear in their name, without prior written
33 * permission of the Apache Software Foundation.
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * ====================================================================
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation. For more
51 * information on the Apache Software Foundation, please see
52 * <http://www.apache.org/>.
54 * Portions of this software are based upon public domain software
55 * (zlib functions gz_open and gzwrite)
59 * mod_deflate.c: Perform deflate transfer-encoding on the fly
61 * Written by Ian Holsman (IanH@apache.org)
66 #include "http_config.h"
68 #include "apr_strings.h"
69 #include "apr_general.h"
70 #include "util_filter.h"
71 #include "apr_buckets.h"
72 #include "http_request.h"
73 #define APR_WANT_STRFUNC
81 /* As part of the encoding process, we must send what our OS_CODE is
82 * (or so it seems based on what I can tell of how gzip encoding works).
84 * zutil.h is not always included with zlib distributions (it is a private
85 * header), so this is straight from zlib 1.1.3's zutil.h.
91 #ifdef WIN32 /* Window 95 & Windows NT */
95 #if defined(VAXC) || defined(VMS)
103 #if defined(ATARI) || defined(atarist)
107 #if defined(MACOS) || defined(TARGET_OS_MAC)
111 #ifdef __50SERIES /* Prime/PRIMOS */
120 #define OS_CODE 0x03 /* assume Unix */
124 static const char deflateFilterName[] = "DEFLATE";
125 module AP_MODULE_DECLARE_DATA deflate_module;
127 typedef struct deflate_filter_config_t
131 apr_size_t bufferSize;
133 } deflate_filter_config;
135 /* windowsize is negative to suppress Zlib header */
136 #define DEFAULT_WINDOWSIZE -15
137 #define DEFAULT_MEMLEVEL 9
138 #define DEFAULT_BUFFERSIZE 8096
140 /* Outputs a long in LSB order to the given file
141 * only the bottom 4 bits are required for the deflate file format.
143 static void putLong(unsigned char *string, unsigned long x)
145 string[0] = (unsigned char)(x & 0xff);
146 string[1] = (unsigned char)((x & 0xff00) >> 8);
147 string[2] = (unsigned char)((x & 0xff0000) >> 16);
148 string[3] = (unsigned char)((x & 0xff000000) >> 24);
151 /* Inputs a string and returns a long.
153 static unsigned long getLong(unsigned char *string)
155 return ((unsigned long)string[0])
156 | (((unsigned long)string[1]) << 8)
157 | (((unsigned long)string[2]) << 16)
158 | (((unsigned long)string[3]) << 24);
161 static void *create_deflate_server_config(apr_pool_t *p, server_rec *s)
163 deflate_filter_config *c = apr_pcalloc(p, sizeof *c);
165 c->memlevel = DEFAULT_MEMLEVEL;
166 c->windowSize = DEFAULT_WINDOWSIZE;
167 c->bufferSize = DEFAULT_BUFFERSIZE;
172 static const char *deflate_set_window_size(cmd_parms *cmd, void *dummy,
175 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
182 return "DeflateWindowSize must be between 1 and 15";
184 c->windowSize = i * -1;
189 static const char *deflate_set_buffer_size(cmd_parms *cmd, void *dummy,
192 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
197 return "DeflateBufferSize should be positive";
200 c->bufferSize = (apr_size_t)n;
204 static const char *deflate_set_note(cmd_parms *cmd, void *dummy,
207 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
209 c->noteName = apr_pstrdup(cmd->pool, arg);
214 static const char *deflate_set_memlevel(cmd_parms *cmd, void *dummy,
217 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
224 return "DeflateMemLevel must be between 1 and 9";
232 static char deflate_magic[2] = { '\037', '\213' };
234 typedef struct deflate_ctx_t
237 unsigned char *buffer;
239 apr_bucket_brigade *bb, *proc_bb;
242 static apr_status_t deflate_out_filter(ap_filter_t *f,
243 apr_bucket_brigade *bb)
246 request_rec *r = f->r;
247 deflate_ctx *ctx = f->ctx;
249 deflate_filter_config *c = ap_get_module_config(r->server->module_config,
252 /* If we don't have a context, we need to ensure that it is okay to send
253 * the deflated content. If we have a context, that means we've done
254 * this before and we liked it.
255 * This could be not so nice if we always fail. But, if we succeed,
256 * we're in better shape.
260 const char *encoding, *accepts;
262 /* only work on main request/no subrequests */
264 ap_remove_output_filter(f);
265 return ap_pass_brigade(f->next, bb);
268 /* some browsers might have problems, so set no-gzip
269 * (with browsermatch) for them
271 if (apr_table_get(r->subprocess_env, "no-gzip")) {
272 ap_remove_output_filter(f);
273 return ap_pass_brigade(f->next, bb);
276 /* Some browsers might have problems with content types
277 * other than text/html, so set gzip-only-text/html
278 * (with browsermatch) for them
280 if (r->content_type == NULL
281 || strncmp(r->content_type, "text/html", 9)) {
282 const char *env_value = apr_table_get(r->subprocess_env,
283 "gzip-only-text/html");
284 if ( env_value && (strcmp(env_value,"1") == 0) ) {
285 ap_remove_output_filter(f);
286 return ap_pass_brigade(f->next, bb);
290 /* Let's see what our current Content-Encoding is.
291 * If gzip is present, don't gzip again. (We could, but let's not.)
293 encoding = apr_table_get(r->headers_out, "Content-Encoding");
295 const char *tmp = encoding;
297 token = ap_get_token(r->pool, &tmp, 0);
298 while (token && token[0]) {
299 if (!strcasecmp(token, "gzip")) {
300 ap_remove_output_filter(f);
301 return ap_pass_brigade(f->next, bb);
303 /* Otherwise, skip token */
305 token = ap_get_token(r->pool, &tmp, 0);
309 /* if they don't have the line, then they can't play */
310 accepts = apr_table_get(r->headers_in, "Accept-Encoding");
311 if (accepts == NULL) {
312 ap_remove_output_filter(f);
313 return ap_pass_brigade(f->next, bb);
316 token = ap_get_token(r->pool, &accepts, 0);
317 while (token && token[0] && strcasecmp(token, "gzip")) {
320 token = ap_get_token(r->pool, &accepts, 0);
323 /* No acceptable token found. */
324 if (token == NULL || token[0] == '\0') {
325 ap_remove_output_filter(f);
326 return ap_pass_brigade(f->next, bb);
329 /* We're cool with filtering this. */
330 ctx = f->ctx = apr_pcalloc(r->pool, sizeof(*ctx));
331 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
332 ctx->buffer = apr_palloc(r->pool, c->bufferSize);
334 zRC = deflateInit2(&ctx->stream, Z_BEST_SPEED, Z_DEFLATED,
335 c->windowSize, c->memlevel,
340 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
341 "unable to init Zlib: "
342 "deflateInit2 returned %d: URL %s",
344 return ap_pass_brigade(f->next, bb);
347 /* RFC 1952 Section 2.3 dictates the gzip header:
349 * +---+---+---+---+---+---+---+---+---+---+
350 * |ID1|ID2|CM |FLG| MTIME |XFL|OS |
351 * +---+---+---+---+---+---+---+---+---+---+
353 * If we wish to populate in MTIME (as hinted in RFC 1952), do:
354 * putLong(date_array, apr_time_now() / APR_USEC_PER_SEC);
355 * where date_array is a char[4] and then print date_array in the
356 * MTIME position. WARNING: ENDIANNESS ISSUE HERE.
358 buf = apr_psprintf(r->pool, "%c%c%c%c%c%c%c%c%c%c", deflate_magic[0],
359 deflate_magic[1], Z_DEFLATED, 0 /* flags */,
360 0, 0, 0, 0 /* 4 chars for mtime */,
361 0 /* xflags */, OS_CODE);
362 e = apr_bucket_pool_create(buf, 10, r->pool, f->c->bucket_alloc);
363 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
365 /* If the entire Content-Encoding is "identity", we can replace it. */
366 if (!encoding || !strcasecmp(encoding, "identity")) {
367 apr_table_setn(r->headers_out, "Content-Encoding", "gzip");
370 apr_table_mergen(r->headers_out, "Content-Encoding", "gzip");
372 apr_table_setn(r->headers_out, "Vary", "Accept-Encoding");
373 apr_table_unset(r->headers_out, "Content-Length");
375 /* initialize deflate output buffer */
376 ctx->stream.next_out = ctx->buffer;
377 ctx->stream.avail_out = c->bufferSize;
380 APR_BRIGADE_FOREACH(e, bb) {
387 if (APR_BUCKET_IS_EOS(e)) {
389 unsigned int deflate_len;
391 ctx->stream.avail_in = 0; /* should be zero already anyway */
393 deflate_len = c->bufferSize - ctx->stream.avail_out;
395 if (deflate_len != 0) {
396 b = apr_bucket_heap_create((char *)ctx->buffer,
399 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
400 ctx->stream.next_out = ctx->buffer;
401 ctx->stream.avail_out = c->bufferSize;
408 zRC = deflate(&ctx->stream, Z_FINISH);
410 if (deflate_len == 0 && zRC == Z_BUF_ERROR) {
414 done = (ctx->stream.avail_out != 0 || zRC == Z_STREAM_END);
416 if (zRC != Z_OK && zRC != Z_STREAM_END) {
421 buf = apr_palloc(r->pool, 8);
422 putLong((unsigned char *)&buf[0], ctx->crc);
423 putLong((unsigned char *)&buf[4], ctx->stream.total_in);
425 b = apr_bucket_pool_create(buf, 8, r->pool, f->c->bucket_alloc);
426 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
427 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
428 "Zlib: Compressed %ld to %ld : URL %s",
429 ctx->stream.total_in, ctx->stream.total_out, r->uri);
432 if (ctx->stream.total_in > 0) {
435 total = ctx->stream.total_out * 100 / ctx->stream.total_in;
437 apr_table_setn(r->notes, c->noteName,
438 apr_itoa(r->pool, total));
441 apr_table_setn(r->notes, c->noteName, "-");
445 deflateEnd(&ctx->stream);
447 /* Remove EOS from the old list, and insert into the new. */
448 APR_BUCKET_REMOVE(e);
449 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
451 /* Okay, we've seen the EOS.
452 * Time to pass it along down the chain.
454 return ap_pass_brigade(f->next, ctx->bb);
457 if (APR_BUCKET_IS_FLUSH(e)) {
459 zRC = deflate(&(ctx->stream), Z_SYNC_FLUSH);
464 ctx->stream.next_out = ctx->buffer;
465 len = c->bufferSize - ctx->stream.avail_out;
467 b = apr_bucket_heap_create((char *)ctx->buffer, len,
468 NULL, f->c->bucket_alloc);
469 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
470 ctx->stream.avail_out = c->bufferSize;
472 bkt = apr_bucket_flush_create(f->c->bucket_alloc);
473 APR_BRIGADE_INSERT_TAIL(ctx->bb, bkt);
474 ap_pass_brigade(f->next, ctx->bb);
479 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
481 /* This crc32 function is from zlib. */
482 ctx->crc = crc32(ctx->crc, (const Bytef *)data, len);
485 ctx->stream.next_in = (unsigned char *)data; /* We just lost const-ness,
486 * but we'll just have to
488 ctx->stream.avail_in = len;
490 while (ctx->stream.avail_in != 0) {
491 if (ctx->stream.avail_out == 0) {
492 ctx->stream.next_out = ctx->buffer;
493 len = c->bufferSize - ctx->stream.avail_out;
495 b = apr_bucket_heap_create((char *)ctx->buffer, len,
496 NULL, f->c->bucket_alloc);
497 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
498 ctx->stream.avail_out = c->bufferSize;
501 zRC = deflate(&(ctx->stream), Z_NO_FLUSH);
508 apr_brigade_destroy(bb);
512 /* This is the deflate input filter (inflates). */
513 static apr_status_t deflate_in_filter(ap_filter_t *f,
514 apr_bucket_brigade *bb,
515 ap_input_mode_t mode,
516 apr_read_type_e block,
520 request_rec *r = f->r;
521 deflate_ctx *ctx = f->ctx;
524 deflate_filter_config *c;
526 /* just get out of the way of things we don't want. */
527 if (mode != AP_MODE_READBYTES) {
528 return ap_get_brigade(f->next, bb, mode, block, readbytes);
531 c = ap_get_module_config(r->server->module_config, &deflate_module);
535 char *token, deflate_hdr[10];
536 const char *encoding;
539 /* only work on main request/no subrequests */
541 ap_remove_input_filter(f);
542 return ap_get_brigade(f->next, bb, mode, block, readbytes);
545 /* Let's see what our current Content-Encoding is.
546 * If gzip is present, don't gzip again. (We could, but let's not.)
548 encoding = apr_table_get(r->headers_in, "Content-Encoding");
550 const char *tmp = encoding;
552 token = ap_get_token(r->pool, &tmp, 0);
553 while (token && token[0]) {
554 if (!strcasecmp(token, "gzip")) {
558 /* Otherwise, skip token */
560 token = ap_get_token(r->pool, &tmp, 0);
565 ap_remove_input_filter(f);
566 return ap_get_brigade(f->next, bb, mode, block, readbytes);
569 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
570 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
571 ctx->proc_bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
572 ctx->buffer = apr_palloc(r->pool, c->bufferSize);
574 rv = ap_get_brigade(f->next, ctx->bb, AP_MODE_READBYTES, block, 10);
575 if (rv != APR_SUCCESS) {
580 rv = apr_brigade_flatten(ctx->bb, deflate_hdr, &len);
581 if (rv != APR_SUCCESS) {
585 /* We didn't get the magic bytes. */
587 deflate_hdr[0] != deflate_magic[0] ||
588 deflate_hdr[1] != deflate_magic[1]) {
592 /* We can't handle flags for now. */
593 if (deflate_hdr[3] != 0) {
597 zRC = inflateInit2(&ctx->stream, c->windowSize);
601 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
602 "unable to init Zlib: "
603 "inflateInit2 returned %d: URL %s",
605 ap_remove_input_filter(f);
606 return ap_get_brigade(f->next, bb, mode, block, readbytes);
609 /* initialize deflate output buffer */
610 ctx->stream.next_out = ctx->buffer;
611 ctx->stream.avail_out = c->bufferSize;
613 apr_brigade_cleanup(ctx->bb);
616 if (APR_BRIGADE_EMPTY(ctx->proc_bb)) {
617 rv = ap_get_brigade(f->next, ctx->bb, mode, block, readbytes);
619 if (rv != APR_SUCCESS) {
623 APR_BRIGADE_FOREACH(bkt, ctx->bb) {
627 /* If we actually see the EOS, that means we screwed up! */
628 if (APR_BUCKET_IS_EOS(bkt)) {
632 if (APR_BUCKET_IS_FLUSH(bkt)) {
633 apr_bucket *tmp_heap;
634 zRC = inflate(&(ctx->stream), Z_SYNC_FLUSH);
639 ctx->stream.next_out = ctx->buffer;
640 len = c->bufferSize - ctx->stream.avail_out;
642 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
643 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
644 NULL, f->c->bucket_alloc);
645 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
646 ctx->stream.avail_out = c->bufferSize;
648 /* Move everything to the returning brigade. */
649 APR_BUCKET_REMOVE(bkt);
650 APR_BRIGADE_CONCAT(bb, ctx->bb);
655 apr_bucket_read(bkt, &data, &len, APR_BLOCK_READ);
657 /* pass through zlib inflate. */
658 ctx->stream.next_in = (unsigned char *)data;
659 ctx->stream.avail_in = len;
663 while (ctx->stream.avail_in != 0) {
664 if (ctx->stream.avail_out == 0) {
665 apr_bucket *tmp_heap;
666 ctx->stream.next_out = ctx->buffer;
667 len = c->bufferSize - ctx->stream.avail_out;
669 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
670 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
671 NULL, f->c->bucket_alloc);
672 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
673 ctx->stream.avail_out = c->bufferSize;
676 zRC = inflate(&ctx->stream, Z_NO_FLUSH);
678 if (zRC == Z_STREAM_END) {
686 if (zRC == Z_STREAM_END) {
687 apr_bucket *tmp_heap, *eos;
689 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
690 "Zlib: Inflated %ld to %ld : URL %s",
691 ctx->stream.total_in, ctx->stream.total_out,
694 len = c->bufferSize - ctx->stream.avail_out;
696 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
697 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
698 NULL, f->c->bucket_alloc);
699 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
700 ctx->stream.avail_out = c->bufferSize;
702 /* Is the remaining 8 bytes already in the avail stream? */
703 if (ctx->stream.avail_in >= 8) {
704 unsigned long compCRC, compLen;
705 compCRC = getLong(ctx->stream.next_in);
706 if (ctx->crc != compCRC) {
709 ctx->stream.next_in += 4;
710 compLen = getLong(ctx->stream.next_in);
711 if (ctx->stream.total_out != compLen) {
716 /* FIXME: We need to grab the 8 verification bytes
721 inflateEnd(&ctx->stream);
723 eos = apr_bucket_eos_create(f->c->bucket_alloc);
724 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, eos);
729 apr_brigade_cleanup(ctx->bb);
732 /* If we are about to return nothing for a 'blocking' read and we have
733 * some data in our zlib buffer, flush it out so we can return something.
735 if (block == APR_BLOCK_READ &&
736 APR_BRIGADE_EMPTY(ctx->proc_bb) &&
737 ctx->stream.avail_out < c->bufferSize) {
738 apr_bucket *tmp_heap;
740 ctx->stream.next_out = ctx->buffer;
741 len = c->bufferSize - ctx->stream.avail_out;
743 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
744 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
745 NULL, f->c->bucket_alloc);
746 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
747 ctx->stream.avail_out = c->bufferSize;
750 if (!APR_BRIGADE_EMPTY(ctx->proc_bb)) {
751 apr_bucket_brigade *newbb;
753 /* May return APR_INCOMPLETE which is fine by us. */
754 apr_brigade_partition(ctx->proc_bb, readbytes, &bkt);
756 newbb = apr_brigade_split(ctx->proc_bb, bkt);
757 APR_BRIGADE_CONCAT(bb, ctx->proc_bb);
758 APR_BRIGADE_CONCAT(ctx->proc_bb, newbb);
764 static void register_hooks(apr_pool_t *p)
766 ap_register_output_filter(deflateFilterName, deflate_out_filter, NULL,
767 AP_FTYPE_CONTENT_SET);
768 ap_register_input_filter(deflateFilterName, deflate_in_filter, NULL,
769 AP_FTYPE_CONTENT_SET);
772 static const command_rec deflate_filter_cmds[] = {
773 AP_INIT_TAKE1("DeflateFilterNote", deflate_set_note, NULL, RSRC_CONF,
774 "Set a note to report on compression ratio"),
775 AP_INIT_TAKE1("DeflateWindowSize", deflate_set_window_size, NULL,
776 RSRC_CONF, "Set the Deflate window size (1-15)"),
777 AP_INIT_TAKE1("DeflateBufferSize", deflate_set_buffer_size, NULL, RSRC_CONF,
778 "Set the Deflate Buffer Size"),
779 AP_INIT_TAKE1("DeflateMemLevel", deflate_set_memlevel, NULL, RSRC_CONF,
780 "Set the Deflate Memory Level (1-9)"),
784 module AP_MODULE_DECLARE_DATA deflate_module = {
785 STANDARD20_MODULE_STUFF,
786 NULL, /* dir config creater */
787 NULL, /* dir merger --- default is to override */
788 create_deflate_server_config, /* server config */
789 NULL, /* merge server config */
790 deflate_filter_cmds, /* command table */
791 register_hooks /* register hooks */