1 /* ====================================================================
2 * The Apache Software License, Version 1.1
4 * Copyright (c) 2000-2002 The Apache Software Foundation. All rights
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
19 * 3. The end-user documentation included with the redistribution,
20 * if any, must include the following acknowledgment:
21 * "This product includes software developed by the
22 * Apache Software Foundation (http://www.apache.org/)."
23 * Alternately, this acknowledgment may appear in the software itself,
24 * if and wherever such third-party acknowledgments normally appear.
26 * 4. The names "Apache" and "Apache Software Foundation" must
27 * not be used to endorse or promote products derived from this
28 * software without prior written permission. For written
29 * permission, please contact apache@apache.org.
31 * 5. Products derived from this software may not be called "Apache",
32 * nor may "Apache" appear in their name, without prior written
33 * permission of the Apache Software Foundation.
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * ====================================================================
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation. For more
51 * information on the Apache Software Foundation, please see
52 * <http://www.apache.org/>.
54 * Portions of this software are based upon public domain software
55 * (zlib functions gz_open and gzwrite)
59 * mod_deflate.c: Perform deflate transfer-encoding on the fly
61 * Written by Ian Holsman (IanH@apache.org)
66 #include "http_config.h"
68 #include "apr_strings.h"
69 #include "apr_general.h"
70 #include "util_filter.h"
71 #include "apr_buckets.h"
72 #include "http_request.h"
73 #define APR_WANT_STRFUNC
81 /* As part of the encoding process, we must send what our OS_CODE is
82 * (or so it seems based on what I can tell of how gzip encoding works).
84 * zutil.h is not always included with zlib distributions (it is a private
85 * header), so this is straight from zlib 1.1.3's zutil.h.
91 #ifdef WIN32 /* Window 95 & Windows NT */
95 #if defined(VAXC) || defined(VMS)
103 #if defined(ATARI) || defined(atarist)
107 #if defined(MACOS) || defined(TARGET_OS_MAC)
111 #ifdef __50SERIES /* Prime/PRIMOS */
120 #define OS_CODE 0x03 /* assume Unix */
124 static const char deflateFilterName[] = "DEFLATE";
125 module AP_MODULE_DECLARE_DATA deflate_module;
127 typedef struct deflate_filter_config_t
131 apr_size_t bufferSize;
133 } deflate_filter_config;
135 /* windowsize is negative to suppress Zlib header */
136 #define DEFAULT_WINDOWSIZE -15
137 #define DEFAULT_MEMLEVEL 9
138 #define DEFAULT_BUFFERSIZE 8096
140 /* Outputs a long in LSB order to the given file
141 * only the bottom 4 bits are required for the deflate file format.
143 static void putLong(unsigned char *string, unsigned long x)
145 string[0] = (unsigned char)(x & 0xff);
146 string[1] = (unsigned char)((x & 0xff00) >> 8);
147 string[2] = (unsigned char)((x & 0xff0000) >> 16);
148 string[3] = (unsigned char)((x & 0xff000000) >> 24);
151 /* Inputs a string and returns a long.
153 static unsigned long getLong(unsigned char *string)
155 return ((unsigned long)string[0])
156 | (((unsigned long)string[1]) << 8)
157 | (((unsigned long)string[2]) << 16)
158 | (((unsigned long)string[3]) << 24);
161 static void *create_deflate_server_config(apr_pool_t *p, server_rec *s)
163 deflate_filter_config *c = apr_pcalloc(p, sizeof *c);
165 c->memlevel = DEFAULT_MEMLEVEL;
166 c->windowSize = DEFAULT_WINDOWSIZE;
167 c->bufferSize = DEFAULT_BUFFERSIZE;
172 static const char *deflate_set_window_size(cmd_parms *cmd, void *dummy,
175 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
182 return "DeflateWindowSize must be between 1 and 15";
184 c->windowSize = i * -1;
189 static const char *deflate_set_buffer_size(cmd_parms *cmd, void *dummy,
192 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
197 return "DeflateBufferSize should be positive";
200 c->bufferSize = (apr_size_t)n;
204 static const char *deflate_set_note(cmd_parms *cmd, void *dummy,
207 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
209 c->noteName = apr_pstrdup(cmd->pool, arg);
214 static const char *deflate_set_memlevel(cmd_parms *cmd, void *dummy,
217 deflate_filter_config *c = ap_get_module_config(cmd->server->module_config,
224 return "DeflateMemLevel must be between 1 and 9";
232 static char deflate_magic[2] = { '\037', '\213' };
234 typedef struct deflate_ctx_t
237 unsigned char *buffer;
239 apr_bucket_brigade *bb, *proc_bb;
242 static apr_status_t deflate_out_filter(ap_filter_t *f,
243 apr_bucket_brigade *bb)
246 request_rec *r = f->r;
247 deflate_ctx *ctx = f->ctx;
249 deflate_filter_config *c = ap_get_module_config(r->server->module_config,
252 /* If we don't have a context, we need to ensure that it is okay to send
253 * the deflated content. If we have a context, that means we've done
254 * this before and we liked it.
255 * This could be not so nice if we always fail. But, if we succeed,
256 * we're in better shape.
260 const char *encoding, *accepts;
262 /* only work on main request/no subrequests */
264 ap_remove_output_filter(f);
265 return ap_pass_brigade(f->next, bb);
268 /* some browsers might have problems, so set no-gzip
269 * (with browsermatch) for them
271 if (apr_table_get(r->subprocess_env, "no-gzip")) {
272 ap_remove_output_filter(f);
273 return ap_pass_brigade(f->next, bb);
276 /* Some browsers might have problems with content types
277 * other than text/html, so set gzip-only-text/html
278 * (with browsermatch) for them
280 if (r->content_type == NULL
281 || strncmp(r->content_type, "text/html", 9)) {
282 const char *env_value = apr_table_get(r->subprocess_env,
283 "gzip-only-text/html");
284 if ( env_value && (strcmp(env_value,"1") == 0) ) {
285 ap_remove_output_filter(f);
286 return ap_pass_brigade(f->next, bb);
290 /* Let's see what our current Content-Encoding is.
291 * If gzip is present, don't gzip again. (We could, but let's not.)
293 encoding = apr_table_get(r->headers_out, "Content-Encoding");
295 const char *tmp = encoding;
297 token = ap_get_token(r->pool, &tmp, 0);
298 while (token && token[0]) {
299 if (!strcasecmp(token, "gzip")) {
300 ap_remove_output_filter(f);
301 return ap_pass_brigade(f->next, bb);
303 /* Otherwise, skip token */
305 token = ap_get_token(r->pool, &tmp, 0);
309 /* Even if we don't accept this request based on it not having
310 * the Accept-Encoding, we need to note that we were looking
311 * for this header and downstream proxies should be aware of that.
313 apr_table_setn(r->headers_out, "Vary", "Accept-Encoding");
315 /* if they don't have the line, then they can't play */
316 accepts = apr_table_get(r->headers_in, "Accept-Encoding");
317 if (accepts == NULL) {
318 ap_remove_output_filter(f);
319 return ap_pass_brigade(f->next, bb);
322 token = ap_get_token(r->pool, &accepts, 0);
323 while (token && token[0] && strcasecmp(token, "gzip")) {
326 token = ap_get_token(r->pool, &accepts, 0);
329 /* No acceptable token found. */
330 if (token == NULL || token[0] == '\0') {
331 ap_remove_output_filter(f);
332 return ap_pass_brigade(f->next, bb);
335 /* We're cool with filtering this. */
336 ctx = f->ctx = apr_pcalloc(r->pool, sizeof(*ctx));
337 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
338 ctx->buffer = apr_palloc(r->pool, c->bufferSize);
340 zRC = deflateInit2(&ctx->stream, Z_BEST_SPEED, Z_DEFLATED,
341 c->windowSize, c->memlevel,
346 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
347 "unable to init Zlib: "
348 "deflateInit2 returned %d: URL %s",
350 return ap_pass_brigade(f->next, bb);
353 /* RFC 1952 Section 2.3 dictates the gzip header:
355 * +---+---+---+---+---+---+---+---+---+---+
356 * |ID1|ID2|CM |FLG| MTIME |XFL|OS |
357 * +---+---+---+---+---+---+---+---+---+---+
359 * If we wish to populate in MTIME (as hinted in RFC 1952), do:
360 * putLong(date_array, apr_time_now() / APR_USEC_PER_SEC);
361 * where date_array is a char[4] and then print date_array in the
362 * MTIME position. WARNING: ENDIANNESS ISSUE HERE.
364 buf = apr_psprintf(r->pool, "%c%c%c%c%c%c%c%c%c%c", deflate_magic[0],
365 deflate_magic[1], Z_DEFLATED, 0 /* flags */,
366 0, 0, 0, 0 /* 4 chars for mtime */,
367 0 /* xflags */, OS_CODE);
368 e = apr_bucket_pool_create(buf, 10, r->pool, f->c->bucket_alloc);
369 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
371 /* If the entire Content-Encoding is "identity", we can replace it. */
372 if (!encoding || !strcasecmp(encoding, "identity")) {
373 apr_table_setn(r->headers_out, "Content-Encoding", "gzip");
376 apr_table_mergen(r->headers_out, "Content-Encoding", "gzip");
378 apr_table_unset(r->headers_out, "Content-Length");
380 /* initialize deflate output buffer */
381 ctx->stream.next_out = ctx->buffer;
382 ctx->stream.avail_out = c->bufferSize;
385 APR_BRIGADE_FOREACH(e, bb) {
392 if (APR_BUCKET_IS_EOS(e)) {
394 unsigned int deflate_len;
396 ctx->stream.avail_in = 0; /* should be zero already anyway */
398 deflate_len = c->bufferSize - ctx->stream.avail_out;
400 if (deflate_len != 0) {
401 b = apr_bucket_heap_create((char *)ctx->buffer,
404 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
405 ctx->stream.next_out = ctx->buffer;
406 ctx->stream.avail_out = c->bufferSize;
413 zRC = deflate(&ctx->stream, Z_FINISH);
415 if (deflate_len == 0 && zRC == Z_BUF_ERROR) {
419 done = (ctx->stream.avail_out != 0 || zRC == Z_STREAM_END);
421 if (zRC != Z_OK && zRC != Z_STREAM_END) {
426 buf = apr_palloc(r->pool, 8);
427 putLong((unsigned char *)&buf[0], ctx->crc);
428 putLong((unsigned char *)&buf[4], ctx->stream.total_in);
430 b = apr_bucket_pool_create(buf, 8, r->pool, f->c->bucket_alloc);
431 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
432 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
433 "Zlib: Compressed %ld to %ld : URL %s",
434 ctx->stream.total_in, ctx->stream.total_out, r->uri);
437 if (ctx->stream.total_in > 0) {
440 total = ctx->stream.total_out * 100 / ctx->stream.total_in;
442 apr_table_setn(r->notes, c->noteName,
443 apr_itoa(r->pool, total));
446 apr_table_setn(r->notes, c->noteName, "-");
450 deflateEnd(&ctx->stream);
452 /* Remove EOS from the old list, and insert into the new. */
453 APR_BUCKET_REMOVE(e);
454 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
456 /* Okay, we've seen the EOS.
457 * Time to pass it along down the chain.
459 return ap_pass_brigade(f->next, ctx->bb);
462 if (APR_BUCKET_IS_FLUSH(e)) {
464 zRC = deflate(&(ctx->stream), Z_SYNC_FLUSH);
469 ctx->stream.next_out = ctx->buffer;
470 len = c->bufferSize - ctx->stream.avail_out;
472 b = apr_bucket_heap_create((char *)ctx->buffer, len,
473 NULL, f->c->bucket_alloc);
474 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
475 ctx->stream.avail_out = c->bufferSize;
477 bkt = apr_bucket_flush_create(f->c->bucket_alloc);
478 APR_BRIGADE_INSERT_TAIL(ctx->bb, bkt);
479 ap_pass_brigade(f->next, ctx->bb);
484 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
486 /* This crc32 function is from zlib. */
487 ctx->crc = crc32(ctx->crc, (const Bytef *)data, len);
490 ctx->stream.next_in = (unsigned char *)data; /* We just lost const-ness,
491 * but we'll just have to
493 ctx->stream.avail_in = len;
495 while (ctx->stream.avail_in != 0) {
496 if (ctx->stream.avail_out == 0) {
497 ctx->stream.next_out = ctx->buffer;
498 len = c->bufferSize - ctx->stream.avail_out;
500 b = apr_bucket_heap_create((char *)ctx->buffer, len,
501 NULL, f->c->bucket_alloc);
502 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
503 ctx->stream.avail_out = c->bufferSize;
506 zRC = deflate(&(ctx->stream), Z_NO_FLUSH);
513 apr_brigade_destroy(bb);
517 /* This is the deflate input filter (inflates). */
518 static apr_status_t deflate_in_filter(ap_filter_t *f,
519 apr_bucket_brigade *bb,
520 ap_input_mode_t mode,
521 apr_read_type_e block,
525 request_rec *r = f->r;
526 deflate_ctx *ctx = f->ctx;
529 deflate_filter_config *c;
531 /* just get out of the way of things we don't want. */
532 if (mode != AP_MODE_READBYTES) {
533 return ap_get_brigade(f->next, bb, mode, block, readbytes);
536 c = ap_get_module_config(r->server->module_config, &deflate_module);
540 char *token, deflate_hdr[10];
541 const char *encoding;
544 /* only work on main request/no subrequests */
546 ap_remove_input_filter(f);
547 return ap_get_brigade(f->next, bb, mode, block, readbytes);
550 /* Let's see what our current Content-Encoding is.
551 * If gzip is present, don't gzip again. (We could, but let's not.)
553 encoding = apr_table_get(r->headers_in, "Content-Encoding");
555 const char *tmp = encoding;
557 token = ap_get_token(r->pool, &tmp, 0);
558 while (token && token[0]) {
559 if (!strcasecmp(token, "gzip")) {
563 /* Otherwise, skip token */
565 token = ap_get_token(r->pool, &tmp, 0);
570 ap_remove_input_filter(f);
571 return ap_get_brigade(f->next, bb, mode, block, readbytes);
574 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
575 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
576 ctx->proc_bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
577 ctx->buffer = apr_palloc(r->pool, c->bufferSize);
579 rv = ap_get_brigade(f->next, ctx->bb, AP_MODE_READBYTES, block, 10);
580 if (rv != APR_SUCCESS) {
585 rv = apr_brigade_flatten(ctx->bb, deflate_hdr, &len);
586 if (rv != APR_SUCCESS) {
590 /* We didn't get the magic bytes. */
592 deflate_hdr[0] != deflate_magic[0] ||
593 deflate_hdr[1] != deflate_magic[1]) {
597 /* We can't handle flags for now. */
598 if (deflate_hdr[3] != 0) {
602 zRC = inflateInit2(&ctx->stream, c->windowSize);
606 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
607 "unable to init Zlib: "
608 "inflateInit2 returned %d: URL %s",
610 ap_remove_input_filter(f);
611 return ap_get_brigade(f->next, bb, mode, block, readbytes);
614 /* initialize deflate output buffer */
615 ctx->stream.next_out = ctx->buffer;
616 ctx->stream.avail_out = c->bufferSize;
618 apr_brigade_cleanup(ctx->bb);
621 if (APR_BRIGADE_EMPTY(ctx->proc_bb)) {
622 rv = ap_get_brigade(f->next, ctx->bb, mode, block, readbytes);
624 if (rv != APR_SUCCESS) {
628 APR_BRIGADE_FOREACH(bkt, ctx->bb) {
632 /* If we actually see the EOS, that means we screwed up! */
633 if (APR_BUCKET_IS_EOS(bkt)) {
637 if (APR_BUCKET_IS_FLUSH(bkt)) {
638 apr_bucket *tmp_heap;
639 zRC = inflate(&(ctx->stream), Z_SYNC_FLUSH);
644 ctx->stream.next_out = ctx->buffer;
645 len = c->bufferSize - ctx->stream.avail_out;
647 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
648 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
649 NULL, f->c->bucket_alloc);
650 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
651 ctx->stream.avail_out = c->bufferSize;
653 /* Move everything to the returning brigade. */
654 APR_BUCKET_REMOVE(bkt);
655 APR_BRIGADE_CONCAT(bb, ctx->bb);
660 apr_bucket_read(bkt, &data, &len, APR_BLOCK_READ);
662 /* pass through zlib inflate. */
663 ctx->stream.next_in = (unsigned char *)data;
664 ctx->stream.avail_in = len;
668 while (ctx->stream.avail_in != 0) {
669 if (ctx->stream.avail_out == 0) {
670 apr_bucket *tmp_heap;
671 ctx->stream.next_out = ctx->buffer;
672 len = c->bufferSize - ctx->stream.avail_out;
674 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
675 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
676 NULL, f->c->bucket_alloc);
677 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
678 ctx->stream.avail_out = c->bufferSize;
681 zRC = inflate(&ctx->stream, Z_NO_FLUSH);
683 if (zRC == Z_STREAM_END) {
691 if (zRC == Z_STREAM_END) {
692 apr_bucket *tmp_heap, *eos;
694 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
695 "Zlib: Inflated %ld to %ld : URL %s",
696 ctx->stream.total_in, ctx->stream.total_out,
699 len = c->bufferSize - ctx->stream.avail_out;
701 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
702 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
703 NULL, f->c->bucket_alloc);
704 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
705 ctx->stream.avail_out = c->bufferSize;
707 /* Is the remaining 8 bytes already in the avail stream? */
708 if (ctx->stream.avail_in >= 8) {
709 unsigned long compCRC, compLen;
710 compCRC = getLong(ctx->stream.next_in);
711 if (ctx->crc != compCRC) {
714 ctx->stream.next_in += 4;
715 compLen = getLong(ctx->stream.next_in);
716 if (ctx->stream.total_out != compLen) {
721 /* FIXME: We need to grab the 8 verification bytes
726 inflateEnd(&ctx->stream);
728 eos = apr_bucket_eos_create(f->c->bucket_alloc);
729 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, eos);
734 apr_brigade_cleanup(ctx->bb);
737 /* If we are about to return nothing for a 'blocking' read and we have
738 * some data in our zlib buffer, flush it out so we can return something.
740 if (block == APR_BLOCK_READ &&
741 APR_BRIGADE_EMPTY(ctx->proc_bb) &&
742 ctx->stream.avail_out < c->bufferSize) {
743 apr_bucket *tmp_heap;
745 ctx->stream.next_out = ctx->buffer;
746 len = c->bufferSize - ctx->stream.avail_out;
748 ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer, len);
749 tmp_heap = apr_bucket_heap_create((char *)ctx->buffer, len,
750 NULL, f->c->bucket_alloc);
751 APR_BRIGADE_INSERT_TAIL(ctx->proc_bb, tmp_heap);
752 ctx->stream.avail_out = c->bufferSize;
755 if (!APR_BRIGADE_EMPTY(ctx->proc_bb)) {
756 apr_bucket_brigade *newbb;
758 /* May return APR_INCOMPLETE which is fine by us. */
759 apr_brigade_partition(ctx->proc_bb, readbytes, &bkt);
761 newbb = apr_brigade_split(ctx->proc_bb, bkt);
762 APR_BRIGADE_CONCAT(bb, ctx->proc_bb);
763 APR_BRIGADE_CONCAT(ctx->proc_bb, newbb);
769 static void register_hooks(apr_pool_t *p)
771 ap_register_output_filter(deflateFilterName, deflate_out_filter, NULL,
772 AP_FTYPE_CONTENT_SET);
773 ap_register_input_filter(deflateFilterName, deflate_in_filter, NULL,
774 AP_FTYPE_CONTENT_SET);
777 static const command_rec deflate_filter_cmds[] = {
778 AP_INIT_TAKE1("DeflateFilterNote", deflate_set_note, NULL, RSRC_CONF,
779 "Set a note to report on compression ratio"),
780 AP_INIT_TAKE1("DeflateWindowSize", deflate_set_window_size, NULL,
781 RSRC_CONF, "Set the Deflate window size (1-15)"),
782 AP_INIT_TAKE1("DeflateBufferSize", deflate_set_buffer_size, NULL, RSRC_CONF,
783 "Set the Deflate Buffer Size"),
784 AP_INIT_TAKE1("DeflateMemLevel", deflate_set_memlevel, NULL, RSRC_CONF,
785 "Set the Deflate Memory Level (1-9)"),
789 module AP_MODULE_DECLARE_DATA deflate_module = {
790 STANDARD20_MODULE_STUFF,
791 NULL, /* dir config creater */
792 NULL, /* dir merger --- default is to override */
793 create_deflate_server_config, /* server config */
794 NULL, /* merge server config */
795 deflate_filter_cmds, /* command table */
796 register_hooks /* register hooks */