1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * simple hokey charset recoding configuration module
20 * See mod_ebcdic and mod_charset for more thought-out examples. This
21 * one is just so Jeff can learn how a module works and experiment with
22 * basic character set recoding configuration.
24 * !!!This is an extremely cheap ripoff of mod_charset.c from Russian Apache!!!
28 #include "http_config.h"
30 #include "http_core.h"
32 #include "http_main.h"
33 #include "http_protocol.h"
34 #include "http_request.h"
35 #include "util_charset.h"
36 #include "apr_buckets.h"
37 #include "util_filter.h"
38 #include "apr_strings.h"
40 #include "apr_xlate.h"
41 #define APR_WANT_STRFUNC
44 #define OUTPUT_XLATE_BUF_SIZE (16*1024) /* size of translation buffer used on output */
45 #define INPUT_XLATE_BUF_SIZE (8*1024) /* size of translation buffer used on input */
47 #define XLATE_MIN_BUFF_LEFT 128 /* flush once there is no more than this much
48 * space left in the translation buffer
51 #define FATTEST_CHAR 8 /* we don't handle chars wider than this that straddle
55 /* extended error status codes; this is used in addition to an apr_status_t to
56 * track errors in the translation filter
59 EES_INIT = 0, /* no error info yet; value must be 0 for easy init */
60 EES_LIMIT, /* built-in restriction encountered */
61 EES_INCOMPLETE_CHAR, /* incomplete multi-byte char at end of content */
63 EES_DOWNSTREAM, /* something bad happened in a filter below xlate */
64 EES_BAD_INPUT /* input data invalid */
67 /* registered name of the output translation filter */
68 #define XLATEOUT_FILTER_NAME "XLATEOUT"
69 /* registered name of input translation filter */
70 #define XLATEIN_FILTER_NAME "XLATEIN"
72 typedef struct charset_dir_t {
73 const char *charset_source; /* source encoding */
74 const char *charset_default; /* how to ship on wire */
75 /** module does ap_add_*_filter()? */
76 enum {IA_INIT, IA_IMPADD, IA_NOIMPADD} implicit_add;
77 /** treat all mimetypes as text? */
78 enum {FX_INIT, FX_FORCE, FX_NOFORCE} force_xlate;
81 /* charset_filter_ctx_t is created for each filter instance; because the same
82 * filter code is used for translating in both directions, we need this context
83 * data to tell the filter which translation handle to use; it also can hold a
84 * character which was split between buckets
86 typedef struct charset_filter_ctx_t {
88 int is_sb; /* single-byte translation? */
90 ees_t ees; /* extended error status */
92 char buf[FATTEST_CHAR]; /* we want to be able to build a complete char here */
93 int ran; /* has filter instance run before? */
94 int noop; /* should we pass brigades through unchanged? */
95 char *tmp; /* buffer for input filtering */
96 apr_bucket_brigade *bb; /* input buckets we couldn't finish translating */
97 apr_bucket_brigade *tmpbb; /* used for passing downstream */
98 } charset_filter_ctx_t;
100 /* charset_req_t is available via r->request_config if any translation is
103 typedef struct charset_req_t {
105 charset_filter_ctx_t *output_ctx, *input_ctx;
108 module AP_MODULE_DECLARE_DATA charset_lite_module;
110 static void *create_charset_dir_conf(apr_pool_t *p,char *dummy)
112 charset_dir_t *dc = (charset_dir_t *)apr_pcalloc(p,sizeof(charset_dir_t));
117 static void *merge_charset_dir_conf(apr_pool_t *p, void *basev, void *overridesv)
119 charset_dir_t *a = (charset_dir_t *)apr_pcalloc (p, sizeof(charset_dir_t));
120 charset_dir_t *base = (charset_dir_t *)basev,
121 *over = (charset_dir_t *)overridesv;
123 /* If it is defined in the current container, use it. Otherwise, use the one
124 * from the enclosing container.
128 over->charset_default ? over->charset_default : base->charset_default;
130 over->charset_source ? over->charset_source : base->charset_source;
132 over->implicit_add != IA_INIT ? over->implicit_add : base->implicit_add;
134 over->force_xlate != FX_INIT ? over->force_xlate : base->force_xlate;
138 /* CharsetSourceEnc charset
140 static const char *add_charset_source(cmd_parms *cmd, void *in_dc,
143 charset_dir_t *dc = in_dc;
145 dc->charset_source = name;
149 /* CharsetDefault charset
151 static const char *add_charset_default(cmd_parms *cmd, void *in_dc,
154 charset_dir_t *dc = in_dc;
156 dc->charset_default = name;
160 /* CharsetOptions optionflag...
162 static const char *add_charset_options(cmd_parms *cmd, void *in_dc,
165 charset_dir_t *dc = in_dc;
167 if (!strcasecmp(flag, "ImplicitAdd")) {
168 dc->implicit_add = IA_IMPADD;
170 else if (!strcasecmp(flag, "NoImplicitAdd")) {
171 dc->implicit_add = IA_NOIMPADD;
173 else if (!strcasecmp(flag, "TranslateAllMimeTypes")) {
174 dc->force_xlate = FX_FORCE;
176 else if (!strcasecmp(flag, "NoTranslateAllMimeTypes")) {
177 dc->force_xlate = FX_NOFORCE;
180 return apr_pstrcat(cmd->temp_pool,
181 "Invalid CharsetOptions option: ",
189 /* find_code_page() is a fixup hook that checks if the module is
190 * configured and the input or output potentially need to be translated.
191 * If so, context is initialized for the filters.
193 static int find_code_page(request_rec *r)
195 charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
196 &charset_lite_module);
197 charset_req_t *reqinfo;
198 charset_filter_ctx_t *input_ctx, *output_ctx;
201 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
202 "uri: %s file: %s method: %d "
203 "imt: %s flags: %s%s%s %s->%s",
205 r->filename ? r->filename : "(none)",
207 r->content_type ? r->content_type : "(unknown)",
208 r->main ? "S" : "", /* S if subrequest */
209 r->prev ? "R" : "", /* R if redirect */
210 r->proxyreq ? "P" : "", /* P if proxy */
211 dc->charset_source, dc->charset_default);
213 /* If we don't have a full directory configuration, bail out.
215 if (!dc->charset_source || !dc->charset_default) {
216 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01448)
217 "incomplete configuration: src %s, dst %s",
218 dc->charset_source ? dc->charset_source : "unspecified",
219 dc->charset_default ? dc->charset_default : "unspecified");
223 /* catch proxy requests */
228 /* mod_rewrite indicators */
230 && (!strncmp(r->filename, "redirect:", 9)
231 || !strncmp(r->filename, "gone:", 5)
232 || !strncmp(r->filename, "passthrough:", 12)
233 || !strncmp(r->filename, "forbidden:", 10))) {
237 /* no translation when server and network charsets are set to the same value */
238 if (!strcasecmp(dc->charset_source, dc->charset_default)) {
242 /* Get storage for the request data and the output filter context.
243 * We rarely need the input filter context, so allocate that separately.
245 reqinfo = (charset_req_t *)apr_pcalloc(r->pool,
246 sizeof(charset_req_t) +
247 sizeof(charset_filter_ctx_t));
248 output_ctx = (charset_filter_ctx_t *)(reqinfo + 1);
252 output_ctx->tmpbb = apr_brigade_create(r->pool,
253 r->connection->bucket_alloc);
254 ap_set_module_config(r->request_config, &charset_lite_module, reqinfo);
256 reqinfo->output_ctx = output_ctx;
258 switch (r->method_number) {
261 /* Set up input translation. Note: A request body can be included
262 * with the OPTIONS method, but for now we don't set up translation
265 input_ctx = apr_pcalloc(r->pool, sizeof(charset_filter_ctx_t));
266 input_ctx->bb = apr_brigade_create(r->pool,
267 r->connection->bucket_alloc);
268 input_ctx->tmp = apr_palloc(r->pool, INPUT_XLATE_BUF_SIZE);
270 reqinfo->input_ctx = input_ctx;
271 rv = apr_xlate_open(&input_ctx->xlate, dc->charset_source,
272 dc->charset_default, r->pool);
273 if (rv != APR_SUCCESS) {
274 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01449)
275 "can't open translation %s->%s",
276 dc->charset_default, dc->charset_source);
277 return HTTP_INTERNAL_SERVER_ERROR;
279 if (apr_xlate_sb_get(input_ctx->xlate, &input_ctx->is_sb) != APR_SUCCESS) {
280 input_ctx->is_sb = 0;
287 static int configured_in_list(request_rec *r, const char *filter_name,
288 struct ap_filter_t *filter_list)
290 struct ap_filter_t *filter = filter_list;
293 if (!strcasecmp(filter_name, filter->frec->name)) {
296 filter = filter->next;
301 static int configured_on_input(request_rec *r, const char *filter_name)
303 return configured_in_list(r, filter_name, r->input_filters);
306 static int configured_on_output(request_rec *r, const char *filter_name)
308 return configured_in_list(r, filter_name, r->output_filters);
311 /* xlate_insert_filter() is a filter hook which decides whether or not
312 * to insert a translation filter for the current request.
314 static void xlate_insert_filter(request_rec *r)
316 /* Hey... don't be so quick to use reqinfo->dc here; reqinfo may be NULL */
317 charset_req_t *reqinfo = ap_get_module_config(r->request_config,
318 &charset_lite_module);
319 charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
320 &charset_lite_module);
322 if (dc && (dc->implicit_add == IA_NOIMPADD)) {
323 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, r,
324 "xlate output filter not added implicitly because "
325 "CharsetOptions included 'NoImplicitAdd'");
330 if (reqinfo->output_ctx && !configured_on_output(r, XLATEOUT_FILTER_NAME)) {
331 ap_add_output_filter(XLATEOUT_FILTER_NAME, reqinfo->output_ctx, r,
334 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
335 "xlate output filter not added implicitly because %s",
336 !reqinfo->output_ctx ?
337 "no output configuration available" :
338 "another module added the filter");
340 if (reqinfo->input_ctx && !configured_on_input(r, XLATEIN_FILTER_NAME)) {
341 ap_add_input_filter(XLATEIN_FILTER_NAME, reqinfo->input_ctx, r,
344 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
345 "xlate input filter not added implicitly because %s",
346 !reqinfo->input_ctx ?
347 "no input configuration available" :
348 "another module added the filter");
352 /* stuff that sucks that I know of:
355 * why create an eos bucket when we see it come down the stream? just send the one
356 * passed as input... news flash: this will be fixed when xlate_out_filter() starts
357 * using the more generic xlate_brigade()
359 * translation mechanics:
360 * we don't handle characters that straddle more than two buckets; an error
364 static apr_status_t send_bucket_downstream(ap_filter_t *f, apr_bucket *b)
366 charset_filter_ctx_t *ctx = f->ctx;
369 APR_BRIGADE_INSERT_TAIL(ctx->tmpbb, b);
370 rv = ap_pass_brigade(f->next, ctx->tmpbb);
371 if (rv != APR_SUCCESS) {
372 ctx->ees = EES_DOWNSTREAM;
374 apr_brigade_cleanup(ctx->tmpbb);
378 /* send_downstream() is passed the translated data; it puts it in a single-
379 * bucket brigade and passes the brigade to the next filter
381 static apr_status_t send_downstream(ap_filter_t *f, const char *tmp, apr_size_t len)
383 request_rec *r = f->r;
384 conn_rec *c = r->connection;
387 b = apr_bucket_transient_create(tmp, len, c->bucket_alloc);
388 return send_bucket_downstream(f, b);
391 static apr_status_t send_eos(ap_filter_t *f)
393 request_rec *r = f->r;
394 conn_rec *c = r->connection;
395 apr_bucket_brigade *bb;
397 charset_filter_ctx_t *ctx = f->ctx;
400 bb = apr_brigade_create(r->pool, c->bucket_alloc);
401 b = apr_bucket_eos_create(c->bucket_alloc);
402 APR_BRIGADE_INSERT_TAIL(bb, b);
403 rv = ap_pass_brigade(f->next, bb);
404 if (rv != APR_SUCCESS) {
405 ctx->ees = EES_DOWNSTREAM;
410 static apr_status_t set_aside_partial_char(charset_filter_ctx_t *ctx,
412 apr_size_t partial_len)
416 if (sizeof(ctx->buf) > partial_len) {
417 ctx->saved = partial_len;
418 memcpy(ctx->buf, partial, partial_len);
423 ctx->ees = EES_LIMIT; /* we don't handle chars this wide which straddle
430 static apr_status_t finish_partial_char(charset_filter_ctx_t *ctx,
432 const char **cur_str,
439 apr_size_t tmp_input_len;
441 /* Keep adding bytes from the input string to the saved string until we
442 * 1) finish the input char
444 * or 3) run out of bytes to add
448 ctx->buf[ctx->saved] = **cur_str;
452 tmp_input_len = ctx->saved;
453 rv = apr_xlate_conv_buffer(ctx->xlate,
458 } while (rv == APR_INCOMPLETE && *cur_len);
460 if (rv == APR_SUCCESS) {
464 ctx->ees = EES_LIMIT; /* code isn't smart enough to handle chars
465 * straddling more than two buckets
472 static void log_xlate_error(ap_filter_t *f, apr_status_t rv)
474 charset_filter_ctx_t *ctx = f->ctx;
482 msg = APLOGNO(02193) "xlate filter - a built-in restriction was encountered";
486 msg = APLOGNO(02194) "xlate filter - an input character was invalid";
488 case EES_BUCKET_READ:
490 msg = APLOGNO(02195) "xlate filter - bucket read routine failed";
492 case EES_INCOMPLETE_CHAR:
494 strcpy(msgbuf, APLOGNO(02196) "xlate filter - incomplete char at end of input - ");
497 /* We must ensure not to process more than what would fit in the
498 * remaining of the destination buffer, including terminating NULL */
499 if (len > (sizeof(msgbuf) - strlen(msgbuf) - 1) / 2)
500 len = (sizeof(msgbuf) - strlen(msgbuf) - 1) / 2;
502 ap_bin2hex(ctx->buf, len, msgbuf + strlen(msgbuf));
506 msg = APLOGNO(02197) "xlate filter - an error occurred in a lower filter";
509 msg = APLOGNO(02198) "xlate filter - returning error";
511 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(02997) "%s", msg);
514 /* chk_filter_chain() is called once per filter instance; it tries to
515 * determine if the current filter instance should be disabled because
516 * its translation is incompatible with the translation of an existing
517 * instance of the translate filter
519 * Example bad scenario:
521 * configured filter chain for the request:
522 * INCLUDES XLATEOUT(8859-1->UTS-16)
523 * configured filter chain for the subrequest:
524 * XLATEOUT(8859-1->UTS-16)
526 * When the subrequest is processed, the filter chain will be
527 * XLATEOUT(8859-1->UTS-16) XLATEOUT(8859-1->UTS-16)
528 * This makes no sense, so the instance of XLATEOUT added for the
529 * subrequest will be noop-ed.
531 * Example good scenario:
533 * configured filter chain for the request:
534 * INCLUDES XLATEOUT(8859-1->UTS-16)
535 * configured filter chain for the subrequest:
536 * XLATEOUT(IBM-1047->8859-1)
538 * When the subrequest is processed, the filter chain will be
539 * XLATEOUT(IBM-1047->8859-1) XLATEOUT(8859-1->UTS-16)
540 * This makes sense, so the instance of XLATEOUT added for the
541 * subrequest will be left alone and it will translate from
544 static void chk_filter_chain(ap_filter_t *f)
547 charset_filter_ctx_t *curctx, *last_xlate_ctx = NULL,
549 int output = !strcasecmp(f->frec->name, XLATEOUT_FILTER_NAME);
555 /* walk the filter chain; see if it makes sense for our filter to
558 curf = output ? f->r->output_filters : f->r->input_filters;
560 if (!strcasecmp(curf->frec->name, f->frec->name) &&
562 curctx = (charset_filter_ctx_t *)curf->ctx;
563 if (!last_xlate_ctx) {
564 last_xlate_ctx = curctx;
567 if (strcmp(last_xlate_ctx->dc->charset_default,
568 curctx->dc->charset_source)) {
569 /* incompatible translation
570 * if our filter instance is incompatible with an instance
571 * already in place, noop our instance
573 * . We are only willing to noop our own instance.
574 * . It is possible to noop another instance which has not
575 * yet run, but this is not currently implemented.
576 * Hopefully it will not be needed.
577 * . It is not possible to noop an instance which has
580 if (last_xlate_ctx == f->ctx) {
581 last_xlate_ctx->noop = 1;
582 if (APLOGrtrace1(f->r)) {
583 const char *symbol = output ? "->" : "<-";
585 ap_log_rerror(APLOG_MARK, APLOG_DEBUG,
586 0, f->r, APLOGNO(01451)
588 "translation %s%s%s; existing "
589 "translation %s%s%s",
590 f->r->uri ? "uri" : "file",
591 f->r->uri ? f->r->uri : f->r->filename,
592 last_xlate_ctx->dc->charset_source,
594 last_xlate_ctx->dc->charset_default,
595 curctx->dc->charset_source,
597 curctx->dc->charset_default);
601 const char *symbol = output ? "->" : "<-";
603 ap_log_rerror(APLOG_MARK, APLOG_ERR,
604 0, f->r, APLOGNO(01452)
605 "chk_filter_chain() - can't disable "
606 "translation %s%s%s; existing "
607 "translation %s%s%s",
608 last_xlate_ctx->dc->charset_source,
610 last_xlate_ctx->dc->charset_default,
611 curctx->dc->charset_source,
613 curctx->dc->charset_default);
623 /* xlate_brigade() is used to filter request and response bodies
625 * we'll stop when one of the following occurs:
626 * . we run out of buckets
627 * . we run out of space in the output buffer
628 * . we hit an error or metadata
631 * bb: brigade to process
632 * buffer: storage to hold the translated characters
633 * buffer_avail: size of buffer
634 * (and a few more uninteresting parms)
637 * return value: APR_SUCCESS or some error code
638 * bb: we've removed any buckets representing the
639 * translated characters; the eos bucket, if
640 * present, will be left in the brigade
641 * buffer: filled in with translated characters
642 * buffer_avail: updated with the bytes remaining
643 * hit_eos: did we hit an EOS bucket?
645 static apr_status_t xlate_brigade(charset_filter_ctx_t *ctx,
646 apr_bucket_brigade *bb,
648 apr_size_t *buffer_avail,
651 apr_bucket *b = NULL; /* set to NULL only to quiet some gcc */
652 apr_bucket *consumed_bucket;
654 apr_size_t bytes_in_bucket; /* total bytes read from current bucket */
655 apr_size_t bucket_avail; /* bytes left in current bucket */
656 apr_status_t rv = APR_SUCCESS;
660 consumed_bucket = NULL;
662 if (!bucket_avail) { /* no bytes left to process in the current bucket... */
663 if (consumed_bucket) {
664 apr_bucket_delete(consumed_bucket);
665 consumed_bucket = NULL;
667 b = APR_BRIGADE_FIRST(bb);
668 if (b == APR_BRIGADE_SENTINEL(bb) ||
669 APR_BUCKET_IS_METADATA(b)) {
672 rv = apr_bucket_read(b, &bucket, &bytes_in_bucket, APR_BLOCK_READ);
673 if (rv != APR_SUCCESS) {
674 ctx->ees = EES_BUCKET_READ;
677 bucket_avail = bytes_in_bucket;
678 consumed_bucket = b; /* for axing when we're done reading it */
681 /* We've got data, so translate it. */
683 /* Rats... we need to finish a partial character from the previous
686 * Strangely, finish_partial_char() increments the input buffer
687 * pointer but does not increment the output buffer pointer.
689 apr_size_t old_buffer_avail = *buffer_avail;
690 rv = finish_partial_char(ctx,
691 &bucket, &bucket_avail,
692 &buffer, buffer_avail);
693 buffer += old_buffer_avail - *buffer_avail;
696 apr_size_t old_buffer_avail = *buffer_avail;
697 apr_size_t old_bucket_avail = bucket_avail;
698 rv = apr_xlate_conv_buffer(ctx->xlate,
699 bucket, &bucket_avail,
702 buffer += old_buffer_avail - *buffer_avail;
703 bucket += old_bucket_avail - bucket_avail;
705 if (rv == APR_INCOMPLETE) { /* partial character at end of input */
706 /* We need to save the final byte(s) for next time; we can't
707 * convert it until we look at the next bucket.
709 rv = set_aside_partial_char(ctx, bucket, bucket_avail);
713 if (rv != APR_SUCCESS) {
714 /* bad input byte or partial char too big to store */
717 if (*buffer_avail < XLATE_MIN_BUFF_LEFT) {
718 /* if any data remains in the current bucket, split there */
720 apr_bucket_split(b, bytes_in_bucket - bucket_avail);
722 apr_bucket_delete(b);
728 if (!APR_BRIGADE_EMPTY(bb)) {
729 b = APR_BRIGADE_FIRST(bb);
730 if (APR_BUCKET_IS_EOS(b)) {
731 /* Leave the eos bucket in the brigade for reporting to
732 * subsequent filters.
736 /* Oops... we have a partial char from the previous bucket
737 * that won't be completed because there's no more data.
740 ctx->ees = EES_INCOMPLETE_CHAR;
748 /* xlate_out_filter() handles (almost) arbitrary conversions from one charset
750 * translation is determined in the fixup hook (find_code_page), which is
751 * where the filter's context data is set up... the context data gives us
752 * the translation handle
754 static apr_status_t xlate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
756 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
757 &charset_lite_module);
758 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
759 &charset_lite_module);
760 charset_filter_ctx_t *ctx = f->ctx;
761 apr_bucket *dptr, *consumed_bucket;
763 apr_size_t cur_len, cur_avail;
764 char tmp[OUTPUT_XLATE_BUF_SIZE];
765 apr_size_t space_avail;
767 apr_status_t rv = APR_SUCCESS;
770 /* this is SetOutputFilter path; grab the preallocated context,
771 * if any; note that if we decided not to do anything in an earlier
772 * handler, we won't even have a reqinfo
775 ctx = f->ctx = reqinfo->output_ctx;
776 reqinfo->output_ctx = NULL; /* prevent SNAFU if user coded us twice
777 * in the filter chain; we can't have two
778 * instances using the same context
781 if (!ctx) { /* no idea how to translate; don't do anything */
782 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
788 /* Check the mime type to see if translation should be performed.
790 if (!ctx->noop && ctx->xlate == NULL) {
791 const char *mime_type = f->r->content_type;
793 if (mime_type && (ap_cstr_casecmpn(mime_type, "text/", 5) == 0 ||
794 #if APR_CHARSET_EBCDIC
795 /* On an EBCDIC machine, be willing to translate mod_autoindex-
796 * generated output. Otherwise, it doesn't look too cool.
798 * XXX This isn't a perfect fix because this doesn't trigger us
799 * to convert from the charset of the source code to ASCII. The
800 * general solution seems to be to allow a generator to set an
801 * indicator in the r specifying that the body is coded in the
802 * implementation character set (i.e., the charset of the source
803 * code). This would get several different types of documents
804 * translated properly: mod_autoindex output, mod_status output,
805 * mod_info output, hard-coded error documents, etc.
807 strcmp(mime_type, DIR_MAGIC_TYPE) == 0 ||
809 ap_cstr_casecmpn(mime_type, "message/", 8) == 0 ||
810 dc->force_xlate == FX_FORCE)) {
812 rv = apr_xlate_open(&ctx->xlate,
813 dc->charset_default, dc->charset_source, f->r->pool);
814 if (rv != APR_SUCCESS) {
815 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01453)
816 "can't open translation %s->%s",
817 dc->charset_source, dc->charset_default);
821 if (apr_xlate_sb_get(ctx->xlate, &ctx->is_sb) != APR_SUCCESS) {
829 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r,
830 "mime type is %s; no translation selected",
836 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r,
837 "xlate_out_filter() - "
838 "charset_source: %s charset_default: %s",
839 dc && dc->charset_source ? dc->charset_source : "(none)",
840 dc && dc->charset_default ? dc->charset_default : "(none)");
842 if (!ctx->ran) { /* filter never ran before */
845 if (!ctx->noop && !ctx->is_sb) {
846 /* We're not converting between two single-byte charsets, so unset
847 * Content-Length since it is unlikely to remain the same.
849 apr_table_unset(f->r->headers_out, "Content-Length");
854 return ap_pass_brigade(f->next, bb);
857 dptr = APR_BRIGADE_FIRST(bb);
860 space_avail = sizeof(tmp);
861 consumed_bucket = NULL;
863 if (!cur_len) { /* no bytes left to process in the current bucket... */
864 if (consumed_bucket) {
865 apr_bucket_delete(consumed_bucket);
866 consumed_bucket = NULL;
868 if (dptr == APR_BRIGADE_SENTINEL(bb)) {
871 if (APR_BUCKET_IS_EOS(dptr)) {
872 cur_len = -1; /* XXX yuck, but that tells us to send
873 * eos down; when we minimize our bb construction
874 * we'll fix this crap */
876 /* Oops... we have a partial char from the previous bucket
877 * that won't be completed because there's no more data.
880 ctx->ees = EES_INCOMPLETE_CHAR;
884 if (APR_BUCKET_IS_METADATA(dptr)) {
885 apr_bucket *metadata_bucket;
886 metadata_bucket = dptr;
887 dptr = APR_BUCKET_NEXT(dptr);
888 APR_BUCKET_REMOVE(metadata_bucket);
889 rv = send_bucket_downstream(f, metadata_bucket);
890 if (rv != APR_SUCCESS) {
895 rv = apr_bucket_read(dptr, &cur_str, &cur_len, APR_BLOCK_READ);
896 if (rv != APR_SUCCESS) {
897 ctx->ees = EES_BUCKET_READ;
900 consumed_bucket = dptr; /* for axing when we're done reading it */
901 dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the
904 /* Try to fill up our tmp buffer with translated data. */
907 if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */
909 /* Rats... we need to finish a partial character from the previous
914 tmp_tmp = tmp + sizeof(tmp) - space_avail;
915 rv = finish_partial_char(ctx,
917 &tmp_tmp, &space_avail);
920 rv = apr_xlate_conv_buffer(ctx->xlate,
922 tmp + sizeof(tmp) - space_avail, &space_avail);
924 /* Update input ptr and len after consuming some bytes */
925 cur_str += cur_len - cur_avail;
928 if (rv == APR_INCOMPLETE) { /* partial character at end of input */
929 /* We need to save the final byte(s) for next time; we can't
930 * convert it until we look at the next bucket.
932 rv = set_aside_partial_char(ctx, cur_str, cur_len);
938 if (rv != APR_SUCCESS) {
939 /* bad input byte or partial char too big to store */
943 if (space_avail < XLATE_MIN_BUFF_LEFT) {
944 /* It is time to flush, as there is not enough space left in the
945 * current output buffer to bother with converting more data.
947 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
948 if (rv != APR_SUCCESS) {
952 /* tmp is now empty */
953 space_avail = sizeof(tmp);
957 if (rv == APR_SUCCESS) {
958 if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */
959 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
962 if (rv == APR_SUCCESS) {
968 log_xlate_error(f, rv);
974 static apr_status_t xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb,
975 ap_input_mode_t mode, apr_read_type_e block,
979 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
980 &charset_lite_module);
981 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
982 &charset_lite_module);
983 charset_filter_ctx_t *ctx = f->ctx;
984 apr_size_t buffer_size;
987 /* just get out of the way of things we don't want. */
988 if (mode != AP_MODE_READBYTES) {
989 return ap_get_brigade(f->next, bb, mode, block, readbytes);
993 /* this is SetInputFilter path; grab the preallocated context,
994 * if any; note that if we decided not to do anything in an earlier
995 * handler, we won't even have a reqinfo
998 ctx = f->ctx = reqinfo->input_ctx;
999 reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice
1000 * in the filter chain; we can't have two
1001 * instances using the same context
1004 if (!ctx) { /* no idea how to translate; don't do anything */
1005 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
1011 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r,
1012 "xlate_in_filter() - "
1013 "charset_source: %s charset_default: %s",
1014 dc && dc->charset_source ? dc->charset_source : "(none)",
1015 dc && dc->charset_default ? dc->charset_default : "(none)");
1017 if (!ctx->ran) { /* filter never ran before */
1018 chk_filter_chain(f);
1020 if (!ctx->noop && !ctx->is_sb
1021 && apr_table_get(f->r->headers_in, "Content-Length")) {
1022 /* A Content-Length header is present, but it won't be valid after
1023 * conversion because we're not converting between two single-byte
1024 * charsets. This will affect most CGI scripts and may affect
1026 * Content-Length can't be unset here because that would break
1027 * being able to read the request body.
1028 * Processing of chunked request bodies is not impacted by this
1029 * filter since the length was not declared anyway.
1031 ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, f->r,
1032 "Request body length may change, resulting in "
1033 "misprocessing by some modules or scripts");
1038 return ap_get_brigade(f->next, bb, mode, block, readbytes);
1041 if (APR_BRIGADE_EMPTY(ctx->bb)) {
1042 if ((rv = ap_get_brigade(f->next, bb, mode, block,
1043 readbytes)) != APR_SUCCESS) {
1048 APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */
1051 buffer_size = INPUT_XLATE_BUF_SIZE;
1052 rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos);
1053 if (rv == APR_SUCCESS) {
1055 /* move anything leftover into our context for next time;
1056 * we don't currently "set aside" since the data came from
1057 * down below, but I suspect that for long-term we need to
1060 APR_BRIGADE_CONCAT(ctx->bb, bb);
1062 if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */
1065 e = apr_bucket_heap_create(ctx->tmp,
1066 INPUT_XLATE_BUF_SIZE - buffer_size,
1067 NULL, f->r->connection->bucket_alloc);
1068 /* make sure we insert at the head, because there may be
1069 * an eos bucket already there, and the eos bucket should
1070 * come after the data
1072 APR_BRIGADE_INSERT_HEAD(bb, e);
1075 /* XXX need to get some more data... what if the last brigade
1076 * we got had only the first byte of a multibyte char? we need
1077 * to grab more data from the network instead of returning an
1081 /* If we have any metadata at the head of ctx->bb, go ahead and move it
1082 * onto the end of bb to be returned to our caller.
1084 if (!APR_BRIGADE_EMPTY(ctx->bb)) {
1085 apr_bucket *b = APR_BRIGADE_FIRST(ctx->bb);
1086 while (b != APR_BRIGADE_SENTINEL(ctx->bb)
1087 && APR_BUCKET_IS_METADATA(b)) {
1088 APR_BUCKET_REMOVE(b);
1089 APR_BRIGADE_INSERT_TAIL(bb, b);
1090 b = APR_BRIGADE_FIRST(ctx->bb);
1095 log_xlate_error(f, rv);
1101 static const command_rec cmds[] =
1103 AP_INIT_TAKE1("CharsetSourceEnc",
1107 "source (html,cgi,ssi) file charset"),
1108 AP_INIT_TAKE1("CharsetDefault",
1109 add_charset_default,
1112 "name of default charset"),
1113 AP_INIT_ITERATE("CharsetOptions",
1114 add_charset_options,
1117 "valid options: ImplicitAdd, NoImplicitAdd, TranslateAllMimeTypes, "
1118 "NoTranslateAllMimeTypes"),
1122 static void charset_register_hooks(apr_pool_t *p)
1124 ap_hook_fixups(find_code_page, NULL, NULL, APR_HOOK_MIDDLE);
1125 ap_hook_insert_filter(xlate_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST);
1126 #if APR_CHARSET_EBCDIC
1127 ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL,
1128 AP_FTYPE_RESOURCE+1);
1129 ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL,
1130 AP_FTYPE_RESOURCE+1);
1132 ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL,
1134 ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL,
1139 AP_DECLARE_MODULE(charset_lite) =
1141 STANDARD20_MODULE_STUFF,
1142 create_charset_dir_conf,
1143 merge_charset_dir_conf,
1147 charset_register_hooks