1 /* Copyright 2000-2006 The Apache Software Foundation or its licensors, as
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * simple hokey charset recoding configuration module
20 * See mod_ebcdic and mod_charset for more thought-out examples. This
21 * one is just so Jeff can learn how a module works and experiment with
22 * basic character set recoding configuration.
24 * !!!This is an extremely cheap ripoff of mod_charset.c from Russian Apache!!!
28 #include "http_config.h"
30 #include "http_core.h"
32 #include "http_main.h"
33 #include "http_protocol.h"
34 #include "http_request.h"
35 #include "util_charset.h"
36 #include "apr_buckets.h"
37 #include "util_filter.h"
38 #include "apr_strings.h"
40 #include "apr_xlate.h"
41 #define APR_WANT_STRFUNC
44 #define OUTPUT_XLATE_BUF_SIZE (16*1024) /* size of translation buffer used on output */
45 #define INPUT_XLATE_BUF_SIZE (8*1024) /* size of translation buffer used on input */
47 #define XLATE_MIN_BUFF_LEFT 128 /* flush once there is no more than this much
48 * space left in the translation buffer
51 #define FATTEST_CHAR 8 /* we don't handle chars wider than this that straddle
55 /* extended error status codes; this is used in addition to an apr_status_t to
56 * track errors in the translation filter
59 EES_INIT = 0, /* no error info yet; value must be 0 for easy init */
60 EES_LIMIT, /* built-in restriction encountered */
61 EES_INCOMPLETE_CHAR, /* incomplete multi-byte char at end of content */
63 EES_DOWNSTREAM, /* something bad happened in a filter below xlate */
64 EES_BAD_INPUT /* input data invalid */
67 /* registered name of the output translation filter */
68 #define XLATEOUT_FILTER_NAME "XLATEOUT"
69 /* registered name of input translation filter */
70 #define XLATEIN_FILTER_NAME "XLATEIN"
72 typedef struct charset_dir_t {
73 /** debug level; -1 means uninitialized, 0 means no debug */
75 const char *charset_source; /* source encoding */
76 const char *charset_default; /* how to ship on wire */
77 /** module does ap_add_*_filter()? */
78 enum {IA_INIT, IA_IMPADD, IA_NOIMPADD} implicit_add;
81 /* charset_filter_ctx_t is created for each filter instance; because the same
82 * filter code is used for translating in both directions, we need this context
83 * data to tell the filter which translation handle to use; it also can hold a
84 * character which was split between buckets
86 typedef struct charset_filter_ctx_t {
88 int is_sb; /* single-byte translation? */
90 ees_t ees; /* extended error status */
92 char buf[FATTEST_CHAR]; /* we want to be able to build a complete char here */
93 int ran; /* has filter instance run before? */
94 int noop; /* should we pass brigades through unchanged? */
95 char *tmp; /* buffer for input filtering */
96 apr_bucket_brigade *bb; /* input buckets we couldn't finish translating */
97 } charset_filter_ctx_t;
99 /* charset_req_t is available via r->request_config if any translation is
102 typedef struct charset_req_t {
104 charset_filter_ctx_t *output_ctx, *input_ctx;
107 /* debug level definitions */
108 #define DBGLVL_GORY 9 /* gory details */
109 #define DBGLVL_FLOW 4 /* enough messages to see what happens on
111 #define DBGLVL_PMC 2 /* messages about possible misconfiguration */
113 module AP_MODULE_DECLARE_DATA charset_lite_module;
115 static void *create_charset_dir_conf(apr_pool_t *p,char *dummy)
117 charset_dir_t *dc = (charset_dir_t *)apr_pcalloc(p,sizeof(charset_dir_t));
123 static void *merge_charset_dir_conf(apr_pool_t *p, void *basev, void *overridesv)
125 charset_dir_t *a = (charset_dir_t *)apr_pcalloc (p, sizeof(charset_dir_t));
126 charset_dir_t *base = (charset_dir_t *)basev,
127 *over = (charset_dir_t *)overridesv;
129 /* If it is defined in the current container, use it. Otherwise, use the one
130 * from the enclosing container.
134 over->debug != -1 ? over->debug : base->debug;
136 over->charset_default ? over->charset_default : base->charset_default;
138 over->charset_source ? over->charset_source : base->charset_source;
140 over->implicit_add != IA_INIT ? over->implicit_add : base->implicit_add;
144 /* CharsetSourceEnc charset
146 static const char *add_charset_source(cmd_parms *cmd, void *in_dc,
149 charset_dir_t *dc = in_dc;
151 dc->charset_source = name;
155 /* CharsetDefault charset
157 static const char *add_charset_default(cmd_parms *cmd, void *in_dc,
160 charset_dir_t *dc = in_dc;
162 dc->charset_default = name;
166 /* CharsetOptions optionflag...
168 static const char *add_charset_options(cmd_parms *cmd, void *in_dc,
171 charset_dir_t *dc = in_dc;
173 if (!strcasecmp(flag, "ImplicitAdd")) {
174 dc->implicit_add = IA_IMPADD;
176 else if (!strcasecmp(flag, "NoImplicitAdd")) {
177 dc->implicit_add = IA_NOIMPADD;
179 else if (!strncasecmp(flag, "DebugLevel=", 11)) {
180 dc->debug = atoi(flag + 11);
183 return apr_pstrcat(cmd->temp_pool,
184 "Invalid CharsetOptions option: ",
192 /* find_code_page() is a fixup hook that decides if translation should be
193 * enabled; if so, it sets up request data for use by the filter registration
194 * hook so that it knows what to do
196 static int find_code_page(request_rec *r)
198 charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
199 &charset_lite_module);
200 charset_req_t *reqinfo;
201 charset_filter_ctx_t *input_ctx, *output_ctx;
203 const char *mime_type;
205 if (dc->debug >= DBGLVL_FLOW) {
206 ap_log_rerror(APLOG_MARK,APLOG_DEBUG, 0, r,
207 "uri: %s file: %s method: %d "
208 "imt: %s flags: %s%s%s %s->%s",
209 r->uri, r->filename, r->method_number,
210 r->content_type ? r->content_type : "(unknown)",
211 r->main ? "S" : "", /* S if subrequest */
212 r->prev ? "R" : "", /* R if redirect */
213 r->proxyreq ? "P" : "", /* P if proxy */
214 dc->charset_source, dc->charset_default);
217 /* If we don't have a full directory configuration, bail out.
219 if (!dc->charset_source || !dc->charset_default) {
220 if (dc->debug >= DBGLVL_PMC) {
221 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
222 "incomplete configuration: src %s, dst %s",
223 dc->charset_source ? dc->charset_source : "unspecified",
224 dc->charset_default ? dc->charset_default : "unspecified");
229 /* catch proxy requests */
230 if (r->proxyreq) return DECLINED;
231 /* mod_rewrite indicators */
232 if (!strncmp(r->filename, "redirect:", 9)) return DECLINED;
233 if (!strncmp(r->filename, "gone:", 5)) return DECLINED;
234 if (!strncmp(r->filename, "passthrough:", 12)) return DECLINED;
235 if (!strncmp(r->filename, "forbidden:", 10)) return DECLINED;
237 mime_type = r->content_type ? r->content_type : ap_default_type(r);
239 /* If mime type isn't text or message, bail out.
242 /* XXX When we handle translation of the request body, watch out here as
243 * 1.3 allowed additional mime types: multipart and
244 * application/x-www-form-urlencoded
247 if (strncasecmp(mime_type, "text/", 5) &&
248 #if APR_CHARSET_EBCDIC || AP_WANT_DIR_TRANSLATION
249 /* On an EBCDIC machine, be willing to translate mod_autoindex-
250 * generated output. Otherwise, it doesn't look too cool.
252 * XXX This isn't a perfect fix because this doesn't trigger us
253 * to convert from the charset of the source code to ASCII. The
254 * general solution seems to be to allow a generator to set an
255 * indicator in the r specifying that the body is coded in the
256 * implementation character set (i.e., the charset of the source
257 * code). This would get several different types of documents
258 * translated properly: mod_autoindex output, mod_status output,
259 * mod_info output, hard-coded error documents, etc.
261 strcmp(mime_type, DIR_MAGIC_TYPE) &&
263 strncasecmp(mime_type, "message/", 8)) {
264 if (dc->debug >= DBGLVL_GORY) {
265 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
266 "mime type is %s; no translation selected",
269 /* We must not bail out here (i.e., the MIME test must be in the filter
270 * itself, not in the fixup, because only then is the final MIME type known.
271 * Examples for late changes to the MIME type include CGI handling (MIME
272 * type is set in the Content-Type header produced by the CGI script), or
273 * PHP (until PHP runs, the MIME type is set to application/x-httpd-php)
277 if (dc->debug >= DBGLVL_GORY) {
278 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
279 "charset_source: %s charset_default: %s",
280 dc && dc->charset_source ? dc->charset_source : "(none)",
281 dc && dc->charset_default ? dc->charset_default : "(none)");
284 /* Get storage for the request data and the output filter context.
285 * We rarely need the input filter context, so allocate that separately.
287 reqinfo = (charset_req_t *)apr_pcalloc(r->pool,
288 sizeof(charset_req_t) +
289 sizeof(charset_filter_ctx_t));
290 output_ctx = (charset_filter_ctx_t *)(reqinfo + 1);
294 ap_set_module_config(r->request_config, &charset_lite_module, reqinfo);
296 reqinfo->output_ctx = output_ctx;
298 /* We must not open the xlation table here yet, because the final MIME
299 * type is not known until we are actually called in the output filter.
300 * With POST or PUT request, the case is different, because their MIME
301 * type is set in the request headers, and their data are prerequisites
302 * for actually calling, e.g., the CGI handler later on.
304 output_ctx->xlate = NULL;
306 switch (r->method_number) {
309 /* Set up input translation. Note: A request body can be included
310 * with the OPTIONS method, but for now we don't set up translation
313 input_ctx = apr_pcalloc(r->pool, sizeof(charset_filter_ctx_t));
314 input_ctx->bb = apr_brigade_create(r->pool,
315 r->connection->bucket_alloc);
316 input_ctx->tmp = apr_palloc(r->pool, INPUT_XLATE_BUF_SIZE);
318 reqinfo->input_ctx = input_ctx;
319 rv = apr_xlate_open(&input_ctx->xlate, dc->charset_source,
320 dc->charset_default, r->pool);
321 if (rv != APR_SUCCESS) {
322 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
323 "can't open translation %s->%s",
324 dc->charset_default, dc->charset_source);
325 return HTTP_INTERNAL_SERVER_ERROR;
327 if (apr_xlate_sb_get(input_ctx->xlate, &input_ctx->is_sb) != APR_SUCCESS) {
328 input_ctx->is_sb = 0;
335 static int configured_in_list(request_rec *r, const char *filter_name,
336 struct ap_filter_t *filter_list)
338 struct ap_filter_t *filter = filter_list;
341 if (!strcasecmp(filter_name, filter->frec->name)) {
344 filter = filter->next;
349 static int configured_on_input(request_rec *r, const char *filter_name)
351 return configured_in_list(r, filter_name, r->input_filters);
354 static int configured_on_output(request_rec *r, const char *filter_name)
356 return configured_in_list(r, filter_name, r->output_filters);
359 /* xlate_insert_filter() is a filter hook which decides whether or not
360 * to insert a translation filter for the current request.
362 static void xlate_insert_filter(request_rec *r)
364 /* Hey... don't be so quick to use reqinfo->dc here; reqinfo may be NULL */
365 charset_req_t *reqinfo = ap_get_module_config(r->request_config,
366 &charset_lite_module);
367 charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
368 &charset_lite_module);
371 if (reqinfo->output_ctx && !configured_on_output(r, XLATEOUT_FILTER_NAME)) {
372 ap_add_output_filter(XLATEOUT_FILTER_NAME, reqinfo->output_ctx, r,
375 else if (dc->debug >= DBGLVL_FLOW) {
376 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
377 "xlate output filter not added implicitly because %s",
378 !reqinfo->output_ctx ?
379 "no output configuration available" :
380 "another module added the filter");
383 if (reqinfo->input_ctx && !configured_on_input(r, XLATEIN_FILTER_NAME)) {
384 ap_add_input_filter(XLATEIN_FILTER_NAME, reqinfo->input_ctx, r,
387 else if (dc->debug >= DBGLVL_FLOW) {
388 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
389 "xlate input filter not added implicitly because %s",
390 !reqinfo->input_ctx ?
391 "no input configuration available" :
392 "another module added the filter");
397 /* stuff that sucks that I know of:
400 * why create an eos bucket when we see it come down the stream? just send the one
401 * passed as input... news flash: this will be fixed when xlate_out_filter() starts
402 * using the more generic xlate_brigade()
404 * translation mechanics:
405 * we don't handle characters that straddle more than two buckets; an error
409 /* send_downstream() is passed the translated data; it puts it in a single-
410 * bucket brigade and passes the brigade to the next filter
412 static apr_status_t send_downstream(ap_filter_t *f, const char *tmp, apr_size_t len)
414 request_rec *r = f->r;
415 conn_rec *c = r->connection;
416 apr_bucket_brigade *bb;
418 charset_filter_ctx_t *ctx = f->ctx;
421 bb = apr_brigade_create(r->pool, c->bucket_alloc);
422 b = apr_bucket_transient_create(tmp, len, c->bucket_alloc);
423 APR_BRIGADE_INSERT_TAIL(bb, b);
424 rv = ap_pass_brigade(f->next, bb);
425 if (rv != APR_SUCCESS) {
426 ctx->ees = EES_DOWNSTREAM;
431 static apr_status_t send_eos(ap_filter_t *f)
433 request_rec *r = f->r;
434 conn_rec *c = r->connection;
435 apr_bucket_brigade *bb;
437 charset_filter_ctx_t *ctx = f->ctx;
440 bb = apr_brigade_create(r->pool, c->bucket_alloc);
441 b = apr_bucket_eos_create(c->bucket_alloc);
442 APR_BRIGADE_INSERT_TAIL(bb, b);
443 rv = ap_pass_brigade(f->next, bb);
444 if (rv != APR_SUCCESS) {
445 ctx->ees = EES_DOWNSTREAM;
450 static apr_status_t set_aside_partial_char(charset_filter_ctx_t *ctx,
452 apr_size_t partial_len)
456 if (sizeof(ctx->buf) > partial_len) {
457 ctx->saved = partial_len;
458 memcpy(ctx->buf, partial, partial_len);
463 ctx->ees = EES_LIMIT; /* we don't handle chars this wide which straddle
470 static apr_status_t finish_partial_char(charset_filter_ctx_t *ctx,
472 const char **cur_str,
479 apr_size_t tmp_input_len;
481 /* Keep adding bytes from the input string to the saved string until we
482 * 1) finish the input char
484 * or 3) run out of bytes to add
488 ctx->buf[ctx->saved] = **cur_str;
492 tmp_input_len = ctx->saved;
493 rv = apr_xlate_conv_buffer(ctx->xlate,
498 } while (rv == APR_INCOMPLETE && *cur_len);
500 if (rv == APR_SUCCESS) {
504 ctx->ees = EES_LIMIT; /* code isn't smart enough to handle chars
505 * straddling more than two buckets
512 static void log_xlate_error(ap_filter_t *f, apr_status_t rv)
514 charset_filter_ctx_t *ctx = f->ctx;
522 msg = "xlate filter - a built-in restriction was encountered";
526 msg = "xlate filter - an input character was invalid";
528 case EES_BUCKET_READ:
530 msg = "xlate filter - bucket read routine failed";
532 case EES_INCOMPLETE_CHAR:
534 strcpy(msgbuf, "xlate filter - incomplete char at end of input - ");
536 while ((apr_size_t)cur < ctx->saved) {
537 apr_snprintf(msgbuf + strlen(msgbuf), sizeof(msgbuf) - strlen(msgbuf),
538 "%02X", (unsigned)ctx->buf[cur]);
544 msg = "xlate filter - an error occurred in a lower filter";
547 msg = "xlate filter - returning error";
549 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r,
553 /* chk_filter_chain() is called once per filter instance; it tries to
554 * determine if the current filter instance should be disabled because
555 * its translation is incompatible with the translation of an existing
556 * instance of the translate filter
558 * Example bad scenario:
560 * configured filter chain for the request:
561 * INCLUDES XLATEOUT(8859-1->UTS-16)
562 * configured filter chain for the subrequest:
563 * XLATEOUT(8859-1->UTS-16)
565 * When the subrequest is processed, the filter chain will be
566 * XLATEOUT(8859-1->UTS-16) XLATEOUT(8859-1->UTS-16)
567 * This makes no sense, so the instance of XLATEOUT added for the
568 * subrequest will be noop-ed.
570 * Example good scenario:
572 * configured filter chain for the request:
573 * INCLUDES XLATEOUT(8859-1->UTS-16)
574 * configured filter chain for the subrequest:
575 * XLATEOUT(IBM-1047->8859-1)
577 * When the subrequest is processed, the filter chain will be
578 * XLATEOUT(IBM-1047->8859-1) XLATEOUT(8859-1->UTS-16)
579 * This makes sense, so the instance of XLATEOUT added for the
580 * subrequest will be left alone and it will translate from
583 static void chk_filter_chain(ap_filter_t *f)
586 charset_filter_ctx_t *curctx, *last_xlate_ctx = NULL,
588 int debug = ctx->dc->debug;
589 int output = !strcasecmp(f->frec->name, XLATEOUT_FILTER_NAME);
595 /* walk the filter chain; see if it makes sense for our filter to
598 curf = output ? f->r->output_filters : f->r->input_filters;
600 if (!strcasecmp(curf->frec->name, f->frec->name) &&
602 curctx = (charset_filter_ctx_t *)curf->ctx;
603 if (!last_xlate_ctx) {
604 last_xlate_ctx = curctx;
607 if (strcmp(last_xlate_ctx->dc->charset_default,
608 curctx->dc->charset_source)) {
609 /* incompatible translation
610 * if our filter instance is incompatible with an instance
611 * already in place, noop our instance
613 * . We are only willing to noop our own instance.
614 * . It is possible to noop another instance which has not
615 * yet run, but this is not currently implemented.
616 * Hopefully it will not be needed.
617 * . It is not possible to noop an instance which has
620 if (last_xlate_ctx == f->ctx) {
621 last_xlate_ctx->noop = 1;
622 if (debug >= DBGLVL_PMC) {
623 const char *symbol = output ? "->" : "<-";
625 ap_log_rerror(APLOG_MARK, APLOG_DEBUG,
628 "translation %s%s%s; existing "
629 "translation %s%s%s",
630 f->r->uri ? "uri" : "file",
631 f->r->uri ? f->r->uri : f->r->filename,
632 last_xlate_ctx->dc->charset_source,
634 last_xlate_ctx->dc->charset_default,
635 curctx->dc->charset_source,
637 curctx->dc->charset_default);
641 const char *symbol = output ? "->" : "<-";
643 ap_log_rerror(APLOG_MARK, APLOG_ERR,
645 "chk_filter_chain() - can't disable "
646 "translation %s%s%s; existing "
647 "translation %s%s%s",
648 last_xlate_ctx->dc->charset_source,
650 last_xlate_ctx->dc->charset_default,
651 curctx->dc->charset_source,
653 curctx->dc->charset_default);
663 /* xlate_brigade() is used to filter request and response bodies
665 * we'll stop when one of the following occurs:
666 * . we run out of buckets
667 * . we run out of space in the output buffer
671 * bb: brigade to process
672 * buffer: storage to hold the translated characters
673 * buffer_size: size of buffer
674 * (and a few more uninteresting parms)
677 * return value: APR_SUCCESS or some error code
678 * bb: we've removed any buckets representing the
679 * translated characters; the eos bucket, if
680 * present, will be left in the brigade
681 * buffer: filled in with translated characters
682 * buffer_size: updated with the bytes remaining
683 * hit_eos: did we hit an EOS bucket?
685 static apr_status_t xlate_brigade(charset_filter_ctx_t *ctx,
686 apr_bucket_brigade *bb,
688 apr_size_t *buffer_avail,
691 apr_bucket *b = NULL; /* set to NULL only to quiet some gcc */
692 apr_bucket *consumed_bucket;
694 apr_size_t bytes_in_bucket; /* total bytes read from current bucket */
695 apr_size_t bucket_avail; /* bytes left in current bucket */
696 apr_status_t rv = APR_SUCCESS;
700 consumed_bucket = NULL;
702 if (!bucket_avail) { /* no bytes left to process in the current bucket... */
703 if (consumed_bucket) {
704 apr_bucket_delete(consumed_bucket);
705 consumed_bucket = NULL;
707 b = APR_BRIGADE_FIRST(bb);
708 if (b == APR_BRIGADE_SENTINEL(bb) ||
709 APR_BUCKET_IS_EOS(b)) {
712 rv = apr_bucket_read(b, &bucket, &bytes_in_bucket, APR_BLOCK_READ);
713 if (rv != APR_SUCCESS) {
714 ctx->ees = EES_BUCKET_READ;
717 bucket_avail = bytes_in_bucket;
718 consumed_bucket = b; /* for axing when we're done reading it */
721 /* We've got data, so translate it. */
723 /* Rats... we need to finish a partial character from the previous
726 * Strangely, finish_partial_char() increments the input buffer
727 * pointer but does not increment the output buffer pointer.
729 apr_size_t old_buffer_avail = *buffer_avail;
730 rv = finish_partial_char(ctx,
731 &bucket, &bucket_avail,
732 &buffer, buffer_avail);
733 buffer += old_buffer_avail - *buffer_avail;
736 apr_size_t old_buffer_avail = *buffer_avail;
737 apr_size_t old_bucket_avail = bucket_avail;
738 rv = apr_xlate_conv_buffer(ctx->xlate,
739 bucket, &bucket_avail,
742 buffer += old_buffer_avail - *buffer_avail;
743 bucket += old_bucket_avail - bucket_avail;
745 if (rv == APR_INCOMPLETE) { /* partial character at end of input */
746 /* We need to save the final byte(s) for next time; we can't
747 * convert it until we look at the next bucket.
749 rv = set_aside_partial_char(ctx, bucket, bucket_avail);
753 if (rv != APR_SUCCESS) {
754 /* bad input byte or partial char too big to store */
757 if (*buffer_avail < XLATE_MIN_BUFF_LEFT) {
758 /* if any data remains in the current bucket, split there */
760 apr_bucket_split(b, bytes_in_bucket - bucket_avail);
762 apr_bucket_delete(b);
768 if (!APR_BRIGADE_EMPTY(bb)) {
769 b = APR_BRIGADE_FIRST(bb);
770 if (APR_BUCKET_IS_EOS(b)) {
771 /* Leave the eos bucket in the brigade for reporting to
772 * subsequent filters.
776 /* Oops... we have a partial char from the previous bucket
777 * that won't be completed because there's no more data.
780 ctx->ees = EES_INCOMPLETE_CHAR;
788 /* xlate_out_filter() handles (almost) arbitrary conversions from one charset
790 * translation is determined in the fixup hook (find_code_page), which is
791 * where the filter's context data is set up... the context data gives us
792 * the translation handle
794 static apr_status_t xlate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
796 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
797 &charset_lite_module);
798 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
799 &charset_lite_module);
800 charset_filter_ctx_t *ctx = f->ctx;
801 apr_bucket *dptr, *consumed_bucket;
803 apr_size_t cur_len, cur_avail;
804 char tmp[OUTPUT_XLATE_BUF_SIZE];
805 apr_size_t space_avail;
807 apr_status_t rv = APR_SUCCESS;
810 /* this is SetOutputFilter path; grab the preallocated context,
811 * if any; note that if we decided not to do anything in an earlier
812 * handler, we won't even have a reqinfo
815 ctx = f->ctx = reqinfo->output_ctx;
816 reqinfo->output_ctx = NULL; /* prevent SNAFU if user coded us twice
817 * in the filter chain; we can't have two
818 * instances using the same context
821 if (!ctx) { /* no idea how to translate; don't do anything */
822 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
828 /* Opening the output translation (this used to be done in the fixup hook,
829 * but that was too early: a subsequent type modification, e.g., by a
830 * CGI script, would go unnoticed. Now we do it in the filter itself.)
832 if (!ctx->noop && ctx->xlate == NULL)
834 const char *mime_type = f->r->content_type ? f->r->content_type : ap_default_type(f->r);
836 /* XXX When we handle translation of the request body, watch out here as
837 * 1.3 allowed additional mime types: multipart and
838 * application/x-www-form-urlencoded
840 if (strncasecmp(mime_type, "text/", 5) == 0 ||
841 #if APR_CHARSET_EBCDIC
842 /* On an EBCDIC machine, be willing to translate mod_autoindex-
843 * generated output. Otherwise, it doesn't look too cool.
845 * XXX This isn't a perfect fix because this doesn't trigger us
846 * to convert from the charset of the source code to ASCII. The
847 * general solution seems to be to allow a generator to set an
848 * indicator in the r specifying that the body is coded in the
849 * implementation character set (i.e., the charset of the source
850 * code). This would get several different types of documents
851 * translated properly: mod_autoindex output, mod_status output,
852 * mod_info output, hard-coded error documents, etc.
854 strcmp(mime_type, DIR_MAGIC_TYPE) == 0 ||
856 strncasecmp(mime_type, "message/", 8) == 0) {
858 rv = apr_xlate_open(&ctx->xlate,
859 dc->charset_default, dc->charset_source, f->r->pool);
860 if (rv != APR_SUCCESS) {
861 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r,
862 "can't open translation %s->%s",
863 dc->charset_source, dc->charset_default);
867 if (apr_xlate_sb_get(ctx->xlate, &ctx->is_sb) != APR_SUCCESS) {
874 if (dc->debug >= DBGLVL_GORY)
875 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
876 "mime type is %s; no translation selected",
881 if (dc->debug >= DBGLVL_GORY) {
882 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
883 "xlate_out_filter() - "
884 "charset_source: %s charset_default: %s",
885 dc && dc->charset_source ? dc->charset_source : "(none)",
886 dc && dc->charset_default ? dc->charset_default : "(none)");
889 if (!ctx->ran) { /* filter never ran before */
892 if (!ctx->noop && !ctx->is_sb) {
893 /* We're not converting between two single-byte charsets, so unset
894 * Content-Length since it is unlikely to remain the same.
896 apr_table_unset(f->r->headers_out, "Content-Length");
901 return ap_pass_brigade(f->next, bb);
904 dptr = APR_BRIGADE_FIRST(bb);
907 space_avail = sizeof(tmp);
908 consumed_bucket = NULL;
910 if (!cur_len) { /* no bytes left to process in the current bucket... */
911 if (consumed_bucket) {
912 apr_bucket_delete(consumed_bucket);
913 consumed_bucket = NULL;
915 if (dptr == APR_BRIGADE_SENTINEL(bb)) {
919 if (APR_BUCKET_IS_EOS(dptr)) {
921 cur_len = -1; /* XXX yuck, but that tells us to send
922 * eos down; when we minimize our bb construction
923 * we'll fix this crap */
925 /* Oops... we have a partial char from the previous bucket
926 * that won't be completed because there's no more data.
929 ctx->ees = EES_INCOMPLETE_CHAR;
933 rv = apr_bucket_read(dptr, &cur_str, &cur_len, APR_BLOCK_READ);
934 if (rv != APR_SUCCESS) {
936 ctx->ees = EES_BUCKET_READ;
939 consumed_bucket = dptr; /* for axing when we're done reading it */
940 dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the
943 /* Try to fill up our tmp buffer with translated data. */
946 if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */
948 /* Rats... we need to finish a partial character from the previous
953 tmp_tmp = tmp + sizeof(tmp) - space_avail;
954 rv = finish_partial_char(ctx,
956 &tmp_tmp, &space_avail);
959 rv = apr_xlate_conv_buffer(ctx->xlate,
961 tmp + sizeof(tmp) - space_avail, &space_avail);
963 /* Update input ptr and len after consuming some bytes */
964 cur_str += cur_len - cur_avail;
967 if (rv == APR_INCOMPLETE) { /* partial character at end of input */
968 /* We need to save the final byte(s) for next time; we can't
969 * convert it until we look at the next bucket.
971 rv = set_aside_partial_char(ctx, cur_str, cur_len);
977 if (rv != APR_SUCCESS) {
978 /* bad input byte or partial char too big to store */
982 if (space_avail < XLATE_MIN_BUFF_LEFT) {
983 /* It is time to flush, as there is not enough space left in the
984 * current output buffer to bother with converting more data.
986 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
987 if (rv != APR_SUCCESS) {
991 /* tmp is now empty */
992 space_avail = sizeof(tmp);
996 if (rv == APR_SUCCESS) {
997 if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */
998 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
1001 if (rv == APR_SUCCESS) {
1002 if (cur_len == -1) {
1007 log_xlate_error(f, rv);
1013 static int xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb,
1014 ap_input_mode_t mode, apr_read_type_e block,
1015 apr_off_t readbytes)
1018 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
1019 &charset_lite_module);
1020 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
1021 &charset_lite_module);
1022 charset_filter_ctx_t *ctx = f->ctx;
1023 apr_size_t buffer_size;
1027 /* this is SetInputFilter path; grab the preallocated context,
1028 * if any; note that if we decided not to do anything in an earlier
1029 * handler, we won't even have a reqinfo
1032 ctx = f->ctx = reqinfo->input_ctx;
1033 reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice
1034 * in the filter chain; we can't have two
1035 * instances using the same context
1038 if (!ctx) { /* no idea how to translate; don't do anything */
1039 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
1045 if (dc->debug >= DBGLVL_GORY) {
1046 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
1047 "xlate_in_filter() - "
1048 "charset_source: %s charset_default: %s",
1049 dc && dc->charset_source ? dc->charset_source : "(none)",
1050 dc && dc->charset_default ? dc->charset_default : "(none)");
1053 if (!ctx->ran) { /* filter never ran before */
1054 chk_filter_chain(f);
1056 if (!ctx->noop && !ctx->is_sb) {
1057 /* We're not converting between two single-byte charsets, so note
1058 * that some handlers can't deal with it.
1059 * It doesn't help to unset Content-Length in the input header
1060 * table since in all likelihood the handler has already seen it.
1062 if (dc->debug >= DBGLVL_PMC) {
1063 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
1064 "Request body length may change, breaking some requests");
1070 return ap_get_brigade(f->next, bb, mode, block, readbytes);
1073 if (APR_BRIGADE_EMPTY(ctx->bb)) {
1074 if ((rv = ap_get_brigade(f->next, bb, mode, block,
1075 readbytes)) != APR_SUCCESS) {
1080 APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */
1083 buffer_size = INPUT_XLATE_BUF_SIZE;
1084 rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos);
1085 if (rv == APR_SUCCESS) {
1087 /* move anything leftover into our context for next time;
1088 * we don't currently "set aside" since the data came from
1089 * down below, but I suspect that for long-term we need to
1092 APR_BRIGADE_CONCAT(ctx->bb, bb);
1094 if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */
1097 e = apr_bucket_heap_create(ctx->tmp,
1098 INPUT_XLATE_BUF_SIZE - buffer_size,
1099 NULL, f->r->connection->bucket_alloc);
1100 /* make sure we insert at the head, because there may be
1101 * an eos bucket already there, and the eos bucket should
1102 * come after the data
1104 APR_BRIGADE_INSERT_HEAD(bb, e);
1107 /* XXX need to get some more data... what if the last brigade
1108 * we got had only the first byte of a multibyte char? we need
1109 * to grab more data from the network instead of returning an
1115 log_xlate_error(f, rv);
1121 static const command_rec cmds[] =
1123 AP_INIT_TAKE1("CharsetSourceEnc",
1127 "source (html,cgi,ssi) file charset"),
1128 AP_INIT_TAKE1("CharsetDefault",
1129 add_charset_default,
1132 "name of default charset"),
1133 AP_INIT_ITERATE("CharsetOptions",
1134 add_charset_options,
1137 "valid options: ImplicitAdd, NoImplicitAdd, DebugLevel=n"),
1141 static void charset_register_hooks(apr_pool_t *p)
1143 ap_hook_fixups(find_code_page, NULL, NULL, APR_HOOK_MIDDLE);
1144 ap_hook_insert_filter(xlate_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST);
1145 ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL,
1147 ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL,
1151 module AP_MODULE_DECLARE_DATA charset_lite_module =
1153 STANDARD20_MODULE_STUFF,
1154 create_charset_dir_conf,
1155 merge_charset_dir_conf,
1159 charset_register_hooks