From b84938b0436022abc80a81fe13305eaead5c5217 Mon Sep 17 00:00:00 2001 From: Graham Leggett Date: Sat, 29 Oct 2011 11:13:37 +0000 Subject: [PATCH] mod_include: Add support for application/x-www-form-urlencoded encoding and decoding. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1194870 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES | 3 ++ docs/manual/mod/mod_include.xml | 72 ++++++++++++++++++--------------- include/ap_mmn.h | 4 +- include/httpd.h | 23 +++++++++++ modules/filters/mod_include.c | 16 ++++++++ server/gen_test_char.c | 9 ++++- server/util.c | 44 ++++++++++++++++++++ 7 files changed, 137 insertions(+), 34 deletions(-) diff --git a/CHANGES b/CHANGES index 8a1f26b33f..095674d80e 100644 --- a/CHANGES +++ b/CHANGES @@ -12,6 +12,9 @@ Changes with Apache 2.3.15 PR 51714. [Stefan Fritsch, Jim Jagielski, Ruediger Pluem, Eric Covener, ] + *) mod_include: Add support for application/x-www-form-urlencoded encoding + and decoding. [Graham Leggett] + *) rotatelogs: Add -c option to force logfile creation in every rotation interval, even if empty. [Jan Kaluža ] diff --git a/docs/manual/mod/mod_include.xml b/docs/manual/mod/mod_include.xml index 7bf6c63bdd..586e3d19c7 100644 --- a/docs/manual/mod/mod_include.xml +++ b/docs/manual/mod/mod_include.xml @@ -186,13 +186,15 @@ is none, where no decoding will be done. If set to url, then URL decoding (also known as %-encoding; this is appropriate for use within URLs in links, etc.) will be - performed. If set to base64, base64 will be decoded, - and if set to entity, HTML entity encoding will be - stripped. Decoding is done prior to any further encoding on the - variable. Multiple encodings can be stripped by specifying more - than one comma separated encoding. The decoding setting will - remain in effect until the next decoding attribute is encountered, - or the element ends.

+ performed. If set to urlencoded, + application/x-www-form-urlencoded compatible encoding (found in + query strings) will be stripped. If set to base64, + base64 will be decoded, and if set to entity, HTML + entity encoding will be stripped. Decoding is done prior to any + further encoding on the variable. Multiple encodings can be + stripped by specifying more than one comma separated encoding. + The decoding setting will remain in effect until the next decoding + attribute is encountered, or the element ends.

The decoding attribute must precede the corresponding var attribute to be effective.

@@ -204,14 +206,17 @@ to none, no encoding will be done. If set to url, then URL encoding (also known as %-encoding; this is appropriate for use within URLs in links, etc.) will be - performed. If set to base64, base64 encoding will - be performed. At the start of an echo element, - the default is set to entity, resulting in entity - encoding (which is appropriate in the context of a block-level - HTML element, e.g. a paragraph of text). This can be - changed by adding an encoding attribute, which will - remain in effect until the next encoding attribute - is encountered or the element ends, whichever comes first.

+ performed. If set to urlencoded, + application/x-www-form-urlencoded compatible encoding will be + performed instead, and should be used with query strings. If set + to base64, base64 encoding will be performed. At + the start of an echo element, the default is set to + entity, resulting in entity encoding (which is + appropriate in the context of a block-level HTML element, + e.g. a paragraph of text). This can be changed by adding + an encoding attribute, which will remain in effect + until the next encoding attribute is encountered or + the element ends, whichever comes first.

The encoding attribute must precede the corresponding var attribute to be effective.

@@ -425,29 +430,32 @@

Specifies whether Apache should strip an encoding from the variable before processing the variable further. The default is none, where no decoding will be done. If set to - url, base64 or entity, - URL decoding, base64 decoding or HTML entity decoding will be - performed respectively. More than one decoding can be specified - by separating with commas. The decoding setting will remain in - effect until the next decoding attribute is encountered, or the - element ends. The decoding attribute must - precede the corresponding var attribute to - be effective.

+ url, urlencoded, base64 + or entity, URL decoding, + application/x-www-form-urlencoded decoding, base64 decoding or HTML + entity decoding will be performed respectively. More than one + decoding can be specified by separating with commas. The decoding + setting will remain in effect until the next decoding attribute + is encountered, or the element ends. The decoding + attribute must precede the corresponding + var attribute to be effective.

encoding

Specifies how Apache should encode special characters contained in the variable before setting them. The default is none, where no encoding will be done. If set to - url, base64 or entity, - URL encoding, base64 encoding or HTML entity encoding will be - performed respectively. More than one encoding can be specified - by separating with commas. The encoding setting will remain in - effect until the next encoding attribute is encountered, or the - element ends. The encoding attribute must - precede the corresponding var attribute - to be effective. Encodings are applied after all decodings have - been stripped.

+ url, urlencoding, base64 + or entity, URL encoding, + application/x-www-form-urlencoded encoding, base64 encoding or + HTML entity encoding will be performed respectively. More than + one encoding can be specified by separating with commas. The + encoding setting will remain in effect until the next encoding + attribute is encountered, or the element ends. The + encoding attribute must precede the + corresponding var attribute to be effective. + Encodings are applied after all decodings have been + stripped.

diff --git a/include/ap_mmn.h b/include/ap_mmn.h index ce20e76ab5..a9443888ac 100644 --- a/include/ap_mmn.h +++ b/include/ap_mmn.h @@ -362,6 +362,8 @@ * proxy_dir_conf * 20111025.0 (2.3.15-dev) Add return value and maxlen to ap_varbuf_regsub(), * add ap_pregsub_ex() + * 20111025.1 (2.3.15-dev) Add ap_escape_urlencoded(), ap_escape_urlencoded_buffer() + * and ap_unescape_urlencoded(). */ #define MODULE_MAGIC_COOKIE 0x41503234UL /* "AP24" */ @@ -369,7 +371,7 @@ #ifndef MODULE_MAGIC_NUMBER_MAJOR #define MODULE_MAGIC_NUMBER_MAJOR 20111025 #endif -#define MODULE_MAGIC_NUMBER_MINOR 0 /* 0...n */ +#define MODULE_MAGIC_NUMBER_MINOR 1 /* 0...n */ /** * Determine if the server's current MODULE_MAGIC_NUMBER is at least a diff --git a/include/httpd.h b/include/httpd.h index 28c9eb179e..89f91059bb 100644 --- a/include/httpd.h +++ b/include/httpd.h @@ -1539,6 +1539,13 @@ AP_DECLARE(int) ap_unescape_url(char *url); */ AP_DECLARE(int) ap_unescape_url_keep2f(char *url, int decode_slashes); +/** + * Unescape an application/x-www-form-urlencoded string + * @param query The query to unescape + * @return 0 on success, non-zero otherwise + */ +AP_DECLARE(int) ap_unescape_urlencoded(char *query); + /** * Convert all double slashes to single slashes * @param name The string to convert @@ -1581,6 +1588,22 @@ AP_DECLARE(char *) ap_os_escape_path(apr_pool_t *p, const char *path, int partia /** @see ap_os_escape_path */ #define ap_escape_uri(ppool,path) ap_os_escape_path(ppool,path,1) +/** + * Escape a string as application/x-www-form-urlencoded + * @param p The pool to allocate from + * @param s The path to convert + * @return The converted URL + */ +AP_DECLARE(char *) ap_escape_urlencoded(apr_pool_t *p, const char *s); + +/** + * Escape a string as application/x-www-form-urlencoded, to a preallocated buffer + * @param c The preallocated buffer to write to + * @param s The path to convert + * @return The converted URL (c) + */ +AP_DECLARE(char *) ap_escape_urlencoded_buffer(char *c, const char *s); + /** * Escape an html string * @param p The pool to allocate from diff --git a/modules/filters/mod_include.c b/modules/filters/mod_include.c index e2563bb2d5..3fdcffee1f 100644 --- a/modules/filters/mod_include.c +++ b/modules/filters/mod_include.c @@ -1957,6 +1957,11 @@ static apr_status_t handle_echo(include_ctx_t *ctx, ap_filter_t *f, ap_unescape_url(buf); echo_text = buf; } + else if (!strcasecmp(token, "urlencoded")) { + char *buf = apr_pstrdup(ctx->pool, echo_text); + ap_unescape_urlencoded(buf); + echo_text = buf; + } else if (!strcasecmp(token, "entity")) { char *buf = apr_pstrdup(ctx->pool, echo_text); decodehtml(buf); @@ -1986,6 +1991,9 @@ static apr_status_t handle_echo(include_ctx_t *ctx, ap_filter_t *f, else if (!strcasecmp(token, "url")) { echo_text = ap_escape_uri(ctx->dpool, echo_text); } + else if (!strcasecmp(token, "urlencoded")) { + echo_text = ap_escape_urlencoded(ctx->dpool, echo_text); + } else if (!strcasecmp(token, "entity")) { echo_text = ap_escape_html2(ctx->dpool, echo_text, 0); } @@ -2576,6 +2584,11 @@ static apr_status_t handle_set(include_ctx_t *ctx, ap_filter_t *f, ap_unescape_url(buf); parsed_string = buf; } + else if (!strcasecmp(token, "urlencoded")) { + char *buf = apr_pstrdup(ctx->pool, parsed_string); + ap_unescape_urlencoded(buf); + parsed_string = buf; + } else if (!strcasecmp(token, "entity")) { char *buf = apr_pstrdup(ctx->pool, parsed_string); decodehtml(buf); @@ -2605,6 +2618,9 @@ static apr_status_t handle_set(include_ctx_t *ctx, ap_filter_t *f, else if (!strcasecmp(token, "url")) { parsed_string = ap_escape_uri(ctx->dpool, parsed_string); } + else if (!strcasecmp(token, "urlencoded")) { + parsed_string = ap_escape_urlencoded(ctx->dpool, parsed_string); + } else if (!strcasecmp(token, "entity")) { parsed_string = ap_escape_html2(ctx->dpool, parsed_string, 0); } diff --git a/server/gen_test_char.c b/server/gen_test_char.c index a0b55100f4..1c40bde93e 100644 --- a/server/gen_test_char.c +++ b/server/gen_test_char.c @@ -51,6 +51,7 @@ #define T_HTTP_TOKEN_STOP (0x08) #define T_ESCAPE_LOGITEM (0x10) #define T_ESCAPE_FORENSIC (0x20) +#define T_ESCAPE_URLENCODED (0x40) int main(int argc, char *argv[]) { @@ -65,6 +66,7 @@ int main(int argc, char *argv[]) "#define T_HTTP_TOKEN_STOP (%u)\n" "#define T_ESCAPE_LOGITEM (%u)\n" "#define T_ESCAPE_FORENSIC (%u)\n" + "#define T_ESCAPE_URLENCODED (%u)\n" "\n" "static const unsigned char test_char_table[256] = {", T_ESCAPE_SHELL_CMD, @@ -72,7 +74,8 @@ int main(int argc, char *argv[]) T_OS_ESCAPE_PATH, T_HTTP_TOKEN_STOP, T_ESCAPE_LOGITEM, - T_ESCAPE_FORENSIC); + T_ESCAPE_FORENSIC, + T_ESCAPE_URLENCODED); for (c = 0; c < 256; ++c) { flags = 0; @@ -108,6 +111,10 @@ int main(int argc, char *argv[]) flags |= T_OS_ESCAPE_PATH; } + if (!apr_isalnum(c) && !strchr(".-*_ ", c)) { + flags |= T_ESCAPE_URLENCODED; + } + /* these are the "tspecials" (RFC2068) or "separators" (RFC2616) */ if (c && (apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}", c))) { flags |= T_HTTP_TOKEN_STOP; diff --git a/server/util.c b/server/util.c index abd7a10e42..ca011a9e6b 100644 --- a/server/util.c +++ b/server/util.c @@ -1621,6 +1621,23 @@ AP_DECLARE(int) ap_unescape_url_reserved(char *url, const char *reserved) } #endif +AP_DECLARE(int) ap_unescape_urlencoded(char *query) +{ + char *slider; + + /* replace plus with a space */ + if (query) { + for (slider = query; *slider; slider++) { + if (*slider == '+') { + *slider = ' '; + } + } + } + + /* unescape everything else */ + return unescape_url(query, NULL, NULL); +} + AP_DECLARE(char *) ap_construct_server(apr_pool_t *p, const char *hostname, apr_port_t port, const request_rec *r) { @@ -1729,6 +1746,33 @@ AP_DECLARE(char *) ap_os_escape_path(apr_pool_t *p, const char *path, int partia return copy; } +AP_DECLARE(char *) ap_escape_urlencoded_buffer(char *copy, const char *buffer) +{ + const unsigned char *s = (const unsigned char *)buffer; + unsigned char *d = (unsigned char *)copy; + unsigned c; + + while ((c = *s)) { + if (TEST_CHAR(c, T_ESCAPE_URLENCODED)) { + d = c2x(c, '%', d); + } + else if (c == ' ') { + *d++ = '+'; + } + else { + *d++ = c; + } + ++s; + } + *d = '\0'; + return copy; +} + +AP_DECLARE(char *) ap_escape_urlencoded(apr_pool_t *p, const char *buffer) +{ + return ap_escape_urlencoded_buffer(apr_palloc(p, 3 * strlen(buffer) + 1), buffer); +} + /* ap_escape_uri is now a macro for os_escape_path */ AP_DECLARE(char *) ap_escape_html2(apr_pool_t *p, const char *s, int toasc) -- 2.40.0