From 0471dd1d9483d6b77258b40ffe3d5f54c03e6217 Mon Sep 17 00:00:00 2001 From: Ilia Alshanetsky Date: Mon, 18 Dec 2006 04:24:38 +0000 Subject: [PATCH] MFB: Filter fixes: Fixed possible double encoding problem with sanitizing filters Make use of space-strict strip_tags() function --- ext/filter/sanitizing_filters.c | 109 ++++++------- ext/filter/tests/025.phpt | 2 +- ext/filter/tests/042.phpt | 10 ++ ext/filter/tests/043.phpt | 267 ++++++++++++++++++++++++++++++++ ext/standard/string.c | 2 +- 5 files changed, 334 insertions(+), 56 deletions(-) create mode 100644 ext/filter/tests/042.phpt create mode 100644 ext/filter/tests/043.phpt diff --git a/ext/filter/sanitizing_filters.c b/ext/filter/sanitizing_filters.c index a390969b1c..3c2d9988f8 100644 --- a/ext/filter/sanitizing_filters.c +++ b/ext/filter/sanitizing_filters.c @@ -27,52 +27,29 @@ typedef unsigned long filter_map[256]; /* }}} */ /* {{{ HELPER FUNCTIONS */ -static void php_filter_encode_html(zval *value, const char* chars, int encode_nul) +static void php_filter_encode_html(zval *value, const unsigned char *chars) { - register int x, y; smart_str str = {0}; int len = Z_STRLEN_P(value); - char *s = Z_STRVAL_P(value); + unsigned char *s = (unsigned char *)Z_STRVAL_P(value); + unsigned char *e = s + len; if (Z_STRLEN_P(value) == 0) { return; } - for (x = 0, y = 0; len--; x++, y++) { - if (strchr(chars, s[x]) || (encode_nul && s[x] == 0)) { + while (s < e) { + if (chars[*s]) { smart_str_appendl(&str, "&#", 2); - smart_str_append_long(&str, s[x]); + smart_str_append_unsigned(&str, (unsigned long)*s); smart_str_appendc(&str, ';'); } else { - smart_str_appendc(&str, s[x]); + /* XXX: this needs to be optimized to work with blocks of 'safe' chars */ + smart_str_appendc(&str, *s); } + s++; } - smart_str_0(&str); - efree(Z_STRVAL_P(value)); - Z_STRVAL_P(value) = str.c; - Z_STRLEN_P(value) = str.len; -} - -static void php_filter_encode_html_high_low(zval *value, long flags) -{ - register int x, y; - smart_str str = {0}; - int len = Z_STRLEN_P(value); - unsigned char *s = (unsigned char *)Z_STRVAL_P(value); - if (Z_STRLEN_P(value) == 0) { - return; - } - - for (x = 0, y = 0; len--; x++, y++) { - if (((flags & FILTER_FLAG_ENCODE_LOW) && (s[x] < 32)) || ((flags & FILTER_FLAG_ENCODE_HIGH) && (s[x] > 127))) { - smart_str_appendl(&str, "&#", 2); - smart_str_append_unsigned(&str, s[x]); - smart_str_appendc(&str, ';'); - } else { - smart_str_appendc(&str, s[x]); - } - } smart_str_0(&str); efree(Z_STRVAL_P(value)); Z_STRVAL_P(value) = str.c; @@ -181,9 +158,28 @@ static void filter_map_apply(zval *value, filter_map *map) void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL) { size_t new_len; - + unsigned char enc[256] = {0}; + + /* strip high/strip low ( see flags )*/ + php_filter_strip(value, flags); + + if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) { + enc['\''] = enc['"'] = 1; + } + if (flags & FILTER_FLAG_ENCODE_AMP) { + enc['&'] = 1; + } + if (flags & FILTER_FLAG_ENCODE_LOW) { + memset(enc, 1, 32); + } + if (flags & FILTER_FLAG_ENCODE_HIGH) { + memset(enc + 127, 1, sizeof(enc) - 127); + } + + php_filter_encode_html(value, enc); + /* strip tags, implicitly also removes \0 chars */ - new_len = php_strip_tags(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0); + new_len = php_strip_tags(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, -1); Z_STRLEN_P(value) = new_len; if (new_len == 0) { @@ -191,21 +187,6 @@ void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL) ZVAL_EMPTY_STRING(value); return; } - - if (! (flags & FILTER_FLAG_NO_ENCODE_QUOTES)) { - /* encode ' and " to numerical entity */ - php_filter_encode_html(value, "'\"", 0); - } - /* strip high/strip low ( see flags )*/ - php_filter_strip(value, flags); - - /* encode low/encode high flags */ - php_filter_encode_html_high_low(value, flags); - - /* also all the flags - & encode as %xx */ - if (flags & FILTER_FLAG_ENCODE_AMP) { - php_filter_encode_html(value, "&", 0); - } } /* }}} */ @@ -222,11 +203,21 @@ void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ php_filter_special_chars */ void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL) { + unsigned char enc[256] = {0}; + + php_filter_strip(value, flags); + /* encodes ' " < > & \0 to numerical entities */ - php_filter_encode_html(value, "'\"<>&", 1); + enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1; + /* if strip low is not set, then we encode them as &#xx; */ - php_filter_strip(value, flags); - php_filter_encode_html_high_low(value, FILTER_FLAG_ENCODE_LOW | flags); + memset(enc, 1, 32); + + if (flags & FILTER_FLAG_ENCODE_HIGH) { + memset(enc + 127, 1, sizeof(enc) - 127); + } + + php_filter_encode_html(value, enc); } /* }}} */ @@ -235,11 +226,21 @@ void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL) { /* Only if no flags are set (optimization) */ if (flags != 0 && Z_STRLEN_P(value) > 0) { + unsigned char enc[256] = {0}; + php_filter_strip(value, flags); + if (flags & FILTER_FLAG_ENCODE_AMP) { - php_filter_encode_html(value, "&", 0); + enc['&'] = 1; } - php_filter_encode_html_high_low(value, flags); + if (flags & FILTER_FLAG_ENCODE_LOW) { + memset(enc, 1, 32); + } + if (flags & FILTER_FLAG_ENCODE_HIGH) { + memset(enc + 127, 1, sizeof(enc) - 127); + } + + php_filter_encode_html(value, enc); } } /* }}} */ diff --git a/ext/filter/tests/025.phpt b/ext/filter/tests/025.phpt index d1e94abf02..bf85e36071 100644 --- a/ext/filter/tests/025.phpt +++ b/ext/filter/tests/025.phpt @@ -17,7 +17,7 @@ echo "Done\n"; string(0) "" string(0) "" string(12) "!@#$%^&*()'"" -string(32) "!@#$%^&*()&#39;&#34;" +string(24) "!@#$%^&*()'"" string(11) "`1234567890" string(5) "`123`" string(1) "." diff --git a/ext/filter/tests/042.phpt b/ext/filter/tests/042.phpt new file mode 100644 index 0000000000..b295e0698b --- /dev/null +++ b/ext/filter/tests/042.phpt @@ -0,0 +1,10 @@ +--TEST-- +Combination of strip & sanitize filters +--FILE-- +alert(/ext/filter+bypass/);< /script>ABC'; +$a = filter_var($var, FILTER_SANITIZE_STRING, array("flags" => FILTER_FLAG_STRIP_LOW)); +echo $a . "\n"; +?> +--EXPECT-- +XYZalert(/ext/filter+bypass/);ABC diff --git a/ext/filter/tests/043.phpt b/ext/filter/tests/043.phpt new file mode 100644 index 0000000000..b8d0eec7be --- /dev/null +++ b/ext/filter/tests/043.phpt @@ -0,0 +1,267 @@ +--TEST-- +Character encoding test +--FILE-- + $flags))); +} +?> +--EXPECT-- +string(4) "�" +string(4) "" +string(4) "" +string(4) "" +string(4) "" +string(4) "" +string(4) "" +string(4) "" +string(4) "" +string(4) " " +string(5) " " +string(5) " " +string(5) " " +string(5) " " +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(5) "" +string(1) " " +string(1) "!" +string(5) """ +string(1) "#" +string(1) "$" +string(1) "%" +string(5) "&" +string(5) "'" +string(1) "(" +string(1) ")" +string(1) "*" +string(1) "+" +string(1) "," +string(1) "-" +string(1) "." +string(1) "/" +string(1) "0" +string(1) "1" +string(1) "2" +string(1) "3" +string(1) "4" +string(1) "5" +string(1) "6" +string(1) "7" +string(1) "8" +string(1) "9" +string(1) ":" +string(1) ";" +string(0) "" +string(1) "=" +string(1) ">" +string(1) "?" +string(1) "@" +string(1) "A" +string(1) "B" +string(1) "C" +string(1) "D" +string(1) "E" +string(1) "F" +string(1) "G" +string(1) "H" +string(1) "I" +string(1) "J" +string(1) "K" +string(1) "L" +string(1) "M" +string(1) "N" +string(1) "O" +string(1) "P" +string(1) "Q" +string(1) "R" +string(1) "S" +string(1) "T" +string(1) "U" +string(1) "V" +string(1) "W" +string(1) "X" +string(1) "Y" +string(1) "Z" +string(1) "[" +string(1) "\" +string(1) "]" +string(1) "^" +string(1) "_" +string(1) "`" +string(1) "a" +string(1) "b" +string(1) "c" +string(1) "d" +string(1) "e" +string(1) "f" +string(1) "g" +string(1) "h" +string(1) "i" +string(1) "j" +string(1) "k" +string(1) "l" +string(1) "m" +string(1) "n" +string(1) "o" +string(1) "p" +string(1) "q" +string(1) "r" +string(1) "s" +string(1) "t" +string(1) "u" +string(1) "v" +string(1) "w" +string(1) "x" +string(1) "y" +string(1) "z" +string(1) "{" +string(1) "|" +string(1) "}" +string(1) "~" +string(6) "" +string(6) "€" +string(6) "" +string(6) "‚" +string(6) "ƒ" +string(6) "„" +string(6) "…" +string(6) "†" +string(6) "‡" +string(6) "ˆ" +string(6) "‰" +string(6) "Š" +string(6) "‹" +string(6) "Œ" +string(6) "" +string(6) "Ž" +string(6) "" +string(6) "" +string(6) "‘" +string(6) "’" +string(6) "“" +string(6) "”" +string(6) "•" +string(6) "–" +string(6) "—" +string(6) "˜" +string(6) "™" +string(6) "š" +string(6) "›" +string(6) "œ" +string(6) "" +string(6) "ž" +string(6) "Ÿ" +string(6) " " +string(6) "¡" +string(6) "¢" +string(6) "£" +string(6) "¤" +string(6) "¥" +string(6) "¦" +string(6) "§" +string(6) "¨" +string(6) "©" +string(6) "ª" +string(6) "«" +string(6) "¬" +string(6) "­" +string(6) "®" +string(6) "¯" +string(6) "°" +string(6) "±" +string(6) "²" +string(6) "³" +string(6) "´" +string(6) "µ" +string(6) "¶" +string(6) "·" +string(6) "¸" +string(6) "¹" +string(6) "º" +string(6) "»" +string(6) "¼" +string(6) "½" +string(6) "¾" +string(6) "¿" +string(6) "À" +string(6) "Á" +string(6) "Â" +string(6) "Ã" +string(6) "Ä" +string(6) "Å" +string(6) "Æ" +string(6) "Ç" +string(6) "È" +string(6) "É" +string(6) "Ê" +string(6) "Ë" +string(6) "Ì" +string(6) "Í" +string(6) "Î" +string(6) "Ï" +string(6) "Ð" +string(6) "Ñ" +string(6) "Ò" +string(6) "Ó" +string(6) "Ô" +string(6) "Õ" +string(6) "Ö" +string(6) "×" +string(6) "Ø" +string(6) "Ù" +string(6) "Ú" +string(6) "Û" +string(6) "Ü" +string(6) "Ý" +string(6) "Þ" +string(6) "ß" +string(6) "à" +string(6) "á" +string(6) "â" +string(6) "ã" +string(6) "ä" +string(6) "å" +string(6) "æ" +string(6) "ç" +string(6) "è" +string(6) "é" +string(6) "ê" +string(6) "ë" +string(6) "ì" +string(6) "í" +string(6) "î" +string(6) "ï" +string(6) "ð" +string(6) "ñ" +string(6) "ò" +string(6) "ó" +string(6) "ô" +string(6) "õ" +string(6) "ö" +string(6) "÷" +string(6) "ø" +string(6) "ù" +string(6) "ú" +string(6) "û" +string(6) "ü" +string(6) "ý" +string(6) "þ" +string(6) "ÿ" \ No newline at end of file diff --git a/ext/standard/string.c b/ext/standard/string.c index e4a97393e9..f9c808efcc 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -6532,7 +6532,7 @@ PHPAPI size_t php_strip_tags(char *rbuf, int len, int *stateptr, char *allow, in case '\0': break; case '<': - if (isspace(*(p + 1))) { + if (isspace(*(p + 1)) && allow_len >=- 0) { goto reg_char; } if (state == 0) { -- 2.40.0