From: Rasmus Lerdorf Date: Mon, 20 Sep 1999 15:50:56 +0000 (+0000) Subject: Added optional allowable_tags arguments to strip_tags() and fgetss() to X-Git-Tag: before-sapi-split~112 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c0dcedcafc9882c12bde1d39ebbefdfdd9aa51dd;p=php Added optional allowable_tags arguments to strip_tags() and fgetss() to allow you to specify a string of tags that are not to be stripped Could have used flex for this, and Andrew sent me a flex file to do this, but I could do the same thing with only minor additions to the existing state machine and the resulting code is much smaller and tighter. --- diff --git a/ChangeLog b/ChangeLog index 64f9118095..4a5503e0dc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,8 @@ PHP 4.0 CHANGE LOG ChangeLog ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ?? 1999, Version 4.0 Beta 3 +- Added optional allowable_tags arguments to strip_tags() and fgetss() to + allow you to specify a string of tags that are not to be stripped (Rasmus) - Upgraded var_dump() to take multiple arguments (Andrey) - Resourcified XML (Thies) - Fixed a memory leak in the Apache per-directory directives handler (Zeev) diff --git a/ext/standard/file.c b/ext/standard/file.c index ecd913856d..78922925f3 100644 --- a/ext/standard/file.c +++ b/ext/standard/file.c @@ -919,19 +919,33 @@ PHP_FUNCTION(fgetc) { /* Strip any HTML tags while reading */ -/* {{{ proto string fgetss(int fp, int length) +/* {{{ proto string fgetss(int fp, int length [, allowable_tags]) Get a line from file pointer and strip HTML tags */ PHP_FUNCTION(fgetss) { - pval *fd, *bytes; + pval *fd, *bytes, *allow=NULL; FILE *fp; int id, len, type; char *buf; int issock=0; int *sock,socketd=0; - if (ARG_COUNT(ht) != 2 || getParameters(ht, 2, &fd, &bytes) == FAILURE) { - WRONG_PARAM_COUNT; + switch(ARG_COUNT(ht)) { + case 2: + if (getParameters(ht, 2, &fd, &bytes) == FAILURE) { + RETURN_FALSE; + } + break; + case 3: + if (getParameters(ht, 3, &fd, &bytes, &allow) == FAILURE) { + RETURN_FALSE; + } + convert_to_string(allow); + break; + default: + WRONG_PARAM_COUNT; + /* NOTREACHED */ + break; } convert_to_long(fd); @@ -959,7 +973,7 @@ PHP_FUNCTION(fgetss) RETURN_FALSE; } - _php3_strip_tags(buf, fgetss_state); + _php3_strip_tags(buf, len, fgetss_state, allow->value.str.val); RETURN_STRING(buf, 0); } /* }}} */ diff --git a/ext/standard/php3_string.h b/ext/standard/php3_string.h index 448a49daac..7d8bda4903 100644 --- a/ext/standard/php3_string.h +++ b/ext/standard/php3_string.h @@ -99,7 +99,7 @@ extern PHPAPI char *php3i_stristr(unsigned char *s, unsigned char *t); extern PHPAPI char *_php3_str_to_str(char *haystack, int length, char *needle, int needle_len, char *str, int str_len, int *_new_length); extern PHPAPI void _php3_trim(pval *str, pval *return_value, int mode); -extern PHPAPI void _php3_strip_tags(char *rbuf, int state); +extern PHPAPI void _php3_strip_tags(char *rbuf, int len, int state, char *allow); extern PHPAPI void _php3_char_to_str(char *str, uint len, char from, char *to, int to_len, pval *result); diff --git a/ext/standard/string.c b/ext/standard/string.c index 2e26fa46fd..066ccf4a00 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -1807,19 +1807,32 @@ PHP_FUNCTION(nl2br) } /* }}} */ -/* {{{ proto string strip_tags(string str) +/* {{{ proto string strip_tags(string str [, allowable_tags]) Strips HTML and PHP tags from a string */ PHP_FUNCTION(strip_tags) { char *buf; - pval *str; + pval *str, *allow=NULL; - if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &str) == FAILURE) { - WRONG_PARAM_COUNT; + switch(ARG_COUNT(ht)) { + case 1: + if(getParameters(ht, 1, &str)==FAILURE) { + RETURN_FALSE; + } + break; + case 2: + if(getParameters(ht, 2, &str, &allow)==FAILURE) { + RETURN_FALSE; + } + convert_to_string(allow); + break; + default: + WRONG_PARAM_COUNT; + break; } convert_to_string(str); buf = estrdup(str->value.str.val); - _php3_strip_tags(buf, 0); + _php3_strip_tags(buf, str->value.str.len, 0, allow->value.str.val); RETURN_STRING(buf, 0); } /* }}} */ @@ -1888,6 +1901,59 @@ PHP_FUNCTION(parse_str) } /* }}} */ +#define PHP_TAG_BUF_SIZE 1023 + +/* Check if tag is in a set of tags + * + * states: + * + * 0 start tag + * 1 first non-whitespace char seen + */ +int php_tag_find(char *tag, int len, char *set) { + char c, *n, *t; + int i=0, state=0, done=0; + char *norm = emalloc(len); + + n = norm; + t = tag; + c = tolower(*t); + /* + normalize the tag removing leading and trailing whitespace + and turn any into just and any + into + */ + while(i': + done =1; + break; + default: + if(!isspace(c)) { + if(state==0) { + state=1; + if(c!='/') *(n++) = c; + } else { + *(n++) = c; + } + } else { + if(state==1) done=1; + } + break; + } + c = tolower(*(++t)); + } + *(n++) = '>'; + *n = '\0'; + if(strstr(set,norm)) done=1; + else done=0; + efree(norm); + return done; +} + /* A simple little state-machine to strip out html and php tags State 0 is the output state, State 1 means we are inside a @@ -1898,10 +1964,14 @@ PHP_FUNCTION(parse_str) lc holds the last significant character read and br is a bracket counter. + + When an allow string is passed in we keep track of the string + in state 1 and when the tag is closed check it against the + allow string to see if we should allow it. */ -void _php3_strip_tags(char *rbuf, int state) { - char *buf, *p, *rp, c, lc; - int br; +void _php3_strip_tags(char *rbuf, int len, int state, char *allow) { + char *tbuf, *buf, *p, *tp, *rp, c, lc; + int br, i=0; buf = estrdup(rbuf); c = *buf; @@ -1909,13 +1979,21 @@ void _php3_strip_tags(char *rbuf, int state) { p = buf; rp = rbuf; br = 0; + if(allow) { + _php3_strtolower(allow); + tbuf = emalloc(PHP_TAG_BUF_SIZE+1); + tp = tbuf; + } else tp=NULL; - while (c) { /* This is not binary-safe. Don't see why it should be */ + while(i=PHP_TAG_BUF_SIZE ) { /* no buffer overflows */ + tp = tbuf; + } + } break; } c = *(++p); + i++; } *rp = '\0'; efree(buf); + if(allow) efree(tbuf); } /*