/* {{{ php_escape_html_entities
*/
-PHPAPI char *php_escape_html_entities(char *orig, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
+PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC)
{
- unsigned char *old = (unsigned char *)orig;
int i, j, maxlen, len;
char *replaced;
enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
int is_basic = 0;
if (this_char == '&') {
- memcpy(replaced + len, "&", sizeof("&") - 1);
- len += sizeof("&") - 1;
+ if (double_encode) {
+encode_amp:
+ memcpy(replaced + len, "&", sizeof("&") - 1);
+ len += sizeof("&") - 1;
+ } else {
+ char *e = memchr(old + i, ';', oldlen - i);
+ char *s = (char*)old + i;
+
+ if (!e || (e - s) > 10) { /* minor optimization to avoid "entities" over 10 chars in length */
+ goto encode_amp;
+ } else {
+ if (*s == '#') { /* numeric entities */
+ s++;
+ while (s < e) {
+ if (!isdigit(*s++)) {
+ goto encode_amp;
+ }
+ }
+ } else { /* text entities */
+ while (s < e) {
+ if (!isalnum(*s++)) {
+ goto encode_amp;
+ }
+ }
+ }
+ replaced[len++] = '&';
+ }
+ }
is_basic = 1;
} else {
for (j = 0; basic_entities[j].charcode != 0; j++) {
}
/* }}} */
+PHPAPI char *php_escape_html_entities(char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
+{
+ return php_escape_html_entities_ex((unsigned char*)old, oldlen, newlen, all, quote_style, hint_charset, 1 TSRMLS_CC);
+}
+
/* {{{ php_html_entities
*/
static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
long quote_style = ENT_COMPAT;
zend_uchar type;
char *replaced;
+ zend_bool double_encode = 1;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|ls", &str, &str_len, &type, "e_style, &hint_charset, &hint_charset_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|ls!b", &str, &str_len, &type, "e_style, &hint_charset, &hint_charset_len, &double_encode) == FAILURE) {
return;
}
hint_charset = "utf-8";
}
- replaced = php_escape_html_entities(str.s, str_len, &len, all, quote_style, hint_charset TSRMLS_CC);
+ replaced = php_escape_html_entities_ex((unsigned char*)str.s, str_len, &len, all, quote_style, hint_charset, double_encode TSRMLS_CC);
if (type == IS_UNICODE) {
RETVAL_U_STRINGL(UG(utf8_conv), replaced, len, ZSTR_AUTOFREE);
}
/* }}} */
-/* {{{ proto string htmlspecialchars(string string [, int quote_style][, string charset]) U
+/* {{{ proto string htmlspecialchars(string string [, int quote_style[, string charset[, bool double_encode]]])
Convert special characters to HTML entities */
PHP_FUNCTION(htmlspecialchars)
{
/* }}} */
-/* {{{ proto string htmlentities(string string [, int quote_style][, string charset]) U
+/* {{{ proto string htmlentities(string string [, int quote_style[, string charset[, bool double_encode]]])
Convert all applicable characters to HTML entities */
PHP_FUNCTION(htmlentities)
{