From: Antony Dovgal Date: Wed, 3 Oct 2007 18:38:35 +0000 (+0000) Subject: remove \u, \U and \C support in single quotes, as they are meant to contain binary... X-Git-Tag: RELEASE_2_0_0a1~1670 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c53602571de201341916a267e13080c1df5e5c2d;p=php remove \u, \U and \C support in single quotes, as they are meant to contain binary data only and no escape sequences except \' fixes bug #42746 --- diff --git a/README.UNICODE b/README.UNICODE index d2cce26426..14d415f7b4 100644 --- a/README.UNICODE +++ b/README.UNICODE @@ -460,11 +460,6 @@ PHP interprets the contents of strings as follows: - a new escape sequence allows specifying a character by its full Unicode name, e.g. \C{THAI CHARACTER PHO SAMPHAO} => U+0E20 -The single-quoted string is more restrictive than the other two types. So far -the only escape sequence allowed inside of it was \', which specifies a literal -single quote. However, single quoted strings now support the new Unicode -character escape sequences as well. - PHP allows variable interpolation inside the double-quoted and heredoc strings. However, the parser separates the string into literal and variable chunks during compilation, e.g. "abc $var def" -> "abc" . $var . "def". This means that PHP diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index fa1687c338..90e1133389 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -1187,64 +1187,6 @@ static int zend_scan_unicode_single_string(zval *zendlval TSRMLS_DC) *t++ = *s; Z_USTRLEN_P(zendlval)--; break; - case 0x43: /*'C'*/ - { - UChar *p = s+1; - if (p < end && zend_parse_charname_sequence(&p, end, &codepoint TSRMLS_CC)) { - Z_USTRLEN_P(zendlval) -= p - s + 1; - s = p; - if (U_IS_BMP(codepoint)) { - *t++ = (UChar) codepoint; - } else { - *t++ = (UChar) U16_LEAD(codepoint); - *t++ = (UChar) U16_TRAIL(codepoint); - Z_USTRLEN_P(zendlval)++; - } - } else { - zend_error(E_COMPILE_WARNING, "Invalid \\C{..} sequence"); - efree(Z_USTRVAL_P(zendlval)); - return 0; - } - break; - } - case 0x75 /*'u'*/: - { - codepoint = 0; - if (zend_udigits_to_codepoint(s+1, end, &codepoint, 4)) { - *t++ = (UChar) codepoint; - s += 4; - Z_USTRLEN_P(zendlval) -= 5; - } else { - zend_error(E_COMPILE_WARNING,"\\u escape sequence requires exactly 4 hexadecimal digits"); - efree(Z_USTRVAL_P(zendlval)); - return 0; - } - break; - } - case 0x55 /*'U'*/: - { - codepoint = 0; - if (zend_udigits_to_codepoint(s+1, end, &codepoint, 6)) { - if (U_IS_BMP(codepoint)) { - *t++ = (UChar) codepoint; - Z_USTRLEN_P(zendlval) -= 7; - } else if (codepoint <= 0x10FFFF) { - *t++ = (UChar) U16_LEAD(codepoint); - *t++ = (UChar) U16_TRAIL(codepoint); - Z_USTRLEN_P(zendlval) -= 6; - } else { - zend_error(E_COMPILE_WARNING,"\\U%06x is above the highest valid codepoint 0x10FFFF", codepoint); - efree(Z_USTRVAL_P(zendlval)); - return 0; - } - s += 6; - } else { - zend_error(E_COMPILE_WARNING,"\\U escape sequence requires exactly 6 hexadecimal digits"); - efree(Z_USTRVAL_P(zendlval)); - return 0; - } - break; - } default: *t++ = 0x5C; /*'\\'*/ *t++ = *s;