]> granicus.if.org Git - php/commitdiff
Convert nl2br() to suppor IS_UNICODE.
authorAndrei Zmievski <andrei@php.net>
Tue, 8 Aug 2006 21:03:11 +0000 (21:03 +0000)
committerAndrei Zmievski <andrei@php.net>
Tue, 8 Aug 2006 21:03:11 +0000 (21:03 +0000)
# Hmm, it's a bit ugly..

ext/standard/string.c
unicode-progress.txt

index f0ada8c8a1c12a8d3dcdec940866a4f1c0a73f1e..2a7794fd89371709f20f946829c03ee20189ec7c 100644 (file)
@@ -5363,78 +5363,127 @@ PHP_FUNCTION(hebrevc)
 /* }}} */
 
 
-/* {{{ proto string nl2br(string str)
+/* {{{ proto string nl2br(string str) U
    Converts newlines to HTML line breaks */
 PHP_FUNCTION(nl2br)
 {
        /* in brief this inserts <br /> before matched regexp \n\r?|\r\n? */
-       zval    **zstr;
-       char    *tmp, *str;
-       int     new_length;
-       char    *end, *target;
-       int     repl_cnt = 0;
+       zstr    str;
+       int     str_len;
+       zend_uchar str_type;
+       zstr    p, end, tmp, target;
+       int             new_length;
+       int             repl_cnt = 0;
 
-       if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &zstr) == FAILURE) {
-               WRONG_PARAM_COUNT;
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t", &str, &str_len, &str_type) == FAILURE) {
+               return;
        }
 
-       convert_to_string_ex(zstr);
-
-       str = Z_STRVAL_PP(zstr);
-       end = str + Z_STRLEN_PP(zstr);
+       p = str;
 
        /* it is really faster to scan twice and allocate mem once insted scanning once
           and constantly reallocing */
-       while (str < end) {
-               if (*str == '\r') {
-                       if (*(str+1) == '\n') {
-                               str++;
-                       }
-                       repl_cnt++;
-               } else if (*str == '\n') {
-                       if (*(str+1) == '\r') {
-                               str++;
+       if (str_type == IS_UNICODE) {
+               end.u = p.u + str_len;
+               while (p.u < end.u) {
+                       if (*p.u == (UChar) 0x0d /*'\r'*/) {
+                               if (*(p.u+1) == (UChar) 0x0a /*'\n'*/) {
+                                       p.u++;
+                               }
+                               repl_cnt++;
+                       } else if (*p.u == (UChar) 0x0a /*'\n'*/) {
+                               if (*(p.u+1) == (UChar) 0x0d /*'\r'*/) {
+                                       p.u++;
+                               }
+                               repl_cnt++;
                        }
-                       repl_cnt++;
+
+                       p.u++;
                }
+       } else {
+               end.s = p.s + str_len;
+               while (p.s < end.s) {
+                       if (*p.s == '\r') {
+                               if (*(p.s+1) == '\n') {
+                                       p.s++;
+                               }
+                               repl_cnt++;
+                       } else if (*p.s == '\n') {
+                               if (*(p.s+1) == '\r') {
+                                       p.s++;
+                               }
+                               repl_cnt++;
+                       }
 
-               str++;
+                       p.s++;
+               }
        }
 
        if (repl_cnt == 0) {
-               RETURN_STRINGL(Z_STRVAL_PP(zstr), Z_STRLEN_PP(zstr), 1);
+               RETURN_ZSTRL(str, str_len, str_type, 1);
        }
 
-       new_length = Z_STRLEN_PP(zstr) + repl_cnt * (sizeof("<br />") - 1);
-       tmp = target = emalloc(new_length + 1);
+       new_length = str_len + repl_cnt * (sizeof("<br />") - 1);
+
+       if (str_type == IS_UNICODE) {
+               tmp.u = target.u = eumalloc(new_length + 1);
+               p = str;
+
+               while (p.u < end.u) {
+                       switch (*p.u) {
+                               case 0x0d /*'\r'*/:
+                               case 0x0a /*'\n'*/:
+                                       *target.u++ = (UChar) 0x3c /*'<'*/;
+                                       *target.u++ = (UChar) 0x62 /*'b'*/;
+                                       *target.u++ = (UChar) 0x72 /*'r'*/;
+                                       *target.u++ = (UChar) 0x20 /*' '*/;
+                                       *target.u++ = (UChar) 0x2f /*'/'*/;
+                                       *target.u++ = (UChar) 0x3e /*'>'*/;
+
+                                       if ((*p.u == (UChar) 0x0d /*'\r'*/ && *(p.u+1) == (UChar) 0x0a /*'\n'*/)
+                                               || (*p.u == (UChar) 0x0a /*'\n'*/ && *(p.u+1) == (UChar) 0x0d /*'\r'*/)) {
+                                               *target.u++ = *p.u++;
+                                       }
+                                       /* lack of a break; is intentional */
+                               default:
+                                       *target.u++ = *p.u;
+                       }
 
-       str = Z_STRVAL_PP(zstr);
+                       p.u++;
+               }
 
-       while (str < end) {
-               switch (*str) {
-                       case '\r':
-                       case '\n':
-                               *target++ = '<';
-                               *target++ = 'b';
-                               *target++ = 'r';
-                               *target++ = ' ';
-                               *target++ = '/';
-                               *target++ = '>';
+               *target.u = 0;
+       } else {
+               tmp.s = target.s = emalloc(new_length + 1);
+               p = str;
+
+               while (p.s < end.s) {
+                       switch (*p.s) {
+                               case '\r':
+                               case '\n':
+                                       *target.s++ = '<';
+                                       *target.s++ = 'b';
+                                       *target.s++ = 'r';
+                                       *target.s++ = ' ';
+                                       *target.s++ = '/';
+                                       *target.s++ = '>';
+
+                                       if ((*p.s == '\r' && *(p.s+1) == '\n') || (*p.s == '\n' && *(p.s+1) == '\r')) {
+                                               *target.s++ = *p.s++;
+                                       }
+                                       /* lack of a break; is intentional */
+                               default:
+                                       *target.s++ = *p.s;
+                       }
 
-                               if ((*str == '\r' && *(str+1) == '\n') || (*str == '\n' && *(str+1) == '\r')) {
-                                       *target++ = *str++;
-                               }
-                               /* lack of a break; is intentional */
-                       default:
-                               *target++ = *str;
+                       p.s++;
                }
 
-               str++;
+               *target.s = '\0';
        }
 
-       *target = '\0';
 
-       RETURN_STRINGL(tmp, new_length, 0);
+       RETURN_ZSTRL(tmp, new_length, str_type, 0);
 }
 /* }}} */
 
index 9ee93b9e66fc8f4427c59bc5965e2295827157fb..8ce7457b31cbf5e8c316b81ca16b8f8e822eef2e 100644 (file)
@@ -19,9 +19,6 @@ ext/standard
     addcslashes()
         Params API. Figure out how to escape characters > 255.
 
-    basename()
-        Create php_u_basename() without mbstring stuff
-
     chunk_split()
         Params API, Unicode upgrades. Split on codepoint level.
 
@@ -42,9 +39,6 @@ ext/standard
     nl_langinfo()
         Params API, otherwise leave alone
 
-    nl2br()
-        Params API, IS_UNICODE support
-
     parse_str()
         Params API. How do we deal with encoding of the data?
 
@@ -114,9 +108,6 @@ ext/standard
         Needs update so that it doesn't try to find half of a surrogate
         pair.
 
-    strrev()
-        Params API
-
     strtr()
         Check on Derick's progress.
 
@@ -196,12 +187,14 @@ ext/standard
   string.c
   --------
     addslashes()
+    basename()
     bin2hex()
     chr()
     dirname()
     explode()
     implode()
     levenshtein()
+    nl2br()
     ord()
     pathinfo()
     range()