]> granicus.if.org Git - php/commitdiff
- Fixed bug in ucfirst() implementation. If the tmp_len = 0 then it will not
authorDerick Rethans <derick@php.net>
Fri, 17 Mar 2006 14:29:05 +0000 (14:29 +0000)
committerDerick Rethans <derick@php.net>
Fri, 17 Mar 2006 14:29:05 +0000 (14:29 +0000)
  write the uppercased character to the buffer, but only returns the length of
  the uppercased letter as per
  http://icu.sourceforge.net/apiref/icu4c/ustring_8h.html#a50.
- Updated is_string():
  If Unicode semantics is turned on, return "true" for Unicode strings only.
  If Unicode semantics is turned off, return "true" for native strings only.
- Added is_binary() function that returns "true" for native strings only.
- Added first implementation of upgraded strtr function. It works except if
  combining sequences or surrogates are used in the non-array method of calling
  this function.

ext/standard/string.c
ext/standard/tests/strings/strtr2.phpt [new file with mode: 0644]
ext/standard/tests/strings/ucfirst.phpt [new file with mode: 0644]
ext/standard/type.c

index 523615d2c8fa2eedfe9bd434411a0c0bcc26fcb6..39a2629f5c1c460bf823856a771b7420bec35c0c 100644 (file)
@@ -3263,8 +3263,8 @@ PHP_FUNCTION(chr)
    Makes an Unicode string's first character uppercase */
 static void php_u_ucfirst(zval *ustr, zval *return_value TSRMLS_DC)
 {
-       UChar tmp[3] = { 0,}; /* UChar32 will be converted to upto 2 UChar units ? */
-       int tmp_len = 0;
+       UChar tmp[3] = { 0, 0, 0 }; /* UChar32 will be converted to upto 2 UChar units ? */
+       int tmp_len = 2;
        int pos = 0;
        UErrorCode status = U_ZERO_ERROR;
 
@@ -3427,6 +3427,181 @@ PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int trl
 }
 /* }}} */
 
+/* {{{ php_u_strtr
+ */
+PHPAPI UChar *php_u_strtr(UChar *str, int len, UChar *str_from, int str_from_len, UChar *str_to, int str_to_len, int trlen)
+{
+       int i, j;
+       int can_optimize = 1;
+       
+       if ((trlen < 1) || (len < 1)) {
+               return str;
+       }
+
+       /* First loop to see if we can use the optimized version */
+       for (i = 0; i < trlen; i++)     {
+               if (str_from[i] > 255 || str_to[i] > 255) {
+                       can_optimize = 0;
+                       break;
+               }
+       }
+       if (can_optimize) {
+               for (i = trlen; i < str_from_len; i++) {
+                       if (str_from[i] > 255) {
+                               can_optimize = 0;
+                               break;
+                       }
+               }
+       }
+       if (can_optimize) {
+               for (i = trlen; i < str_to_len; i++) {
+                       if (str_from[i] > 255) {
+                               can_optimize = 0;
+                               break;
+                       }
+               }
+       }
+
+       if (can_optimize) {
+               UChar xlat[256];
+
+               for (i = 0; i < 256; xlat[i] = i, i++);
+
+               for (i = 0; i < trlen; i++) {
+                       xlat[str_from[i]] = str_to[i];
+               }
+
+               for (i = 0; i < len; i++) {
+                       str[i] = xlat[str[i]];
+               }
+
+               return str;
+       } else {
+               /* UTODO: We're quite fucked... this is *extremely* slow, better
+                * algorithm wanted here! It also doesn't handle combining sequences, I
+                * asked the icu-support list for good algorithms.  */
+               for (i = 0; i < len; i++) {
+                       for (j = 0; j < trlen; j++) {
+                               if (str[i] == str_from[j]) {
+                                       str[i] = str_to[j];
+                               }
+                       }
+               }
+               return str;
+       }
+}
+/* }}} */
+
+/* {{{ php_u_strtr_array
+ */
+static void php_u_strtr_array(zval *return_value, UChar *str, int slen, HashTable *hash)
+{
+       zval **entry;
+       zstr   string_key;
+       uint   string_key_len;
+       zval **trans;
+       zval   ctmp;
+       ulong num_key;
+       int minlen = 128*1024;
+       int maxlen = 0, pos, len, found;
+       UChar *key;
+       HashPosition hpos;
+       smart_str result = {0};
+       HashTable tmp_hash;
+       
+       zend_hash_init(&tmp_hash, 0, NULL, NULL, 0);
+       zend_hash_internal_pointer_reset_ex(hash, &hpos);
+       while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) == SUCCESS) {
+               switch (zend_hash_get_current_key_ex(hash, &string_key, &string_key_len, &num_key, 0, &hpos)) {
+                       case HASH_KEY_IS_UNICODE:
+                               len = string_key_len-1;
+                               if (len < 1) {
+                                       zend_hash_destroy(&tmp_hash);
+                                       RETURN_FALSE;
+                               }
+                               zend_u_hash_add(&tmp_hash, IS_UNICODE, string_key, string_key_len, entry, sizeof(zval*), NULL);
+                               if (len > maxlen) {
+                                       maxlen = len;
+                               }
+                               if (len < minlen) {
+                                       minlen = len;
+                               }
+                               break; 
+                       
+                       case HASH_KEY_IS_LONG:
+                               Z_TYPE(ctmp) = IS_LONG;
+                               Z_LVAL(ctmp) = num_key;
+                       
+                               convert_to_unicode(&ctmp);
+                               len = Z_USTRLEN(ctmp);
+                               zend_u_hash_add(&tmp_hash, IS_UNICODE, Z_UNIVAL(ctmp), len+1, entry, sizeof(zval*), NULL);
+                               zval_dtor(&ctmp);
+
+                               if (len > maxlen) {
+                                       maxlen = len;
+                               }
+                               if (len < minlen) {
+                                       minlen = len;
+                               }
+                               break;
+               }
+               zend_hash_move_forward_ex(hash, &hpos);
+       }
+
+       key = eumalloc(maxlen+1);
+       pos = 0;
+
+       while (pos < slen) {
+               if ((pos + maxlen) > slen) {
+                       maxlen = slen - pos;
+               }
+
+               found = 0;
+               memcpy(key, str+pos, UBYTES(maxlen));
+
+               for (len = maxlen; len >= minlen; len--) {
+                       key[len] = 0;
+                       
+                       if (zend_u_hash_find(&tmp_hash, IS_UNICODE, ZSTR(key), len+1, (void**)&trans) == SUCCESS) {
+                               UChar *tval;
+                               int tlen;
+                               zval tmp;
+
+                               if (Z_TYPE_PP(trans) != IS_UNICODE) {
+                                       tmp = **trans;
+                                       zval_copy_ctor(&tmp);
+                                       convert_to_string(&tmp);
+                                       tval = Z_USTRVAL(tmp);
+                                       tlen = Z_USTRLEN(tmp);
+                               } else {
+                                       tval = Z_USTRVAL_PP(trans);
+                                       tlen = Z_USTRLEN_PP(trans);
+                               }
+
+                               smart_str_appendl(&result, tval, UBYTES(tlen));
+                               pos += len;
+                               found = 1;
+
+                               if (Z_TYPE_PP(trans) != IS_UNICODE) {
+                                       zval_dtor(&tmp);
+                               }
+                               break;
+                       } 
+               }
+
+               if (! found) {
+                       smart_str_append2c(&result, str[pos]);
+                       pos++;
+               }
+       }
+
+       efree(key);
+       zend_hash_destroy(&tmp_hash);
+       smart_str_0(&result);
+       RETVAL_UNICODEL((UChar *) result.c, result.len >> 1, 0);
+}
+/* }}} */
+
 /* {{{ php_strtr_array
  */
 static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *hash)
@@ -3552,27 +3727,52 @@ PHP_FUNCTION(strtr)
                RETURN_FALSE;
        }
 
-       convert_to_string_ex(str);
+       if (Z_TYPE_PP(str) != IS_UNICODE && Z_TYPE_PP(str) != IS_STRING) {
+               convert_to_text_ex(str);
+       }
 
        /* shortcut for empty string */
-       if (Z_STRLEN_PP(str) == 0) {
+       if (Z_TYPE_PP(str) == IS_UNICODE && !Z_USTRLEN_PP(str)) {
+               RETURN_EMPTY_UNICODE();
+       } else if (!Z_STRLEN_PP(str)) {
                RETURN_EMPTY_STRING();
        }
 
-       if (ac == 2) {
-               php_strtr_array(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), HASH_OF(*from));
+       if (Z_TYPE_PP(str) == IS_UNICODE) {
+               if (ac == 2) {
+                       php_u_strtr_array(return_value, Z_USTRVAL_PP(str), Z_USTRLEN_PP(str), HASH_OF(*from));
+                       Z_TYPE_P(return_value) = IS_UNICODE;
+               } else {
+                       convert_to_unicode_ex(from);
+                       convert_to_unicode_ex(to);
+
+                       ZVAL_UNICODEL(return_value, Z_USTRVAL_PP(str), Z_USTRLEN_PP(str), 1);
+                       
+                       php_u_strtr(Z_USTRVAL_P(return_value),
+                                         Z_USTRLEN_P(return_value),
+                                         Z_USTRVAL_PP(from),
+                                         Z_USTRLEN_PP(from),
+                                         Z_USTRVAL_PP(to),
+                                         Z_USTRLEN_PP(to),
+                                         MIN(Z_USTRLEN_PP(from), Z_USTRLEN_PP(to)));
+                       Z_TYPE_P(return_value) = IS_UNICODE;
+               }
        } else {
-               convert_to_string_ex(from);
-               convert_to_string_ex(to);
+               if (ac == 2) {
+                       php_strtr_array(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), HASH_OF(*from));
+               } else {
+                       convert_to_string_ex(from);
+                       convert_to_string_ex(to);
 
-               ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
-               
-               php_strtr(Z_STRVAL_P(return_value),
-                                 Z_STRLEN_P(return_value),
-                                 Z_STRVAL_PP(from),
-                                 Z_STRVAL_PP(to),
-                                 MIN(Z_STRLEN_PP(from), 
-                                 Z_STRLEN_PP(to)));
+                       ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
+                       
+                       php_strtr(Z_STRVAL_P(return_value),
+                                         Z_STRLEN_P(return_value),
+                                         Z_STRVAL_PP(from),
+                                         Z_STRVAL_PP(to),
+                                         MIN(Z_STRLEN_PP(from), 
+                                         Z_STRLEN_PP(to)));
+               }
        }
 }
 /* }}} */
diff --git a/ext/standard/tests/strings/strtr2.phpt b/ext/standard/tests/strings/strtr2.phpt
new file mode 100644 (file)
index 0000000..be3f032
--- /dev/null
@@ -0,0 +1,29 @@
+--TEST--
+strtr() function (with unicode chars and combining sequences)
+--FILE--
+<?php
+       declare(encoding="utf8");
+       $from = "aåаиу";
+       $to   = "12356";
+       $string = "Dе åkаt krаpt de krуllen van de trap af";
+       var_dump( strtr( $string, $from, $to ) );
+
+       $from = "aeoui";
+       $to   = "12356";
+       $string = "De akat krapt de krullen van de trap af";
+       var_dump( strtr( $string, $from, $to ) );
+
+       $ar = array( "a" => 1, "e" => "2", "o" => 3, "u" => 5, "i" => 6 );
+       $string = "De akat krapt de krullen van de trap af";
+       var_dump( strtr( $string, $ar ) );
+
+       // Test with combining chars
+       $from = "åb";
+       $to   = "1";
+       $string = "xyzabc";
+       var_dump( strtr( $string, $from, $to ) );
+?>
+--EXPECT--
+unicode(39) "Dе 2k3t kr3pt de kr6llen v1n de tr1p 1f"
+unicode(39) "D2 1k1t kr1pt d2 kr5ll2n v1n d2 tr1p 1f"
+unicode(39) "D2 1k1t kr1pt d2 kr5ll2n v1n d2 tr1p 1f"
diff --git a/ext/standard/tests/strings/ucfirst.phpt b/ext/standard/tests/strings/ucfirst.phpt
new file mode 100644 (file)
index 0000000..6bb1402
--- /dev/null
@@ -0,0 +1,12 @@
+--TEST--
+ucfirst()
+--FILE--
+<?php
+echo ucfirst("peren"), "\n";
+echo ucfirst("appelen"), "\n";
+echo ucfirst("ßen"), "\n";
+?>
+--EXPECT--
+Peren
+Appelen
+SSen
index 2f52100f45db6667b1f971d5f0498dcb85a75076..4688c99172f24a049d340dbc30de910d6ebdd05a 100644 (file)
@@ -285,11 +285,19 @@ PHP_FUNCTION(is_float)
 }
 /* }}} */
 
+/* {{{ proto bool is_binary(mixed var)
+   Returns true if variable is a native (binary) string */
+PHP_FUNCTION(is_binary)
+{
+       php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, IS_STRING);
+}
+/* }}} */
+
 /* {{{ proto bool is_string(mixed var)
-   Returns true if variable is a native string */
+   Returns true if variable is a string */
 PHP_FUNCTION(is_string)
 {
-       php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, IS_STRING);
+       php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, UG(unicode) ? IS_UNICODE : IS_STRING);
 }
 /* }}} */