From 7c20dce5487357fb36f1caba32af516b465647dc Mon Sep 17 00:00:00 2001 From: Rui Hirokawa Date: Fri, 23 Dec 2005 13:50:29 +0000 Subject: [PATCH] fixed #29955 mb_strtoupper() / lower() broken with Turkish encoding.. --- ext/mbstring/config.m4 | 1 + ext/mbstring/php_unicode.c | 48 +++++++++++++++++++++++++++++++------- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index 12970e30af..d2c7383e91 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -225,6 +225,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/nls/nls_uni.c libmbfl/nls/nls_zh.c libmbfl/nls/nls_hy.c + libmbfl/nls/nls_tr.c ]) PHP_MBSTRING_ADD_CFLAG([-DHAVE_CONFIG_H]) else diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c index 99eed1d8dc..f59322fb90 100644 --- a/ext/mbstring/php_unicode.c +++ b/ext/mbstring/php_unicode.c @@ -44,6 +44,8 @@ #include "php_unicode.h" #include "unicode_data.h" +ZEND_EXTERN_MODULE_GLOBALS(mbstring) + /* * A simple array of 32-bit masks for lookup. */ @@ -96,6 +98,7 @@ static int prop_lookup(unsigned long code, unsigned long n) } + MBSTRING_API int php_unicode_is_prop(unsigned long code, unsigned long mask1, unsigned long mask2) { @@ -142,7 +145,23 @@ static unsigned long case_lookup(unsigned long code, long l, long r, int field) return code; } -MBSTRING_API unsigned long php_unicode_toupper(unsigned long code) +MBSTRING_API unsigned long php_turkish_toupper(unsigned long code, long l, long r, int field) +{ + if (code == 0x0069L) { + return 0x0130L; + } + return case_lookup(code, l, r, field); +} + +MBSTRING_API unsigned long php_turkish_tolower(unsigned long code, long l, long r, int field) +{ + if (code == 0x0049L) { + return 0x0131L; + } + return case_lookup(code, l, r, field); +} + +MBSTRING_API unsigned long php_unicode_toupper(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC) { int field; long l, r; @@ -157,6 +176,12 @@ MBSTRING_API unsigned long php_unicode_toupper(unsigned long code) field = 2; l = _uccase_len[0]; r = (l + _uccase_len[1]) - 3; + + if (MBSTRG(current_language) == mbfl_no_language_turkish && + enc == mbfl_no_encoding_8859_9) { + return php_turkish_toupper(code, l, r, field); + } + } else { /* * The character is title case. @@ -168,7 +193,7 @@ MBSTRING_API unsigned long php_unicode_toupper(unsigned long code) return case_lookup(code, l, r, field); } -MBSTRING_API unsigned long php_unicode_tolower(unsigned long code) +MBSTRING_API unsigned long php_unicode_tolower(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC) { int field; long l, r; @@ -183,6 +208,12 @@ MBSTRING_API unsigned long php_unicode_tolower(unsigned long code) field = 1; l = 0; r = _uccase_len[0] - 3; + + if (MBSTRG(current_language) == mbfl_no_language_turkish && + enc == mbfl_no_encoding_8859_9) { + return php_turkish_tolower(code, l, r, field); + } + } else { /* * The character is title case. @@ -194,7 +225,7 @@ MBSTRING_API unsigned long php_unicode_tolower(unsigned long code) return case_lookup(code, l, r, field); } -MBSTRING_API unsigned long php_unicode_totitle(unsigned long code) +MBSTRING_API unsigned long php_unicode_totitle(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC) { int field; long l, r; @@ -246,25 +277,26 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, char *srcstr, size_t size_t unicode_len; unsigned char *unicode_ptr; size_t i; + enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding); unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding, &unicode_len TSRMLS_CC); if (unicode == NULL) return NULL; - unicode_ptr = unicode; + unicode_ptr = (unsigned char *)unicode; switch(case_mode) { case PHP_UNICODE_CASE_UPPER: for (i = 0; i < unicode_len; i+=4) { UINT32_TO_BE_ARY(&unicode_ptr[i], - php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]))); + php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC)); } break; case PHP_UNICODE_CASE_LOWER: for (i = 0; i < unicode_len; i+=4) { UINT32_TO_BE_ARY(&unicode_ptr[i], - php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]))); + php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC)); } break; @@ -278,7 +310,7 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, char *srcstr, size_t if (mode) { if (res) { UINT32_TO_BE_ARY(&unicode_ptr[i], - php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]))); + php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC)); } else { mode = 0; } @@ -286,7 +318,7 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, char *srcstr, size_t if (res) { mode = 1; UINT32_TO_BE_ARY(&unicode_ptr[i], - php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]))); + php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC)); } } } -- 2.50.1