From: Rui Hirokawa Date: Fri, 23 Dec 2005 15:18:52 +0000 (+0000) Subject: MFH: fixed #29955 mb_strtoupper() / lower() broken with Turkish encoding.. X-Git-Tag: php-5.1.2RC2~98 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4628a41c002f7363d49589ec8f436839e010b580;p=php MFH: fixed #29955 mb_strtoupper() / lower() broken with Turkish encoding.. --- diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index 760891917b..1904ce06a5 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -237,6 +237,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/nls/nls_uni.c libmbfl/nls/nls_zh.c libmbfl/nls/nls_hy.c + libmbfl/nls/nls_tr.c ]) PHP_MBSTRING_ADD_CFLAG([-DHAVE_CONFIG_H]) else diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32 index b047cc7487..4a3a7f0a1c 100644 --- a/ext/mbstring/config.w32 +++ b/ext/mbstring/config.w32 @@ -40,7 +40,7 @@ if (PHP_MBSTRING == "yes") { ADD_SOURCES("ext/mbstring/libmbfl/nls", "nls_de.c nls_en.c nls_ja.c \ nls_kr.c nls_neutral.c nls_ru.c nls_uni.c nls_zh.c nls_hy.c \ - ", "mbstring"); + nls_tr.c", "mbstring"); AC_DEFINE('HAVE_MBSTRING', 1, 'Have mbstring support'); AC_DEFINE('HAVE_MBSTR_CN', 1, 'CN'); diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_language.c b/ext/mbstring/libmbfl/mbfl/mbfl_language.c index c5dab65502..aaeebbc8e8 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_language.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_language.c @@ -59,6 +59,7 @@ #include "nls/nls_ru.h" #include "nls/nls_en.h" #include "nls/nls_hy.h" +#include "nls/nls_tr.h" #include "nls/nls_neutral.h" #ifndef HAVE_STRCASECMP @@ -77,6 +78,7 @@ static const mbfl_language *mbfl_language_ptr_table[] = { &mbfl_language_german, &mbfl_language_russian, &mbfl_language_armenian, + &mbfl_language_turkish, &mbfl_language_neutral, NULL }; diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_language.h b/ext/mbstring/libmbfl/mbfl/mbfl_language.h index f6b9ec2080..caf1d80940 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_language.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_language.h @@ -58,6 +58,7 @@ enum mbfl_no_language { mbfl_no_language_traditional_chinese, /* zh-tw */ mbfl_no_language_russian, /* ru */ mbfl_no_language_armenian, /* hy */ + mbfl_no_language_turkish, /* tr */ mbfl_no_language_max }; diff --git a/ext/mbstring/libmbfl/nls/nls_tr.c b/ext/mbstring/libmbfl/nls/nls_tr.c new file mode 100644 index 0000000000..c7044ff293 --- /dev/null +++ b/ext/mbstring/libmbfl/nls/nls_tr.c @@ -0,0 +1,21 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include +#endif + +#include "mbfilter.h" +#include "nls_tr.h" + +const mbfl_language mbfl_language_turkish = { + mbfl_no_language_turkish, + "Turkish", + "tr", + NULL, + mbfl_no_encoding_8859_9, + mbfl_no_encoding_qprint, + mbfl_no_encoding_8bit +}; + diff --git a/ext/mbstring/libmbfl/nls/nls_tr.h b/ext/mbstring/libmbfl/nls/nls_tr.h new file mode 100644 index 0000000000..5c17f7daa5 --- /dev/null +++ b/ext/mbstring/libmbfl/nls/nls_tr.h @@ -0,0 +1,8 @@ +#ifndef MBFL_NLS_TR_H +#define MBFL_NLS_TR_H + +#include "mbfilter.h" + +extern const mbfl_language mbfl_language_turkish; + +#endif /* MBFL_NLS_TR_H */ diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c index 99eed1d8dc..15a54375ec 100644 --- a/ext/mbstring/php_unicode.c +++ b/ext/mbstring/php_unicode.c @@ -44,6 +44,8 @@ #include "php_unicode.h" #include "unicode_data.h" +ZEND_EXTERN_MODULE_GLOBALS(mbstring) + /* * A simple array of 32-bit masks for lookup. */ @@ -142,7 +144,23 @@ static unsigned long case_lookup(unsigned long code, long l, long r, int field) return code; } -MBSTRING_API unsigned long php_unicode_toupper(unsigned long code) +MBSTRING_API unsigned long php_turkish_toupper(unsigned long code, long l, long r, int field) +{ + if (code == 0x0069L) { + return 0x0130L; + } + return case_lookup(code, l, r, field); +} + +MBSTRING_API unsigned long php_turkish_tolower(unsigned long code, long l, long r, int field) +{ + if (code == 0x0049L) { + return 0x0131L; + } + return case_lookup(code, l, r, field); +} + +MBSTRING_API unsigned long php_unicode_toupper(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC) { int field; long l, r; @@ -157,6 +175,12 @@ MBSTRING_API unsigned long php_unicode_toupper(unsigned long code) field = 2; l = _uccase_len[0]; r = (l + _uccase_len[1]) - 3; + + if (MBSTRG(current_language) == mbfl_no_language_turkish && + enc == mbfl_no_encoding_8859_9) { + return php_turkish_toupper(code, l, r, field); + } + } else { /* * The character is title case. @@ -168,7 +192,7 @@ MBSTRING_API unsigned long php_unicode_toupper(unsigned long code) return case_lookup(code, l, r, field); } -MBSTRING_API unsigned long php_unicode_tolower(unsigned long code) +MBSTRING_API unsigned long php_unicode_tolower(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC) { int field; long l, r; @@ -183,6 +207,12 @@ MBSTRING_API unsigned long php_unicode_tolower(unsigned long code) field = 1; l = 0; r = _uccase_len[0] - 3; + + if (MBSTRG(current_language) == mbfl_no_language_turkish && + enc == mbfl_no_encoding_8859_9) { + return php_turkish_tolower(code, l, r, field); + } + } else { /* * The character is title case. @@ -194,7 +224,7 @@ MBSTRING_API unsigned long php_unicode_tolower(unsigned long code) return case_lookup(code, l, r, field); } -MBSTRING_API unsigned long php_unicode_totitle(unsigned long code) +MBSTRING_API unsigned long php_unicode_totitle(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC) { int field; long l, r; @@ -246,25 +276,26 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, char *srcstr, size_t size_t unicode_len; unsigned char *unicode_ptr; size_t i; + enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding); unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding, &unicode_len TSRMLS_CC); if (unicode == NULL) return NULL; - unicode_ptr = unicode; + unicode_ptr = (unsigned char *)unicode; switch(case_mode) { case PHP_UNICODE_CASE_UPPER: for (i = 0; i < unicode_len; i+=4) { UINT32_TO_BE_ARY(&unicode_ptr[i], - php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]))); + php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC)); } break; case PHP_UNICODE_CASE_LOWER: for (i = 0; i < unicode_len; i+=4) { UINT32_TO_BE_ARY(&unicode_ptr[i], - php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]))); + php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC)); } break; @@ -278,7 +309,7 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, char *srcstr, size_t if (mode) { if (res) { UINT32_TO_BE_ARY(&unicode_ptr[i], - php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]))); + php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC)); } else { mode = 0; } @@ -286,7 +317,7 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, char *srcstr, size_t if (res) { mode = 1; UINT32_TO_BE_ARY(&unicode_ptr[i], - php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]))); + php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC)); } } }