From a886a0cf930eeadcc9033355407e4a087ba0ebc9 Mon Sep 17 00:00:00 2001 From: Andrey Hristov Date: Wed, 18 Jun 2014 21:53:34 +0300 Subject: [PATCH] Add support for gb18030/MySQL 5.7 --- NEWS | 3 +++ ext/mysqlnd/mysqlnd_charset.c | 50 ++++++++++++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index f496f90016..752c4ccd3f 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,9 @@ PHP NEWS . Fixed bug #67413 (fileinfo: cdf_read_property_info insufficient boundary check). (Francisco Alonso, Jan Kaluza, Remi) +- mysqlnd: + . Added support for gb18030 from MySQL 5.7. (Andrey) + - Network: . Fixed bug #67432 (Fix potential segfault in dns_get_record()). (CVE-2014-4049). (Sara) diff --git a/ext/mysqlnd/mysqlnd_charset.c b/ext/mysqlnd/mysqlnd_charset.c index c2f2b8890e..dfa90db255 100644 --- a/ext/mysqlnd/mysqlnd_charset.c +++ b/ext/mysqlnd/mysqlnd_charset.c @@ -418,20 +418,60 @@ static uint mysqlnd_mbcharlen_utf16(unsigned int utf16) /* {{{ utf32 functions */ -static uint -check_mb_utf32(const char *start __attribute((unused)), const char *end __attribute((unused))) +static unsigned int check_mb_utf32(const char *start __attribute((unused)), const char *end __attribute((unused))) { return 4; } -static uint -mysqlnd_mbcharlen_utf32(unsigned int utf32 __attribute((unused))) +static unsigned int mysqlnd_mbcharlen_utf32(unsigned int utf32 __attribute((unused))) { return 4; } /* }}} */ + +/* {{{ gb18030 functions */ +#define is_gb18030_odd(c) (0x81 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0xFE) +#define is_gb18030_even_2(c) ((0x40 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0x7E) || (0x80 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0xFE)) +#define is_gb18030_even_4(c) (0x30 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0x39) + + +static unsigned int mysqlnd_mbcharlen_gb18030(unsigned int c) +{ + if (c <= 0xFF) { + return !is_gb18030_odd(c); + } + if (c > 0xFFFF || !is_gb18030_odd((c >> 8) & 0xFF)) { + return 0; + } + if (is_gb18030_even_2((c & 0xFF))) { + return 2; + } + if (is_gb18030_even_4((c & 0xFF))) { + return 4; + } + + return 0; +} + + +static unsigned int my_ismbchar_gb18030(const char * start, const char * end) +{ + if (end - start <= 1 || !is_gb18030_odd(start[0])) { + return 0; + } + + if (is_gb18030_even_2(start[1])) { + return 2; + } else if (end - start > 3 && is_gb18030_even_4(start[1]) && is_gb18030_odd(start[2]) && is_gb18030_even_4(start[3])) { + return 4; + } + + return 0; +} +/* }}} */ + /* The server compiles sometimes the full utf-8 (the mb4) as utf8m4, and the old as utf8, for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3. @@ -643,6 +683,8 @@ const MYSQLND_CHARSET mysqlnd_charsets[] = { 245, UTF8_MB4, UTF8_MB4"_croatian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid}, { 246, UTF8_MB4, UTF8_MB4"_unicode_520_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid}, { 247, UTF8_MB4, UTF8_MB4"_vietnamese_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid}, + { 248, "gb18030", "gb18030_chinese_ci", 1, 4, "", mysqlnd_mbcharlen_gb18030, my_ismbchar_gb18030}, + { 249, "gb18030", "gb18030_bin", 1, 4, "", mysqlnd_mbcharlen_gb18030, my_ismbchar_gb18030}, { 254, UTF8_MB3, UTF8_MB3"_general_cs", 1, 3, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid}, { 0, NULL, NULL, 0, 0, NULL, NULL, NULL} -- 2.40.0