]> granicus.if.org Git - php/commitdiff
Improved table search
authorDmitry Stogov <dmitry@zend.com>
Wed, 11 Mar 2015 12:02:47 +0000 (15:02 +0300)
committerDmitry Stogov <dmitry@zend.com>
Wed, 11 Mar 2015 12:02:47 +0000 (15:02 +0300)
ext/standard/html.c
ext/standard/html_tables.h
ext/standard/html_tables/html_table_gen.php

index c8e36b8d45cea9c31e829c91ae616ecff02e69db..cce1b73be806c7ebfa25bed8fa4c9e1544342f6d 100644 (file)
@@ -451,8 +451,9 @@ det_charset:
                int found = 0;
 
                /* now walk the charset map and look for the codeset */
-               for (i = 0; charset_map[i].codeset; i++) {
-                       if (len == strlen(charset_map[i].codeset) && strncasecmp(charset_hint, charset_map[i].codeset, len) == 0) {
+               for (i = 0; i < sizeof(charset_map)/sizeof(charset_map[0]); i++) {
+                       if (len == charset_map[i].codeset_len &&
+                           zend_binary_strcasecmp(charset_hint, len, charset_map[i].codeset, len) == 0) {
                                charset = charset_map[i].charset;
                                found = 1;
                                break;
index 151cc21b6fde186ad22b340af14d6e63683965d0..378a801f7dcc223bd387959caedd1fb1fe49cd05 100644 (file)
@@ -39,42 +39,42 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
 
 static const struct {
        const char *codeset;
+       uint32_t codeset_len;
        enum entity_charset charset;
 } charset_map[] = {
-       { "ISO-8859-1",         cs_8859_1 },
-       { "ISO8859-1",          cs_8859_1 },
-       { "ISO-8859-15",        cs_8859_15 },
-       { "ISO8859-15",         cs_8859_15 },
-       { "utf-8",                      cs_utf_8 },
-       { "cp1252",             cs_cp1252 },
-       { "Windows-1252",       cs_cp1252 },
-       { "1252",                       cs_cp1252 },
-       { "BIG5",                       cs_big5 },
-       { "950",                        cs_big5 },
-       { "GB2312",                     cs_gb2312 },
-       { "936",                        cs_gb2312 },
-       { "BIG5-HKSCS",         cs_big5hkscs },
-       { "Shift_JIS",          cs_sjis },
-       { "SJIS",                       cs_sjis },
-       { "932",                        cs_sjis },
-       { "SJIS-win",           cs_sjis },
-       { "CP932",                      cs_sjis },
-       { "EUCJP",                      cs_eucjp },
-       { "EUC-JP",                     cs_eucjp },
-       { "eucJP-win",          cs_eucjp },
-       { "KOI8-R",                     cs_koi8r },
-       { "koi8-ru",            cs_koi8r },
-       { "koi8r",                      cs_koi8r },
-       { "cp1251",                     cs_cp1251 },
-       { "Windows-1251",       cs_cp1251 },
-       { "win-1251",           cs_cp1251 },
-       { "iso8859-5",          cs_8859_5 },
-       { "iso-8859-5",         cs_8859_5 },
-       { "cp866",                      cs_cp866 },
-       { "866",                        cs_cp866 },
-       { "ibm866",                     cs_cp866 },
-       { "MacRoman",           cs_macroman },
-       { NULL }
+       { "ISO-8859-1",         sizeof("ISO-8859-1")-1,         cs_8859_1 },
+       { "ISO8859-1",          sizeof("ISO8859-1")-1,          cs_8859_1 },
+       { "ISO-8859-15",        sizeof("ISO-8859-15")-1,        cs_8859_15 },
+       { "ISO8859-15",         sizeof("ISO8859-15")-1,         cs_8859_15 },
+       { "utf-8",                      sizeof("utf-8")-1,                      cs_utf_8 },
+       { "cp1252",             sizeof("cp1252")-1,             cs_cp1252 },
+       { "Windows-1252",       sizeof("Windows-1252")-1,       cs_cp1252 },
+       { "1252",                       sizeof("1252")-1,                       cs_cp1252 },
+       { "BIG5",                       sizeof("BIG5")-1,                       cs_big5 },
+       { "950",                        sizeof("950")-1,                        cs_big5 },
+       { "GB2312",                     sizeof("GB2312")-1,                     cs_gb2312 },
+       { "936",                        sizeof("936")-1,                        cs_gb2312 },
+       { "BIG5-HKSCS",         sizeof("BIG5-HKSCS")-1,         cs_big5hkscs },
+       { "Shift_JIS",          sizeof("Shift_JIS")-1,          cs_sjis },
+       { "SJIS",                       sizeof("SJIS")-1,                       cs_sjis },
+       { "932",                        sizeof("932")-1,                        cs_sjis },
+       { "SJIS-win",           sizeof("SJIS-win")-1,           cs_sjis },
+       { "CP932",                      sizeof("CP932")-1,                      cs_sjis },
+       { "EUCJP",                      sizeof("EUCJP")-1,                      cs_eucjp },
+       { "EUC-JP",                     sizeof("EUC-JP")-1,                     cs_eucjp },
+       { "eucJP-win",          sizeof("eucJP-win")-1,          cs_eucjp },
+       { "KOI8-R",                     sizeof("KOI8-R")-1,                     cs_koi8r },
+       { "koi8-ru",            sizeof("koi8-ru")-1,            cs_koi8r },
+       { "koi8r",                      sizeof("koi8r")-1,                      cs_koi8r },
+       { "cp1251",                     sizeof("cp1251")-1,                     cs_cp1251 },
+       { "Windows-1251",       sizeof("Windows-1251")-1,       cs_cp1251 },
+       { "win-1251",           sizeof("win-1251")-1,           cs_cp1251 },
+       { "iso8859-5",          sizeof("iso8859-5")-1,          cs_8859_5 },
+       { "iso-8859-5",         sizeof("iso-8859-5")-1,         cs_8859_5 },
+       { "cp866",                      sizeof("cp866")-1,                      cs_cp866 },
+       { "866",                        sizeof("866")-1,                        cs_cp866 },
+       { "ibm866",                     sizeof("ibm866")-1,                     cs_cp866 },
+       { "MacRoman",           sizeof("MacRoman")-1,           cs_macroman }
 };
 
 /* longest entity name length excluding & and ; */
index 45ae5f832b45cf3eb4a44293d787e3a2a19fc774..d047b9df13419dbb540472e2d40cd7b8e30c9396 100644 (file)
@@ -62,42 +62,42 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
 
 static const struct {
        const char *codeset;
+       uint32_t codeset_len;
        enum entity_charset charset;
 } charset_map[] = {
-       { "ISO-8859-1",         cs_8859_1 },
-       { "ISO8859-1",          cs_8859_1 },
-       { "ISO-8859-15",        cs_8859_15 },
-       { "ISO8859-15",         cs_8859_15 },
-       { "utf-8",                      cs_utf_8 },
-       { "cp1252",             cs_cp1252 },
-       { "Windows-1252",       cs_cp1252 },
-       { "1252",                       cs_cp1252 }, 
-       { "BIG5",                       cs_big5 },
-       { "950",                        cs_big5 },
-       { "GB2312",                     cs_gb2312 },
-       { "936",                        cs_gb2312 },
-       { "BIG5-HKSCS",         cs_big5hkscs },
-       { "Shift_JIS",          cs_sjis },
-       { "SJIS",                       cs_sjis },
-       { "932",                        cs_sjis },
-       { "SJIS-win",           cs_sjis },
-       { "CP932",                      cs_sjis },
-       { "EUCJP",                      cs_eucjp },
-       { "EUC-JP",                     cs_eucjp },
-       { "eucJP-win",          cs_eucjp },
-       { "KOI8-R",                     cs_koi8r },
-       { "koi8-ru",            cs_koi8r },
-       { "koi8r",                      cs_koi8r },
-       { "cp1251",                     cs_cp1251 },
-       { "Windows-1251",       cs_cp1251 },
-       { "win-1251",           cs_cp1251 },
-       { "iso8859-5",          cs_8859_5 },
-       { "iso-8859-5",         cs_8859_5 },
-       { "cp866",                      cs_cp866 },
-       { "866",                        cs_cp866 },    
-       { "ibm866",                     cs_cp866 },
-       { "MacRoman",           cs_macroman },
-       { NULL }
+       { "ISO-8859-1",         sizeof("ISO-8859-1")-1,         cs_8859_1 },
+       { "ISO8859-1",          sizeof("ISO8859-1")-1,          cs_8859_1 },
+       { "ISO-8859-15",        sizeof("ISO-8859-15")-1,        cs_8859_15 },
+       { "ISO8859-15",         sizeof("ISO8859-15")-1,         cs_8859_15 },
+       { "utf-8",                      sizeof("utf-8")-1,                      cs_utf_8 },
+       { "cp1252",             sizeof("cp1252")-1,             cs_cp1252 },
+       { "Windows-1252",       sizeof("Windows-1252")-1,       cs_cp1252 },
+       { "1252",                       sizeof("1252")-1,                       cs_cp1252 },
+       { "BIG5",                       sizeof("BIG5")-1,                       cs_big5 },
+       { "950",                        sizeof("950")-1,                        cs_big5 },
+       { "GB2312",                     sizeof("GB2312")-1,                     cs_gb2312 },
+       { "936",                        sizeof("936")-1,                        cs_gb2312 },
+       { "BIG5-HKSCS",         sizeof("BIG5-HKSCS")-1,         cs_big5hkscs },
+       { "Shift_JIS",          sizeof("Shift_JIS")-1,          cs_sjis },
+       { "SJIS",                       sizeof("SJIS")-1,                       cs_sjis },
+       { "932",                        sizeof("932")-1,                        cs_sjis },
+       { "SJIS-win",           sizeof("SJIS-win")-1,           cs_sjis },
+       { "CP932",                      sizeof("CP932")-1,                      cs_sjis },
+       { "EUCJP",                      sizeof("EUCJP")-1,                      cs_eucjp },
+       { "EUC-JP",                     sizeof("EUC-JP")-1,                     cs_eucjp },
+       { "eucJP-win",          sizeof("eucJP-win")-1,          cs_eucjp },
+       { "KOI8-R",                     sizeof("KOI8-R")-1,                     cs_koi8r },
+       { "koi8-ru",            sizeof("koi8-ru")-1,            cs_koi8r },
+       { "koi8r",                      sizeof("koi8r")-1,                      cs_koi8r },
+       { "cp1251",                     sizeof("cp1251")-1,                     cs_cp1251 },
+       { "Windows-1251",       sizeof("Windows-1251")-1,       cs_cp1251 },
+       { "win-1251",           sizeof("win-1251")-1,           cs_cp1251 },
+       { "iso8859-5",          sizeof("iso8859-5")-1,          cs_8859_5 },
+       { "iso-8859-5",         sizeof("iso-8859-5")-1,         cs_8859_5 },
+       { "cp866",                      sizeof("cp866")-1,                      cs_cp866 },
+       { "866",                        sizeof("866")-1,                        cs_cp866 },
+       { "ibm866",                     sizeof("ibm866")-1,                     cs_cp866 },
+       { "MacRoman",           sizeof("MacRoman")-1,           cs_macroman }
 };
 
 /* longest entity name length excluding & and ; */
@@ -406,7 +406,7 @@ typedef const entity_stage3_row *const *entity_stage1_row; /* 64 elements */
 
 /* For stage 1, Calculate k & 0xFFF000 >> 3*4.
  * If larger than 1D, we have no mapping. Otherwise lookup that index */
+
 typedef struct {
        const entity_stage1_row *ms_table;
        /* for tables with only basic entities, this member is to be accessed