]> granicus.if.org Git - php/commitdiff
added numeric entities encode/decode in hex format.
authorRui Hirokawa <hirokawa@php.net>
Mon, 18 Jul 2011 08:36:17 +0000 (08:36 +0000)
committerRui Hirokawa <hirokawa@php.net>
Mon, 18 Jul 2011 08:36:17 +0000 (08:36 +0000)
ext/mbstring/libmbfl/mbfl/mbfilter.c
ext/mbstring/mbstring.c

index 9eec3b4afc9cda963ac019aa047c1ea95c18501e..9f929cc3b2d63137e31da082edbb089655a9c848 100644 (file)
@@ -2746,7 +2746,9 @@ collector_decode_htmlnumericentity(int c, void *data)
                }
                break;
        case 2:
-               if (c >= 0x30 && c <= 0x39) {   /* '0' - '9' */
+               if (c == 0x78) {        /* 'x' */
+                       pc->status = 4;
+               } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
                        pc->cache = c - 0x30;
                        pc->status = 3;
                        pc->digit = 1;
@@ -2810,6 +2812,89 @@ collector_decode_htmlnumericentity(int c, void *data)
                        (*pc->decoder->filter_function)(c, pc->decoder);
                }
                break;
+       case 4:
+               if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
+                       pc->cache = c - 0x30;
+                       pc->status = 5;
+                       pc->digit = 1;
+               } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F'  */
+                       pc->cache = c - 0x41 + 10;
+                       pc->status = 5;
+                       pc->digit = 1;
+               } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f'  */
+                       pc->cache = c - 0x61 + 10;
+                       pc->status = 5;
+                       pc->digit = 1;
+               } else {
+                       pc->status = 0;
+                       (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
+                       (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
+                       (*pc->decoder->filter_function)(0x78, pc->decoder);             /* 'x' */
+                       (*pc->decoder->filter_function)(c, pc->decoder);
+               }
+               break;
+       case 5:
+               s = 0;
+               f = 0;
+               if ((c >= 0x30 && c <= 0x39) ||
+                       (c >= 0x41 && c <= 0x46) ||
+                       (c >= 0x61 && c <= 0x66)) {     /* '0' - '9' or 'a' - 'f'  */
+                       if (pc->digit > 9) {
+                               pc->status = 0;
+                               s = pc->cache;
+                               f = 1;
+                       } else {
+                               if (c >= 0x30 && c <= 0x39) {
+                                       s = pc->cache*16 + (c - 0x30);
+                               } else if (c >= 0x41 && c <= 0x46)  {
+                                       s = pc->cache*16 + (c - 0x41 + 10);
+                               } else {
+                                       s = pc->cache*16 + (c - 0x61 + 10);
+                               }
+                               pc->cache = s;
+                               pc->digit++;
+                       }
+               } else {
+                       pc->status = 0;
+                       s = pc->cache;
+                       f = 1;
+                       n = 0;
+                       size = pc->mapsize;
+                       while (n < size) {
+                               mapelm = &(pc->convmap[n*4]);
+                               d = s - mapelm[2];
+                               if (d >= mapelm[0] && d <= mapelm[1]) {
+                                       f = 0;
+                                       (*pc->decoder->filter_function)(d, pc->decoder);
+                                       if (c != 0x3b) {        /* ';' */
+                                               (*pc->decoder->filter_function)(c, pc->decoder);
+                                       }
+                                       break;
+                               }
+                               n++;
+                       }
+               }
+               if (f) {
+                       (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
+                       (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
+                       (*pc->decoder->filter_function)(0x78, pc->decoder);             /* 'x' */
+                       r = 1;
+                       n = pc->digit;
+                       while (n > 0) {
+                               r *= 16;
+                               n--;
+                       }
+                       s %= r;
+                       r /= 16;
+                       while (r > 0) {
+                               d = s/r;
+                               s %= r;
+                               r /= 16;
+                               (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
+                       }
+                       (*pc->decoder->filter_function)(c, pc->decoder);
+               }
+               break;
        default:
                if (c == 0x26) {        /* '&' */
                        pc->status = 1;
@@ -2822,6 +2907,53 @@ collector_decode_htmlnumericentity(int c, void *data)
        return c;
 }
 
+static int
+collector_encode_hex_htmlnumericentity(int c, void *data)
+{
+       struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
+       int f, n, s, r, d, size, *mapelm;
+
+       size = pc->mapsize;
+       f = 0;
+       n = 0;
+       while (n < size) {
+               mapelm = &(pc->convmap[n*4]);
+               if (c >= mapelm[0] && c <= mapelm[1]) {
+                       s = (c + mapelm[2]) & mapelm[3];
+                       if (s >= 0) {
+                               (*pc->decoder->filter_function)(0x26, pc->decoder);     /* '&' */
+                               (*pc->decoder->filter_function)(0x23, pc->decoder);     /* '#' */
+                               (*pc->decoder->filter_function)(0x78, pc->decoder);     /* 'x' */
+                               r = 0x1000000;
+                               s %= r;
+                               while (r > 0) {
+                                       d = s/r;
+                                       if (d || f) {
+                                               f = 1;
+                                               s %= r;
+                                               (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
+                                       }
+                                       r /= 16;
+                               }
+                               if (!f) {
+                                       f = 1;
+                                       (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
+                               }
+                               (*pc->decoder->filter_function)(0x3b, pc->decoder);             /* ';' */
+                       }
+               }
+               if (f) {
+                       break;
+               }
+               n++;
+       }
+       if (!f) {
+               (*pc->decoder->filter_function)(c, pc->decoder);
+       }
+
+       return c;
+}
+
 mbfl_string *
 mbfl_html_numeric_entity(
     mbfl_string *string,
@@ -2850,12 +2982,17 @@ mbfl_html_numeric_entity(
            string->no_encoding,
            mbfl_memory_device_output, 0, &device);
        /* wchar filter */
-       if (type == 0) {
+       if (type == 0) { /* decimal output */
                encoder = mbfl_convert_filter_new(
                    string->no_encoding,
                    mbfl_no_encoding_wchar,
                    collector_encode_htmlnumericentity, 0, &pc);
-       } else {
+       } else if (type == 2) { /* hex output */
+               encoder = mbfl_convert_filter_new(
+                   string->no_encoding,
+                   mbfl_no_encoding_wchar,
+                   collector_encode_hex_htmlnumericentity, 0, &pc);
+       } else { /* type == 1: decimal/hex input */
                encoder = mbfl_convert_filter_new(
                    string->no_encoding,
                    mbfl_no_encoding_wchar,
index 34644a553bc61cf6bf04eb1bbb62afa1413419e4..f87b51abf7e90a1c38dd251b89d428604d254840 100644 (file)
@@ -412,6 +412,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
        ZEND_ARG_INFO(0, string)
        ZEND_ARG_INFO(0, convmap)
        ZEND_ARG_INFO(0, encoding)
+       ZEND_ARG_INFO(0, is_hex)
 ZEND_END_ARG_INFO()
 
 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
@@ -3682,10 +3683,11 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
        HashTable *target_hash;
        size_t argc = ZEND_NUM_ARGS();
        int i, *convmap, *mapelm, mapsize=0;
+       zend_bool is_hex = 0;
        mbfl_string string, result, *ret;
        enum mbfl_no_encoding no_encoding;
 
-       if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
+       if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
                return;
        }
 
@@ -3696,7 +3698,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
        string.len = str_len;
 
        /* encoding */
-       if (argc == 3) {
+       if ((argc == 3 || argc == 4) && encoding_len > 0) {
                no_encoding = mbfl_name2no_encoding(encoding);
                if (no_encoding == mbfl_no_encoding_invalid) {
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
@@ -3706,6 +3708,12 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
                }
        }
 
+       if (argc == 4) {
+               if (type == 0 && is_hex) {
+                       type = 2; /* output in hex format */
+               }
+       }
+
        /* conversion map */
        convmap = NULL;
        if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
@@ -3743,7 +3751,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
 }
 /* }}} */
 
-/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
+/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
    Converts specified characters to HTML numeric entities */
 PHP_FUNCTION(mb_encode_numericentity)
 {