]> granicus.if.org Git - php/commitdiff
MFH: speed improvement of jisx0213 conversion.
authorRui Hirokawa <hirokawa@php.net>
Sun, 21 Aug 2011 02:23:33 +0000 (02:23 +0000)
committerRui Hirokawa <hirokawa@php.net>
Sun, 21 Aug 2011 02:23:33 +0000 (02:23 +0000)
ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c
ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
ext/mbstring/libmbfl/filters/unicode_table_jis2004.h

index fc807c5139fbb18de750db83a663ec2041e43ed1..d855374bf078e6569ce094fd4bd0ff3ab87d4603 100644 (file)
@@ -39,8 +39,6 @@
 #include "unicode_table_jis.h"
 
 extern int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter);
-
-static int mbfl_filt_conv_2022jp_2004_flush(mbfl_convert_filter *filter);
 static int mbfl_filt_ident_2022jp_2004(int c, mbfl_identify_filter *filter);
 
 const mbfl_encoding mbfl_encoding_2022jp_2004 = {
@@ -74,51 +72,9 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = {
        mbfl_filt_conv_common_ctor,
        mbfl_filt_conv_common_dtor,
        mbfl_filt_conv_wchar_jis2004,
-       mbfl_filt_conv_2022jp_2004_flush
+       mbfl_filt_conv_jis2004_flush
 };
 
-#define CK(statement)  do { if ((statement) < 0) return (-1); } while (0)
-
-static int
-mbfl_filt_conv_2022jp_2004_flush(mbfl_convert_filter *filter)
-{
-       int k, c1, c2, s1, s2;
-
-       k = filter->cache;
-
-       if ((filter->status & 0xf) == 1 && k >= 0 && k <= jisx0213_u2_tbl_len) {
-               s1 = jisx0213_u2_fb_tbl[k];     
-               c1 = (s1 >> 8) & 0x7f;
-               c2 = s1 & 0x7f;
-
-               if ((filter->status & 0xff00) != 0x200) {
-                       CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
-                       CK((*filter->output_function)(0x24, filter->data));             /* '$' */
-                       CK((*filter->output_function)(0x28, filter->data));             /* '(' */
-                       CK((*filter->output_function)(0x51, filter->data));             /* 'Q' */
-               }
-               filter->status = 0x200;
-               CK((*filter->output_function)(c1, filter->data));
-               CK((*filter->output_function)(c2, filter->data));               
-       }
-       filter->cache = 0;
-
-       /* back to latin */
-       if ((filter->status & 0xff00) != 0) {
-               CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
-               CK((*filter->output_function)(0x28, filter->data));             /* '(' */
-               CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
-       }       
-
-       filter->status &= 0xff;
-
-       if (filter->flush_function != NULL) {
-               return (*filter->flush_function)(filter->data);
-       }
-
-       return 0;
-}
-
 static int mbfl_filt_ident_2022jp_2004(int c, mbfl_identify_filter *filter)
 {
 retry:
index 58d8788629dfb37f233a1e408cb19f94acf27b28..7cf9452f77a09797b11ffbf0c6086bf27d36452e 100644 (file)
@@ -34,8 +34,6 @@
 #include "mbfilter.h"
 #include "mbfilter_sjis_2004.h"
 
-#define UNICODE_TABLE_JIS2004_DEF
-
 #include "unicode_table_jis2004.h"
 #include "unicode_table_jis.h"
 
@@ -43,20 +41,6 @@ extern const unsigned char mblen_table_sjis[];
 
 static int mbfl_filt_ident_sjis2004(int c, mbfl_identify_filter *filter);
 
-static const int uni2sjis_tbl_range[][2] = {
-       {0x0000, 0x045f},
-       {0x4e00, 0x9fff},
-       {0xff00, 0xffe5},
-       {0xfa0f, 0xfa6a},
-};
-
-static const unsigned short *uni2sjis_tbl[] = {
-       ucs_a1_jisx0213_table,
-       ucs_i_jisx0213_table,
-       ucs_r_jisx0213_table,
-       ucs_r2_jisx0213_table,
-};
-
 extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
 extern int mbfl_bisec_srch(int w, const unsigned short *tbl, int n);
 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
@@ -236,7 +220,7 @@ retry:
                w1 = (s1 << 8) | s2;
 
                if (w1 >= 0x2121) {
-
+                       /* conversion for combining characters */
                        if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) ||
                                (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 ||
                                (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) {
@@ -248,6 +232,7 @@ retry:
                                }
                        } 
 
+                       /* conversion for BMP  */
                        if (w <= 0) {
                                w1 = (s1 - 0x21)*94 + s2 - 0x21;
                                if (w1 >= 0 && w1 < jisx0213_ucs_table_size) {
@@ -255,6 +240,7 @@ retry:
                                }
                        }
 
+                       /* conversion for CJK Unified Ideographs ext.B (U+2XXXX) */
                        if (w <= 0) {
                                w1 = (s1 << 8) | s2;
                                k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len);
@@ -285,7 +271,7 @@ retry:
                }
                break;
 
-       case 2: /* got 0x8e : EUC-JP-2004 */
+       case 2: /* got 0x8e : EUC-JP-2004 kana */
                filter->status = 0;
                if (c > 0xa0 && c < 0xe0) {
                        w = 0xfec0 + c;
@@ -300,41 +286,65 @@ retry:
                }
                break;
 
-       case 3: /* got 0x8f,  X 0213 plane 2 first char : EUC-JP-2004 */
+       case 3: /* X 0213 plane 2 first char : EUC-JP-2004 (0x8f), ISO-2022-JP-2004 */
                if ((c >= 0 && c < 0x21) || c == 0x7f) {                /* CTLs */
                        CK((*filter->output_function)(c, filter->data));
                        filter->status = 0;
                } else {
-                       filter->status++;
-                       filter->cache = c;
+                       if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
+                               s1 = c - 0x80;
+                       } else {
+                               s1 = c;
+                       }
+                       if (s1 > 0x20 && s1 < 0x80) {
+                               filter->cache = s1;
+                               filter->status++;
+                       } else {
+                               if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
+                                       w = c | 0x8f00;
+                                       w &= MBFL_WCSGROUP_MASK;
+                                       w |= MBFL_WCSGROUP_THROUGH;
+                               } else {
+                                       w = c & 0x7f;
+                                       w &= MBFL_WCSPLANE_MASK;
+                                       w |= MBFL_WCSPLANE_JIS0213;                             
+                               }
+                               CK((*filter->output_function)(w, filter->data));
+                       }
                }
                break;
 
-       case 4: /* got 0x8f,  X 0213 plane 2 second char */
+       case 4: /* X 0213 plane 2 second char : EUC-JP-2004, ISO-2022-JP-2004 */
 
                filter->status &= ~0xf;
                c1 = filter->cache;
-               c2 = c;
                if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
-                       c1 -= 0x80;
-                       c2 -= 0x80;
+                       c2 = c - 0x80;
+               } else {
+                       c2 = c;
                }
                s1 = c1 - 0x21;
                s2 = c2 - 0x21;
 
-               if (s1 >= 0 && s1 < 94 && s2 >= 0 && s2 < 94) {
+               if (((s1 >= 0 && s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || 
+                       (s1 >= 77 && s1 < 94)) && s2 >= 0 && s2 < 94) {
+                       /* calc offset from ku */
                        for (k = 0; k < jisx0213_p2_ofst_len; k++) {
                                if (s1 == jisx0213_p2_ofst[k]-1) {
                                        break;
                                } 
                        }                       
                        k = k - (jisx0213_p2_ofst[k]-1);
+
+                       /* check for japanese chars in BMP */
                        s = (s1 + 94 + k)*94 + s2;
                        if (s >= 0 && s < jisx0213_ucs_table_size) {
                                w = jisx0213_ucs_table[s];
                        } else {
                                w = 0;
                        }
+
+                       /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */
                        if (w <= 0) {
                                w1 = ((c1 + k + 94) << 8) | c2;
                                k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len);
@@ -342,11 +352,13 @@ retry:
                                        w = jisx0213_jis_u5_tbl[k] + 0x20000;
                                }
                        }
+
                        if (w <= 0) {
                                w = ((c1 & 0x7f) << 8) | (c2 & 0x7f);
                                w &= MBFL_WCSPLANE_MASK;
                                w |= MBFL_WCSPLANE_JIS0213;
                        }
+
                        CK((*filter->output_function)(w, filter->data));                        
                } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
                        CK((*filter->output_function)(c, filter->data));
@@ -365,7 +377,7 @@ retry:
 
                break;
 
-       case 5: /* X 0212 */
+       case 5: /* X 0208 : ISO-2022-JP-2004 */
                filter->status &= ~0xf;
                c1 = filter->cache;
                if (c > 0x20 && c < 0x7f) {
@@ -382,7 +394,7 @@ retry:
                CK((*filter->output_function)(w, filter->data));
                break;
 
-       /* ESC */
+       /* ESC : ISO-2022-JP-2004 */
 /*     case 0x06:      */
 /*     case 0x16:      */
 /*     case 0x26:      */
@@ -401,7 +413,7 @@ retry:
                }
                break;
 
-       /* ESC $ */
+       /* ESC $ : ISO-2022-JP-2004 */
 /*     case 0x07:      */
 /*     case 0x17:      */
 /*     case 0x27:      */
@@ -423,7 +435,7 @@ retry:
 
                break;
 
-       /* ESC $ ( */
+       /* ESC $ ( : ISO-2022-JP-2004 */
 /*     case 0x08:      */
 /*     case 0x18:      */
 /*     case 0x28:      */
@@ -444,7 +456,7 @@ retry:
                }
                break;
 
-       /* ESC ( */
+       /* ESC ( : ISO-2022-JP-2004 */
 /*     case 0x09:      */
 /*     case 0x19:      */
 /*     case 0x29:      */
@@ -476,8 +488,13 @@ mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) {
 
 retry:
 
-       if ((filter->status & 0xf)== 0 && ( c == 0x00E6 ||
-                                       (c >= 0x0254 && c <= 0x02E9) || (c >= 0x304B && c <= 0x31F7))) {
+       /* check for 1st char of combining characters */
+       if ((filter->status & 0xf)== 0 && ( 
+                       c == 0x00E6 ||
+                       (c >= 0x0254 && c <= 0x02E9) || 
+                       (c >= 0x304B && c <= 0x3053) ||
+                       (c >= 0x30AB && c <= 0x30C8) ||
+                       c == 0x31F7)) {
                for (k=0;k<jisx0213_u2_tbl_len;k++) {
                        if (c == jisx0213_u2_tbl[2*k]) {
                                filter->status++;
@@ -487,6 +504,7 @@ retry:
                }
        }       
 
+       /* check for 2nd char of combining characters */
        if ((filter->status & 0xf) == 1 && 
                filter->cache >= 0 && filter->cache <= jisx0213_u2_tbl_len) {
                k = filter->cache;
@@ -529,23 +547,26 @@ retry:
                }
        }
 
+       /* check for major japanese chars */
        if (s1 <= 0) {
-               for (k=0; k<sizeof(uni2sjis_tbl_range)/(sizeof(int)*2);k++) {
-                       if (c >= uni2sjis_tbl_range[k][0] && c <= uni2sjis_tbl_range[k][1]) {
-                               s1 = uni2sjis_tbl[k][c-uni2sjis_tbl_range[k][0]];
+               for (k=0; k < uni2jis_tbl_len ;k++) {
+                       if (c >= uni2jis_tbl_range[k][0] && c <= uni2jis_tbl_range[k][1]) {
+                               s1 = uni2jis_tbl[k][c-uni2jis_tbl_range[k][0]];
                                break;
                        }
                }
        }
        
-       if (c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) {
+       /* check for japanese chars in compressed area */
+       if (s1 <= 0 && c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) {
                k = mbfl_bisec_srch(c, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len);
                if (k >= 0) {
                        s1 = ucs_c1_jisx0213_ofst[k] + c - ucs_c1_jisx0213_tbl[2*k];
                }
        }
        
-       if (c >= jisx0213_u5_tbl_min && c <= jisx0213_u5_tbl_max) {
+       /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */
+       if (s1 <= 0 && c >= jisx0213_u5_tbl_min && c <= jisx0213_u5_tbl_max) {
                k = mbfl_bisec_srch2(c - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len);
                if (k >= 0) {
                        s1 = jisx0213_u5_jis_tbl[k];
@@ -649,6 +670,7 @@ mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter)
        int k, c1, c2, s1, s2;
 
        k = filter->cache;
+       filter->cache = 0;
 
        if (filter->status == 1 && k >= 0 && k <= jisx0213_u2_tbl_len) {
                s1 = jisx0213_u2_fb_tbl[k];     
@@ -660,12 +682,29 @@ mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter)
                } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
                        s2 = (s1 & 0xff) | 0x80;                        
                        s1 = ((s1 >> 8) & 0xff) | 0x80;
+               } else {
+                       s2 = s1 & 0x7f;                 
+                       s1 = (s1 >> 8) & 0x7f;
+                       if ((filter->status & 0xff00) != 0x200) {
+                               CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
+                               CK((*filter->output_function)(0x24, filter->data));             /* '$' */
+                               CK((*filter->output_function)(0x28, filter->data));             /* '(' */
+                               CK((*filter->output_function)(0x51, filter->data));             /* 'Q' */
+                       }
+                       filter->status = 0x200;
                }
 
                CK((*filter->output_function)(s1, filter->data));
                CK((*filter->output_function)(s2, filter->data));               
        }
-       filter->cache = 0;
+
+       /* back to latin */
+       if ((filter->status & 0xff00) != 0) {
+               CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
+               CK((*filter->output_function)(0x28, filter->data));             /* '(' */
+               CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
+       }       
+
        filter->status = 0;
 
        if (filter->flush_function != NULL) {
index ddb161e79b51f312851ba4c5f7d82b64b612cddb..0039a6f92dfcc40368094358e83597facdd55650 100644 (file)
 #ifndef UNICODE_TABLE_JIS2004_H
 #define UNICODE_TABLE_JIS2004_H
 
-#ifdef UNICODE_TABLE_JIS2004_DEF
-
 /*
  * Unicode table
  */
 
-const unsigned short jisx0213_ucs_table[] = { // 0x0000 - 0x2C0F
+static const unsigned short jisx0213_ucs_table[] = { // 0x0000 - 0x2C0F
 
 /* plane 1 ku 1 */
 0x3000,0x3001,0x3002,0xFF0C,0xFF0E,0x30FB,0xFF1A,0xFF1B,
@@ -1593,10 +1591,10 @@ const unsigned short jisx0213_ucs_table[] = { // 0x0000 - 0x2C0F
 0x9F69,0x0000,0x9F6D,0x9F70,0x9F75,0x0000,0x0000,0x0000,
 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,};
 
-const int jisx0213_ucs_table_size = (sizeof(jisx0213_ucs_table)/sizeof(unsigned short));
+static const int jisx0213_ucs_table_size = (sizeof(jisx0213_ucs_table)/sizeof(unsigned short));
 
 
-const unsigned short ucs_a1_jisx0213_table[] = { // 0x0000 - 0x045f
+static const unsigned short ucs_a1_jisx0213_table[] = { // 0x0000 - 0x045f
 
 /* 0000h */
 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
@@ -1749,11 +1747,50 @@ const unsigned short ucs_a1_jisx0213_table[] = { // 0x0000 - 0x045f
 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 };
 
-const int ucs_a1_jisx0213_table_min = 0x0000;
-const int ucs_a1_jisx0213_table_max = 0x0000 + 
+static const int ucs_a1_jisx0213_table_min = 0x0000;
+static const int ucs_a1_jisx0213_table_max = 0x0000 + 
        (sizeof(ucs_a1_jisx0213_table)/sizeof(unsigned short));
 
-const unsigned short ucs_i_jisx0213_table[] = { // 0x4e00 - 0x9fff
+static const unsigned short ucs_hk_jisx0213_table[] = { // 0x3000 - 0x30ff
+/* 3000h */
+0x2121,0x2122,0x2123,0x2137,0x0000,0x2139,0x213A,0x213B,
+0x2152,0x2153,0x2154,0x2155,0x2156,0x2157,0x2158,0x2159,
+0x215A,0x215B,0x2229,0x222E,0x214C,0x214D,0x225A,0x225B,
+0x2258,0x2259,0x0000,0x0000,0x2141,0x2D60,0x0000,0x2D61,
+0x2666,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x2233,0x2234,0x2235,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x2236,0x2237,0x233C,0x0000,0x0000,
+0x0000,0x2421,0x2422,0x2423,0x2424,0x2425,0x2426,0x2427,
+0x2428,0x2429,0x242A,0x242B,0x242C,0x242D,0x242E,0x242F,
+0x2430,0x2431,0x2432,0x2433,0x2434,0x2435,0x2436,0x2437,
+0x2438,0x2439,0x243A,0x243B,0x243C,0x243D,0x243E,0x243F,
+0x2440,0x2441,0x2442,0x2443,0x2444,0x2445,0x2446,0x2447,
+0x2448,0x2449,0x244A,0x244B,0x244C,0x244D,0x244E,0x244F,
+0x2450,0x2451,0x2452,0x2453,0x2454,0x2455,0x2456,0x2457,
+0x2458,0x2459,0x245A,0x245B,0x245C,0x245D,0x245E,0x245F,
+0x2460,0x2461,0x2462,0x2463,0x2464,0x2465,0x2466,0x2467,
+0x2468,0x2469,0x246A,0x246B,0x246C,0x246D,0x246E,0x246F,
+0x2470,0x2471,0x2472,0x2473,0x2474,0x2475,0x2476,0x0000,
+0x0000,0x0000,0x0000,0x212B,0x212C,0x2135,0x2136,0x2239,
+0x237B,0x2521,0x2522,0x2523,0x2524,0x2525,0x2526,0x2527,
+0x2528,0x2529,0x252A,0x252B,0x252C,0x252D,0x252E,0x252F,
+0x2530,0x2531,0x2532,0x2533,0x2534,0x2535,0x2536,0x2537,
+0x2538,0x2539,0x253A,0x253B,0x253C,0x253D,0x253E,0x253F,
+0x2540,0x2541,0x2542,0x2543,0x2544,0x2545,0x2546,0x2547,
+0x2548,0x2549,0x254A,0x254B,0x254C,0x254D,0x254E,0x254F,
+0x2550,0x2551,0x2552,0x2553,0x2554,0x2555,0x2556,0x2557,
+0x2558,0x2559,0x255A,0x255B,0x255C,0x255D,0x255E,0x255F,
+0x2560,0x2561,0x2562,0x2563,0x2564,0x2565,0x2566,0x2567,
+0x2568,0x2569,0x256A,0x256B,0x256C,0x256D,0x256E,0x256F,
+0x2570,0x2571,0x2572,0x2573,0x2574,0x2575,0x2576,0x2772,
+0x2773,0x2774,0x2775,0x2126,0x213C,0x2133,0x2134,0x2238,
+};
+
+static const int ucs_hk_jisx0213_min = 0x3000;
+static const int ucs_hk_jisx0213_max = 0x30FF;
+
+static const unsigned short ucs_i_jisx0213_table[] = { // 0x4e00 - 0x9fff
 
 /* 4E00h */
 0x306C,0x437A,0x7F22,0x3C37,0x0000,0x0000,0x0000,0x4B7C,
@@ -4544,11 +4581,11 @@ const unsigned short ucs_i_jisx0213_table[] = { // 0x4e00 - 0x9fff
 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 };
 
-const int ucs_i_jisx0213_table_min = 0x4E00;
-const int ucs_i_jisx0213_table_max = 0x4E00 + (sizeof(ucs_i_jisx0213_table)/
+static const int ucs_i_jisx0213_table_min = 0x4E00;
+static const int ucs_i_jisx0213_table_max = 0x4E00 + (sizeof(ucs_i_jisx0213_table)/
                                                                                           sizeof(unsigned short));
 
-const unsigned short ucs_r_jisx0213_table[] = { // 0xff00 - 0xffe5
+static const unsigned short ucs_r_jisx0213_table[] = { // 0xff00 - 0xffe5
 
 /* FF00h */
 0x0000,0x212A,0x2230,0x2174,0x2170,0x2173,0x2175,0x222F,
@@ -4581,11 +4618,11 @@ const unsigned short ucs_r_jisx0213_table[] = { // 0xff00 - 0xffe5
 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 0x0000,0x0000,0x0000,0x2131,0x0000,0x216F,};
 
-const int ucs_r_jisx0213_table_min = 0xFF00;
-const int ucs_r_jisx0213_table_max = 0xFF00 + (sizeof(ucs_r_jisx0213_table)/
-                                                                                          sizeof(unsigned short));
+static const int ucs_r_jisx0213_table_min = 0xFF00;
+static const int ucs_r_jisx0213_table_max = 0xFF00 + (sizeof(ucs_r_jisx0213_table)/
+                                                                                                         sizeof(unsigned short));
 
-const unsigned short ucs_r2_jisx0213_table[] = { // 0xfa0f - 0xfa6a
+static const unsigned short ucs_r2_jisx0213_table[] = { // 0xfa0f - 0xfa6a
 0x2F4B,
 0x2F57,0x4F72,0x0000,0x8679,0x757A,0x775A,0x776F,0x0000,
 0x0000,0x793C,0x793D,0x7941,0x0000,0x0000,0x0000,0x7B3A,
@@ -4600,10 +4637,10 @@ const unsigned short ucs_r2_jisx0213_table[] = { // 0xfa0f - 0xfa6a
 0x7B6F,0x7B79,0x7C2F,0x7C30,0x7C38,0x7C3D,0x9369,0x7C59,
 0x7D63,0x7D76,0x7D7B,};
 
-const int ucs_r2_jisx0213_min = 0xFA0F;
-const int ucs_r2_jisx0213_max = 0xFA6A;
+static const int ucs_r2_jisx0213_min = 0xFA0F;
+static const int ucs_r2_jisx0213_max = 0xFA6A;
 
-const unsigned short ucs_c1_jisx0213_tbl[] = {
+static const unsigned short ucs_c1_jisx0213_tbl[] = {
  // 0x1e00 - 0x4dff
 0x1E3E,0x1E3F,0x1F70,0x1F71,0x1F72,0x1F73,0x2010,0x2010,
 0x2013,0x2013,0x2014,0x2014,0x2016,0x2016,0x2018,0x2019,
@@ -4709,7 +4746,7 @@ const unsigned short ucs_c1_jisx0213_tbl[] = {
 0x4C20,0x4C20,0x4CC4,0x4CC4,0x4CD1,0x4CD1,0x4D07,0x4D07,
 0x4D77,0x4D77,};
 
-const unsigned short ucs_c1_jisx0213_ofst[] = {
+static const unsigned short ucs_c1_jisx0213_ofst[] = {
 0x2872,0x2B46,0x2B50,0x213E,0x237C,0x213D,0x2142,0x2146,
 0x2148,0x2277,0x2340,0x2145,0x2144,0x2273,0x216C,0x2228,
 0x286B,0x007E,0x2B58,0x2C7E,0x286C,0x2C7D,0x2921,0x216E,
@@ -4763,12 +4800,13 @@ const unsigned short ucs_c1_jisx0213_ofst[] = {
 0x9728,0x972A,0x9731,0x7E3E,0x973F,0x982A,0x982D,0x984B,
 0x9860,};
 
-const int ucs_c1_jisx0213_tbl_len = sizeof(ucs_c1_jisx0213_ofst)/sizeof(unsigned short);
+static const int ucs_c1_jisx0213_tbl_len = sizeof(ucs_c1_jisx0213_ofst)/sizeof(unsigned short);
 
-const int ucs_c1_jisx0213_min = 0x1E00;
-const int ucs_c1_jisx0213_max = 0x4DFF;
+static const int ucs_c1_jisx0213_min = 0x1E00;
+static const int ucs_c1_jisx0213_max = 0x4DFF;
 
-const unsigned short jisx0213_jis_u5_tbl[] = {
+/* CJK Unified ideographs Extension B:  U+2XXXX */
+static const unsigned short jisx0213_jis_u5_tbl[] = {
 0x000B,0x123D,0x131B,0x146E,0x18BD,0x0B9F,0x16B4,0x1E34,
 0x31C4,0x35C4,0x373F,0x3763,0x3CFE,0x47F1,0x548E,0x550E,
 0x5771,0x59C4,0x5DA1,0x6AFF,0x6E40,0x70F4,0x7684,0x8277,
@@ -4808,7 +4846,7 @@ const unsigned short jisx0213_jis_u5_tbl[] = {
 0x9EC4,0x9EE9,0x9EDB,0x9FCE,0xA02F,0xA01A,0xA0F9,0xA082,
 0x2218,0xA38C,0xA437,0xA5F1,0xA602,0xA61A,0xA6B2,};
 
-const unsigned short jisx0213_jis_u5_key[] = {
+static const unsigned short jisx0213_jis_u5_key[] = {
 0x2E22,0x2F42,0x2F4C,0x2F60,0x2F7B,0x4F54,0x4F63,0x4F6E,
 0x753A,0x7572,0x7629,0x7632,0x7660,0x776C,0x787E,0x7929,
 0x7947,0x7954,0x796E,0x7A5D,0x7B33,0x7B49,0x7B6C,0x7C49,
@@ -4848,7 +4886,7 @@ const unsigned short jisx0213_jis_u5_key[] = {
 0x9767,0x976D,0x9770,0x9825,0x9829,0x982B,0x9832,0x9835,
 0x9853,0x9858,0x985A,0x986E,0x9870,0x9872,0x9876,};
 
-const unsigned short jisx0213_u5_jis_tbl[] = {
+static const unsigned short jisx0213_u5_jis_tbl[] = {
 0x2E22,0x7F21,0x7F2B,0x7F2E,0x7F36,0x7F46,0x7F70,0x7F79,
 0x7F77,0x8122,0x8125,0x8127,0x8131,0x8132,0x8138,0x813F,
 0x8141,0x814A,0x8152,0x8153,0x8159,0x815C,0x4F54,0x8177,
@@ -4888,7 +4926,7 @@ const unsigned short jisx0213_u5_jis_tbl[] = {
 0x9767,0x9770,0x976D,0x9825,0x982B,0x9829,0x9835,0x9832,
 0x7E66,0x9858,0x985A,0x986E,0x9870,0x9872,0x9876,};
 
-const unsigned short jisx0213_u5_jis_key[] = {
+static const unsigned short jisx0213_u5_jis_key[] = {
 0x000B,0x0089,0x00A2,0x00A4,0x01A2,0x0213,0x032B,0x0371,
 0x0381,0x03F9,0x044A,0x0509,0x05D6,0x0628,0x074F,0x0807,
 0x083A,0x08B9,0x097C,0x099D,0x0AD3,0x0B1D,0x0B9F,0x0D45,
@@ -4928,17 +4966,17 @@ const unsigned short jisx0213_u5_jis_key[] = {
 0x9EC4,0x9EDB,0x9EE9,0x9FCE,0xA01A,0xA02F,0xA082,0xA0F9,
 0xA190,0xA38C,0xA437,0xA5F1,0xA602,0xA61A,0xA6B2,};
 
-const int jisx0213_u5_tbl_min = 0x2000B;
-const int jisx0213_u5_tbl_max = 0x2A6B2;
-const int jisx0213_u5_tbl_len = sizeof(jisx0213_u5_jis_key)/sizeof(unsigned short);
+static const int jisx0213_u5_tbl_min = 0x2000B;
+static const int jisx0213_u5_tbl_max = 0x2A6B2;
+static const int jisx0213_u5_tbl_len = sizeof(jisx0213_u5_jis_key)/sizeof(unsigned short);
 
-const unsigned short jisx0213_u2_key[] = {
+static const unsigned short jisx0213_u2_key[] = {
        0x2477,0x2478,0x2479,0x247A,0x247B,0x2577,0x2578,0x2579,
        0x257A,0x257B,0x257C,0x257D,0x257E,0x2678,0x2B44,0x2B48,
        0x2B49,0x2B4A,0x2B4B,0x2B4C,0x2B4D,0x2B4E,0x2B4F,0x2B65,
        0x2B66};
 
-const unsigned short jisx0213_u2_tbl[] = {
+static const unsigned short jisx0213_u2_tbl[] = {
        0x304B,0x309A,0x304D,0x309A,0x304F,0x309A,0x3051,0x309A,
        0x3053,0x309A,0x30AB,0x309A,0x30AD,0x309A,0x30AF,0x309A,
        0x30B1,0x309A,0x30B3,0x309A,0x30BB,0x309A,0x30C4,0x309A,
@@ -4947,80 +4985,52 @@ const unsigned short jisx0213_u2_tbl[] = {
        0x0259,0x0301,0x025A,0x0300,0x025A,0x0301,0x02E9,0x02E5,
        0x02E5,0x02E9};
 
-const unsigned short jisx0213_u2_fb_tbl[] = {
+static const unsigned short jisx0213_u2_fb_tbl[] = {
        0x242B,0x242D,0x242F,0x2431,0x2433,0x252B,0x252D,0x252F,
        0x2531,0x2533,0x253B,0x2544,0x2548,0x2675,0x295C,0x2B38,
        0x2B38,0x2B37,0x2B37,0x2B30,0x2B30,0x2B43,0x2B43,0x2B64,
        0x2B60};
 
-const int jisx0213_u2_tbl_len = sizeof(jisx0213_u2_key)/sizeof(unsigned short);
+static const int jisx0213_u2_tbl_len = sizeof(jisx0213_u2_key)/sizeof(unsigned short);
 
-const unsigned short jisx0213_uni2sjis_cmap_key[] = {
+static const unsigned short jisx0213_uni2sjis_cmap_key[] = {
        0xf91d,0xf928,0xf929,0xf936,0xf970,0xf9d0,0xf9dc,
        0xfe45,0xfe46,0xffe5, 
 };
 
-const unsigned short jisx0213_uni2sjis_cmap_val[] = {
+static const unsigned short jisx0213_uni2sjis_cmap_val[] = {
        0x763b,0x742e,0x754e,0x7b4f,0x7649,0x7e24,0x7d5d,
        0x233e,0x233d,0x216f,
 };
 
-const int jisx0213_uni2sjis_cmap_len = 
+static const int jisx0213_uni2sjis_cmap_len = 
        sizeof(jisx0213_uni2sjis_cmap_key)/sizeof(unsigned short);
 
 
-const unsigned short jisx0213_p2_ofst[] = {
+static const unsigned short jisx0213_p2_ofst[] = {
        1, 8, 3, 4, 5, 12, 13, 14, 15, 78, 79, 80, 81, 82,
        83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94};
 
-const int jisx0213_p2_ofst_len = 
+static const int jisx0213_p2_ofst_len = 
        sizeof(jisx0213_p2_ofst)/sizeof(unsigned short);
 
-#else
-
-extern const unsigned short jisx0213_ucs_table[];
-extern const unsigned short ucs_a1_jisx0213_table[];
-extern const unsigned short ucs_i_jisx0213_table[];
-extern const unsigned short ucs_r_jisx0213_table[];
-extern const unsigned short ucs_r2_jisx0213_table[];
-
-extern const unsigned short ucs_c1_jisx0213_table[];
-extern const unsigned short ucs_c1_jisx0213_ofst[];
-extern const unsigned short jisx0213_jis_u5_tbl[];
-extern const unsigned short jisx0213_jis_u5_key[];
-extern const unsigned short jisx0213_u5_jis_tbl[];
-extern const unsigned short jisx0213_u5_jis_key[];
-extern const unsigned short jisx0213_u2_key[];
-extern const unsigned short jisx0213_u2_tbl[];
-extern const unsigned short jisx0213_u2_fb_tbl[];
-extern const unsigned short jisx0213_uni2sjis_cmap_key[];
-extern const unsigned short jisx0213_uni2sjis_cmap_val[];
-
-extern const int jisx0213_ucs_table_size;
-extern const int ucs_a1_jisx0213_table_min;
-extern const int ucs_a1_jisx0213_table_max;
-extern const int ucs_i_jisx0213_table_min;
-extern const int ucs_i_jisx0213_table_max;
-extern int ucs_r_jisx0213_table_min;
-extern int ucs_r_jisx0213_table_max;
-extern int ucs_r2_jisx0213_table_min;
-extern int ucs_r2_jisx0213_table_max;
-
-extern const int ucs_c1_jisx0213_tbl_len;
-extern const int jisx0213_u5_tbl_min;
-extern const int jisx0213_u5_tbl_max;
-extern const int jisx0213_u5_tbl_len;
-extern const int jisx0213_u2_tbl_len;
-extern const int jisx0213_uni2sjis_cmap_len;
-
-extern const unsigned short ucs_c1_jisx0213_tbl[];
-
-extern const int ucs_c1_jisx0213_min;
-extern const int ucs_c1_jisx0213_max;
-
-extern const unsigned short jisx0213_p2_ofst[];
-extern const int jisx0213_p2_ofst_len;
-
-#endif /* UNICODE_TABLE_JIS2004_DEF */
+static const int uni2jis_tbl_range[][2] = {
+       {0x0000, 0x045f},
+       {0x3000, 0x30ff},
+       {0x4e00, 0x9fff},
+       {0xff00, 0xffe5},
+       {0xfa0f, 0xfa6a},
+};
+
+static const unsigned short *uni2jis_tbl[] = {
+       ucs_a1_jisx0213_table,
+       ucs_hk_jisx0213_table,
+       ucs_i_jisx0213_table,
+       ucs_r_jisx0213_table,
+       ucs_r2_jisx0213_table,
+};
+
+static const int uni2jis_tbl_len = sizeof(uni2jis_tbl_range)/(sizeof(int)*2);
+
 #endif /* UNICODE_TABLE_JIS2004_H */