]> granicus.if.org Git - php/commitdiff
added ISO-2022-KR support in mbstring.
authorRui Hirokawa <hirokawa@php.net>
Wed, 15 May 2002 12:13:56 +0000 (12:13 +0000)
committerRui Hirokawa <hirokawa@php.net>
Wed, 15 May 2002 12:13:56 +0000 (12:13 +0000)
ext/mbstring/mbfilter.c
ext/mbstring/mbfilter_cn.c
ext/mbstring/mbfilter_kr.c
ext/mbstring/mbfilter_kr.h
ext/mbstring/mbstring.c

index 0968b44fc9cefff3de240ee8f8d062ac7c73602a..678dc38d291dae9a70b9ebe8eb1b7f2ad21281d7 100644 (file)
@@ -788,6 +788,15 @@ static mbfl_encoding mbfl_encoding_uhc = {
        MBFL_ENCTYPE_MBCS
 };
 
+static mbfl_encoding mbfl_encoding_2022kr = {
+       mbfl_no_encoding_2022kr,
+       "ISO-2022-KR",
+       "ISO-2022-KR",
+       NULL,
+       NULL,
+       MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
+};
+
 #endif /* HAVE_MBSTR_KR */
 
 static const char *mbfl_encoding_cp1252_aliases[] = {"cp1252", NULL};
@@ -1007,6 +1016,7 @@ static mbfl_encoding *mbfl_encoding_ptr_list[] = {
 #if defined(HAVE_MBSTR_KR)
        &mbfl_encoding_euc_kr,
        &mbfl_encoding_uhc,
+       &mbfl_encoding_2022kr,
 #endif
        NULL
 };
@@ -1115,6 +1125,7 @@ static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter TSRMLS_DC);
 #if defined(HAVE_MBSTR_KR)
 static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter TSRMLS_DC);
 static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter TSRMLS_DC);
+static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter TSRMLS_DC);
 #endif /* HAVE_MBSTR_KR */
 
 static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter TSRMLS_DC);
@@ -1723,6 +1734,23 @@ static struct mbfl_convert_vtbl vtbl_wchar_uhc = {
        mbfl_filt_conv_common_dtor,
        mbfl_filt_conv_wchar_uhc,
        mbfl_filt_conv_common_flush };
+
+static struct mbfl_convert_vtbl vtbl_wchar_2022kr = {
+       mbfl_no_encoding_wchar,
+       mbfl_no_encoding_2022kr,
+       mbfl_filt_conv_common_ctor,
+       mbfl_filt_conv_common_dtor,
+       mbfl_filt_conv_wchar_2022kr,
+       mbfl_filt_conv_any_2022kr_flush };
+
+static struct mbfl_convert_vtbl vtbl_2022kr_wchar = {
+       mbfl_no_encoding_2022kr,
+       mbfl_no_encoding_wchar,
+       mbfl_filt_conv_common_ctor,
+       mbfl_filt_conv_common_dtor,
+       mbfl_filt_conv_2022kr_wchar,
+       mbfl_filt_conv_common_flush };
+
 #endif /* HAVE_MBSTR_KR */
 
 static struct mbfl_convert_vtbl vtbl_cp1252_wchar = {
@@ -1987,6 +2015,8 @@ static struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
        &vtbl_wchar_euckr,
        &vtbl_uhc_wchar,
        &vtbl_wchar_uhc,
+       &vtbl_2022kr_wchar,
+       &vtbl_wchar_2022kr,
 #endif
        &vtbl_cp1252_wchar,
        &vtbl_wchar_cp1252,
@@ -2170,6 +2200,13 @@ static struct mbfl_identify_vtbl vtbl_identify_uhc = {
        mbfl_filt_ident_common_ctor,
        mbfl_filt_ident_common_dtor,
        mbfl_filt_ident_uhc };
+
+static struct mbfl_identify_vtbl vtbl_identify_2022kr = {
+       mbfl_no_encoding_2022kr,
+       mbfl_filt_ident_common_ctor,
+       mbfl_filt_ident_common_dtor,
+       mbfl_filt_ident_2022kr };
+
 #endif /* HAVE_MBSTR_KR */
 
 static struct mbfl_identify_vtbl vtbl_identify_cp1252 = {
@@ -2286,6 +2323,7 @@ static struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
 #if defined(HAVE_MBSTR_KR)
        &vtbl_identify_euckr,
        &vtbl_identify_uhc,
+       &vtbl_identify_2022kr,
 #endif
        &vtbl_identify_cp1252,
        &vtbl_identify_8859_1,
@@ -6011,6 +6049,77 @@ mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter TSRMLS_DC)
        return c;
 }
 
+static int
+mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter TSRMLS_DC)
+{
+retry:
+       switch (filter->status & 0xf) {
+/*     case 0x00:       ASCII */
+/*     case 0x10:       KSC5601 mode */
+/*     case 0x20:       KSC5601 DBCS */
+/*     case 0x40:       KSC5601 SBCS */
+       case 0:
+               if (!(filter->status & 0x10)) {
+                       if (c == 0x1b)
+                               filter->status += 2;
+               } else if (filter->status == 0x20 && c > 0x20 && c < 0x7f) {            /* kanji first char */
+                       filter->status += 1;
+               } else if (c >= 0 && c < 0x80) {                /* latin, CTLs */
+                       ;
+               } else {
+                       filter->flag = 1;       /* bad */
+               }
+               break;
+
+/*     case 0x21:       KSC5601 second char */
+       case 1:
+               filter->status &= ~0xf;
+               if (c < 0x21 || c > 0x7e) {             /* bad */
+                       filter->flag = 1;
+               }
+               break;
+
+       /* ESC */
+       case 2:
+               if (c == 0x24) {                /* '$' */
+                       filter->status++;
+               } else {
+                       filter->flag = 1;       /* bad */
+                       filter->status &= ~0xf;
+                       goto retry;
+               }
+               break;
+
+       /* ESC $ */
+       case 3:
+               if (c == 0x29) {                /* ')' */
+                       filter->status++;
+               } else {
+                       filter->flag = 1;       /* bad */
+                       filter->status &= ~0xf;
+                       goto retry;
+               }
+               break;
+
+       /* ESC $) */
+       case 5:
+               if (c == 0x43) {                /* 'C' */
+                       filter->status = 0x10;
+               } else {
+                       filter->flag = 1;       /* bad */
+                       filter->status &= ~0xf;
+                       goto retry;
+               }
+               break;
+
+       default:
+               filter->status = 0;
+               break;
+       }
+
+       return c;
+}
+
 #endif /* HAVE_MBSTR_KR */
 
 
index 9dcdd7dfdb8c1cad03cf7cd35595cec2d3e95fc9..6feab182ddc40fac39d4d0004b7a92849a047498 100644 (file)
@@ -353,7 +353,7 @@ mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter TSRMLS_DC)
        } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
                s = ucs_hff_cp936_table[c - ucs_hff_cp936_table_min];
        }
-       if (s >= 0x0080) {
+       if (s & 0x8000) {
                s -= 0x8080;
        }
 
index 93f44274f067f34986cadbd66987818d74f56e9b..da4157d5711ec487c1c71ebdf9ad7b9a28f537dd 100644 (file)
@@ -172,6 +172,205 @@ mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter TSRMLS_DC)
        return c;
 }
 
+
+/*
+ * ISO-2022-KR => wchar
+ */
+int
+mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
+{
+       int c1, w, flag;
+
+retry:
+       switch (filter->status & 0xf) {
+               /* case 0x00: ASCII */
+               /* case 0x10: KSC5601 */
+       case 0:
+               if (c == 0x1b) { /* ESC */
+                       filter->status += 2;
+               } else if (c == 0x0f) { /* SI (ASCII) */
+                       filter->status &= ~0xff; 
+               } else if (c == 0x0e) { /* SO (KSC5601) */
+                       filter->status |= 0x10; 
+               } else if ((filter->status & 0x10) != 0  && c > 0x20 && c < 0x7f) {
+                       /* KSC5601 lead byte */
+                       filter->cache = c;
+                       filter->status += 1;
+               } else if ((filter->status & 0x10) == 0 &&  c >= 0 && c < 0x80) {
+                       /* latin, CTLs */
+                       CK((*filter->output_function)(c, filter->data TSRMLS_CC));
+               } else {
+                       w = c & MBFL_WCSGROUP_MASK;
+                       w |= MBFL_WCSGROUP_THROUGH;
+                       CK((*filter->output_function)(w, filter->data TSRMLS_CC));
+               }
+               break;
+
+       case 1:         /* dbcs second byte */
+               filter->status &= ~0xf;
+               c1 = filter->cache;
+               flag = 0;
+               if (c1 > 0x20 && c1 < 0x47) {
+                       flag = 1;
+               } else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) {
+                       flag = 2;
+               }
+               if (flag > 0 && c > 0x20 && c < 0x7f) {
+                       if (flag == 1){
+                               w = (c1 - 0x21)*178 + (c - 0x21) + 0x54;
+                               if (w >= 0 && w < uhc2_ucs_table_size) {
+                                       w = uhc2_ucs_table[w];
+                               } else {
+                                       w = 0;
+                               }
+                       } else {
+                               if (c1 < 0x49){
+                                       w = (c1 - 0x47)*94 + c - 0x21;
+                               } else {
+                                       w = (c1 - 0x48)*94 + c - 0x21;
+                               }
+                               if (w >= 0 && w < uhc3_ucs_table_size) {
+                                       w = uhc3_ucs_table[w];
+                               } else {
+                                       w = 0;
+                               }
+                       }
+                       
+                       if (w <= 0) {
+                               w = (c1 << 8) | c;
+                               w &= MBFL_WCSPLANE_MASK;
+                               w |= MBFL_WCSPLANE_KSC5601;
+                       }
+                       CK((*filter->output_function)(w, filter->data TSRMLS_CC));
+               } else if (c == 0x1b) {  /* ESC */
+                       filter->status++;
+               } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
+                       CK((*filter->output_function)(c, filter->data TSRMLS_CC));
+               } else {
+                       w = (c1 << 8) | c;
+                       w &= MBFL_WCSGROUP_MASK;
+                       w |= MBFL_WCSGROUP_THROUGH;
+                       CK((*filter->output_function)(w, filter->data TSRMLS_CC));
+               }
+               break;
+
+       case 2:                 /* ESC */
+               if (c == 0x24) { /* '$' */
+                       filter->status++;
+               } else {
+                       filter->status &= ~0xf;
+                       CK((*filter->output_function)(0x1b, filter->data TSRMLS_CC));
+                       goto retry;
+               }
+               break;
+       case 3:         /* ESC $ */
+               if (c == 0x29) { /* ')' */
+                       filter->status++;
+               } else {
+                       filter->status &= ~0xf;
+                       CK((*filter->output_function)(0x1b, filter->data TSRMLS_CC));
+                       CK((*filter->output_function)(0x24, filter->data TSRMLS_CC));
+                       goto retry;
+               }
+               break;
+       case 4:         /* ESC $ )  */
+               if (c == 0x43) { /* 'C' */
+                       filter->status &= ~0xf;
+                       filter->status |= 0x100;
+               } else {
+                       filter->status &= ~0xf;
+                       CK((*filter->output_function)(0x1b, filter->data TSRMLS_CC));
+                       CK((*filter->output_function)(0x24, filter->data TSRMLS_CC));
+                       CK((*filter->output_function)(0x29, filter->data TSRMLS_CC));
+                       goto retry;
+               }
+               break;
+       default:
+               filter->status = 0;
+               break;
+       }
+
+       return c;
+}
+
+/*
+ * wchar => ISO-2022-KR
+ */
+int
+mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter TSRMLS_DC)
+{
+       int c1, c2, s;
+
+       s = 0;
+
+       if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
+               s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
+       } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
+               s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
+       } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
+               s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
+       } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
+               s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
+       } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
+               s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
+       } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
+               s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
+       }
+
+       c1 = (s >> 8) & 0xff;
+       c2 = s & 0xff;
+       /* exclude UHC extension area */
+       if (c1 < 0xa1 || c2 < 0xa1){ 
+               s = c;
+       }
+       if (s & 0x8000) {
+               s -= 0x8080;
+       }
+
+       if (s <= 0) {
+               c1 = c & ~MBFL_WCSPLANE_MASK;
+               if (c1 == MBFL_WCSPLANE_KSC5601) {
+                       s = c & MBFL_WCSPLANE_MASK;
+               }
+               if (c == 0) {
+                       s = 0;
+               } else if (s <= 0) {
+                       s = -1;
+               }
+       } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
+               s = -1;
+       }
+       if (s >= 0) {
+               if (s < 0x80) { /* ASCII */
+                       if ((filter->status & 0x10) != 0) {
+                               CK((*filter->output_function)(0x0f, filter->data TSRMLS_CC));           /* SI */
+                               filter->status &= ~0x10;
+                       }
+                       CK((*filter->output_function)(s, filter->data TSRMLS_CC));
+               } else {
+                       if ((filter->status & 0x10) == 0) {
+                               CK((*filter->output_function)(0x0e, filter->data TSRMLS_CC));           /* SO */
+                               filter->status |= 0x10;
+                       }
+                       if ( (filter->status & 0x100) == 0) {
+                               CK((*filter->output_function)(0x1b, filter->data TSRMLS_CC));           /* ESC */
+                               CK((*filter->output_function)(0x24, filter->data TSRMLS_CC));           /* '$' */
+                               CK((*filter->output_function)(0x29, filter->data TSRMLS_CC));           /* ')' */
+                               CK((*filter->output_function)(0x43, filter->data TSRMLS_CC));           /* 'C' */
+                               filter->status |= 0x100;
+                       }
+                       CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
+                       CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
+               }
+       } else {
+               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+                       CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
+               }
+       }
+
+       return c;
+}
+
 /*
  * UHC => wchar
  */
@@ -314,6 +513,17 @@ mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter TSRMLS_DC)
        return c;
 }
 
+int
+mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter TSRMLS_DC)
+{
+       /* back to ascii */
+       if ((filter->status & 0xff00) != 0) {
+               CK((*filter->output_function)(0x0f, filter->data TSRMLS_CC));           /* SI */
+       }
+       filter->status &= 0xff;
+       return 0;
+}
+
 #endif /* HAVE_MBSTR_KR */
 
 /*
index 22b7f5ed4b7e617f25dc0a4941e096f4c2e8cae1..34d0d54309ba6b10862df45e13a4ae4d9f682243 100644 (file)
@@ -26,5 +26,8 @@ int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
 int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter TSRMLS_DC);
 int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
 int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter TSRMLS_DC);
+int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
+int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter TSRMLS_DC);
+int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter TSRMLS_DC);
 
 #endif /* MBFL_MBFILTER_KR_H */
index 5b40ff3f890df3cd834edb1777a554c7b3b15844..f5c1e02ebbeafdade1d38703b11768339a29ef0b 100644 (file)
@@ -65,7 +65,7 @@
 #include "php_content_types.h"
 #include "SAPI.h"
 
-#ifdef ZEND_MULTIBYTE
+#if ZEND_MULTIBYTE
 #include "zend_multibyte.h"
 #endif /* ZEND_MULTIBYTE */