From 4f247de89b3ae17294f48e8e1e2e1499e5eb1c01 Mon Sep 17 00:00:00 2001 From: Moriyoshi Koizumi Date: Fri, 8 Oct 2004 14:40:46 +0000 Subject: [PATCH] - Sync with the latest codebase (fixes bug #28220). --- ext/mbstring/libmbfl/configure.in | 6 ++ ext/mbstring/libmbfl/mbfl/Makefile.am | 12 +++- ext/mbstring/libmbfl/mbfl/eaw_table.h | 36 +++++++++++ ext/mbstring/libmbfl/mbfl/mbfilter.c | 35 ++++++----- ext/mbstring/libmbfl/mbfl/mk_eaw_tbl.awk | 80 ++++++++++++++++++++++++ 5 files changed, 153 insertions(+), 16 deletions(-) create mode 100644 ext/mbstring/libmbfl/mbfl/eaw_table.h create mode 100644 ext/mbstring/libmbfl/mbfl/mk_eaw_tbl.awk diff --git a/ext/mbstring/libmbfl/configure.in b/ext/mbstring/libmbfl/configure.in index b6e5ab581a..25d2e59373 100644 --- a/ext/mbstring/libmbfl/configure.in +++ b/ext/mbstring/libmbfl/configure.in @@ -28,5 +28,11 @@ AC_FUNC_MALLOC AC_FUNC_REALLOC AC_CHECK_FUNCS([strcasecmp strchr]) +AC_CHECK_PROGS([FETCH_VIA_FTP], [wget curl ncftpget]) + +if test "$FETCH_VIA_FTP" = "curl"; then + FETCH_VIA_FTP="curl -O" +fi + AC_CONFIG_FILES([Makefile mbfl/Makefile filters/Makefile nls/Makefile]) AC_OUTPUT diff --git a/ext/mbstring/libmbfl/mbfl/Makefile.am b/ext/mbstring/libmbfl/mbfl/Makefile.am index 6f817feba2..25d6734541 100644 --- a/ext/mbstring/libmbfl/mbfl/Makefile.am +++ b/ext/mbstring/libmbfl/mbfl/Makefile.am @@ -1,6 +1,6 @@ -EXTRA_DIST=Makefile.bcc32 +EXTRA_DIST=Makefile.bcc32 mk_eaw_tbl.awk lib_LTLIBRARIES=libmbfl.la -libmbfl_la_SOURCES=mbfilter.c mbfl_string.c mbfl_language.c mbfl_encoding.c mbfl_convert.c mbfl_ident.c mbfl_memory_device.c mbfl_allocators.c mbfl_filter_output.c mbfilter_pass.c mbfilter_wchar.c mbfilter_8bit.c +libmbfl_la_SOURCES=mbfilter.c mbfl_string.c mbfl_language.c mbfl_encoding.c mbfl_convert.c mbfl_ident.c mbfl_memory_device.c mbfl_allocators.c mbfl_filter_output.c mbfilter_pass.c mbfilter_wchar.c mbfilter_8bit.c eaw_table.h libmbfl_filters_la=../filters/libmbfl_filters.la libmbfl_nls_la=../nls/libmbfl_nls.la libmbfl_la_LIBADD=$(libmbfl_filters_la) $(libmbfl_nls_la) @@ -8,6 +8,14 @@ libmbfl_la_LDFLAGS=-version-info $(SHLIB_VERSION) libmbfl_includedir=$(includedir)/mbfl libmbfl_include_HEADERS=mbfilter.h mbfl_consts.h mbfl_encoding.h mbfl_language.h mbfl_string.h mbfl_convert.h mbfl_ident.h mbfl_memory_device.h mbfl_allocators.h mbfl_defs.h mbfl_filter_output.h mbfilter_pass.h mbfilter_wchar.h mbfilter_8bit.h +mbfilter.c: eaw_table.h + +eaw_table.h: mk_eaw_tbl.awk + $(AWK) -v TABLE_NAME=mbfl_eaw_table -f mk_eaw_tbl.awk EastAsianWidth.txt > $@ + +EastAsianWidth.txt: + $(FETCH_VIA_FTP) ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt + $(libmbfl_filters_la): $(MAKE) -C `dirname $(libmbfl_filters_la)` diff --git a/ext/mbstring/libmbfl/mbfl/eaw_table.h b/ext/mbstring/libmbfl/mbfl/eaw_table.h new file mode 100644 index 0000000000..95c895df1b --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/eaw_table.h @@ -0,0 +1,36 @@ +static const struct { + int begin; + int end; +} mbfl_eaw_table[] = { + { 0x1100, 0x1159 }, + { 0x115f, 0x115f }, + { 0x2329, 0x232a }, + { 0x2e80, 0x2e99 }, + { 0x2e9b, 0x2ef3 }, + { 0x2f00, 0x2fd5 }, + { 0x2ff0, 0x2ffb }, + { 0x3000, 0x303e }, + { 0x3041, 0x3096 }, + { 0x3099, 0x30ff }, + { 0x3105, 0x312c }, + { 0x3131, 0x318e }, + { 0x3190, 0x31b7 }, + { 0x31f0, 0x321e }, + { 0x3220, 0x3243 }, + { 0x3250, 0x327d }, + { 0x327f, 0x32fe }, + { 0x3300, 0x4db5 }, + { 0x4e00, 0x9fa5 }, + { 0xa000, 0xa48c }, + { 0xa490, 0xa4c6 }, + { 0xac00, 0xd7a3 }, + { 0xf900, 0xfa2d }, + { 0xfa30, 0xfa6a }, + { 0xfe30, 0xfe52 }, + { 0xfe54, 0xfe66 }, + { 0xfe68, 0xfe6b }, + { 0xff01, 0xff60 }, + { 0xffe0, 0xffe6 }, + { 0x20000, 0x2fffd }, + { 0x30000, 0x3fffd } +}; diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 7984f30931..01e4787a96 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -103,6 +103,8 @@ #include "mbfl_filter_output.h" #include "mbfilter_pass.h" +#include "eaw_table.h" + /* hex character table "0123456789ABCDEF" */ static char mbfl_hexchar_table[] = { 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46 @@ -1343,20 +1345,31 @@ mbfl_strcut( } +#include /* * strwidth */ -static int -filter_count_width(int c, void* data) +static int is_fullwidth(int c) { - if (c >= 0x20) { - if (c < 0x2000 || (c > 0xff60 && c < 0xffa0)) { - (*(int *)data)++; - } else { - (*(int *)data) += 2; + int i; + + if (c < mbfl_eaw_table[0].begin) { + return 0; + } + + for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) { + if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) { + return 1; } } + return 0; +} + +static int +filter_count_width(int c, void* data) +{ + (*(int *)data) += (is_fullwidth(c) ? 2: 1); return c; } @@ -1421,13 +1434,7 @@ collector_strimwidth(int c, void* data) break; default: if (pc->outchar >= pc->from) { - if (c >= 0x20) { - if (c < 0x2000 || (c > 0xff60 && c < 0xffa0)) { - pc->outwidth++; - } else { - pc->outwidth += 2; - } - } + pc->outwidth += (is_fullwidth(c) ? 2: 1); if (pc->outwidth > pc->width) { if (pc->status == 0) { pc->endpos = pc->device.pos; diff --git a/ext/mbstring/libmbfl/mbfl/mk_eaw_tbl.awk b/ext/mbstring/libmbfl/mbfl/mk_eaw_tbl.awk new file mode 100644 index 0000000000..c7deb4cdf5 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mk_eaw_tbl.awk @@ -0,0 +1,80 @@ +#!/usr/bin/awk -f +# +# $Id$ +# +# Description: a script to generate east asian width table. +# + +BEGIN { + prev = -1 + comma = 0 + ORS = "" + FS = "[;.|# ]" + print "static const struct {\n\tint begin;\n\tint end;\n} " TABLE_NAME "[] = {\n\t" +} + +/^#/ { +} + +/^[0-9a-fA-F]+;/ { + if ($2 == "W" || $2 == "F") { + v = ( "0x" $1 ) + 0 + if (prev < 0) { + first = v + } else if (v - prev > 1) { + if (comma) { + print ",\n\t" + } + printf("{ 0x%04x, 0x%04x }", first, prev) + first = v + comma = 1 + } + prev = v + } else { + if (prev >= 0) { + if (comma) { + print ",\n\t" + } + printf("{ 0x%04x, 0x%04x }", first, prev) + prev = -1 + comma = 1 + } + } +} + +/^[0-9a-fA-F]+\.\./ { + if ($4 == "W" || $4 == "F") { + vs = ( "0x" $1 ) + 0 + ve = ( "0x" $3 ) + 0 + if (prev < 0) { + first = vs + } else if (vs - prev > 1) { + if (comma) { + print ",\n\t" + } + printf("{ 0x%04x, 0x%04x }", first, prev) + first = vs + comma = 1 + } + prev = ve + } else { + if (prev >= 0) { + if (comma) { + print ",\n\t" + } + printf("{ 0x%04x, 0x%04x }", first, prev) + prev = -1 + comma = 1 + } + } +} + +END { + if (prev >= 0) { + if (comma) { + print ",\n\t" + } + printf("{ 0x%04x, 0x%04x }", first, prev) + } + print "\n};\n" +} -- 2.40.0