From: Reuben Thomas Date: Thu, 18 Jan 2018 23:14:22 +0000 (+0000) Subject: Add ANSEL and ISO 5426 codecs from Wolfram Schneider X-Git-Tag: v3.7~92 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cbd1350faf9171eb3e2eb421f646ee4c8b159a01;p=recode Add ANSEL and ISO 5426 codecs from Wolfram Schneider Fixes Debian bug #277571. --- diff --git a/.gitignore b/.gitignore index c7113cb..5b4cb5d 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,8 @@ html/ /src/fr-charname.h /src/iconvdecl.h /src/inisteps.h +/src/lat1ansel.h +/src/lat1iso5426.h /src/merged.c /src/recode /src/recode.1 diff --git a/src/Makefile.am b/src/Makefile.am index 2f3a3aa..ab700c8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -27,19 +27,20 @@ include_HEADERS = recode.h recodext.h H_STEPS = common.h decsteps.h inisteps.h tersteps.h \ $(H_FALLBACKS) $(H_SURFACES) -noinst_HEADERS = $(H_STEPS) cleaner.h charname.h fr-charname.h iconvdecl.h rfc1345.h +noinst_HEADERS = $(H_STEPS) cleaner.h charname.h fr-charname.h iconvdecl.h \ +rfc1345.h lat1iso5426.h lat1ansel.h EXTRA_DIST = stamp-steps stamp-strip $(L_STEPS) mergelex.py $(MANS) CLEANFILES = iconvdecl.h C_STEPS = african.c afrtran.c applemac.c atarist.c bangbang.c cdcnos.c \ -ebcdic.c ibmpc.c iconqnx.c lat1asci.c java.c mule.c strip-data.c \ -testdump.c ucs.c utf16.c utf7.c utf8.c varia.c vn.c +ebcdic.c ibmpc.c iconqnx.c lat1asci.c lat1iso5426.c lat1ansel.c \ +java.c mule.c strip-data.c testdump.c ucs.c utf16.c utf7.c utf8.c varia.c vn.c ALL_STEPS = $(C_STEPS) $(C_FALLBACKS) $(C_SURFACES) -L_STEPS = ascilat1.l $(L_FALLBACKS) $(L_SURFACES) +L_STEPS = ascilat1.l iso5426lat1.l ansellat1.l $(L_FALLBACKS) $(L_SURFACES) H_FALLBACKS = C_FALLBACKS = flat.c html.c lat1ltex.c lat1txte.c rfc1345.c texinfo.c @@ -86,11 +87,11 @@ LEX = @LEX@ PYTHON = @PYTHON@ TABLES_PY = $(PYTHON) $(top_srcdir)/tables.py -$(srcdir)/merged.c: mergelex.py $(L_STEPS) +merged.c: mergelex.py $(L_STEPS) (cd $(srcdir) && cat $(L_STEPS)) \ | $(PYTHON) $(srcdir)/mergelex.py > merged.tm1 $(LEX) -t -8 -Plibrecode_yy merged.tm1 > merged.tm2 - grep -v '^# *line [0-9]' merged.tm2 > $(srcdir)/merged.c + grep -av '^# *line [0-9]' merged.tm2 > $(srcdir)/merged.c rm merged.tm1 merged.tm2 recode.1: main.c $(top_srcdir)/configure.ac @@ -115,11 +116,17 @@ iconv.lo: iconvdecl.h iconvdecl.h: ../tables.py $(TABLES_PY) -i +# lat1ansel.h and lat1iso5426.h +.l.h: + grep '^\\' $< | \ + perl -npe 's/{ put_byte \(//;s /, subtask\); }//;s%^\\%%' | \ + perl -ne '($$a,$$b)=split; $$c=substr($$a,0,3);$$d=substr($$a,3);$$e = eval "0$$c"; next if $$b < 0200 ;printf("\t{%d, \"%c%s\"}, %s* %c *%s\n", $$b, $$e, $$d, "/", $$b, "/")' > $@ + african.lo afrtran.lo applemac.lo atarist.lo bangbang.lo cdcnos.lo \ -ebcdic.lo ibmpc.lo iconqnx.lo lat1asci.lo java.lo mule.lo strip-data.lo \ -testdump.lo ucs.lo utf16.lo utf7.lo utf8.lo varia.lo vn.lo \ -flat.lo html.lo lat1ltex.lo lat1txte.lo rfc1345.lo texinfo.lo \ -base64.lo dump.lo endline.lo permut.lo quoted.lo \ +ebcdic.lo ibmpc.lo iconqnx.lo lat1asci.lo lat1iso5426.lo lat1ansel.lo \ +java.lo mule.lo strip-data.lo testdump.lo ucs.lo utf16.lo utf7.lo utf8.lo \ +varia.lo vn.lo flat.lo html.lo lat1ltex.lo lat1txte.lo rfc1345.lo \ +texinfo.lo base64.lo dump.lo endline.lo permut.lo quoted.lo \ iconv.lo outer.lo decsteps.h inisteps.h tersteps.h: stamp-steps stamp-steps: $(ALL_STEPS) merged.c @echo "Updating {dec,ini,ter}steps.h" diff --git a/src/ansellat1.l b/src/ansellat1.l new file mode 100644 index 0000000..52f62e0 --- /dev/null +++ b/src/ansellat1.l @@ -0,0 +1,205 @@ +/* + * Copyright (c) 1998, Wolfram Schneider + * Konrad Zuse Zentrum für Informationstechnik Berlin. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: ansellat1.l,v 1.1 1998/03/25 11:34:53 wolfram Exp wolfram $ + * + * + * 8-Bit ANSEL to ISO8859-1 conversion. This file support only a subset + * of Z39.47-1993 (ANSEL). It is incomplete due the lack of free + * available documentation ;-( + * + * http://www.niso.org/pccodes.htm + * Z39.47-1993 Extended Latin Alphabet Coded Character Set for + * Bibliographic Use (ANSEL) + * + * Character sets are a basic building block of automated information + * systems. Z39.47 provides a table of coded values for the + * representation of characters of the extended Latin alphabet in + * machine-readable form for thirty-five languages written in the Latin + * alphabet and for fifty-one romanized languages. + * + * 27 pp. ISBN: 1-880124-02-5 Price: $40.00 + * + * + * http://wwnet.com/~krugman1/gentext/gedstand.txt + * 8-Bit ANSEL + * + * The 8-Bit ANSEL (American National Standard for Extended Latin + * Alphabet Coded Character Set for Bibliographic Use, Z39.47, 1985 + * copyright) is the default character set for GEDCOM. [...] + * The ANSEL standard + * specifies an extended 8-bit configuration (above 128) to represent the + * spacing and non-spacing graphic characters that make up most of the + * Latin based languages. ANSEL is a super-set of ASCII. The standard + * ASCII characters including the control characters are preserved. + * + * ANSEL is known by two other names: (1) ANSI Z39.47-1985) and (2) the + * American Library Association character set, used in library systems + * worldwide, including the MARC (MAchine- Readable Catalog) format. + * + * A description of the codes for the ANSEL character set has been + * reproduced with permission and is included with the printed version of + * The GEDCOM Standard. The description of ANSEL codes is not included in + * the electronic version. This description may be purchased from the + * American National Standards Institute at 1430 Broadway, New York, + * N.Y. 10018. The description of the ANSEL character set standard + * includes the following: + * + * * An 8-Bit Code Table showing the ASCII and extended ANSEL codes + * * An explanation or legend of these codes + * * A chart that identifies the ANSEL Non-spacing Graphic Characters + * * A chart that identifies the ASCII Control Characters + * * A chart that identifies the ASCII Graphic Characters + * + * Character-set codes 0 through 127 are the same for 8-Bit ANSEL and + * 8-Bit ASCII (USA version-- ANSI 8-Bit). Character-set codes 128 + * through 255 are unique to the ANSEL character set. + * + * Changes + * ======== + * Fix ansel characters + * + * Capital C with cedilla + * Lower case C with cedilla + * Capital ETH + * Lower case eth + * Capital O with slash + * Capital Y with acute accent + * Lower case Icelandic thorn + * capital OE ligature + * + * Thanks to Larry E. Dixson + * + */ + + +/* Step name: ansel_latin1. */ + +%% + +\241 { put_byte (76, subtask); } +\242 { put_byte (216, subtask); } +\243 { put_byte (208, subtask); } +\244 { put_byte (222, subtask); } +\245 { put_byte (198, subtask); } + +\253 { put_byte (177, subtask); } + +\261 { put_byte (108, subtask); } +\262 { put_byte (248, subtask); } +\263 { put_byte (240, subtask); } +\264 { put_byte (254, subtask); } +\265 { put_byte (230, subtask); } +\266 { put_byte (111, subtask); } + +\271 { put_byte (163, subtask); } + +\303 { put_byte (169, subtask); } +\340 { put_byte (191, subtask); } +\341A { put_byte (192, subtask); } +\341E { put_byte (200, subtask); } +\341I { put_byte (204, subtask); } +\341O { put_byte (210, subtask); } +\341U { put_byte (217, subtask); } +\341a { put_byte (224, subtask); } +\341e { put_byte (232, subtask); } +\341i { put_byte (236, subtask); } +\341o { put_byte (242, subtask); } +\341u { put_byte (249, subtask); } + +\342A { put_byte (193, subtask); } +\342E { put_byte (201, subtask); } +\342I { put_byte (205, subtask); } +\342O { put_byte (211, subtask); } +\342U { put_byte (218, subtask); } +\342Y { put_byte (221, subtask); } + +\342a { put_byte (225, subtask); } +\342e { put_byte (233, subtask); } +\342i { put_byte (237, subtask); } +\342o { put_byte (243, subtask); } +\342u { put_byte (250, subtask); } +\342y { put_byte (253, subtask); } + +\343A { put_byte (194, subtask); } +\343E { put_byte (202, subtask); } +\343I { put_byte (206, subtask); } +\343O { put_byte (212, subtask); } +\343U { put_byte (219, subtask); } + +\343a { put_byte (226, subtask); } +\343e { put_byte (234, subtask); } +\343i { put_byte (238, subtask); } +\343o { put_byte (244, subtask); } +\343u { put_byte (251, subtask); } + +\344A { put_byte (195, subtask); } +\344O { put_byte (213, subtask); } +\344N { put_byte (209, subtask); } +\344a { put_byte (227, subtask); } +\344o { put_byte (245, subtask); } +\344n { put_byte (241, subtask); } + +\345 { put_byte (175, subtask); } + +\350A { put_byte (196, subtask); } +\350E { put_byte (203, subtask); } +\350I { put_byte (207, subtask); } +\350O { put_byte (214, subtask); } +\350U { put_byte (220, subtask); } + +\350a { put_byte (228, subtask); } +\350e { put_byte (235, subtask); } +\350i { put_byte (239, subtask); } +\350o { put_byte (246, subtask); } +\350u { put_byte (252, subtask); } +\350y { put_byte (255, subtask); } + +\352A { put_byte (197, subtask); } +\352a { put_byte (229, subtask); } + +\360C { put_byte (199, subtask); } +\360c { put_byte (231, subtask); } +%% + +bool +module_ansel_latin1 (RECODE_OUTER outer) +{ + return declare_single (outer, "Z39.47:1993", "Latin-1", + outer->quality_variable_to_byte, NULL, + transform_ansel_latin1) + && declare_alias (outer, "8bitANSEL", "Z39.47:1993") + && declare_alias (outer, "Z39.47", "Z39.47:1993") + && declare_alias (outer, "ANSEL", "Z39.47:1993") + && declare_alias (outer, "USMARC", "Z39.47:1993") + && declare_alias (outer, "MARC", "Z39.47:1993") + && declare_alias (outer, "LOC", "Z39.47:1993"); /* Library of Congress */ +} + +void +delmodule_ansel_latin1 (RECODE_OUTER outer) +{ +} diff --git a/src/iso5426lat1.l b/src/iso5426lat1.l new file mode 100644 index 0000000..1c17062 --- /dev/null +++ b/src/iso5426lat1.l @@ -0,0 +1,209 @@ +/* + * Copyright (c) 1998, Wolfram Schneider + * Konrad Zuse Zentrum für Informationstechnik Berlin. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: iso5426lat1.l,v 1.4 1998/02/26 17:57:38 wolfram Exp $ + * + * + * iso5426 to iso8859-1 conversion. This file support only a subset + * of ISO 5426:1983. It is incomplete due the lack of free + * available documentation ;-( + * + * The character conversion based on the german MAB2 (Maschinelles + * Austauschformat für Bibliotheken) documentation, October 1996, + * die Deutsche Bibliothek, Frankfurt am Main. + * URL: http://www.ddb.de, ftp://ftp.ddb.de/pub/mab/ + * + * + * http://lcweb.loc.gov/loc/standards/isotc46/sc4standards.html + * Character sets: + * ISO 5426:1983 - Extension of the Latin alphabet coded character + * set for bibliographic information interchange + * ISO 5426-2:1996 - Extension of the Latin alphabet coded character set + * for bibliographic information interchange + * Part 2: Latin characters used in minor European + * languages and obsolete typography + * + * + * ISO 5426:1983 + * + * Extension of the Latin alphabet coded character set for bibliographic + * information interchange + * + * Contains a set of 76 graphic characters with their coded + * representations. It includes a code table and a legend showing each + * graphic, its name and use, and explanatory notes. Primarily intended + * for information interchange among data processing systems and within + * message transmission systems, this character set is designed to handle + * information in 39 specified languages, as well as transliterated or + * romanized forms of an additional 32 languages. These characters, + * together with the characters in the international reference version of + * ISO 646 (ISO escape sequence ESC 2/8 4/0), constitute a character set + * for the international interchange of bibliographic citations, + * including their annotations, in the Latin alphabet. + * + * ISO 5426-2:1996 + * + * Extension of the Latin alphabet coded character set for bibliographic + * information interchange Part 2: Latin characters used in minor + * European languages and obsolete typography + * + * Contains a set of 70 graphic characters, and their coded + * representations. These characters form a supplement to those provided + * in ISO 5426 by addressing less common and obsolete languages which use + * the Latin script and obsolete printing conventions. Included is a code + * table and a legend showing each graphic, its name and use, and + * explanatory notes. This character set is primarily intended for + * information interchange among data processing systems and within + * message transmission systems. These characters, together with the + * characters from ISO 646/IEC and ISO 5426, is intended to handle + * information in the following languages: Anglo-Saxon, Greenlandic, + * Lappish, Latin, Latvian (older forms), and Maltese. It is also + * intended to cover printing conventions associated with older books, in + * particular marks associated with binding signatures. + * + */ + + +/* Step name: iso5426_latin1. */ + + +%% +\173 { put_byte (123, subtask); } +\174 { put_byte (124, subtask); } +\175 { put_byte (125, subtask); } +\37 { put_byte (36, subtask); } +\266 { put_byte (158, subtask); } +\241 { put_byte (161, subtask); } +\243 { put_byte (163, subtask); } +\244 { put_byte (36, subtask); } +\247 { put_byte (167, subtask); } + +\210 { put_byte (172, subtask); } +\211 { put_byte (172, subtask); } + +\253 { put_byte (171, subtask); } +\273 { put_byte (187, subtask); } +\277 { put_byte (191, subtask); } + +\341 { put_byte (198, subtask); } +\342 { put_byte (208, subtask); } + +\350 { put_byte (76, subtask); } +\351 { put_byte (216, subtask); } +\354 { put_byte (254, subtask); } +\361 { put_byte (230, subtask); } +\362 { put_byte (100, subtask); } +\363 { put_byte (240, subtask); } +\365 { put_byte (134, subtask); } +\370 { put_byte (108, subtask); } +\371 { put_byte (248, subtask); } +\373 { put_byte (223, subtask); } +\374 { put_byte (222, subtask); } + +\301a { put_byte (224, subtask); } +\301e { put_byte (232, subtask); } +\301i { put_byte (236, subtask); } +\301o { put_byte (242, subtask); } +\301u { put_byte (249, subtask); } + +\301A { put_byte (192, subtask); } +\301E { put_byte (200, subtask); } +\301I { put_byte (204, subtask); } +\301O { put_byte (210, subtask); } +\301U { put_byte (217, subtask); } + +\302a { put_byte (225, subtask); } +\302e { put_byte (233, subtask); } +\302i { put_byte (237, subtask); } +\302o { put_byte (243, subtask); } +\302u { put_byte (250, subtask); } +\302y { put_byte (253, subtask); } + +\302A { put_byte (193, subtask); } +\302E { put_byte (201, subtask); } +\302I { put_byte (205, subtask); } +\302O { put_byte (211, subtask); } +\302U { put_byte (218, subtask); } +\302Y { put_byte (221, subtask); } + +\303a { put_byte (226, subtask); } +\303e { put_byte (234, subtask); } +\303i { put_byte (238, subtask); } +\303o { put_byte (244, subtask); } +\303u { put_byte (251, subtask); } + +\303A { put_byte (194, subtask); } +\303E { put_byte (202, subtask); } +\303I { put_byte (206, subtask); } +\303O { put_byte (212, subtask); } +\303U { put_byte (219, subtask); } + +\304a { put_byte (227, subtask); } +\304o { put_byte (245, subtask); } +\304n { put_byte (241, subtask); } + +\304A { put_byte (195, subtask); } +\304O { put_byte (213, subtask); } +\304N { put_byte (209, subtask); } + +\310e { put_byte (235, subtask); } +\310i { put_byte (239, subtask); } +\310y { put_byte (255, subtask); } + +\310E { put_byte (203, subtask); } +\310I { put_byte (207, subtask); } + +\312a { put_byte (229, subtask); } +\312A { put_byte (197, subtask); } + +\320c { put_byte (231, subtask); } +\320C { put_byte (199, subtask); } + +\311a { put_byte (228, subtask); } +\311o { put_byte (246, subtask); } +\311u { put_byte (252, subtask); } +\311A { put_byte (196, subtask); } +\311O { put_byte (214, subtask); } +\311U { put_byte (220, subtask); } +%% + +bool +module_iso5426_latin1 (RECODE_OUTER outer) +{ + return declare_single (outer, "ISO_5426:1983-DB-MAB2", "Latin-1", + outer->quality_variable_to_byte, NULL, + transform_iso5426_latin1) + && declare_alias (outer, "ISO_5426", "ISO_5426:1983-DB-MAB2") + && declare_alias (outer, "DB-MAB2", "ISO_5426:1983-DB-MAB2") + && declare_alias (outer, "MAB2", "ISO_5426:1983-DB-MAB2") + && declare_alias (outer, "MAB", "ISO_5426:1983-DB-MAB2") + && declare_alias (outer, "DDB", "ISO_5426:1983-DB-MAB2"); /* Die Deutsche Bibliothek */ +} + +void +delmodule_iso5426_latin1 (RECODE_OUTER outer) +{ +} diff --git a/src/lat1ansel.c b/src/lat1ansel.c new file mode 100644 index 0000000..e14f599 --- /dev/null +++ b/src/lat1ansel.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 1998, Wolfram Schneider + * Konrad Zuse Zentrum für Informationstechnik Berlin. + * All rights reserved. + * + * Read the file ansellat1.l for more information about Z39.47-1993. + * + * $Id: lat1ansel.c,v 1.1 1998/02/19 15:51:31 wolfram Exp $ + * + */ + +/* Conversion of files between different charsets and usages. + Copyright (C) 1990, 1993 Free Software Foundation, Inc. + Francois Pinard , 1988. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + + +#include "common.h" +#include "decsteps.h" + +struct translation + { + int code; /* code being translated */ + const char *string; /* translation string */ + }; + +static struct translation diacritic_translations [] = + { +#include "lat1ansel.h" + {0, NULL}, + }; + +static bool +init_latin1_ansel (RECODE_STEP step, + const struct recode_request *request, + RECODE_CONST_OPTION_LIST before_options _GL_UNUSED_PARAMETER, + RECODE_CONST_OPTION_LIST after_options _GL_UNUSED_PARAMETER) +{ + RECODE_OUTER outer = request->outer; + + char *pool; + const char **table; + unsigned counter; + struct translation const *cursor; + + if (!ALLOC_SIZE (table, 256 * sizeof (char *) + 256, const char *)) + return false; + pool = (char *) (table + 256); + + for (counter = 0; counter < 128; counter++) + { + pool[2 * counter] = counter; + pool[2 * counter + 1] = '\0'; + table[counter] = pool + 2 * counter; + } + for (counter = 128; counter < 256; counter++) + table[counter] = NULL; + for (cursor = diacritic_translations; cursor->code; cursor++) + table[cursor->code] = cursor->string; + + step->step_table = table; + + return true; +} + +bool +module_latin1_ansel (RECODE_OUTER outer) +{ + return declare_single (outer, "Latin-1", "Z39.47:1993", + outer->quality_variable_to_byte, init_latin1_ansel, + transform_byte_to_variable); +} + +_GL_ATTRIBUTE_CONST void +delmodule_latin1_ansel (RECODE_OUTER outer _GL_UNUSED_PARAMETER) +{ +} diff --git a/src/lat1iso5426.c b/src/lat1iso5426.c new file mode 100644 index 0000000..fa50a3a --- /dev/null +++ b/src/lat1iso5426.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 1998, Wolfram Schneider + * Konrad Zuse Zentrum für Informationstechnik Berlin. + * All rights reserved. + * + * Read the file iso5426lat1.l for more information about iso5426. + * + */ + +/* Conversion of files between different charsets and usages. + Copyright (C) 1990, 1993 Free Software Foundation, Inc. + Francois Pinard , 1988. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + + +#include "common.h" +#include "decsteps.h" + +struct translation + { + int code; /* code being translated */ + const char *string; /* translation string */ + }; + +static struct translation diacritic_translations [] = + { +#include "lat1iso5426.h" + {0, NULL}, + }; + +static bool +init_latin1_iso5426 (RECODE_STEP step, + const struct recode_request *request, + RECODE_CONST_OPTION_LIST before_options _GL_UNUSED_PARAMETER, + RECODE_CONST_OPTION_LIST after_options _GL_UNUSED_PARAMETER) +{ + RECODE_OUTER outer = request->outer; + + char *pool; + const char **table; + unsigned counter; + struct translation const *cursor; + + if (!ALLOC_SIZE (table, 256 * sizeof (char *) + 256, const char *)) + return false; + pool = (char *) (table + 256); + + for (counter = 0; counter < 128; counter++) + { + pool[2 * counter] = counter; + pool[2 * counter + 1] = '\0'; + table[counter] = pool + 2 * counter; + } + for (counter = 128; counter < 256; counter++) + table[counter] = NULL; + for (cursor = diacritic_translations; cursor->code; cursor++) + table[cursor->code] = cursor->string; + + step->step_table = table; + + return true; +} + +bool +module_latin1_iso5426 (RECODE_OUTER outer) +{ + return declare_single (outer, "Latin-1", "ISO_5426:1983-DB-MAB2", + outer->quality_variable_to_byte, init_latin1_iso5426, + transform_byte_to_variable); +} + +_GL_ATTRIBUTE_CONST void +delmodule_latin1_iso5426 (RECODE_OUTER outer _GL_UNUSED_PARAMETER) +{ +} diff --git a/tests/t21_names.py b/tests/t21_names.py index 61a3435..55d9845 100644 --- a/tests/t21_names.py +++ b/tests/t21_names.py @@ -163,6 +163,7 @@ ISO-10646-UCS-4 10646 ISO_10646 u4 UCS UCS-4 ISO_646.basic ISO_646.basic:1983 ref ISO_646.irv irv iso-ir-2 ISO_646.irv:1983 ISO_2033-1983 e13b iso-ir-98 +ISO_5426:1983-DB-MAB2 DB-MAB2 DDB ISO_5426 MAB MAB2 ISO_5427 iso-ir-37 ISO_5427-ext iso-ir-54 ISO_5427:1981 ISO_5428 iso-ir-55 ISO_5428:1980 @@ -228,6 +229,7 @@ VISCII VNI VPS XML-standalone h0 +Z39.47:1993 8bitANSEL ANSEL LOC MARC USMARC Z39.47 ''' def test_1():