]> granicus.if.org Git - recode/commitdiff
Java charset
authorFrançois Pinard <pinard@iro.umontreal.ca>
Thu, 13 Mar 2008 17:15:34 +0000 (13:15 -0400)
committerFrançois Pinard <pinard@iro.umontreal.ca>
Thu, 13 Mar 2008 17:16:40 +0000 (13:16 -0400)
13 files changed:
doc/recode.info
doc/recode.texi
doc/stamp-vti
doc/version.texi
src/ChangeLog
src/Makefile.am
src/Makefile.in
src/decsteps.h
src/inisteps.h
src/java.c [new file with mode: 0644]
src/tersteps.h
tests/ChangeLog
tests/t40_java.py [new file with mode: 0644]

index cc494143040f21e40e8ff44802a14cfbd78dc5f6..f32cf2e5a15728dddcf3e80bd0c8598c442eb8c6 100644 (file)
@@ -134,6 +134,7 @@ Various other charsets
 * Vietnamese::          Vietnamese charsets
 * African::             African charsets
 * Others::              Cyrillic and other charsets
+* Java::                Java code
 * Texte::               Easy French conventions
 * Mule::                Mule as a multiplexed charset
 
@@ -3896,9 +3897,10 @@ to handle these particularly well for French texts.
 * HTML::                World Wide Web representations
 * LaTeX::               LaTeX macro calls
 * Texinfo::             GNU project documentation files
-* Vietnamese::
+* Vietnamese::          Vietnamese charsets
 * African::             African charsets
-* Others::
+* Others::              Cyrillic and other charsets
+* Java::                Java code
 * Texte::               Easy French conventions
 * Mule::                Mule as a multiplexed charset
 
@@ -4124,7 +4126,7 @@ Recode under the name `AFRL1-101-BPI_OCIL'.  Accepted aliases are
 `t-fra' and `t-francais'.
 
 \1f
-File: recode.info,  Node: Others,  Next: Texte,  Prev: African,  Up: Miscellaneous
+File: recode.info,  Node: Others,  Next: Java,  Prev: African,  Up: Miscellaneous
 
 12.6 Cyrillic and other charsets
 ================================
@@ -4159,9 +4161,27 @@ everybody, and this section will merely disappear.
      This charset is available under the name `KOI-8_CS2'.
 
 \1f
-File: recode.info,  Node: Texte,  Next: Mule,  Prev: Others,  Up: Miscellaneous
+File: recode.info,  Node: Java,  Next: Texte,  Prev: Others,  Up: Miscellaneous
+
+12.7 Java code
+==============
+
+This charset is available under the name `Java', and should be
+considered experimental for now.
+
+   ASCII characters represent themselves.  Character outside ASCII are
+coded as `\uNNNN', where `NNNN' stands for the four-digit hexadecimal
+value of the character within Unicode.  The canonical representation
+uses lower case for the `u' prefix and for the hexadecimal digits, yet
+Recode also accepts upper case.
+
+   There is currently no attempt to distinguish Java comments from Java
+strings while the recoding goes, and this may be corrected some day.
+
+\1f
+File: recode.info,  Node: Texte,  Next: Mule,  Prev: Java,  Up: Miscellaneous
 
-12.7 Easy French conventions
+12.8 Easy French conventions
 ============================
 
 This charset is available in Recode under the name `Texte' and has
@@ -4284,7 +4304,7 @@ as being fairly evident.
 \1f
 File: recode.info,  Node: Mule,  Prev: Texte,  Up: Miscellaneous
 
-12.8 Mule as a multiplexed charset
+12.9 Mule as a multiplexed charset
 ==================================
 
 This version of Recode barely starts supporting multiplexed or
@@ -5872,6 +5892,7 @@ by Recode, and their aliases.
 * isoir91:                               Tabular.             (line  27)
 * isoir92:                               Tabular.             (line  27)
 * IT, aliases and source:                Tabular.             (line 520)
+* Java:                                  Java.                (line   6)
 * JIS_C6220-1969:                        Tabular.             (line 524)
 * JIS_C6220-1969-jp, aliases and source: Tabular.             (line 524)
 * JIS_C6220-1969-ro, aliases and source: Tabular.             (line 528)
@@ -6083,95 +6104,96 @@ by Recode, and their aliases.
 \1f
 Tag Table:
 Node: Top\7f1148
-Node: Tutorial\7f5575
-Node: Introduction\7f9803
-Node: Charset overview\7f14037
-Node: Surface overview\7f15842
-Node: Contributing\7f17310
-Ref: Contributing-Footnote-1\7f19544
-Node: Invoking recode\7f19678
-Node: Synopsis\7f20633
-Ref: Synopsis-Footnote-1\7f23073
-Node: Requests\7f23370
-Ref: Requests-Footnote-1\7f29260
-Ref: Requests-Footnote-2\7f29327
-Ref: Requests-Footnote-3\7f29505
-Node: Listings\7f29964
-Ref: Listings-Footnote-1\7f41113
-Node: Recoding\7f41436
-Node: Reversibility\7f44257
-Ref: Reversibility-Footnote-1\7f52712
-Node: Sequencing\7f52849
-Node: Mixed\7f55293
-Node: Emacs\7f58661
-Node: Debugging\7f59695
-Node: Library\7f63965
-Node: Outer level\7f65319
-Node: Request level\7f72193
-Node: Task level\7f83140
-Node: Charset level\7f93562
-Node: Errors\7f94404
-Ref: Errors-Footnote-1\7f99250
-Ref: Errors-Footnote-2\7f99364
-Node: Universal\7f99725
-Ref: Universal-Footnote-1\7f102837
-Ref: Universal-Footnote-2\7f102903
-Node: UCS-2\7f103116
-Node: UCS-4\7f105642
-Node: UTF-7\7f106182
-Node: UTF-8\7f106777
-Node: UTF-16\7f111082
-Node: count-characters\7f112230
-Node: dump-with-names\7f112901
-Node: iconv\7f115450
-Node: Tabular\7f118881
-Node: ASCII misc\7f141094
-Node: ASCII\7f141460
-Node: ISO 8859\7f142276
-Node: ASCII-BS\7f144570
-Node: flat\7f146407
-Node: IBM and MS\7f147078
-Node: EBCDIC\7f147622
-Node: IBM-PC\7f149718
-Ref: IBM-PC-Footnote-1\7f151832
-Node: Icon-QNX\7f151991
-Node: CDC\7f152416
-Node: Display Code\7f154097
-Ref: Display Code-Footnote-1\7f156378
-Node: CDC-NOS\7f156583
-Node: Bang-Bang\7f158545
-Node: Micros\7f160474
-Node: Apple-Mac\7f160857
-Node: AtariST\7f162891
-Node: Miscellaneous\7f163877
-Node: HTML\7f164610
-Node: LaTeX\7f170599
-Node: Texinfo\7f171373
-Node: Vietnamese\7f172145
-Node: African\7f173121
-Node: Others\7f174471
-Node: Texte\7f175925
-Ref: Texte-Footnote-1\7f180475
-Ref: Texte-Footnote-2\7f180555
-Ref: Texte-Footnote-3\7f181030
-Node: Mule\7f181127
-Ref: Mule-Footnote-1\7f182908
-Node: Surfaces\7f183427
-Ref: Surfaces-Footnote-1\7f186846
-Node: Permutations\7f186950
-Node: End lines\7f187791
-Node: MIME\7f189992
-Node: Dump\7f191179
-Node: Test\7f195349
-Node: Internals\7f197827
-Node: Main flow\7f199055
-Node: New charsets\7f202158
-Node: New surfaces\7f206696
-Node: Design\7f207422
-Ref: Design-Footnote-1\7f216588
-Node: Concept Index\7f216692
-Node: Option Index\7f231727
-Node: Library Index\7f234580
-Node: Charset and Surface Index\7f239155
+Node: Tutorial\7f5609
+Node: Introduction\7f9837
+Node: Charset overview\7f14071
+Node: Surface overview\7f15876
+Node: Contributing\7f17344
+Ref: Contributing-Footnote-1\7f19578
+Node: Invoking recode\7f19712
+Node: Synopsis\7f20667
+Ref: Synopsis-Footnote-1\7f23107
+Node: Requests\7f23404
+Ref: Requests-Footnote-1\7f29294
+Ref: Requests-Footnote-2\7f29361
+Ref: Requests-Footnote-3\7f29539
+Node: Listings\7f29998
+Ref: Listings-Footnote-1\7f41147
+Node: Recoding\7f41470
+Node: Reversibility\7f44291
+Ref: Reversibility-Footnote-1\7f52746
+Node: Sequencing\7f52883
+Node: Mixed\7f55327
+Node: Emacs\7f58695
+Node: Debugging\7f59729
+Node: Library\7f63999
+Node: Outer level\7f65353
+Node: Request level\7f72227
+Node: Task level\7f83174
+Node: Charset level\7f93596
+Node: Errors\7f94438
+Ref: Errors-Footnote-1\7f99284
+Ref: Errors-Footnote-2\7f99398
+Node: Universal\7f99759
+Ref: Universal-Footnote-1\7f102871
+Ref: Universal-Footnote-2\7f102937
+Node: UCS-2\7f103150
+Node: UCS-4\7f105676
+Node: UTF-7\7f106216
+Node: UTF-8\7f106811
+Node: UTF-16\7f111116
+Node: count-characters\7f112264
+Node: dump-with-names\7f112935
+Node: iconv\7f115484
+Node: Tabular\7f118915
+Node: ASCII misc\7f141128
+Node: ASCII\7f141494
+Node: ISO 8859\7f142310
+Node: ASCII-BS\7f144604
+Node: flat\7f146441
+Node: IBM and MS\7f147112
+Node: EBCDIC\7f147656
+Node: IBM-PC\7f149752
+Ref: IBM-PC-Footnote-1\7f151866
+Node: Icon-QNX\7f152025
+Node: CDC\7f152450
+Node: Display Code\7f154131
+Ref: Display Code-Footnote-1\7f156412
+Node: CDC-NOS\7f156617
+Node: Bang-Bang\7f158579
+Node: Micros\7f160508
+Node: Apple-Mac\7f160891
+Node: AtariST\7f162925
+Node: Miscellaneous\7f163911
+Node: HTML\7f164748
+Node: LaTeX\7f170737
+Node: Texinfo\7f171511
+Node: Vietnamese\7f172283
+Node: African\7f173259
+Node: Others\7f174609
+Node: Java\7f176062
+Node: Texte\7f176729
+Ref: Texte-Footnote-1\7f181277
+Ref: Texte-Footnote-2\7f181357
+Ref: Texte-Footnote-3\7f181832
+Node: Mule\7f181929
+Ref: Mule-Footnote-1\7f183710
+Node: Surfaces\7f184229
+Ref: Surfaces-Footnote-1\7f187648
+Node: Permutations\7f187752
+Node: End lines\7f188593
+Node: MIME\7f190794
+Node: Dump\7f191981
+Node: Test\7f196151
+Node: Internals\7f198629
+Node: Main flow\7f199857
+Node: New charsets\7f202960
+Node: New surfaces\7f207498
+Node: Design\7f208224
+Ref: Design-Footnote-1\7f217390
+Node: Concept Index\7f217494
+Node: Option Index\7f232529
+Node: Library Index\7f235382
+Node: Charset and Surface Index\7f239957
 \1f
 End Tag Table
index 0fe7eec691eb5f911fbde9e2227da003dd9283b5..2e8e1cf7c0df1fd5b18fe57689e0e2c8f806c945 100644 (file)
@@ -178,6 +178,7 @@ Various other charsets
 * Vietnamese::          Vietnamese charsets
 * African::             African charsets
 * Others::              Cyrillic and other charsets
+* Java::                Java code
 * Texte::               Easy French conventions
 * Mule::                Mule as a multiplexed charset
 
@@ -3760,9 +3761,10 @@ knows how to handle these particularly well for French texts.
 * HTML::                World Wide Web representations
 * LaTeX::               LaTeX macro calls
 * Texinfo::             GNU project documentation files
-* Vietnamese::
+* Vietnamese::          Vietnamese charsets
 * African::             African charsets
-* Others::
+* Others::              Cyrillic and other charsets
+* Java::                Java code
 * Texte::               Easy French conventions
 * Mule::                Mule as a multiplexed charset
 @end menu
@@ -4056,7 +4058,7 @@ African charsets in this series.  This charset is available in Recode
 under the name @code{AFRL1-101-BPI_OCIL}.  Accepted aliases are @code{t-fra}
 and @code{t-francais}.
 
-@node Others, Texte, African, Miscellaneous
+@node Others, Java, African, Miscellaneous
 @section Cyrillic and other charsets
 
 @cindex Cyrillic charsets
@@ -4097,7 +4099,23 @@ as an accepted alias.
 This charset is available under the name @code{KOI-8_CS2}.
 @end table
 
-@node Texte, Mule, Others, Miscellaneous
+@node Java, Texte, Others, Miscellaneous
+@section Java code
+
+@tindex Java
+This charset is available under the name @code{Java}, and should be
+considered experimental for now.
+
+ASCII characters represent themselves.  Character outside ASCII are
+coded as @samp{\uNNNN}, where @samp{NNNN} stands for the four-digit
+hexadecimal value of the character within Unicode.  The canonical
+representation uses lower case for the @samp{u} prefix and for the
+hexadecimal digits, yet Recode also accepts upper case.
+
+There is currently no attempt to distinguish Java comments from Java
+strings while the recoding goes, and this may be corrected some day.
+
+@node Texte, Mule, Java, Miscellaneous
 @section Easy French conventions
 
 @tindex Texte
index 58f7fecce0cf9fb13c55b577940084a9e05ebaf3..265e4b23ef9bdb1c37fd36af3b08758fe26e0e74 100644 (file)
@@ -1,4 +1,4 @@
-@set UPDATED 9 March 2008
+@set UPDATED 12 March 2008
 @set UPDATED-MONTH March 2008
 @set EDITION 3.7-beta2
 @set VERSION 3.7-beta2
index 58f7fecce0cf9fb13c55b577940084a9e05ebaf3..265e4b23ef9bdb1c37fd36af3b08758fe26e0e74 100644 (file)
@@ -1,4 +1,4 @@
-@set UPDATED 9 March 2008
+@set UPDATED 12 March 2008
 @set UPDATED-MONTH March 2008
 @set EDITION 3.7-beta2
 @set VERSION 3.7-beta2
index 3f9fce61c89187eb770806aea28094cfa2858378..3bc492c834bd08bb4f19f58b6cb4a8ce1f4d2aa6 100644 (file)
@@ -1,3 +1,8 @@
+2008-03-13  François Pinard  <pinard@iro.umontreal.ca>
+
+       * java.c: New.
+       * Makefile.am: Adjusted.
+
 2008-03-12  François Pinard  <pinard@iro.umontreal.ca>
 
        * recodext.h: Set bit field with, for ignore, from 2 to 1.
index 48a0ca0c8f1c432b54f6ad36a060fb96328cd444..1f6f1abb97f4c20659f26bed9ed64e673d446628 100644 (file)
@@ -31,10 +31,12 @@ EXTRA_DIST = stamp-steps stamp-strip $(L_STEPS) mergelex.py $(MANS)
 CLEANFILES = iconvdecl.h
 
 C_STEPS = african.c afrtran.c applemac.c atarist.c bangbang.c cdcnos.c \
-ebcdic.c ibmpc.c iconqnx.c lat1asci.c mule.c strip-data.c testdump.c \
-ucs.c utf16.c utf7.c utf8.c varia.c vn.c $(C_FALLBACKS) $(C_SURFACES) \
-merged.c
+ebcdic.c ibmpc.c iconqnx.c lat1asci.c java.c mule.c strip-data.c \
+testdump.c ucs.c utf16.c utf7.c utf8.c varia.c vn.c \
+$(C_FALLBACKS) $(C_SURFACES) merged.c
+
 OLD_C_STEPS = next.c
+
 L_STEPS = ascilat1.l $(L_FALLBACKS) $(L_SURFACES)
 
 H_FALLBACKS =
index a5b80901ce92a7a2dc85025f44d3108be7aad74a..690019bcafba4599f53d2503d77797c00e1b634d 100644 (file)
@@ -90,9 +90,10 @@ am__objects_2 = base64$U.lo dump$U.lo endline$U.lo permut$U.lo \
        quoted$U.lo
 am__objects_3 = african$U.lo afrtran$U.lo applemac$U.lo atarist$U.lo \
        bangbang$U.lo cdcnos$U.lo ebcdic$U.lo ibmpc$U.lo iconqnx$U.lo \
-       lat1asci$U.lo mule$U.lo strip-data$U.lo testdump$U.lo ucs$U.lo \
-       utf16$U.lo utf7$U.lo utf8$U.lo varia$U.lo vn$U.lo \
-       $(am__objects_1) $(am__objects_2) merged$U.lo
+       lat1asci$U.lo java$U.lo mule$U.lo strip-data$U.lo \
+       testdump$U.lo ucs$U.lo utf16$U.lo utf7$U.lo utf8$U.lo \
+       varia$U.lo vn$U.lo $(am__objects_1) $(am__objects_2) \
+       merged$U.lo
 am_librecode_la_OBJECTS = argmatch$U.lo charname$U.lo combine$U.lo \
        exitfail$U.lo fr-charname$U.lo hash$U.lo iconv$U.lo \
        localcharset$U.lo names$U.lo outer$U.lo quotearg$U.lo \
@@ -272,9 +273,9 @@ xalloc.h $(H_FALLBACKS) $(H_SURFACES)
 EXTRA_DIST = stamp-steps stamp-strip $(L_STEPS) mergelex.py $(MANS)
 CLEANFILES = iconvdecl.h
 C_STEPS = african.c afrtran.c applemac.c atarist.c bangbang.c cdcnos.c \
-ebcdic.c ibmpc.c iconqnx.c lat1asci.c mule.c strip-data.c testdump.c \
-ucs.c utf16.c utf7.c utf8.c varia.c vn.c $(C_FALLBACKS) $(C_SURFACES) \
-merged.c
+ebcdic.c ibmpc.c iconqnx.c lat1asci.c java.c mule.c strip-data.c \
+testdump.c ucs.c utf16.c utf7.c utf8.c varia.c vn.c \
+$(C_FALLBACKS) $(C_SURFACES) merged.c
 
 OLD_C_STEPS = next.c
 L_STEPS = ascilat1.l $(L_FALLBACKS) $(L_SURFACES)
@@ -452,6 +453,7 @@ mostlyclean-kr:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ibmpc$U.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iconqnx$U.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iconv$U.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/java$U.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lat1asci$U.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lat1ltex$U.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lat1txte$U.Plo@am__quote@
@@ -546,6 +548,8 @@ iconqnx_.c: iconqnx.c $(ANSI2KNR)
        $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iconqnx.c; then echo $(srcdir)/iconqnx.c; else echo iconqnx.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
 iconv_.c: iconv.c $(ANSI2KNR)
        $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iconv.c; then echo $(srcdir)/iconv.c; else echo iconv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+java_.c: java.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/java.c; then echo $(srcdir)/java.c; else echo java.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
 lat1asci_.c: lat1asci.c $(ANSI2KNR)
        $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lat1asci.c; then echo $(srcdir)/lat1asci.c; else echo lat1asci.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
 lat1ltex_.c: lat1ltex.c $(ANSI2KNR)
@@ -614,21 +618,21 @@ endline_.$(OBJEXT) endline_.lo exitfail_.$(OBJEXT) exitfail_.lo \
 flat_.$(OBJEXT) flat_.lo fr-charname_.$(OBJEXT) fr-charname_.lo \
 freeze_.$(OBJEXT) freeze_.lo hash_.$(OBJEXT) hash_.lo html_.$(OBJEXT) \
 html_.lo ibmpc_.$(OBJEXT) ibmpc_.lo iconqnx_.$(OBJEXT) iconqnx_.lo \
-iconv_.$(OBJEXT) iconv_.lo lat1asci_.$(OBJEXT) lat1asci_.lo \
-lat1ltex_.$(OBJEXT) lat1ltex_.lo lat1txte_.$(OBJEXT) lat1txte_.lo \
-localcharset_.$(OBJEXT) localcharset_.lo main_.$(OBJEXT) main_.lo \
-merged_.$(OBJEXT) merged_.lo mixed_.$(OBJEXT) mixed_.lo \
-mule_.$(OBJEXT) mule_.lo names_.$(OBJEXT) names_.lo outer_.$(OBJEXT) \
-outer_.lo permut_.$(OBJEXT) permut_.lo quotearg_.$(OBJEXT) \
-quotearg_.lo quoted_.$(OBJEXT) quoted_.lo recode_.$(OBJEXT) recode_.lo \
-request_.$(OBJEXT) request_.lo rfc1345_.$(OBJEXT) rfc1345_.lo \
-strip-data_.$(OBJEXT) strip-data_.lo strip-pool_.$(OBJEXT) \
-strip-pool_.lo task_.$(OBJEXT) task_.lo testdump_.$(OBJEXT) \
-testdump_.lo texinfo_.$(OBJEXT) texinfo_.lo ucs_.$(OBJEXT) ucs_.lo \
-utf16_.$(OBJEXT) utf16_.lo utf7_.$(OBJEXT) utf7_.lo utf8_.$(OBJEXT) \
-utf8_.lo varia_.$(OBJEXT) varia_.lo vn_.$(OBJEXT) vn_.lo \
-xalloc-die_.$(OBJEXT) xalloc-die_.lo xmalloc_.$(OBJEXT) xmalloc_.lo : \
-$(ANSI2KNR)
+iconv_.$(OBJEXT) iconv_.lo java_.$(OBJEXT) java_.lo \
+lat1asci_.$(OBJEXT) lat1asci_.lo lat1ltex_.$(OBJEXT) lat1ltex_.lo \
+lat1txte_.$(OBJEXT) lat1txte_.lo localcharset_.$(OBJEXT) \
+localcharset_.lo main_.$(OBJEXT) main_.lo merged_.$(OBJEXT) merged_.lo \
+mixed_.$(OBJEXT) mixed_.lo mule_.$(OBJEXT) mule_.lo names_.$(OBJEXT) \
+names_.lo outer_.$(OBJEXT) outer_.lo permut_.$(OBJEXT) permut_.lo \
+quotearg_.$(OBJEXT) quotearg_.lo quoted_.$(OBJEXT) quoted_.lo \
+recode_.$(OBJEXT) recode_.lo request_.$(OBJEXT) request_.lo \
+rfc1345_.$(OBJEXT) rfc1345_.lo strip-data_.$(OBJEXT) strip-data_.lo \
+strip-pool_.$(OBJEXT) strip-pool_.lo task_.$(OBJEXT) task_.lo \
+testdump_.$(OBJEXT) testdump_.lo texinfo_.$(OBJEXT) texinfo_.lo \
+ucs_.$(OBJEXT) ucs_.lo utf16_.$(OBJEXT) utf16_.lo utf7_.$(OBJEXT) \
+utf7_.lo utf8_.$(OBJEXT) utf8_.lo varia_.$(OBJEXT) varia_.lo \
+vn_.$(OBJEXT) vn_.lo xalloc-die_.$(OBJEXT) xalloc-die_.lo \
+xmalloc_.$(OBJEXT) xmalloc_.lo : $(ANSI2KNR)
 
 mostlyclean-libtool:
        -rm -f *.lo
index d400e7e0267281f3d7fe693c37150e6a86647519..222c034f8d87950d77f90e72531da991a6b41301 100644 (file)
@@ -8,6 +8,7 @@ bool module_ebcdic PARAMS ((struct recode_outer *));
 bool module_ibmpc PARAMS ((struct recode_outer *));
 bool module_iconqnx PARAMS ((struct recode_outer *));
 bool module_latin1_ascii PARAMS ((struct recode_outer *));
+bool module_java PARAMS ((struct recode_outer *));
 bool module_mule PARAMS ((struct recode_outer *));
 bool module_strips PARAMS ((struct recode_outer *));
 bool module_testdump PARAMS ((struct recode_outer *));
@@ -41,6 +42,7 @@ void delmodule_ebcdic PARAMS ((struct recode_outer *));
 void delmodule_ibmpc PARAMS ((struct recode_outer *));
 void delmodule_iconqnx PARAMS ((struct recode_outer *));
 void delmodule_latin1_ascii PARAMS ((struct recode_outer *));
+void delmodule_java PARAMS ((struct recode_outer *));
 void delmodule_mule PARAMS ((struct recode_outer *));
 void delmodule_strips PARAMS ((struct recode_outer *));
 void delmodule_testdump PARAMS ((struct recode_outer *));
index a365a01ffbacc463623bde8503265ebbb474316a..b2768034e6bad39580e48d0b481763038945bb2e 100644 (file)
@@ -8,6 +8,7 @@
   if (!module_ibmpc (outer)) return false;
   if (!module_iconqnx (outer)) return false;
   if (!module_latin1_ascii (outer)) return false;
+  if (!module_java (outer)) return false;
   if (!module_mule (outer)) return false;
   if (!module_strips (outer)) return false;
   if (!module_testdump (outer)) return false;
diff --git a/src/java.c b/src/java.c
new file mode 100644 (file)
index 0000000..f90dc0e
--- /dev/null
@@ -0,0 +1,116 @@
+/* Conversion of files between different charsets and surfaces.
+   Copyright © 2008 Free Software Foundation, Inc.
+   Contributed by François Pinard <pinard@iro.umontreal.ca>, 2008.
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License
+   as published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the Recode Library; see the file `COPYING.LIB'.
+   If not, write to the Free Software Foundation, Inc., 59 Temple Place -
+   Suite 330, Boston, MA 02111-1307, USA.  */
+
+#include "common.h"
+
+static bool
+transform_java_utf16 (RECODE_SUBTASK subtask)
+{
+  int character = get_byte (subtask);
+
+  while (character != EOF)
+    if (character == '\\')
+      {
+        char buffer[6];
+        char *cursor = buffer;
+        unsigned value = 0;
+        bool canonical = true;
+
+        *cursor++ = character;
+        character = get_byte (subtask);
+        if (character == 'u' || character == 'U')
+          {
+            if (character == 'U')
+              canonical = false;
+            *cursor++ = character;
+            character = get_byte (subtask);
+            while (cursor < buffer + 6)
+              {
+                if (character >= '0' && character <= '9')
+                  value = (value << 4) | (character - '0');
+                else if (character >= 'A' && character <= 'F')
+                  {
+                    value = (value << 4) | (character - 'A' + 10);
+                    canonical = false;
+                  }
+                else if (character >= 'a' && character <= 'f')
+                  value = (value << 4) | (character - 'a' + 10);
+                else
+                  break;
+                *cursor++ = character;
+                character = get_byte (subtask);
+              }
+            if (cursor == buffer + 6)
+              {
+                if (!canonical)
+                  RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
+                put_ucs2 (value, subtask);
+                continue;
+              }
+          }
+        *cursor = '\0';
+        for (cursor = buffer; *cursor; cursor++)
+          put_ucs2 (*cursor, subtask);
+      }
+    else
+      {
+        put_ucs2 (character, subtask);
+        character = get_byte (subtask);
+      }
+
+  SUBTASK_RETURN (subtask);
+}
+
+static bool
+transform_utf16_java (RECODE_SUBTASK subtask)
+{
+  unsigned value;
+
+  while (get_ucs2 (&value, subtask))
+    if (value < 128)
+      put_byte (value, subtask);
+    else
+      {
+        char buffer[7];
+        char *cursor;
+
+        sprintf (buffer, "\\u%04x", value);
+        for (cursor = buffer; *cursor; cursor++)
+          put_byte (*cursor, subtask);
+      }
+
+  SUBTASK_RETURN (subtask);
+}
+
+bool
+module_java (RECODE_OUTER outer)
+{
+  return
+    declare_single (outer, "UTF-16", "Java",
+                      outer->quality_ucs2_to_variable,
+                      NULL, transform_utf16_java)
+    && declare_single (outer, "Java", "UTF-16",
+                      outer->quality_variable_to_ucs2,
+                      NULL, transform_java_utf16);
+}
+
+void
+delmodule_java (RECODE_OUTER outer)
+{
+}
index 49058db1e2f73bbf29a5b017aabbc8687c883e9e..06baece77847366ae7cbbf74c300fa4ae36fcffe 100644 (file)
@@ -8,6 +8,7 @@
   delmodule_ibmpc (outer);
   delmodule_iconqnx (outer);
   delmodule_latin1_ascii (outer);
+  delmodule_java (outer);
   delmodule_mule (outer);
   delmodule_strips (outer);
   delmodule_testdump (outer);
index 99b8f2b7645aa9b17716b690b7aaa5c6e06c38e7..71bfdc6b14548ca11d88fbebad526bd1ea6bac1f 100644 (file)
@@ -1,3 +1,7 @@
+2008-03-13  François Pinard  <pinard@iro.umontreal.ca>
+
+       * t40_java.py: New.
+
 2008-03-11  François Pinard  <pinard@iro.umontreal.ca>
 
        * Recode.pyx: Adjusted for iconv_name.
diff --git a/tests/t40_java.py b/tests/t40_java.py
new file mode 100644 (file)
index 0000000..1b671ab
--- /dev/null
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+import common
+from common import setup_module, teardown_module
+
+input = '''\
+Dear =DEorvard=F0ur,
+
+=AB O=F9 qu'il r=E9side, =E0 N=EEmes ou m=EAme Capharna=FCm, tout Fran=E7ai=
+s inscrit
+au r=F4le payera son d=FB d=E8s avant No=EBl, qu'il soit na=EFf ou r=E2leur=
+. =BB
+'''
+
+output = '''\
+Dear \u00deorvard\u00f0ur,
+
+\u00ab O\u00f9 qu'il r\u00e9side, \u00e0 N\u00eemes ou m\u00eame Capharna\u00fcm, tout Fran\u00e7ais inscrit
+au r\u00f4le payera son d\u00fb d\u00e8s avant No\u00ebl, qu'il soit na\u00eff ou r\u00e2leur. \u00bb
+'''
+
+class Test:
+
+    def test_1(self):
+        # Block of lines to JAVA.
+        common.request('l1/qp..java')
+        common.validate(input, output)
+
+    def test_2(self):
+        # Block of lines to JAVA and back.
+        common.request('l1/qp..java')
+        common.validate_back(input)