From ff21a8e5c86457e205ff7c598b930efd711cb2dc Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Sat, 21 Jul 2001 18:52:11 +0000 Subject: [PATCH] JDBC encoding additions. Here's a patch against the current CVS. The changes from the previous patch are mostly related to the changed interface for PG_Stream. Anders Bengtsson --- src/interfaces/jdbc/Implementation | 1 + .../jdbc/org/postgresql/Connection.java | 160 ++++------------- .../jdbc/org/postgresql/PG_Stream.java | 17 +- .../jdbc/org/postgresql/core/Encoding.java | 167 ++++++++++++++++++ .../jdbc/org/postgresql/jdbc1/ResultSet.java | 20 +-- .../jdbc/org/postgresql/jdbc2/ResultSet.java | 26 +-- .../org/postgresql/test/EncodingTest.java | 57 ++++++ .../jdbc/org/postgresql/test/JDBC2Tests.java | 1 + 8 files changed, 272 insertions(+), 177 deletions(-) create mode 100644 src/interfaces/jdbc/org/postgresql/core/Encoding.java create mode 100644 src/interfaces/jdbc/org/postgresql/test/EncodingTest.java diff --git a/src/interfaces/jdbc/Implementation b/src/interfaces/jdbc/Implementation index 28ab6c1719..ecfc626d9d 100644 --- a/src/interfaces/jdbc/Implementation +++ b/src/interfaces/jdbc/Implementation @@ -151,6 +151,7 @@ BytePoolDim2 Handles a pool of byte[][] arrays MemoryPool Interface for managing MemoryPools. Not used (yet). ObjectPool Interface for an Object Pool SimpleObjectPool Class that implements ObjectPool and used by BytePoolDim# +Encoding Character encoding logic, mainly for Connection and PG_Stream. Package org.postgresql.fastpath --------------------------- diff --git a/src/interfaces/jdbc/org/postgresql/Connection.java b/src/interfaces/jdbc/org/postgresql/Connection.java index ba0076bcfc..d62127e7c3 100644 --- a/src/interfaces/jdbc/org/postgresql/Connection.java +++ b/src/interfaces/jdbc/org/postgresql/Connection.java @@ -8,9 +8,10 @@ import org.postgresql.Field; import org.postgresql.fastpath.*; import org.postgresql.largeobject.*; import org.postgresql.util.*; +import org.postgresql.core.Encoding; /** - * $Id: Connection.java,v 1.18 2001/07/15 04:21:26 momjian Exp $ + * $Id: Connection.java,v 1.19 2001/07/21 18:52:10 momjian Exp $ * * This abstract class is used by org.postgresql.Driver to open either the JDBC1 or * JDBC2 versions of the Connection class. @@ -33,11 +34,8 @@ public abstract class Connection /** * The encoding to use for this connection. - * If null, the encoding has not been specified by the - * user, and the default encoding for the platform should be - * used. */ - private String encoding; + private Encoding encoding = Encoding.defaultEncoding(); public boolean CONNECTION_OK = true; public boolean CONNECTION_BAD = false; @@ -162,7 +160,7 @@ public abstract class Connection // The most common one to be thrown here is: // "User authentication failed" // - throw new SQLException(pg_stream.ReceiveString(getEncoding())); + throw new SQLException(pg_stream.ReceiveString(encoding)); case 'R': // Get the type of request @@ -232,7 +230,7 @@ public abstract class Connection break; case 'E': case 'N': - throw new SQLException(pg_stream.ReceiveString(getEncoding())); + throw new SQLException(pg_stream.ReceiveString(encoding)); default: throw new PSQLException("postgresql.con.setup"); } @@ -244,111 +242,34 @@ public abstract class Connection break; case 'E': case 'N': - throw new SQLException(pg_stream.ReceiveString(getEncoding())); + throw new SQLException(pg_stream.ReceiveString(encoding)); default: throw new PSQLException("postgresql.con.setup"); } - // Originally we issued a SHOW DATESTYLE statement to find the databases default - // datestyle. However, this caused some problems with timestamps, so in 6.5, we - // went the way of ODBC, and set the connection to ISO. - // - // This may cause some clients to break when they assume anything other than ISO, - // but then - they should be using the proper methods ;-) - // - // We also ask the DB for certain properties (i.e. DatabaseEncoding at this time) - // firstWarning = null; - java.sql.ResultSet initrset = ExecSQL("set datestyle to 'ISO'; " + - "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else getdatabaseencoding() end"); - - String dbEncoding = null; - //retrieve DB properties - if(initrset.next()) { - - //handle DatabaseEncoding - dbEncoding = initrset.getString(1); - //convert from the PostgreSQL name to the Java name - if (dbEncoding.equals("SQL_ASCII")) { - dbEncoding = "ASCII"; - } else if (dbEncoding.equals("UNICODE")) { - dbEncoding = "UTF8"; - } else if (dbEncoding.equals("LATIN1")) { - dbEncoding = "ISO8859_1"; - } else if (dbEncoding.equals("LATIN2")) { - dbEncoding = "ISO8859_2"; - } else if (dbEncoding.equals("LATIN3")) { - dbEncoding = "ISO8859_3"; - } else if (dbEncoding.equals("LATIN4")) { - dbEncoding = "ISO8859_4"; - } else if (dbEncoding.equals("LATIN5")) { - dbEncoding = "ISO8859_5"; - } else if (dbEncoding.equals("LATIN6")) { - dbEncoding = "ISO8859_6"; - } else if (dbEncoding.equals("LATIN7")) { - dbEncoding = "ISO8859_7"; - } else if (dbEncoding.equals("LATIN8")) { - dbEncoding = "ISO8859_8"; - } else if (dbEncoding.equals("LATIN9")) { - dbEncoding = "ISO8859_9"; - } else if (dbEncoding.equals("EUC_JP")) { - dbEncoding = "EUC_JP"; - } else if (dbEncoding.equals("EUC_CN")) { - dbEncoding = "EUC_CN"; - } else if (dbEncoding.equals("EUC_KR")) { - dbEncoding = "EUC_KR"; - } else if (dbEncoding.equals("EUC_TW")) { - dbEncoding = "EUC_TW"; - } else if (dbEncoding.equals("KOI8")) { - // try first if KOI8_U is present, it's a superset of KOI8_R - try { - dbEncoding = "KOI8_U"; - "test".getBytes(dbEncoding); - } - catch(UnsupportedEncodingException uee) { - // well, KOI8_U is still not in standard JDK, falling back to KOI8_R :( - dbEncoding = "KOI8_R"; - } + String dbEncoding; - } else if (dbEncoding.equals("WIN")) { - dbEncoding = "Cp1252"; - } else if (dbEncoding.equals("UNKNOWN")) { - //This isn't a multibyte database so we don't have an encoding to use - //We leave dbEncoding null which will cause the default encoding for the - //JVM to be used - dbEncoding = null; - } else { - dbEncoding = null; - } - } + // "pg_encoding_to_char(1)" will return 'EUC_JP' for a backend compiled with multibyte, + // otherwise it's hardcoded to 'SQL_ASCII'. + // If the backend doesn't know about multibyte we can't assume anything about the encoding + // used, so we denote this with 'UNKNOWN'. + + final String encodingQuery = + "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else getdatabaseencoding() end"; + // Set datestyle and fetch db encoding in a single call, to avoid making + // more than one round trip to the backend during connection startup. - //Set the encoding for this connection - //Since the encoding could be specified or obtained from the DB we use the - //following order: - // 1. passed as a property - // 2. value from DB if supported by current JVM - // 3. default for JVM (leave encoding null) - String passedEncoding = info.getProperty("charSet"); // could be null - - if (passedEncoding != null) { - encoding = passedEncoding; - } else { - if (dbEncoding != null) { - //test DB encoding - try { - "TEST".getBytes(dbEncoding); - //no error the encoding is supported by the current JVM - encoding = dbEncoding; - } catch (UnsupportedEncodingException uee) { - //dbEncoding is not supported by the current JVM - encoding = null; - } - } else { - encoding = null; - } + java.sql.ResultSet resultSet = + ExecSQL("set datestyle to 'ISO'; " + encodingQuery); + + if (! resultSet.next()) { + throw new PSQLException("postgresql.con.failed", "failed getting backend encoding"); } + dbEncoding = resultSet.getString(1); + encoding = Encoding.getEncoding(dbEncoding, info.getProperty("charSet")); // Initialise object handling initObjectTypes(); @@ -448,22 +369,7 @@ public abstract class Connection int insert_oid = 0; SQLException final_error = null; - // Commented out as the backend can now handle queries - // larger than 8K. Peter June 6 2000 - //if (sql.length() > 8192) - //throw new PSQLException("postgresql.con.toolong",sql); - - if (getEncoding() == null) - buf = sql.getBytes(); - else { - try { - buf = sql.getBytes(getEncoding()); - } catch (UnsupportedEncodingException unse) { - throw new PSQLException("postgresql.con.encoding", - unse); - } - } - + buf = encoding.encode(sql); try { pg_stream.SendChar('Q'); @@ -484,7 +390,7 @@ public abstract class Connection { case 'A': // Asynchronous Notify pid = pg_stream.ReceiveInteger(4); - msg = pg_stream.ReceiveString(getEncoding()); + msg = pg_stream.ReceiveString(encoding); break; case 'B': // Binary Data Transfer if (fields == null) @@ -495,7 +401,7 @@ public abstract class Connection tuples.addElement(tup); break; case 'C': // Command Status - recv_status = pg_stream.ReceiveString(getEncoding()); + recv_status = pg_stream.ReceiveString(encoding); // Now handle the update count correctly. if(recv_status.startsWith("INSERT") || recv_status.startsWith("UPDATE") || recv_status.startsWith("DELETE") || recv_status.startsWith("MOVE")) { @@ -537,7 +443,7 @@ public abstract class Connection tuples.addElement(tup); break; case 'E': // Error Message - msg = pg_stream.ReceiveString(getEncoding()); + msg = pg_stream.ReceiveString(encoding); final_error = new SQLException(msg); hfr = true; break; @@ -552,10 +458,10 @@ public abstract class Connection hfr = true; break; case 'N': // Error Notification - addWarning(pg_stream.ReceiveString(getEncoding())); + addWarning(pg_stream.ReceiveString(encoding)); break; case 'P': // Portal Name - String pname = pg_stream.ReceiveString(getEncoding()); + String pname = pg_stream.ReceiveString(encoding); break; case 'T': // MetaData Field Description if (fields != null) @@ -588,7 +494,7 @@ public abstract class Connection for (i = 0 ; i < nf ; ++i) { - String typname = pg_stream.ReceiveString(getEncoding()); + String typname = pg_stream.ReceiveString(encoding); int typid = pg_stream.ReceiveIntegerR(4); int typlen = pg_stream.ReceiveIntegerR(2); int typmod = pg_stream.ReceiveIntegerR(4); @@ -653,11 +559,9 @@ public abstract class Connection } /** - * Get the character encoding to use for this connection. - * @return the encoding to use, or null for the - * default encoding. + * Get the character encoding to use for this connection. */ - public String getEncoding() throws SQLException { + public Encoding getEncoding() throws SQLException { return encoding; } diff --git a/src/interfaces/jdbc/org/postgresql/PG_Stream.java b/src/interfaces/jdbc/org/postgresql/PG_Stream.java index 30eeb6cece..999ef174be 100644 --- a/src/interfaces/jdbc/org/postgresql/PG_Stream.java +++ b/src/interfaces/jdbc/org/postgresql/PG_Stream.java @@ -10,7 +10,7 @@ import org.postgresql.core.*; import org.postgresql.util.*; /** - * @version 1.0 15-APR-1997 + * $Id: PG_Stream.java,v 1.10 2001/07/21 18:52:10 momjian Exp $ * * This class is used by Connection & PGlobj for communicating with the * backend. @@ -208,7 +208,7 @@ public class PG_Stream * @return string from back end * @exception SQLException if an I/O error occurs, or end of file */ - public String ReceiveString(String encoding) + public String ReceiveString(Encoding encoding) throws SQLException { int s = 0; @@ -239,18 +239,7 @@ public class PG_Stream } catch (IOException e) { throw new PSQLException("postgresql.stream.ioerror",e); } - - String v = null; - if (encoding == null) - v = new String(rst, 0, s); - else { - try { - v = new String(rst, 0, s, encoding); - } catch (UnsupportedEncodingException unse) { - throw new PSQLException("postgresql.stream.encoding", unse); - } - } - return v; + return encoding.decode(rst, 0, s); } /** diff --git a/src/interfaces/jdbc/org/postgresql/core/Encoding.java b/src/interfaces/jdbc/org/postgresql/core/Encoding.java new file mode 100644 index 0000000000..91ca6a007a --- /dev/null +++ b/src/interfaces/jdbc/org/postgresql/core/Encoding.java @@ -0,0 +1,167 @@ +package org.postgresql.core; + +import java.io.*; +import java.util.*; +import java.sql.SQLException; +import org.postgresql.util.*; + +/** + * Converts to and from the character encoding used by the backend. + * + * $Id: Encoding.java,v 1.1 2001/07/21 18:52:11 momjian Exp $ + */ + +public class Encoding { + + private static final Encoding DEFAULT_ENCODING = new Encoding(null); + + /** + * Preferred JVM encodings for backend encodings. + */ + private static final Hashtable encodings = new Hashtable(); + + static { + encodings.put("SQL_ASCII", new String[] { "ASCII", "us-ascii" }); + encodings.put("UNICODE", new String[] { "UTF-8", "UTF8" }); + encodings.put("LATIN1", new String[] { "ISO8859_1" }); + encodings.put("LATIN2", new String[] { "ISO8859_2" }); + encodings.put("LATIN3", new String[] { "ISO8859_3" }); + encodings.put("LATIN4", new String[] { "ISO8859_4" }); + encodings.put("LATIN5", new String[] { "ISO8859_5" }); + encodings.put("LATIN6", new String[] { "ISO8859_6" }); + encodings.put("LATIN7", new String[] { "ISO8859_7" }); + encodings.put("LATIN8", new String[] { "ISO8859_8" }); + encodings.put("LATIN9", new String[] { "ISO8859_9" }); + encodings.put("EUC_JP", new String[] { "EUC_JP" }); + encodings.put("EUC_CN", new String[] { "EUC_CN" }); + encodings.put("EUC_KR", new String[] { "EUC_KR" }); + encodings.put("EUC_TW", new String[] { "EUC_TW" }); + encodings.put("WIN", new String[] { "Cp1252" }); + // We prefer KOI8-U, since it is a superset of KOI8-R. + encodings.put("KOI8", new String[] { "KOI8_U", "KOI8_R" }); + // If the database isn't encoding-aware then we can't have + // any preferred encodings. + encodings.put("UNKNOWN", new String[0]); + } + + private final String encoding; + + private Encoding(String encoding) { + this.encoding = encoding; + } + + /** + * Get an Encoding for from the given database encoding and + * the encoding passed in by the user. + */ + public static Encoding getEncoding(String databaseEncoding, + String passedEncoding) + { + if (passedEncoding != null) { + if (isAvailable(passedEncoding)) { + return new Encoding(passedEncoding); + } else { + return defaultEncoding(); + } + } else { + return encodingForDatabaseEncoding(databaseEncoding); + } + } + + /** + * Get an Encoding matching the given database encoding. + */ + private static Encoding encodingForDatabaseEncoding(String databaseEncoding) { + // If the backend encoding is known and there is a suitable + // encoding in the JVM we use that. Otherwise we fall back + // to the default encoding of the JVM. + + if (encodings.containsKey(databaseEncoding)) { + String[] candidates = (String[]) encodings.get(databaseEncoding); + for (int i = 0; i < candidates.length; i++) { + if (isAvailable(candidates[i])) { + return new Encoding(candidates[i]); + } + } + } + return defaultEncoding(); + } + + /** + * Name of the (JVM) encoding used. + */ + public String name() { + return encoding; + } + + /** + * Encode a string to an array of bytes. + */ + public byte[] encode(String s) throws SQLException { + try { + if (encoding == null) { + return s.getBytes(); + } else { + return s.getBytes(encoding); + } + } catch (UnsupportedEncodingException e) { + throw new PSQLException("postgresql.stream.encoding", e); + } + } + + /** + * Decode an array of bytes into a string. + */ + public String decode(byte[] encodedString, int offset, int length) throws SQLException { + try { + if (encoding == null) { + return new String(encodedString, offset, length); + } else { + return new String(encodedString, offset, length, encoding); + } + } catch (UnsupportedEncodingException e) { + throw new PSQLException("postgresql.stream.encoding", e); + } + } + + /** + * Decode an array of bytes into a string. + */ + public String decode(byte[] encodedString) throws SQLException { + return decode(encodedString, 0, encodedString.length); + } + + /** + * Get a Reader that decodes the given InputStream. + */ + public Reader getDecodingReader(InputStream in) throws SQLException { + try { + if (encoding == null) { + return new InputStreamReader(in); + } else { + return new InputStreamReader(in, encoding); + } + } catch (UnsupportedEncodingException e) { + throw new PSQLException("postgresql.res.encoding", e); + } + } + + /** + * Get an Encoding using the default encoding for the JVM. + */ + public static Encoding defaultEncoding() { + return DEFAULT_ENCODING; + } + + /** + * Test if an encoding is available in the JVM. + */ + private static boolean isAvailable(String encodingName) { + try { + "DUMMY".getBytes(encodingName); + return true; + } catch (UnsupportedEncodingException e) { + return false; + } + } +} diff --git a/src/interfaces/jdbc/org/postgresql/jdbc1/ResultSet.java b/src/interfaces/jdbc/org/postgresql/jdbc1/ResultSet.java index 47939a4810..0a28f7eb3e 100644 --- a/src/interfaces/jdbc/org/postgresql/jdbc1/ResultSet.java +++ b/src/interfaces/jdbc/org/postgresql/jdbc1/ResultSet.java @@ -14,6 +14,7 @@ import java.sql.*; import org.postgresql.Field; import org.postgresql.largeobject.*; import org.postgresql.util.*; +import org.postgresql.core.Encoding; /** * A ResultSet provides access to a table of data generated by executing a @@ -154,26 +155,15 @@ public class ResultSet extends org.postgresql.ResultSet implements java.sql.Resu */ public String getString(int columnIndex) throws SQLException { - //byte[] bytes = getBytes(columnIndex); - // - //if (bytes == null) - //return null; - //return new String(bytes); if (columnIndex < 1 || columnIndex > fields.length) throw new PSQLException("postgresql.res.colrange"); + wasNullFlag = (this_row[columnIndex - 1] == null); if(wasNullFlag) return null; - String encoding = connection.getEncoding(); - if (encoding == null) - return new String(this_row[columnIndex - 1]); - else { - try { - return new String(this_row[columnIndex - 1], encoding); - } catch (UnsupportedEncodingException unse) { - throw new PSQLException("postgresql.res.encoding", unse); - } - } + + Encoding encoding = connection.getEncoding(); + return encoding.decode(this_row[columnIndex - 1]); } /** diff --git a/src/interfaces/jdbc/org/postgresql/jdbc2/ResultSet.java b/src/interfaces/jdbc/org/postgresql/jdbc2/ResultSet.java index 5bf11e3c3e..868f404c77 100644 --- a/src/interfaces/jdbc/org/postgresql/jdbc2/ResultSet.java +++ b/src/interfaces/jdbc/org/postgresql/jdbc2/ResultSet.java @@ -5,7 +5,6 @@ package org.postgresql.jdbc2; // changes are also made (if relevent) to the related JDBC 1 class in the // org.postgresql.jdbc1 package. - import java.lang.*; import java.io.*; import java.math.*; @@ -15,6 +14,7 @@ import java.sql.*; import org.postgresql.Field; import org.postgresql.largeobject.*; import org.postgresql.util.*; +import org.postgresql.core.Encoding; /** * A ResultSet provides access to a table of data generated by executing a @@ -172,16 +172,8 @@ public class ResultSet extends org.postgresql.ResultSet implements java.sql.Resu if(wasNullFlag) return null; - String encoding = connection.getEncoding(); - if (encoding == null) - return new String(this_row[columnIndex - 1]); - else { - try { - return new String(this_row[columnIndex - 1], encoding); - } catch (UnsupportedEncodingException unse) { - throw new PSQLException("postgresql.res.encoding", unse); - } - } + Encoding encoding = connection.getEncoding(); + return encoding.decode(this_row[columnIndex - 1]); } /** @@ -1006,15 +998,9 @@ public class ResultSet extends org.postgresql.ResultSet implements java.sql.Resu public java.io.Reader getCharacterStream(int i) throws SQLException { - // New in 7.1 - try { - String encoding = connection.getEncoding(); - if(encoding==null) - return new InputStreamReader(getBinaryStream(i)); - return new InputStreamReader(getBinaryStream(i),encoding); - } catch (UnsupportedEncodingException unse) { - throw new PSQLException("postgresql.res.encoding", unse); - } + Encoding encoding = connection.getEncoding(); + InputStream input = getBinaryStream(i); + return encoding.getDecodingReader(input); } /** diff --git a/src/interfaces/jdbc/org/postgresql/test/EncodingTest.java b/src/interfaces/jdbc/org/postgresql/test/EncodingTest.java new file mode 100644 index 0000000000..660abe3a04 --- /dev/null +++ b/src/interfaces/jdbc/org/postgresql/test/EncodingTest.java @@ -0,0 +1,57 @@ + +package org.postgresql.test.jdbc2; + +import junit.framework.*; +import org.postgresql.core.Encoding; +import java.io.*; + +/** + * Tests for the Encoding class. + * + * $Id: EncodingTest.java,v 1.1 2001/07/21 18:52:11 momjian Exp $ + */ + + +public class EncodingTest extends TestCase { + + public EncodingTest(String name) { + super(name); + } + + public void testCreation() throws Exception { + Encoding encoding; + encoding = Encoding.getEncoding("UNICODE", null); + assertEquals("UTF", encoding.name().substring(0, 3).toUpperCase()); + encoding = Encoding.getEncoding("SQL_ASCII", null); + assert(encoding.name().toUpperCase().indexOf("ASCII") != -1); + assertEquals("When encoding is unknown the default encoding should be used", + Encoding.defaultEncoding(), + Encoding.getEncoding("UNKNOWN", null)); + encoding = Encoding.getEncoding("SQL_ASCII", "utf-8"); + assert("Encoding passed in by the user should be preferred", + encoding.name().toUpperCase().indexOf("UTF") != -1); + } + + public void testTransformations() throws Exception { + Encoding encoding = Encoding.getEncoding("UNICODE", null); + assertEquals("ab", encoding.decode(new byte[] { 97, 98 })); + + assertEquals(2, encoding.encode("ab").length); + assertEquals(97, encoding.encode("a")[0]); + assertEquals(98, encoding.encode("b")[0]); + + encoding = Encoding.defaultEncoding(); + assertEquals("a".getBytes()[0], encoding.encode("a")[0]); + assertEquals(new String(new byte[] { 97 }), + encoding.decode(new byte[] { 97 })); + } + + public void testReader() throws Exception { + Encoding encoding = Encoding.getEncoding("SQL_ASCII", null); + InputStream stream = new ByteArrayInputStream(new byte[] { 97, 98 }); + Reader reader = encoding.getDecodingReader(stream); + assertEquals(97, reader.read()); + assertEquals(98, reader.read()); + assertEquals(-1, reader.read()); + } +} diff --git a/src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java b/src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java index 37624f528d..96265dbe6d 100644 --- a/src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java +++ b/src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java @@ -195,6 +195,7 @@ public class JDBC2Tests extends TestSuite { suite.addTestSuite(DriverTest.class); suite.addTestSuite(ConnectionTest.class); suite.addTestSuite(DatabaseMetaDataTest.class); + suite.addTestSuite(EncodingTest.class); // Connectivity/Protocols -- 2.40.0