From a2a8c7a662ec96537b6d1faba0770c516b921911 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 4 Aug 2009 16:08:37 +0000 Subject: [PATCH] Support hex-string input and output for type BYTEA. Both hex format and the traditional "escape" format are automatically handled on input. The output format is selected by the new GUC variable bytea_output. As committed, bytea_output defaults to HEX, which is an *incompatible change*. We will keep it this way for awhile for testing purposes, but should consider whether to switch to the more backwards-compatible default of ESCAPE before 8.5 is released. Peter Eisentraut --- doc/src/sgml/config.sgml | 19 +++- doc/src/sgml/datatype.sgml | 92 +++++++++++++++--- src/backend/catalog/pg_largeobject.c | 4 +- src/backend/commands/trigger.c | 3 +- src/backend/optimizer/path/indxpath.c | 3 +- src/backend/utils/adt/encode.c | 10 +- src/backend/utils/adt/selfuncs.c | 3 +- src/backend/utils/adt/varlena.c | 74 +++++++++++---- src/backend/utils/misc/guc.c | 18 +++- src/backend/utils/misc/postgresql.conf.sample | 1 + src/bin/pg_dump/pg_dump.c | 60 ++++-------- src/include/utils/builtins.h | 30 ++---- src/include/utils/bytea.h | 50 ++++++++++ src/interfaces/libpq/fe-exec.c | 60 +++++++++++- src/test/regress/expected/conversion.out | 2 + src/test/regress/expected/strings.out | 93 +++++++++++++++++++ src/test/regress/input/largeobject.source | 3 + src/test/regress/output/largeobject.source | 2 + src/test/regress/output/largeobject_1.source | 2 + src/test/regress/sql/conversion.sql | 3 + src/test/regress/sql/strings.sql | 21 +++++ 21 files changed, 442 insertions(+), 111 deletions(-) create mode 100644 src/include/utils/bytea.h diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index a86ba6089a..2b34921e70 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ - + Server Configuration @@ -4060,6 +4060,23 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + bytea_output (enum) + + bytea_output configuration parameter + + + + Sets the output format for values of type bytea. + Valid values are hex (the default) + and escape (the traditional PostgreSQL + format). See for more + information. The bytea type always + accepts both formats on input, regardless of this setting. + + + + xmlbinary (enum) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 774e4dbfb0..abe747a696 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1,4 +1,4 @@ - + Data Types @@ -1177,7 +1177,7 @@ SELECT b, char_length(b) FROM test2; A binary string is a sequence of octets (or bytes). Binary strings are distinguished from character strings in two - ways: First, binary strings specifically allow storing + ways. First, binary strings specifically allow storing octets of value zero and other non-printable octets (usually, octets outside the range 32 to 126). Character strings disallow zero octets, and also disallow any @@ -1191,13 +1191,82 @@ SELECT b, char_length(b) FROM test2; - When entering bytea values, octets of certain - values must be escaped (but all octet - values can be escaped) when used as part - of a string literal in an SQL statement. In + The bytea type supports two external formats for + input and output: PostgreSQL's historical + escape format, and hex format. Both + of these are always accepted on input. The output format depends + on the configuration parameter ; + the default is hex. (Note that the hex format was introduced in + PostgreSQL 8.5; earlier versions and some + tools don't understand it.) + + + + The SQL standard defines a different binary + string type, called BLOB or BINARY LARGE + OBJECT. The input format is different from + bytea, but the provided functions and operators are + mostly the same. + + + + <type>bytea</> hex format + + + The hex format encodes binary data as 2 hexadecimal digits + per byte, most significant nibble first. The entire string is + preceded by the sequence \x (to distinguish it + from the escape format). In some contexts, the initial backslash may + need to be escaped by doubling it, in the same cases in which backslashes + have to be doubled in escape format; details appear below. + The hexadecimal digits can + be either upper or lower case, and whitespace is permitted between + digit pairs (but not within a digit pair nor in the starting + \x sequence). + The hex format is compatible with a wide + range of external applications and protocols, and it tends to be + faster to convert than the escape format, so its use is preferred. + + + + Example: + +SELECT E'\\xDEADBEEF'; + + + + + + <type>bytea</> escape format + + + The escape format is the traditional + PostgreSQL format for the bytea + type. It + takes the approach of representing a binary string as a sequence + of ASCII characters, while converting those bytes that cannot be + represented as an ASCII character into special escape sequences. + If, from the point of view of the application, representing bytes + as characters makes sense, then this representation can be + convenient. But in practice it is usually confusing becauses it + fuzzes up the distinction between binary strings and character + strings, and also the particular escape mechanism that was chosen is + somewhat unwieldy. So this format should probably be avoided + for most new applications. + + + + When entering bytea values in escape format, + octets of certain + values must be escaped, while all octet + values can be escaped. In general, to escape an octet, convert it into its three-digit octal value and precede it - by two backslashes. + by a backslash (or two backslashes, if writing the value as a + literal using escape string syntax). + Backslash itself (octet value 92) can alternatively be represented by + double backslashes. + shows the characters that must be escaped, and gives the alternative escape sequences where applicable. @@ -1343,14 +1412,7 @@ SELECT b, char_length(b) FROM test2; have to escape line feeds and carriage returns if your interface automatically translates these. - - - The SQL standard defines a different binary - string type, called BLOB or BINARY LARGE - OBJECT. The input format is different from - bytea, but the provided functions and operators are - mostly the same. - + diff --git a/src/backend/catalog/pg_largeobject.c b/src/backend/catalog/pg_largeobject.c index 925d21387b..313ccdd3f0 100644 --- a/src/backend/catalog/pg_largeobject.c +++ b/src/backend/catalog/pg_largeobject.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/pg_largeobject.c,v 1.32 2009/01/01 17:23:37 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/pg_largeobject.c,v 1.33 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,7 +18,7 @@ #include "access/heapam.h" #include "catalog/indexing.h" #include "catalog/pg_largeobject.h" -#include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/rel.h" #include "utils/tqual.h" diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 7bc8212789..c1f55cfcf9 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.251 2009/07/30 02:45:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.252 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ #include "tcop/utility.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/inval.h" #include "utils/lsyscache.h" diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index b3f96eb773..3930acf05a 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.240 2009/06/11 14:48:58 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.241 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,6 +31,7 @@ #include "optimizer/restrictinfo.h" #include "optimizer/var.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" #include "utils/selfuncs.h" diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 480b85cefc..e581e3bc42 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.23 2009/01/01 17:23:49 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.24 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -109,7 +109,7 @@ binary_decode(PG_FUNCTION_ARGS) * HEX */ -static const char *hextbl = "0123456789abcdef"; +static const char hextbl[] = "0123456789abcdef"; static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -122,7 +122,7 @@ static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; -static unsigned +unsigned hex_encode(const char *src, unsigned len, char *dst) { const char *end = src + len; @@ -136,7 +136,7 @@ hex_encode(const char *src, unsigned len, char *dst) return len * 2; } -static char +static inline char get_hex(char c) { int res = -1; @@ -152,7 +152,7 @@ get_hex(char c) return (char) res; } -static unsigned +unsigned hex_decode(const char *src, unsigned len, char *dst) { const char *s, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 3d60885a79..e85ab06819 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.261 2009/06/11 14:49:04 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.262 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -109,6 +109,7 @@ #include "parser/parse_coerce.h" #include "parser/parsetree.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/date.h" #include "utils/datum.h" #include "utils/fmgroids.h" diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index b9b54e6db6..c524454432 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.171 2009/06/11 14:49:04 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.172 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,10 +24,14 @@ #include "parser/scansup.h" #include "regex/regex.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" +/* GUC variable */ +int bytea_output = BYTEA_OUTPUT_HEX; + typedef struct varlena unknown; typedef struct @@ -186,10 +190,24 @@ byteain(PG_FUNCTION_ARGS) char *inputText = PG_GETARG_CSTRING(0); char *tp; char *rp; - int byte; + int bc; bytea *result; - for (byte = 0, tp = inputText; *tp != '\0'; byte ++) + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2)/2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode(inputText + 2, len - 2, VARDATA(result)); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) { if (tp[0] != '\\') tp++; @@ -204,7 +222,7 @@ byteain(PG_FUNCTION_ARGS) else { /* - * one backslash, not followed by 0 or ### valid octal + * one backslash, not followed by another or ### valid octal */ ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), @@ -212,10 +230,10 @@ byteain(PG_FUNCTION_ARGS) } } - byte +=VARHDRSZ; + bc += VARHDRSZ; - result = (bytea *) palloc(byte); - SET_VARSIZE(result, byte); + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); tp = inputText; rp = VARDATA(result); @@ -228,11 +246,11 @@ byteain(PG_FUNCTION_ARGS) (tp[2] >= '0' && tp[2] <= '7') && (tp[3] >= '0' && tp[3] <= '7')) { - byte = VAL(tp[1]); - byte <<=3; - byte +=VAL(tp[2]); - byte <<=3; - *rp++ = byte +VAL(tp[3]); + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); tp += 4; } @@ -259,21 +277,30 @@ byteain(PG_FUNCTION_ARGS) /* * byteaout - converts to printable representation of byte array * - * Non-printable characters are inserted as '\nnn' (octal) and '\' as - * '\\'. - * - * NULL vlena should be an error--returning string with NULL for now. + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. */ Datum byteaout(PG_FUNCTION_ARGS) { bytea *vlena = PG_GETARG_BYTEA_PP(0); char *result; - char *vp; char *rp; - int val; /* holds unprintable chars */ - int i; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; int len; + int i; len = 1; /* empty string has 1 char */ vp = VARDATA_ANY(vlena); @@ -297,6 +324,8 @@ byteaout(PG_FUNCTION_ARGS) } else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) { + int val; /* holds unprintable chars */ + val = *vp; rp[0] = '\\'; rp[3] = DIG(val & 07); @@ -309,6 +338,13 @@ byteaout(PG_FUNCTION_ARGS) else *rp++ = *vp; } + } + else + { + elog(ERROR, "unrecognized bytea_output setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } *rp = '\0'; PG_RETURN_CSTRING(result); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 76d3ec9da6..264b45451a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.509 2009/07/22 17:00:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.510 2009/08/04 16:08:36 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -61,6 +61,7 @@ #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/guc_tables.h" #include "utils/memutils.h" #include "utils/pg_locale.h" @@ -180,6 +181,12 @@ static char *config_enum_get_options(struct config_enum * record, * NOTE! Option values may not contain double quotes! */ +static const struct config_enum_entry bytea_output_options[] = { + {"escape", BYTEA_OUTPUT_ESCAPE, false}, + {"hex", BYTEA_OUTPUT_HEX, false}, + {NULL, 0, false} +}; + /* * We have different sets for client and server message level options because * they sort slightly different (see "log" level) @@ -2540,6 +2547,15 @@ static struct config_enum ConfigureNamesEnum[] = BACKSLASH_QUOTE_SAFE_ENCODING, backslash_quote_options, NULL, NULL }, + { + {"bytea_output", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Sets the output format for bytea."), + NULL + }, + &bytea_output, + BYTEA_OUTPUT_HEX, bytea_output_options, NULL, NULL + }, + { {"client_min_messages", PGC_USERSET, LOGGING_WHEN, gettext_noop("Sets the message levels that are sent to the client."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index e50d7a44f7..41488e264f 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -424,6 +424,7 @@ #statement_timeout = 0 # in milliseconds, 0 is disabled #vacuum_freeze_min_age = 50000000 #vacuum_freeze_table_age = 150000000 +#bytea_output = 'hex' # hex, escape #xmlbinary = 'base64' #xmloption = 'content' diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 360eaf1caf..b46c068d4e 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -12,7 +12,7 @@ * by PostgreSQL * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/pg_dump/pg_dump.c,v 1.544 2009/08/02 22:14:52 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_dump/pg_dump.c,v 1.545 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -11008,6 +11008,8 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) TableInfo *tbinfo = tginfo->tgtable; PQExpBuffer query; PQExpBuffer delqry; + char *tgargs; + size_t lentgargs; const char *p; int findx; @@ -11109,53 +11111,29 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) appendPQExpBuffer(query, "EXECUTE PROCEDURE %s(", fmtId(tginfo->tgfname)); - p = tginfo->tgargs; + tgargs = (char *) PQunescapeBytea(tginfo->tgargs, &lentgargs); + p = tgargs; for (findx = 0; findx < tginfo->tgnargs; findx++) { - const char *s = p; + /* find the embedded null that terminates this trigger argument */ + size_t tlen = strlen(p); - /* Set 'p' to end of arg string. marked by '\000' */ - for (;;) + if (p + tlen >= tgargs + lentgargs) { - p = strchr(p, '\\'); - if (p == NULL) - { - write_msg(NULL, "invalid argument string (%s) for trigger \"%s\" on table \"%s\"\n", - tginfo->tgargs, - tginfo->dobj.name, - tbinfo->dobj.name); - exit_nicely(); - } - p++; - if (*p == '\\') /* is it '\\'? */ - { - p++; - continue; - } - if (p[0] == '0' && p[1] == '0' && p[2] == '0') /* is it '\000'? */ - break; + /* hm, not found before end of bytea value... */ + write_msg(NULL, "invalid argument string (%s) for trigger \"%s\" on table \"%s\"\n", + tginfo->tgargs, + tginfo->dobj.name, + tbinfo->dobj.name); + exit_nicely(); } - p--; - - appendPQExpBufferChar(query, '\''); - while (s < p) - { - if (*s == '\'') - appendPQExpBufferChar(query, '\''); - /* - * bytea unconditionally doubles backslashes, so we suppress the - * doubling for standard_conforming_strings. - */ - if (fout->std_strings && *s == '\\' && s[1] == '\\') - s++; - appendPQExpBufferChar(query, *s++); - } - appendPQExpBufferChar(query, '\''); - appendPQExpBuffer(query, - (findx < tginfo->tgnargs - 1) ? ", " : ""); - p = p + 4; + if (findx > 0) + appendPQExpBuffer(query, ", "); + appendStringLiteralAH(query, p, fout); + p += tlen + 1; } + free(tgargs); appendPQExpBuffer(query, ");\n"); if (tginfo->tgenabled != 't' && tginfo->tgenabled != 'O') diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 4b92cbcb60..b664799fc4 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.337 2009/08/03 21:11:39 joe Exp $ + * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.338 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -138,6 +138,12 @@ extern Datum char_text(PG_FUNCTION_ARGS); extern Datum domain_in(PG_FUNCTION_ARGS); extern Datum domain_recv(PG_FUNCTION_ARGS); +/* encode.c */ +extern Datum binary_encode(PG_FUNCTION_ARGS); +extern Datum binary_decode(PG_FUNCTION_ARGS); +extern unsigned hex_encode(const char *src, unsigned len, char *dst); +extern unsigned hex_decode(const char *src, unsigned len, char *dst); + /* enum.c */ extern Datum enum_in(PG_FUNCTION_ARGS); extern Datum enum_out(PG_FUNCTION_ARGS); @@ -711,28 +717,6 @@ extern Datum unknownout(PG_FUNCTION_ARGS); extern Datum unknownrecv(PG_FUNCTION_ARGS); extern Datum unknownsend(PG_FUNCTION_ARGS); -extern Datum byteain(PG_FUNCTION_ARGS); -extern Datum byteaout(PG_FUNCTION_ARGS); -extern Datum bytearecv(PG_FUNCTION_ARGS); -extern Datum byteasend(PG_FUNCTION_ARGS); -extern Datum byteaoctetlen(PG_FUNCTION_ARGS); -extern Datum byteaGetByte(PG_FUNCTION_ARGS); -extern Datum byteaGetBit(PG_FUNCTION_ARGS); -extern Datum byteaSetByte(PG_FUNCTION_ARGS); -extern Datum byteaSetBit(PG_FUNCTION_ARGS); -extern Datum binary_encode(PG_FUNCTION_ARGS); -extern Datum binary_decode(PG_FUNCTION_ARGS); -extern Datum byteaeq(PG_FUNCTION_ARGS); -extern Datum byteane(PG_FUNCTION_ARGS); -extern Datum bytealt(PG_FUNCTION_ARGS); -extern Datum byteale(PG_FUNCTION_ARGS); -extern Datum byteagt(PG_FUNCTION_ARGS); -extern Datum byteage(PG_FUNCTION_ARGS); -extern Datum byteacmp(PG_FUNCTION_ARGS); -extern Datum byteacat(PG_FUNCTION_ARGS); -extern Datum byteapos(PG_FUNCTION_ARGS); -extern Datum bytea_substr(PG_FUNCTION_ARGS); -extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); extern Datum pg_column_size(PG_FUNCTION_ARGS); /* version.c */ diff --git a/src/include/utils/bytea.h b/src/include/utils/bytea.h new file mode 100644 index 0000000000..8750d6d0e9 --- /dev/null +++ b/src/include/utils/bytea.h @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------- + * + * bytea.h + * Declarations for BYTEA data type support. + * + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/utils/bytea.h,v 1.1 2009/08/04 16:08:36 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef BYTEA_H +#define BYTEA_H + +#include "fmgr.h" + + +typedef enum +{ + BYTEA_OUTPUT_ESCAPE, + BYTEA_OUTPUT_HEX +} ByteaOutputType; + +extern int bytea_output; /* ByteaOutputType, but int for GUC enum */ + +/* functions are in utils/adt/varlena.c */ +extern Datum byteain(PG_FUNCTION_ARGS); +extern Datum byteaout(PG_FUNCTION_ARGS); +extern Datum bytearecv(PG_FUNCTION_ARGS); +extern Datum byteasend(PG_FUNCTION_ARGS); +extern Datum byteaoctetlen(PG_FUNCTION_ARGS); +extern Datum byteaGetByte(PG_FUNCTION_ARGS); +extern Datum byteaGetBit(PG_FUNCTION_ARGS); +extern Datum byteaSetByte(PG_FUNCTION_ARGS); +extern Datum byteaSetBit(PG_FUNCTION_ARGS); +extern Datum byteaeq(PG_FUNCTION_ARGS); +extern Datum byteane(PG_FUNCTION_ARGS); +extern Datum bytealt(PG_FUNCTION_ARGS); +extern Datum byteale(PG_FUNCTION_ARGS); +extern Datum byteagt(PG_FUNCTION_ARGS); +extern Datum byteage(PG_FUNCTION_ARGS); +extern Datum byteacmp(PG_FUNCTION_ARGS); +extern Datum byteacat(PG_FUNCTION_ARGS); +extern Datum byteapos(PG_FUNCTION_ARGS); +extern Datum bytea_substr(PG_FUNCTION_ARGS); +extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); + +#endif /* BYTEA_H */ diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index c00f5eae6a..f1318a4a94 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/interfaces/libpq/fe-exec.c,v 1.203 2009/06/11 14:49:13 momjian Exp $ + * $PostgreSQL: pgsql/src/interfaces/libpq/fe-exec.c,v 1.204 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3167,6 +3167,29 @@ PQescapeBytea(const unsigned char *from, size_t from_length, size_t *to_length) } +static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static inline char +get_hex(char c) +{ + int res = -1; + + if (c > 0 && c < 127) + res = hexlookup[(unsigned char) c]; + + return (char) res; +} + + #define ISFIRSTOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '3') #define ISOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '7') #define OCTVAL(CH) ((CH) - '0') @@ -3198,6 +3221,40 @@ PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) strtextlen = strlen((const char *) strtext); + if (strtext[0] == '\\' && strtext[1] == 'x') + { + const unsigned char *s; + unsigned char *p; + + buflen = (strtextlen - 2)/2; + /* Avoid unportable malloc(0) */ + buffer = (unsigned char *) malloc(buflen > 0 ? buflen : 1); + if (buffer == NULL) + return NULL; + + s = strtext + 2; + p = buffer; + while (*s) + { + char v1, + v2; + + /* + * Bad input is silently ignored. Note that this includes + * whitespace between hex pairs, which is allowed by byteain. + */ + v1 = get_hex(*s++); + if (!*s || v1 == (char) -1) + continue; + v2 = get_hex(*s++); + if (v2 != (char) -1) + *p++ = (v1 << 4) | v2; + } + + buflen = p - buffer; + } + else + { /* * Length of input is max length of output, but add one to avoid * unportable malloc(0) if input is zero-length. @@ -3244,6 +3301,7 @@ PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) } } buflen = j; /* buflen is the length of the dequoted data */ + } /* Shrink the buffer to be no larger than necessary */ /* +1 avoids unportable behavior when buflen==0 */ diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out index e80e1a4534..82eca262f0 100644 --- a/src/test/regress/expected/conversion.out +++ b/src/test/regress/expected/conversion.out @@ -1,3 +1,5 @@ +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- -- create user defined conversion -- diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 1241a2ace6..392f48ef8c 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -97,6 +97,99 @@ LINE 1: SELECT U&'wrong: +0061' UESCAPE '+'; ^ DETAIL: String constants with Unicode escapes cannot be used when standard_conforming_strings is off. RESET standard_conforming_strings; +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\xDeAdBeE'::bytea; +ERROR: invalid hexadecimal data: odd number of digits +LINE 1: SELECT E'\\xDeAdBeE'::bytea; + ^ +SELECT E'\\xDeAdBeEx'::bytea; +ERROR: invalid hexadecimal digit: "x" +LINE 1: SELECT E'\\xDeAdBeEx'::bytea; + ^ +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------ + \xde00beef +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +-------------------- + \x4465416442654566 +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +-------------------- + \x4465006442654566 +(1 row) + +SELECT E'De\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\678dBeEf'::bytea; +ERROR: invalid input syntax for type bytea +LINE 1: SELECT E'De\\678dBeEf'::bytea; + ^ +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------------ + \336\000\276\357 +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +---------- + DeAdBeEf +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +------------- + De\000dBeEf +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +---------- + DeSdBeEf +(1 row) + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types diff --git a/src/test/regress/input/largeobject.source b/src/test/regress/input/largeobject.source index 46ba9261ac..807cfd7cc4 100644 --- a/src/test/regress/input/largeobject.source +++ b/src/test/regress/input/largeobject.source @@ -2,6 +2,9 @@ -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; + -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject.source b/src/test/regress/output/largeobject.source index 9d69f6c913..d7468bb513 100644 --- a/src/test/regress/output/largeobject.source +++ b/src/test/regress/output/largeobject.source @@ -1,6 +1,8 @@ -- -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject_1.source b/src/test/regress/output/largeobject_1.source index 1fbc29c251..84e916fea4 100644 --- a/src/test/regress/output/largeobject_1.source +++ b/src/test/regress/output/largeobject_1.source @@ -1,6 +1,8 @@ -- -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql index 99a9178315..be194eec1f 100644 --- a/src/test/regress/sql/conversion.sql +++ b/src/test/regress/sql/conversion.sql @@ -1,3 +1,6 @@ +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; + -- -- create user defined conversion -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 681a0e1e62..63df9402ed 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -43,6 +43,27 @@ SELECT U&'wrong: +0061' UESCAPE '+'; RESET standard_conforming_strings; +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDeAdBeE'::bytea; +SELECT E'\\xDeAdBeEx'::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\123dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; +SELECT E'De\\678dBeEf'::bytea; + +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types -- 2.40.0