1 /*-------------------------------------------------------------------------
4 * Various data encoding/decoding things.
6 * Copyright (c) 2001-2012, PostgreSQL Global Development Group
10 * src/backend/utils/adt/encode.c
12 *-------------------------------------------------------------------------
18 #include "utils/builtins.h"
23 unsigned (*encode_len) (const char *data, unsigned dlen);
24 unsigned (*decode_len) (const char *data, unsigned dlen);
25 unsigned (*encode) (const char *data, unsigned dlen, char *res);
26 unsigned (*decode) (const char *data, unsigned dlen, char *res);
29 static const struct pg_encoding *pg_find_encoding(const char *name);
36 binary_encode(PG_FUNCTION_ARGS)
38 bytea *data = PG_GETARG_BYTEA_P(0);
39 Datum name = PG_GETARG_DATUM(1);
45 const struct pg_encoding *enc;
47 datalen = VARSIZE(data) - VARHDRSZ;
49 namebuf = TextDatumGetCString(name);
51 enc = pg_find_encoding(namebuf);
54 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
55 errmsg("unrecognized encoding: \"%s\"", namebuf)));
57 resultlen = enc->encode_len(VARDATA(data), datalen);
58 result = palloc(VARHDRSZ + resultlen);
60 res = enc->encode(VARDATA(data), datalen, VARDATA(result));
62 /* Make this FATAL 'cause we've trodden on memory ... */
64 elog(FATAL, "overflow - encode estimate too small");
66 SET_VARSIZE(result, VARHDRSZ + res);
68 PG_RETURN_TEXT_P(result);
72 binary_decode(PG_FUNCTION_ARGS)
74 text *data = PG_GETARG_TEXT_P(0);
75 Datum name = PG_GETARG_DATUM(1);
81 const struct pg_encoding *enc;
83 datalen = VARSIZE(data) - VARHDRSZ;
85 namebuf = TextDatumGetCString(name);
87 enc = pg_find_encoding(namebuf);
90 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
91 errmsg("unrecognized encoding: \"%s\"", namebuf)));
93 resultlen = enc->decode_len(VARDATA(data), datalen);
94 result = palloc(VARHDRSZ + resultlen);
96 res = enc->decode(VARDATA(data), datalen, VARDATA(result));
98 /* Make this FATAL 'cause we've trodden on memory ... */
100 elog(FATAL, "overflow - decode estimate too small");
102 SET_VARSIZE(result, VARHDRSZ + res);
104 PG_RETURN_BYTEA_P(result);
112 static const char hextbl[] = "0123456789abcdef";
114 static const int8 hexlookup[128] = {
115 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
116 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
117 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
118 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
119 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
126 hex_encode(const char *src, unsigned len, char *dst)
128 const char *end = src + len;
132 *dst++ = hextbl[(*src >> 4) & 0xF];
133 *dst++ = hextbl[*src & 0xF];
144 if (c > 0 && c < 127)
145 res = hexlookup[(unsigned char) c];
149 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
150 errmsg("invalid hexadecimal digit: \"%c\"", c)));
156 hex_decode(const char *src, unsigned len, char *dst)
169 if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
174 v1 = get_hex(*s++) << 4;
177 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
178 errmsg("invalid hexadecimal data: odd number of digits")));
188 hex_enc_len(const char *src, unsigned srclen)
194 hex_dec_len(const char *src, unsigned srclen)
203 static const char _base64[] =
204 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
206 static const int8 b64lookup[128] = {
207 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
208 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
209 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
210 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
211 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
212 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
213 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
214 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
218 b64_encode(const char *src, unsigned len, char *dst)
232 buf |= (unsigned char) *s << (pos << 3);
239 *p++ = _base64[(buf >> 18) & 0x3f];
240 *p++ = _base64[(buf >> 12) & 0x3f];
241 *p++ = _base64[(buf >> 6) & 0x3f];
242 *p++ = _base64[buf & 0x3f];
255 *p++ = _base64[(buf >> 18) & 0x3f];
256 *p++ = _base64[(buf >> 12) & 0x3f];
257 *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
265 b64_decode(const char *src, unsigned len, char *dst)
267 const char *srcend = src + len,
280 if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
294 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
295 errmsg("unexpected \"=\"")));
302 if (c > 0 && c < 127)
303 b = b64lookup[(unsigned char) c];
306 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
307 errmsg("invalid symbol")));
309 /* add it to buffer */
310 buf = (buf << 6) + b;
314 *p++ = (buf >> 16) & 255;
315 if (end == 0 || end > 1)
316 *p++ = (buf >> 8) & 255;
317 if (end == 0 || end > 2)
326 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
327 errmsg("invalid end sequence")));
334 b64_enc_len(const char *src, unsigned srclen)
336 /* 3 bytes will be converted to 4, linefeed after 76 chars */
337 return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
341 b64_dec_len(const char *src, unsigned srclen)
343 return (srclen * 3) >> 2;
348 * Minimally escape bytea to text.
349 * De-escape text to bytea.
351 * We must escape zero bytes and high-bit-set bytes to avoid generating
352 * text that might be invalid in the current encoding, or that might
353 * change to something else if passed through an encoding conversion
354 * (leading to failing to de-escape to the original bytea value).
355 * Also of course backslash itself has to be escaped.
357 * De-escaping processes \\ and any \### octal
360 #define VAL(CH) ((CH) - '0')
361 #define DIG(VAL) ((VAL) + '0')
364 esc_encode(const char *src, unsigned srclen, char *dst)
366 const char *end = src + srclen;
372 unsigned char c = (unsigned char) *src;
374 if (c == '\0' || IS_HIGHBIT_SET(c))
378 rp[2] = DIG((c >> 3) & 7);
403 esc_decode(const char *src, unsigned srclen, char *dst)
405 const char *end = src + srclen;
413 else if (src + 3 < end &&
414 (src[1] >= '0' && src[1] <= '3') &&
415 (src[2] >= '0' && src[2] <= '7') &&
416 (src[3] >= '0' && src[3] <= '7'))
424 *rp++ = val + VAL(src[3]);
427 else if (src + 1 < end &&
436 * One backslash, not followed by ### valid octal. Should never
437 * get here, since esc_dec_len does same check.
440 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
441 errmsg("invalid input syntax for type bytea")));
451 esc_enc_len(const char *src, unsigned srclen)
453 const char *end = src + srclen;
458 if (*src == '\0' || IS_HIGHBIT_SET(*src))
460 else if (*src == '\\')
472 esc_dec_len(const char *src, unsigned srclen)
474 const char *end = src + srclen;
481 else if (src + 3 < end &&
482 (src[1] >= '0' && src[1] <= '3') &&
483 (src[2] >= '0' && src[2] <= '7') &&
484 (src[3] >= '0' && src[3] <= '7'))
487 * backslash + valid octal
491 else if (src + 1 < end &&
495 * two backslashes = backslash
502 * one backslash, not followed by ### valid octal
505 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
506 errmsg("invalid input syntax for type bytea")));
521 struct pg_encoding enc;
528 hex_enc_len, hex_dec_len, hex_encode, hex_decode
534 b64_enc_len, b64_dec_len, b64_encode, b64_decode
540 esc_enc_len, esc_dec_len, esc_encode, esc_decode
546 NULL, NULL, NULL, NULL
551 static const struct pg_encoding *
552 pg_find_encoding(const char *name)
556 for (i = 0; enclist[i].name; i++)
557 if (pg_strcasecmp(enclist[i].name, name) == 0)
558 return &enclist[i].enc;