]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/encode.c
Run pgindent on 9.2 source tree in preparation for first 9.3
[postgresql] / src / backend / utils / adt / encode.c
1 /*-------------------------------------------------------------------------
2  *
3  * encode.c
4  *        Various data encoding/decoding things.
5  *
6  * Copyright (c) 2001-2012, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/encode.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include <ctype.h>
17
18 #include "utils/builtins.h"
19
20
21 struct pg_encoding
22 {
23         unsigned        (*encode_len) (const char *data, unsigned dlen);
24         unsigned        (*decode_len) (const char *data, unsigned dlen);
25         unsigned        (*encode) (const char *data, unsigned dlen, char *res);
26         unsigned        (*decode) (const char *data, unsigned dlen, char *res);
27 };
28
29 static const struct pg_encoding *pg_find_encoding(const char *name);
30
31 /*
32  * SQL functions.
33  */
34
35 Datum
36 binary_encode(PG_FUNCTION_ARGS)
37 {
38         bytea      *data = PG_GETARG_BYTEA_P(0);
39         Datum           name = PG_GETARG_DATUM(1);
40         text       *result;
41         char       *namebuf;
42         int                     datalen,
43                                 resultlen,
44                                 res;
45         const struct pg_encoding *enc;
46
47         datalen = VARSIZE(data) - VARHDRSZ;
48
49         namebuf = TextDatumGetCString(name);
50
51         enc = pg_find_encoding(namebuf);
52         if (enc == NULL)
53                 ereport(ERROR,
54                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
55                                  errmsg("unrecognized encoding: \"%s\"", namebuf)));
56
57         resultlen = enc->encode_len(VARDATA(data), datalen);
58         result = palloc(VARHDRSZ + resultlen);
59
60         res = enc->encode(VARDATA(data), datalen, VARDATA(result));
61
62         /* Make this FATAL 'cause we've trodden on memory ... */
63         if (res > resultlen)
64                 elog(FATAL, "overflow - encode estimate too small");
65
66         SET_VARSIZE(result, VARHDRSZ + res);
67
68         PG_RETURN_TEXT_P(result);
69 }
70
71 Datum
72 binary_decode(PG_FUNCTION_ARGS)
73 {
74         text       *data = PG_GETARG_TEXT_P(0);
75         Datum           name = PG_GETARG_DATUM(1);
76         bytea      *result;
77         char       *namebuf;
78         int                     datalen,
79                                 resultlen,
80                                 res;
81         const struct pg_encoding *enc;
82
83         datalen = VARSIZE(data) - VARHDRSZ;
84
85         namebuf = TextDatumGetCString(name);
86
87         enc = pg_find_encoding(namebuf);
88         if (enc == NULL)
89                 ereport(ERROR,
90                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
91                                  errmsg("unrecognized encoding: \"%s\"", namebuf)));
92
93         resultlen = enc->decode_len(VARDATA(data), datalen);
94         result = palloc(VARHDRSZ + resultlen);
95
96         res = enc->decode(VARDATA(data), datalen, VARDATA(result));
97
98         /* Make this FATAL 'cause we've trodden on memory ... */
99         if (res > resultlen)
100                 elog(FATAL, "overflow - decode estimate too small");
101
102         SET_VARSIZE(result, VARHDRSZ + res);
103
104         PG_RETURN_BYTEA_P(result);
105 }
106
107
108 /*
109  * HEX
110  */
111
112 static const char hextbl[] = "0123456789abcdef";
113
114 static const int8 hexlookup[128] = {
115         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
116         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
117         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
118         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
119         -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121         -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123 };
124
125 unsigned
126 hex_encode(const char *src, unsigned len, char *dst)
127 {
128         const char *end = src + len;
129
130         while (src < end)
131         {
132                 *dst++ = hextbl[(*src >> 4) & 0xF];
133                 *dst++ = hextbl[*src & 0xF];
134                 src++;
135         }
136         return len * 2;
137 }
138
139 static inline char
140 get_hex(char c)
141 {
142         int                     res = -1;
143
144         if (c > 0 && c < 127)
145                 res = hexlookup[(unsigned char) c];
146
147         if (res < 0)
148                 ereport(ERROR,
149                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
150                                  errmsg("invalid hexadecimal digit: \"%c\"", c)));
151
152         return (char) res;
153 }
154
155 unsigned
156 hex_decode(const char *src, unsigned len, char *dst)
157 {
158         const char *s,
159                            *srcend;
160         char            v1,
161                                 v2,
162                            *p;
163
164         srcend = src + len;
165         s = src;
166         p = dst;
167         while (s < srcend)
168         {
169                 if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
170                 {
171                         s++;
172                         continue;
173                 }
174                 v1 = get_hex(*s++) << 4;
175                 if (s >= srcend)
176                         ereport(ERROR,
177                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
178                                   errmsg("invalid hexadecimal data: odd number of digits")));
179
180                 v2 = get_hex(*s++);
181                 *p++ = v1 | v2;
182         }
183
184         return p - dst;
185 }
186
187 static unsigned
188 hex_enc_len(const char *src, unsigned srclen)
189 {
190         return srclen << 1;
191 }
192
193 static unsigned
194 hex_dec_len(const char *src, unsigned srclen)
195 {
196         return srclen >> 1;
197 }
198
199 /*
200  * BASE64
201  */
202
203 static const char _base64[] =
204 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
205
206 static const int8 b64lookup[128] = {
207         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
208         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
209         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
210         52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
211         -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
212         15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
213         -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
214         41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
215 };
216
217 static unsigned
218 b64_encode(const char *src, unsigned len, char *dst)
219 {
220         char       *p,
221                            *lend = dst + 76;
222         const char *s,
223                            *end = src + len;
224         int                     pos = 2;
225         uint32          buf = 0;
226
227         s = src;
228         p = dst;
229
230         while (s < end)
231         {
232                 buf |= (unsigned char) *s << (pos << 3);
233                 pos--;
234                 s++;
235
236                 /* write it out */
237                 if (pos < 0)
238                 {
239                         *p++ = _base64[(buf >> 18) & 0x3f];
240                         *p++ = _base64[(buf >> 12) & 0x3f];
241                         *p++ = _base64[(buf >> 6) & 0x3f];
242                         *p++ = _base64[buf & 0x3f];
243
244                         pos = 2;
245                         buf = 0;
246                 }
247                 if (p >= lend)
248                 {
249                         *p++ = '\n';
250                         lend = p + 76;
251                 }
252         }
253         if (pos != 2)
254         {
255                 *p++ = _base64[(buf >> 18) & 0x3f];
256                 *p++ = _base64[(buf >> 12) & 0x3f];
257                 *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
258                 *p++ = '=';
259         }
260
261         return p - dst;
262 }
263
264 static unsigned
265 b64_decode(const char *src, unsigned len, char *dst)
266 {
267         const char *srcend = src + len,
268                            *s = src;
269         char       *p = dst;
270         char            c;
271         int                     b = 0;
272         uint32          buf = 0;
273         int                     pos = 0,
274                                 end = 0;
275
276         while (s < srcend)
277         {
278                 c = *s++;
279
280                 if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
281                         continue;
282
283                 if (c == '=')
284                 {
285                         /* end sequence */
286                         if (!end)
287                         {
288                                 if (pos == 2)
289                                         end = 1;
290                                 else if (pos == 3)
291                                         end = 2;
292                                 else
293                                         ereport(ERROR,
294                                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
295                                                          errmsg("unexpected \"=\"")));
296                         }
297                         b = 0;
298                 }
299                 else
300                 {
301                         b = -1;
302                         if (c > 0 && c < 127)
303                                 b = b64lookup[(unsigned char) c];
304                         if (b < 0)
305                                 ereport(ERROR,
306                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
307                                                  errmsg("invalid symbol")));
308                 }
309                 /* add it to buffer */
310                 buf = (buf << 6) + b;
311                 pos++;
312                 if (pos == 4)
313                 {
314                         *p++ = (buf >> 16) & 255;
315                         if (end == 0 || end > 1)
316                                 *p++ = (buf >> 8) & 255;
317                         if (end == 0 || end > 2)
318                                 *p++ = buf & 255;
319                         buf = 0;
320                         pos = 0;
321                 }
322         }
323
324         if (pos != 0)
325                 ereport(ERROR,
326                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
327                                  errmsg("invalid end sequence")));
328
329         return p - dst;
330 }
331
332
333 static unsigned
334 b64_enc_len(const char *src, unsigned srclen)
335 {
336         /* 3 bytes will be converted to 4, linefeed after 76 chars */
337         return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
338 }
339
340 static unsigned
341 b64_dec_len(const char *src, unsigned srclen)
342 {
343         return (srclen * 3) >> 2;
344 }
345
346 /*
347  * Escape
348  * Minimally escape bytea to text.
349  * De-escape text to bytea.
350  *
351  * We must escape zero bytes and high-bit-set bytes to avoid generating
352  * text that might be invalid in the current encoding, or that might
353  * change to something else if passed through an encoding conversion
354  * (leading to failing to de-escape to the original bytea value).
355  * Also of course backslash itself has to be escaped.
356  *
357  * De-escaping processes \\ and any \### octal
358  */
359
360 #define VAL(CH)                 ((CH) - '0')
361 #define DIG(VAL)                ((VAL) + '0')
362
363 static unsigned
364 esc_encode(const char *src, unsigned srclen, char *dst)
365 {
366         const char *end = src + srclen;
367         char       *rp = dst;
368         int                     len = 0;
369
370         while (src < end)
371         {
372                 unsigned char c = (unsigned char) *src;
373
374                 if (c == '\0' || IS_HIGHBIT_SET(c))
375                 {
376                         rp[0] = '\\';
377                         rp[1] = DIG(c >> 6);
378                         rp[2] = DIG((c >> 3) & 7);
379                         rp[3] = DIG(c & 7);
380                         rp += 4;
381                         len += 4;
382                 }
383                 else if (c == '\\')
384                 {
385                         rp[0] = '\\';
386                         rp[1] = '\\';
387                         rp += 2;
388                         len += 2;
389                 }
390                 else
391                 {
392                         *rp++ = c;
393                         len++;
394                 }
395
396                 src++;
397         }
398
399         return len;
400 }
401
402 static unsigned
403 esc_decode(const char *src, unsigned srclen, char *dst)
404 {
405         const char *end = src + srclen;
406         char       *rp = dst;
407         int                     len = 0;
408
409         while (src < end)
410         {
411                 if (src[0] != '\\')
412                         *rp++ = *src++;
413                 else if (src + 3 < end &&
414                                  (src[1] >= '0' && src[1] <= '3') &&
415                                  (src[2] >= '0' && src[2] <= '7') &&
416                                  (src[3] >= '0' && src[3] <= '7'))
417                 {
418                         int                     val;
419
420                         val = VAL(src[1]);
421                         val <<= 3;
422                         val += VAL(src[2]);
423                         val <<= 3;
424                         *rp++ = val + VAL(src[3]);
425                         src += 4;
426                 }
427                 else if (src + 1 < end &&
428                                  (src[1] == '\\'))
429                 {
430                         *rp++ = '\\';
431                         src += 2;
432                 }
433                 else
434                 {
435                         /*
436                          * One backslash, not followed by ### valid octal. Should never
437                          * get here, since esc_dec_len does same check.
438                          */
439                         ereport(ERROR,
440                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
441                                          errmsg("invalid input syntax for type bytea")));
442                 }
443
444                 len++;
445         }
446
447         return len;
448 }
449
450 static unsigned
451 esc_enc_len(const char *src, unsigned srclen)
452 {
453         const char *end = src + srclen;
454         int                     len = 0;
455
456         while (src < end)
457         {
458                 if (*src == '\0' || IS_HIGHBIT_SET(*src))
459                         len += 4;
460                 else if (*src == '\\')
461                         len += 2;
462                 else
463                         len++;
464
465                 src++;
466         }
467
468         return len;
469 }
470
471 static unsigned
472 esc_dec_len(const char *src, unsigned srclen)
473 {
474         const char *end = src + srclen;
475         int                     len = 0;
476
477         while (src < end)
478         {
479                 if (src[0] != '\\')
480                         src++;
481                 else if (src + 3 < end &&
482                                  (src[1] >= '0' && src[1] <= '3') &&
483                                  (src[2] >= '0' && src[2] <= '7') &&
484                                  (src[3] >= '0' && src[3] <= '7'))
485                 {
486                         /*
487                          * backslash + valid octal
488                          */
489                         src += 4;
490                 }
491                 else if (src + 1 < end &&
492                                  (src[1] == '\\'))
493                 {
494                         /*
495                          * two backslashes = backslash
496                          */
497                         src += 2;
498                 }
499                 else
500                 {
501                         /*
502                          * one backslash, not followed by ### valid octal
503                          */
504                         ereport(ERROR,
505                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
506                                          errmsg("invalid input syntax for type bytea")));
507                 }
508
509                 len++;
510         }
511         return len;
512 }
513
514 /*
515  * Common
516  */
517
518 static const struct
519 {
520         const char *name;
521         struct pg_encoding enc;
522 }       enclist[] =
523
524 {
525         {
526                 "hex",
527                 {
528                         hex_enc_len, hex_dec_len, hex_encode, hex_decode
529                 }
530         },
531         {
532                 "base64",
533                 {
534                         b64_enc_len, b64_dec_len, b64_encode, b64_decode
535                 }
536         },
537         {
538                 "escape",
539                 {
540                         esc_enc_len, esc_dec_len, esc_encode, esc_decode
541                 }
542         },
543         {
544                 NULL,
545                 {
546                         NULL, NULL, NULL, NULL
547                 }
548         }
549 };
550
551 static const struct pg_encoding *
552 pg_find_encoding(const char *name)
553 {
554         int                     i;
555
556         for (i = 0; enclist[i].name; i++)
557                 if (pg_strcasecmp(enclist[i].name, name) == 0)
558                         return &enclist[i].enc;
559
560         return NULL;
561 }