]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/json.c
9f3f5d4feb424a10a9b6e38c22b3d3d491320ced
[postgresql] / src / backend / utils / adt / json.c
1 /*-------------------------------------------------------------------------
2  *
3  * json.c
4  *              JSON data type support.
5  *
6  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/json.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "access/htup_details.h"
17 #include "access/transam.h"
18 #include "catalog/pg_cast.h"
19 #include "catalog/pg_type.h"
20 #include "executor/spi.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/pqformat.h"
23 #include "mb/pg_wchar.h"
24 #include "parser/parse_coerce.h"
25 #include "utils/array.h"
26 #include "utils/builtins.h"
27 #include "utils/lsyscache.h"
28 #include "utils/json.h"
29 #include "utils/jsonapi.h"
30 #include "utils/typcache.h"
31 #include "utils/syscache.h"
32
33 /*
34  * The context of the parser is maintained by the recursive descent
35  * mechanism, but is passed explicitly to the error reporting routine
36  * for better diagnostics.
37  */
38 typedef enum                                    /* contexts of JSON parser */
39 {
40         JSON_PARSE_VALUE,                       /* expecting a value */
41         JSON_PARSE_STRING,                      /* expecting a string (for a field name) */
42         JSON_PARSE_ARRAY_START,         /* saw '[', expecting value or ']' */
43         JSON_PARSE_ARRAY_NEXT,          /* saw array element, expecting ',' or ']' */
44         JSON_PARSE_OBJECT_START,        /* saw '{', expecting label or '}' */
45         JSON_PARSE_OBJECT_LABEL,        /* saw object label, expecting ':' */
46         JSON_PARSE_OBJECT_NEXT,         /* saw object value, expecting ',' or '}' */
47         JSON_PARSE_OBJECT_COMMA,        /* saw object ',', expecting next label */
48         JSON_PARSE_END                          /* saw the end of a document, expect nothing */
49 } JsonParseContext;
50
51 static inline void json_lex(JsonLexContext *lex);
52 static inline void json_lex_string(JsonLexContext *lex);
53 static inline void json_lex_number(JsonLexContext *lex, char *s);
54 static inline void parse_scalar(JsonLexContext *lex, JsonSemAction *sem);
55 static void parse_object_field(JsonLexContext *lex, JsonSemAction *sem);
56 static void parse_object(JsonLexContext *lex, JsonSemAction *sem);
57 static void parse_array_element(JsonLexContext *lex, JsonSemAction *sem);
58 static void parse_array(JsonLexContext *lex, JsonSemAction *sem);
59 static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
60 static void report_invalid_token(JsonLexContext *lex);
61 static int      report_json_context(JsonLexContext *lex);
62 static char *extract_mb_char(char *s);
63 static void composite_to_json(Datum composite, StringInfo result,
64                                   bool use_line_feeds);
65 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
66                                   Datum *vals, bool *nulls, int *valcount,
67                                   TYPCATEGORY tcategory, Oid typoutputfunc,
68                                   bool use_line_feeds);
69 static void array_to_json_internal(Datum array, StringInfo result,
70                                            bool use_line_feeds);
71
72 /* the null action object used for pure validation */
73 static JsonSemAction nullSemAction =
74 {
75         NULL, NULL, NULL, NULL, NULL,
76         NULL, NULL, NULL, NULL, NULL
77 };
78
79 /* Recursive Descent parser support routines */
80
81 /*
82  * lex_peek
83  *
84  * what is the current look_ahead token?
85 */
86 static inline JsonTokenType
87 lex_peek(JsonLexContext *lex)
88 {
89         return lex->token_type;
90 }
91
92 /*
93  * lex_accept
94  *
95  * accept the look_ahead token and move the lexer to the next token if the
96  * look_ahead token matches the token parameter. In that case, and if required,
97  * also hand back the de-escaped lexeme.
98  *
99  * returns true if the token matched, false otherwise.
100  */
101 static inline bool
102 lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme)
103 {
104         if (lex->token_type == token)
105         {
106                 if (lexeme != NULL)
107                 {
108                         if (lex->token_type == JSON_TOKEN_STRING)
109                         {
110                                 if (lex->strval != NULL)
111                                         *lexeme = pstrdup(lex->strval->data);
112                         }
113                         else
114                         {
115                                 int                     len = (lex->token_terminator - lex->token_start);
116                                 char       *tokstr = palloc(len + 1);
117
118                                 memcpy(tokstr, lex->token_start, len);
119                                 tokstr[len] = '\0';
120                                 *lexeme = tokstr;
121                         }
122                 }
123                 json_lex(lex);
124                 return true;
125         }
126         return false;
127 }
128
129 /*
130  * lex_accept
131  *
132  * move the lexer to the next token if the current look_ahead token matches
133  * the parameter token. Otherwise, report an error.
134  */
135 static inline void
136 lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
137 {
138         if (!lex_accept(lex, token, NULL))
139                 report_parse_error(ctx, lex);;
140 }
141
142 /*
143  * All the defined      type categories are upper case , so use lower case here
144  * so we avoid any possible clash.
145  */
146 /* fake type category for JSON so we can distinguish it in datum_to_json */
147 #define TYPCATEGORY_JSON 'j'
148 /* fake category for types that have a cast to json */
149 #define TYPCATEGORY_JSON_CAST 'c'
150 /* letters appearing in numeric output that aren't valid in a JSON number */
151 #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
152 /* chars to consider as part of an alphanumeric token */
153 #define JSON_ALPHANUMERIC_CHAR(c)  \
154         (((c) >= 'a' && (c) <= 'z') || \
155          ((c) >= 'A' && (c) <= 'Z') || \
156          ((c) >= '0' && (c) <= '9') || \
157          (c) == '_' || \
158          IS_HIGHBIT_SET(c))
159
160 /*
161  * Input.
162  */
163 Datum
164 json_in(PG_FUNCTION_ARGS)
165 {
166         char       *json = PG_GETARG_CSTRING(0);
167         text       *result = cstring_to_text(json);
168         JsonLexContext *lex;
169
170         /* validate it */
171         lex = makeJsonLexContext(result, false);
172         pg_parse_json(lex, &nullSemAction);
173
174         /* Internal representation is the same as text, for now */
175         PG_RETURN_TEXT_P(result);
176 }
177
178 /*
179  * Output.
180  */
181 Datum
182 json_out(PG_FUNCTION_ARGS)
183 {
184         /* we needn't detoast because text_to_cstring will handle that */
185         Datum           txt = PG_GETARG_DATUM(0);
186
187         PG_RETURN_CSTRING(TextDatumGetCString(txt));
188 }
189
190 /*
191  * Binary send.
192  */
193 Datum
194 json_send(PG_FUNCTION_ARGS)
195 {
196         text       *t = PG_GETARG_TEXT_PP(0);
197         StringInfoData buf;
198
199         pq_begintypsend(&buf);
200         pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
201         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
202 }
203
204 /*
205  * Binary receive.
206  */
207 Datum
208 json_recv(PG_FUNCTION_ARGS)
209 {
210         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
211         text       *result;
212         char       *str;
213         int                     nbytes;
214         JsonLexContext *lex;
215
216         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
217
218         result = palloc(nbytes + VARHDRSZ);
219         SET_VARSIZE(result, nbytes + VARHDRSZ);
220         memcpy(VARDATA(result), str, nbytes);
221
222         /* Validate it. */
223         lex = makeJsonLexContext(result, false);
224         pg_parse_json(lex, &nullSemAction);
225
226         PG_RETURN_TEXT_P(result);
227 }
228
229 /*
230  * makeJsonLexContext
231  *
232  * lex constructor, with or without StringInfo object
233  * for de-escaped lexemes.
234  *
235  * Without is better as it makes the processing faster, so only make one
236  * if really required.
237  */
238 JsonLexContext *
239 makeJsonLexContext(text *json, bool need_escapes)
240 {
241         JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
242
243         lex->input = lex->token_terminator = lex->line_start = VARDATA(json);
244         lex->line_number = 1;
245         lex->input_length = VARSIZE(json) - VARHDRSZ;
246         if (need_escapes)
247                 lex->strval = makeStringInfo();
248         return lex;
249 }
250
251 /*
252  * pg_parse_json
253  *
254  * Publicly visible entry point for the JSON parser.
255  *
256  * lex is a lexing context, set up for the json to be processed by calling
257  * makeJsonLexContext(). sem is a strucure of function pointers to semantic
258  * action routines to be called at appropriate spots during parsing, and a
259  * pointer to a state object to be passed to those routines.
260  */
261 void
262 pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
263 {
264         JsonTokenType tok;
265
266         /* get the initial token */
267         json_lex(lex);
268
269         tok = lex_peek(lex);
270
271         /* parse by recursive descent */
272         switch (tok)
273         {
274                 case JSON_TOKEN_OBJECT_START:
275                         parse_object(lex, sem);
276                         break;
277                 case JSON_TOKEN_ARRAY_START:
278                         parse_array(lex, sem);
279                         break;
280                 default:
281                         parse_scalar(lex, sem);         /* json can be a bare scalar */
282         }
283
284         lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
285
286 }
287
288 /*
289  *      Recursive Descent parse routines. There is one for each structural
290  *      element in a json document:
291  *        - scalar (string, number, true, false, null)
292  *        - array  ( [ ] )
293  *        - array element
294  *        - object ( { } )
295  *        - object field
296  */
297 static inline void
298 parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
299 {
300         char       *val = NULL;
301         json_scalar_action sfunc = sem->scalar;
302         char      **valaddr;
303         JsonTokenType tok = lex_peek(lex);
304
305         valaddr = sfunc == NULL ? NULL : &val;
306
307         /* a scalar must be a string, a number, true, false, or null */
308         switch (tok)
309         {
310                 case JSON_TOKEN_TRUE:
311                         lex_accept(lex, JSON_TOKEN_TRUE, valaddr);
312                         break;
313                 case JSON_TOKEN_FALSE:
314                         lex_accept(lex, JSON_TOKEN_FALSE, valaddr);
315                         break;
316                 case JSON_TOKEN_NULL:
317                         lex_accept(lex, JSON_TOKEN_NULL, valaddr);
318                         break;
319                 case JSON_TOKEN_NUMBER:
320                         lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);
321                         break;
322                 case JSON_TOKEN_STRING:
323                         lex_accept(lex, JSON_TOKEN_STRING, valaddr);
324                         break;
325                 default:
326                         report_parse_error(JSON_PARSE_VALUE, lex);
327         }
328
329         if (sfunc != NULL)
330                 (*sfunc) (sem->semstate, val, tok);
331 }
332
333 static void
334 parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
335 {
336         /*
337          * an object field is "fieldname" : value where value can be a scalar,
338          * object or array
339          */
340
341         char       *fname = NULL;       /* keep compiler quiet */
342         json_ofield_action ostart = sem->object_field_start;
343         json_ofield_action oend = sem->object_field_end;
344         bool            isnull;
345         char      **fnameaddr = NULL;
346         JsonTokenType tok;
347
348         if (ostart != NULL || oend != NULL)
349                 fnameaddr = &fname;
350
351         if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr))
352                 report_parse_error(JSON_PARSE_STRING, lex);
353
354         lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
355
356         tok = lex_peek(lex);
357         isnull = tok == JSON_TOKEN_NULL;
358
359         if (ostart != NULL)
360                 (*ostart) (sem->semstate, fname, isnull);
361
362         switch (tok)
363         {
364                 case JSON_TOKEN_OBJECT_START:
365                         parse_object(lex, sem);
366                         break;
367                 case JSON_TOKEN_ARRAY_START:
368                         parse_array(lex, sem);
369                         break;
370                 default:
371                         parse_scalar(lex, sem);
372         }
373
374         if (oend != NULL)
375                 (*oend) (sem->semstate, fname, isnull);
376
377         if (fname != NULL)
378                 pfree(fname);
379 }
380
381 static void
382 parse_object(JsonLexContext *lex, JsonSemAction *sem)
383 {
384         /*
385          * an object is a possibly empty sequence of object fields, separated by
386          * commas and surrounde by curly braces.
387          */
388         json_struct_action ostart = sem->object_start;
389         json_struct_action oend = sem->object_end;
390         JsonTokenType tok;
391
392         if (ostart != NULL)
393                 (*ostart) (sem->semstate);
394
395         /*
396          * Data inside an object at at a higher nesting level than the object
397          * itself. Note that we increment this after we call the semantic routine
398          * for the object start and restore it before we call the routine for the
399          * object end.
400          */
401         lex->lex_level++;
402
403         /* we know this will succeeed, just clearing the token */
404         lex_expect(JSON_PARSE_OBJECT_START, lex, JSON_TOKEN_OBJECT_START);
405
406         tok = lex_peek(lex);
407         switch (tok)
408         {
409                 case JSON_TOKEN_STRING:
410                         parse_object_field(lex, sem);
411                         while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
412                                 parse_object_field(lex, sem);
413                         break;
414                 case JSON_TOKEN_OBJECT_END:
415                         break;
416                 default:
417                         /* case of an invalid initial token inside the object */
418                         report_parse_error(JSON_PARSE_OBJECT_START, lex);
419         }
420
421         lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
422
423         lex->lex_level--;
424
425         if (oend != NULL)
426                 (*oend) (sem->semstate);
427 }
428
429 static void
430 parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
431 {
432         json_aelem_action astart = sem->array_element_start;
433         json_aelem_action aend = sem->array_element_end;
434         JsonTokenType tok = lex_peek(lex);
435
436         bool            isnull;
437
438         isnull = tok == JSON_TOKEN_NULL;
439
440         if (astart != NULL)
441                 (*astart) (sem->semstate, isnull);
442
443         /* an array element is any object, array or scalar */
444         switch (tok)
445         {
446                 case JSON_TOKEN_OBJECT_START:
447                         parse_object(lex, sem);
448                         break;
449                 case JSON_TOKEN_ARRAY_START:
450                         parse_array(lex, sem);
451                         break;
452                 default:
453                         parse_scalar(lex, sem);
454         }
455
456         if (aend != NULL)
457                 (*aend) (sem->semstate, isnull);
458 }
459
460 static void
461 parse_array(JsonLexContext *lex, JsonSemAction *sem)
462 {
463         /*
464          * an array is a possibly empty sequence of array elements, separated by
465          * commas and surrounded by square brackets.
466          */
467         json_struct_action astart = sem->array_start;
468         json_struct_action aend = sem->array_end;
469
470         if (astart != NULL)
471                 (*astart) (sem->semstate);
472
473         /*
474          * Data inside an array at at a higher nesting level than the array
475          * itself. Note that we increment this after we call the semantic routine
476          * for the array start and restore it before we call the routine for the
477          * array end.
478          */
479         lex->lex_level++;
480
481         lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
482         if (lex_peek(lex) != JSON_TOKEN_ARRAY_END)
483         {
484
485                 parse_array_element(lex, sem);
486
487                 while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
488                         parse_array_element(lex, sem);
489         }
490
491         lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
492
493         lex->lex_level--;
494
495         if (aend != NULL)
496                 (*aend) (sem->semstate);
497 }
498
499 /*
500  * Lex one token from the input stream.
501  */
502 static inline void
503 json_lex(JsonLexContext *lex)
504 {
505         char       *s;
506         int                     len;
507
508         /* Skip leading whitespace. */
509         s = lex->token_terminator;
510         len = s - lex->input;
511         while (len < lex->input_length &&
512                    (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
513         {
514                 if (*s == '\n')
515                         ++lex->line_number;
516                 ++s;
517                 ++len;
518         }
519         lex->token_start = s;
520
521         /* Determine token type. */
522         if (len >= lex->input_length)
523         {
524                 lex->token_start = NULL;
525                 lex->prev_token_terminator = lex->token_terminator;
526                 lex->token_terminator = s;
527                 lex->token_type = JSON_TOKEN_END;
528         }
529         else
530                 switch (*s)
531                 {
532                                 /* Single-character token, some kind of punctuation mark. */
533                         case '{':
534                                 lex->prev_token_terminator = lex->token_terminator;
535                                 lex->token_terminator = s + 1;
536                                 lex->token_type = JSON_TOKEN_OBJECT_START;
537                                 break;
538                         case '}':
539                                 lex->prev_token_terminator = lex->token_terminator;
540                                 lex->token_terminator = s + 1;
541                                 lex->token_type = JSON_TOKEN_OBJECT_END;
542                                 break;
543                         case '[':
544                                 lex->prev_token_terminator = lex->token_terminator;
545                                 lex->token_terminator = s + 1;
546                                 lex->token_type = JSON_TOKEN_ARRAY_START;
547                                 break;
548                         case ']':
549                                 lex->prev_token_terminator = lex->token_terminator;
550                                 lex->token_terminator = s + 1;
551                                 lex->token_type = JSON_TOKEN_ARRAY_END;
552                                 break;
553                         case ',':
554                                 lex->prev_token_terminator = lex->token_terminator;
555                                 lex->token_terminator = s + 1;
556                                 lex->token_type = JSON_TOKEN_COMMA;
557                                 break;
558                         case ':':
559                                 lex->prev_token_terminator = lex->token_terminator;
560                                 lex->token_terminator = s + 1;
561                                 lex->token_type = JSON_TOKEN_COLON;
562                                 break;
563                         case '"':
564                                 /* string */
565                                 json_lex_string(lex);
566                                 lex->token_type = JSON_TOKEN_STRING;
567                                 break;
568                         case '-':
569                                 /* Negative number. */
570                                 json_lex_number(lex, s + 1);
571                                 lex->token_type = JSON_TOKEN_NUMBER;
572                                 break;
573                         case '0':
574                         case '1':
575                         case '2':
576                         case '3':
577                         case '4':
578                         case '5':
579                         case '6':
580                         case '7':
581                         case '8':
582                         case '9':
583                                 /* Positive number. */
584                                 json_lex_number(lex, s);
585                                 lex->token_type = JSON_TOKEN_NUMBER;
586                                 break;
587                         default:
588                                 {
589                                         char       *p;
590
591                                         /*
592                                          * We're not dealing with a string, number, legal
593                                          * punctuation mark, or end of string.  The only legal
594                                          * tokens we might find here are true, false, and null,
595                                          * but for error reporting purposes we scan until we see a
596                                          * non-alphanumeric character.  That way, we can report
597                                          * the whole word as an unexpected token, rather than just
598                                          * some unintuitive prefix thereof.
599                                          */
600                                         for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)
601                                                  /* skip */ ;
602
603                                         /*
604                                          * We got some sort of unexpected punctuation or an
605                                          * otherwise unexpected character, so just complain about
606                                          * that one character.
607                                          */
608                                         if (p == s)
609                                         {
610                                                 lex->prev_token_terminator = lex->token_terminator;
611                                                 lex->token_terminator = s + 1;
612                                                 report_invalid_token(lex);
613                                         }
614
615                                         /*
616                                          * We've got a real alphanumeric token here.  If it
617                                          * happens to be true, false, or null, all is well.  If
618                                          * not, error out.
619                                          */
620                                         lex->prev_token_terminator = lex->token_terminator;
621                                         lex->token_terminator = p;
622                                         if (p - s == 4)
623                                         {
624                                                 if (memcmp(s, "true", 4) == 0)
625                                                         lex->token_type = JSON_TOKEN_TRUE;
626                                                 else if (memcmp(s, "null", 4) == 0)
627                                                         lex->token_type = JSON_TOKEN_NULL;
628                                                 else
629                                                         report_invalid_token(lex);
630                                         }
631                                         else if (p - s == 5 && memcmp(s, "false", 5) == 0)
632                                                 lex->token_type = JSON_TOKEN_FALSE;
633                                         else
634                                                 report_invalid_token(lex);
635
636                                 }
637                 }                                               /* end of switch */
638 }
639
640 /*
641  * The next token in the input stream is known to be a string; lex it.
642  */
643 static inline void
644 json_lex_string(JsonLexContext *lex)
645 {
646         char       *s;
647         int                     len;
648         int                     hi_surrogate = -1;
649
650         if (lex->strval != NULL)
651                 resetStringInfo(lex->strval);
652
653         Assert(lex->input_length > 0);
654         s = lex->token_start;
655         len = lex->token_start - lex->input;
656         for (;;)
657         {
658                 s++;
659                 len++;
660                 /* Premature end of the string. */
661                 if (len >= lex->input_length)
662                 {
663                         lex->token_terminator = s;
664                         report_invalid_token(lex);
665                 }
666                 else if (*s == '"')
667                         break;
668                 else if ((unsigned char) *s < 32)
669                 {
670                         /* Per RFC4627, these characters MUST be escaped. */
671                         /* Since *s isn't printable, exclude it from the context string */
672                         lex->token_terminator = s;
673                         ereport(ERROR,
674                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
675                                          errmsg("invalid input syntax for type json"),
676                                          errdetail("Character with value 0x%02x must be escaped.",
677                                                            (unsigned char) *s),
678                                          report_json_context(lex)));
679                 }
680                 else if (*s == '\\')
681                 {
682                         /* OK, we have an escape character. */
683                         s++;
684                         len++;
685                         if (len >= lex->input_length)
686                         {
687                                 lex->token_terminator = s;
688                                 report_invalid_token(lex);
689                         }
690                         else if (*s == 'u')
691                         {
692                                 int                     i;
693                                 int                     ch = 0;
694
695                                 for (i = 1; i <= 4; i++)
696                                 {
697                                         s++;
698                                         len++;
699                                         if (len >= lex->input_length)
700                                         {
701                                                 lex->token_terminator = s;
702                                                 report_invalid_token(lex);
703                                         }
704                                         else if (*s >= '0' && *s <= '9')
705                                                 ch = (ch * 16) + (*s - '0');
706                                         else if (*s >= 'a' && *s <= 'f')
707                                                 ch = (ch * 16) + (*s - 'a') + 10;
708                                         else if (*s >= 'A' && *s <= 'F')
709                                                 ch = (ch * 16) + (*s - 'A') + 10;
710                                         else
711                                         {
712                                                 lex->token_terminator = s + pg_mblen(s);
713                                                 ereport(ERROR,
714                                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
715                                                                  errmsg("invalid input syntax for type json"),
716                                                                  errdetail("\"\\u\" must be followed by four hexadecimal digits."),
717                                                                  report_json_context(lex)));
718                                         }
719                                 }
720                                 if (lex->strval != NULL)
721                                 {
722                                         char            utf8str[5];
723                                         int                     utf8len;
724
725                                         if (ch >= 0xd800 && ch <= 0xdbff)
726                                         {
727                                                 if (hi_surrogate != -1)
728                                                         ereport(ERROR,
729                                                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
730                                                                 errmsg("invalid input syntax for type json"),
731                                                                 errdetail("Unicode high surrogate must not follow a high surrogate."),
732                                                                 report_json_context(lex)));
733                                                 hi_surrogate = (ch & 0x3ff) << 10;
734                                                 continue;
735                                         }
736                                         else if (ch >= 0xdc00 && ch <= 0xdfff)
737                                         {
738                                                 if (hi_surrogate == -1)
739                                                         ereport(ERROR,
740                                                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
741                                                                 errmsg("invalid input syntax for type json"),
742                                                                 errdetail("Unicode low surrogate must follow a high surrogate."),
743                                                                 report_json_context(lex)));
744                                                 ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
745                                                 hi_surrogate = -1;
746                                         }
747
748                                         if (hi_surrogate != -1)
749                                                 ereport(ERROR,
750                                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
751                                                                  errmsg("invalid input syntax for type json"),
752                                                                  errdetail("Unicode low surrogate must follow a high surrogate."),
753                                                                  report_json_context(lex)));
754
755                                         /*
756                                          * For UTF8, replace the escape sequence by the actual utf8
757                                          * character in lex->strval. Do this also for other encodings
758                                          * if the escape designates an ASCII character, otherwise
759                                          * raise an error. We don't ever unescape a \u0000, since that
760                                          * would result in an impermissible nul byte.
761                                          */
762
763                                         if (ch == 0)
764                                         {
765                                                 appendStringInfoString(lex->strval, "\\u0000");
766                                         }
767                                         else if (GetDatabaseEncoding() == PG_UTF8)
768                                         {
769                                                 unicode_to_utf8(ch, (unsigned char *) utf8str);
770                                                 utf8len = pg_utf_mblen((unsigned char *) utf8str);
771                                                 appendBinaryStringInfo(lex->strval, utf8str, utf8len);
772                                         }
773                                         else if (ch <= 0x007f)
774                                         {
775                                                 /*
776                                                  * This is the only way to designate things like a form feed
777                                                  * character in JSON, so it's useful in all encodings.
778                                                  */
779                                                 appendStringInfoChar(lex->strval, (char) ch);
780                                         }
781                                         else
782                                         {
783                                                 ereport(ERROR,
784                                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
785                                                                  errmsg("invalid input syntax for type json"),
786                                                                  errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
787                                                                  report_json_context(lex)));
788                                         }
789
790                                 }
791                         }
792                         else if (lex->strval != NULL)
793                         {
794                                 if (hi_surrogate != -1)
795                                         ereport(ERROR,
796                                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
797                                                          errmsg("invalid input syntax for type json"),
798                                                          errdetail("Unicode low surrogate must follow a high surrogate."),
799                                                          report_json_context(lex)));
800
801                                 switch (*s)
802                                 {
803                                         case '"':
804                                         case '\\':
805                                         case '/':
806                                                 appendStringInfoChar(lex->strval, *s);
807                                                 break;
808                                         case 'b':
809                                                 appendStringInfoChar(lex->strval, '\b');
810                                                 break;
811                                         case 'f':
812                                                 appendStringInfoChar(lex->strval, '\f');
813                                                 break;
814                                         case 'n':
815                                                 appendStringInfoChar(lex->strval, '\n');
816                                                 break;
817                                         case 'r':
818                                                 appendStringInfoChar(lex->strval, '\r');
819                                                 break;
820                                         case 't':
821                                                 appendStringInfoChar(lex->strval, '\t');
822                                                 break;
823                                         default:
824                                                 /* Not a valid string escape, so error out. */
825                                                 lex->token_terminator = s + pg_mblen(s);
826                                                 ereport(ERROR,
827                                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
828                                                                  errmsg("invalid input syntax for type json"),
829                                                         errdetail("Escape sequence \"\\%s\" is invalid.",
830                                                                           extract_mb_char(s)),
831                                                                  report_json_context(lex)));
832                                 }
833                         }
834                         else if (strchr("\"\\/bfnrt", *s) == NULL)
835                         {
836                                 /*
837                                  * Simpler processing if we're not bothered about de-escaping
838                                  *
839                                  * It's very tempting to remove the strchr() call here and
840                                  * replace it with a switch statement, but testing so far has
841                                  * shown it's not a performance win.
842                                  */
843                                 lex->token_terminator = s + pg_mblen(s);
844                                 ereport(ERROR,
845                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
846                                                  errmsg("invalid input syntax for type json"),
847                                                  errdetail("Escape sequence \"\\%s\" is invalid.",
848                                                                    extract_mb_char(s)),
849                                                  report_json_context(lex)));
850                         }
851
852                 }
853                 else if (lex->strval != NULL)
854                 {
855                         if (hi_surrogate != -1)
856                                 ereport(ERROR,
857                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
858                                                  errmsg("invalid input syntax for type json"),
859                                                  errdetail("Unicode low surrogate must follow a high surrogate."),
860                                                  report_json_context(lex)));
861
862                         appendStringInfoChar(lex->strval, *s);
863                 }
864
865         }
866
867         if (hi_surrogate != -1)
868                 ereport(ERROR,
869                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
870                                  errmsg("invalid input syntax for type json"),
871                 errdetail("Unicode low surrogate must follow a high surrogate."),
872                                  report_json_context(lex)));
873
874         /* Hooray, we found the end of the string! */
875         lex->prev_token_terminator = lex->token_terminator;
876         lex->token_terminator = s + 1;
877 }
878
879 /*-------------------------------------------------------------------------
880  * The next token in the input stream is known to be a number; lex it.
881  *
882  * In JSON, a number consists of four parts:
883  *
884  * (1) An optional minus sign ('-').
885  *
886  * (2) Either a single '0', or a string of one or more digits that does not
887  *         begin with a '0'.
888  *
889  * (3) An optional decimal part, consisting of a period ('.') followed by
890  *         one or more digits.  (Note: While this part can be omitted
891  *         completely, it's not OK to have only the decimal point without
892  *         any digits afterwards.)
893  *
894  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
895  *         followed by '+' or '-', followed by one or more digits.      (Note:
896  *         As with the decimal part, if 'e' or 'E' is present, it must be
897  *         followed by at least one digit.)
898  *
899  * The 's' argument to this function points to the ostensible beginning
900  * of part 2 - i.e. the character after any optional minus sign, and the
901  * first character of the string if there is none.
902  *
903  *-------------------------------------------------------------------------
904  */
905 static inline void
906 json_lex_number(JsonLexContext *lex, char *s)
907 {
908         bool            error = false;
909         char       *p;
910         int                     len;
911
912         len = s - lex->input;
913         /* Part (1): leading sign indicator. */
914         /* Caller already did this for us; so do nothing. */
915
916         /* Part (2): parse main digit string. */
917         if (*s == '0')
918         {
919                 s++;
920                 len++;
921         }
922         else if (*s >= '1' && *s <= '9')
923         {
924                 do
925                 {
926                         s++;
927                         len++;
928                 } while (len < lex->input_length && *s >= '0' && *s <= '9');
929         }
930         else
931                 error = true;
932
933         /* Part (3): parse optional decimal portion. */
934         if (len < lex->input_length && *s == '.')
935         {
936                 s++;
937                 len++;
938                 if (len == lex->input_length || *s < '0' || *s > '9')
939                         error = true;
940                 else
941                 {
942                         do
943                         {
944                                 s++;
945                                 len++;
946                         } while (len < lex->input_length && *s >= '0' && *s <= '9');
947                 }
948         }
949
950         /* Part (4): parse optional exponent. */
951         if (len < lex->input_length && (*s == 'e' || *s == 'E'))
952         {
953                 s++;
954                 len++;
955                 if (len < lex->input_length && (*s == '+' || *s == '-'))
956                 {
957                         s++;
958                         len++;
959                 }
960                 if (len == lex->input_length || *s < '0' || *s > '9')
961                         error = true;
962                 else
963                 {
964                         do
965                         {
966                                 s++;
967                                 len++;
968                         } while (len < lex->input_length && *s >= '0' && *s <= '9');
969                 }
970         }
971
972         /*
973          * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
974          * here should be considered part of the token for error-reporting
975          * purposes.
976          */
977         for (p = s; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*p); p++, len++)
978                 error = true;
979         lex->prev_token_terminator = lex->token_terminator;
980         lex->token_terminator = p;
981         if (error)
982                 report_invalid_token(lex);
983 }
984
985 /*
986  * Report a parse error.
987  *
988  * lex->token_start and lex->token_terminator must identify the current token.
989  */
990 static void
991 report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
992 {
993         char       *token;
994         int                     toklen;
995
996         /* Handle case where the input ended prematurely. */
997         if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
998                 ereport(ERROR,
999                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1000                                  errmsg("invalid input syntax for type json"),
1001                                  errdetail("The input string ended unexpectedly."),
1002                                  report_json_context(lex)));
1003
1004         /* Separate out the current token. */
1005         toklen = lex->token_terminator - lex->token_start;
1006         token = palloc(toklen + 1);
1007         memcpy(token, lex->token_start, toklen);
1008         token[toklen] = '\0';
1009
1010         /* Complain, with the appropriate detail message. */
1011         if (ctx == JSON_PARSE_END)
1012                 ereport(ERROR,
1013                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1014                                  errmsg("invalid input syntax for type json"),
1015                                  errdetail("Expected end of input, but found \"%s\".",
1016                                                    token),
1017                                  report_json_context(lex)));
1018         else
1019         {
1020                 switch (ctx)
1021                 {
1022                         case JSON_PARSE_VALUE:
1023                                 ereport(ERROR,
1024                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1025                                                  errmsg("invalid input syntax for type json"),
1026                                                  errdetail("Expected JSON value, but found \"%s\".",
1027                                                                    token),
1028                                                  report_json_context(lex)));
1029                                 break;
1030                         case JSON_PARSE_STRING:
1031                                 ereport(ERROR,
1032                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1033                                                  errmsg("invalid input syntax for type json"),
1034                                                  errdetail("Expected string, but found \"%s\".",
1035                                                                    token),
1036                                                  report_json_context(lex)));
1037                                 break;
1038                         case JSON_PARSE_ARRAY_START:
1039                                 ereport(ERROR,
1040                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1041                                                  errmsg("invalid input syntax for type json"),
1042                                                  errdetail("Expected array element or \"]\", but found \"%s\".",
1043                                                                    token),
1044                                                  report_json_context(lex)));
1045                                 break;
1046                         case JSON_PARSE_ARRAY_NEXT:
1047                                 ereport(ERROR,
1048                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1049                                                  errmsg("invalid input syntax for type json"),
1050                                           errdetail("Expected \",\" or \"]\", but found \"%s\".",
1051                                                                 token),
1052                                                  report_json_context(lex)));
1053                                 break;
1054                         case JSON_PARSE_OBJECT_START:
1055                                 ereport(ERROR,
1056                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1057                                                  errmsg("invalid input syntax for type json"),
1058                                          errdetail("Expected string or \"}\", but found \"%s\".",
1059                                                            token),
1060                                                  report_json_context(lex)));
1061                                 break;
1062                         case JSON_PARSE_OBJECT_LABEL:
1063                                 ereport(ERROR,
1064                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1065                                                  errmsg("invalid input syntax for type json"),
1066                                                  errdetail("Expected \":\", but found \"%s\".",
1067                                                                    token),
1068                                                  report_json_context(lex)));
1069                                 break;
1070                         case JSON_PARSE_OBJECT_NEXT:
1071                                 ereport(ERROR,
1072                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1073                                                  errmsg("invalid input syntax for type json"),
1074                                           errdetail("Expected \",\" or \"}\", but found \"%s\".",
1075                                                                 token),
1076                                                  report_json_context(lex)));
1077                                 break;
1078                         case JSON_PARSE_OBJECT_COMMA:
1079                                 ereport(ERROR,
1080                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1081                                                  errmsg("invalid input syntax for type json"),
1082                                                  errdetail("Expected string, but found \"%s\".",
1083                                                                    token),
1084                                                  report_json_context(lex)));
1085                                 break;
1086                         default:
1087                                 elog(ERROR, "unexpected json parse state: %d", ctx);
1088                 }
1089         }
1090 }
1091
1092 /*
1093  * Report an invalid input token.
1094  *
1095  * lex->token_start and lex->token_terminator must identify the token.
1096  */
1097 static void
1098 report_invalid_token(JsonLexContext *lex)
1099 {
1100         char       *token;
1101         int                     toklen;
1102
1103         /* Separate out the offending token. */
1104         toklen = lex->token_terminator - lex->token_start;
1105         token = palloc(toklen + 1);
1106         memcpy(token, lex->token_start, toklen);
1107         token[toklen] = '\0';
1108
1109         ereport(ERROR,
1110                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1111                          errmsg("invalid input syntax for type json"),
1112                          errdetail("Token \"%s\" is invalid.", token),
1113                          report_json_context(lex)));
1114 }
1115
1116 /*
1117  * Report a CONTEXT line for bogus JSON input.
1118  *
1119  * lex->token_terminator must be set to identify the spot where we detected
1120  * the error.  Note that lex->token_start might be NULL, in case we recognized
1121  * error at EOF.
1122  *
1123  * The return value isn't meaningful, but we make it non-void so that this
1124  * can be invoked inside ereport().
1125  */
1126 static int
1127 report_json_context(JsonLexContext *lex)
1128 {
1129         const char *context_start;
1130         const char *context_end;
1131         const char *line_start;
1132         int                     line_number;
1133         char       *ctxt;
1134         int                     ctxtlen;
1135         const char *prefix;
1136         const char *suffix;
1137
1138         /* Choose boundaries for the part of the input we will display */
1139         context_start = lex->input;
1140         context_end = lex->token_terminator;
1141         line_start = context_start;
1142         line_number = 1;
1143         for (;;)
1144         {
1145                 /* Always advance over newlines */
1146                 if (context_start < context_end && *context_start == '\n')
1147                 {
1148                         context_start++;
1149                         line_start = context_start;
1150                         line_number++;
1151                         continue;
1152                 }
1153                 /* Otherwise, done as soon as we are close enough to context_end */
1154                 if (context_end - context_start < 50)
1155                         break;
1156                 /* Advance to next multibyte character */
1157                 if (IS_HIGHBIT_SET(*context_start))
1158                         context_start += pg_mblen(context_start);
1159                 else
1160                         context_start++;
1161         }
1162
1163         /*
1164          * We add "..." to indicate that the excerpt doesn't start at the
1165          * beginning of the line ... but if we're within 3 characters of the
1166          * beginning of the line, we might as well just show the whole line.
1167          */
1168         if (context_start - line_start <= 3)
1169                 context_start = line_start;
1170
1171         /* Get a null-terminated copy of the data to present */
1172         ctxtlen = context_end - context_start;
1173         ctxt = palloc(ctxtlen + 1);
1174         memcpy(ctxt, context_start, ctxtlen);
1175         ctxt[ctxtlen] = '\0';
1176
1177         /*
1178          * Show the context, prefixing "..." if not starting at start of line, and
1179          * suffixing "..." if not ending at end of line.
1180          */
1181         prefix = (context_start > line_start) ? "..." : "";
1182         suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
1183
1184         return errcontext("JSON data, line %d: %s%s%s",
1185                                           line_number, prefix, ctxt, suffix);
1186 }
1187
1188 /*
1189  * Extract a single, possibly multi-byte char from the input string.
1190  */
1191 static char *
1192 extract_mb_char(char *s)
1193 {
1194         char       *res;
1195         int                     len;
1196
1197         len = pg_mblen(s);
1198         res = palloc(len + 1);
1199         memcpy(res, s, len);
1200         res[len] = '\0';
1201
1202         return res;
1203 }
1204
1205 /*
1206  * Turn a scalar Datum into JSON, appending the string to "result".
1207  *
1208  * Hand off a non-scalar datum to composite_to_json or array_to_json_internal
1209  * as appropriate.
1210  */
1211 static void
1212 datum_to_json(Datum val, bool is_null, StringInfo result,
1213                           TYPCATEGORY tcategory, Oid typoutputfunc)
1214 {
1215         char       *outputstr;
1216         text       *jsontext;
1217
1218         if (is_null)
1219         {
1220                 appendStringInfoString(result, "null");
1221                 return;
1222         }
1223
1224         switch (tcategory)
1225         {
1226                 case TYPCATEGORY_ARRAY:
1227                         array_to_json_internal(val, result, false);
1228                         break;
1229                 case TYPCATEGORY_COMPOSITE:
1230                         composite_to_json(val, result, false);
1231                         break;
1232                 case TYPCATEGORY_BOOLEAN:
1233                         if (DatumGetBool(val))
1234                                 appendStringInfoString(result, "true");
1235                         else
1236                                 appendStringInfoString(result, "false");
1237                         break;
1238                 case TYPCATEGORY_NUMERIC:
1239                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
1240
1241                         /*
1242                          * Don't call escape_json here if it's a valid JSON number.
1243                          * Numeric output should usually be a valid JSON number and JSON
1244                          * numbers shouldn't be quoted. Quote cases like "Nan" and
1245                          * "Infinity", however.
1246                          */
1247                         if (strpbrk(outputstr, NON_NUMERIC_LETTER) == NULL)
1248                                 appendStringInfoString(result, outputstr);
1249                         else
1250                                 escape_json(result, outputstr);
1251                         pfree(outputstr);
1252                         break;
1253                 case TYPCATEGORY_JSON:
1254                         /* JSON will already be escaped */
1255                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
1256                         appendStringInfoString(result, outputstr);
1257                         pfree(outputstr);
1258                         break;
1259                 case TYPCATEGORY_JSON_CAST:
1260                         jsontext = DatumGetTextP(OidFunctionCall1(typoutputfunc, val));
1261                         outputstr = text_to_cstring(jsontext);
1262                         appendStringInfoString(result, outputstr);
1263                         pfree(outputstr);
1264                         pfree(jsontext);
1265                         break;
1266                 default:
1267                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
1268                         escape_json(result, outputstr);
1269                         pfree(outputstr);
1270                         break;
1271         }
1272 }
1273
1274 /*
1275  * Process a single dimension of an array.
1276  * If it's the innermost dimension, output the values, otherwise call
1277  * ourselves recursively to process the next dimension.
1278  */
1279 static void
1280 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
1281                                   bool *nulls, int *valcount, TYPCATEGORY tcategory,
1282                                   Oid typoutputfunc, bool use_line_feeds)
1283 {
1284         int                     i;
1285         const char *sep;
1286
1287         Assert(dim < ndims);
1288
1289         sep = use_line_feeds ? ",\n " : ",";
1290
1291         appendStringInfoChar(result, '[');
1292
1293         for (i = 1; i <= dims[dim]; i++)
1294         {
1295                 if (i > 1)
1296                         appendStringInfoString(result, sep);
1297
1298                 if (dim + 1 == ndims)
1299                 {
1300                         datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
1301                                                   typoutputfunc);
1302                         (*valcount)++;
1303                 }
1304                 else
1305                 {
1306                         /*
1307                          * Do we want line feeds on inner dimensions of arrays? For now
1308                          * we'll say no.
1309                          */
1310                         array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
1311                                                           valcount, tcategory, typoutputfunc, false);
1312                 }
1313         }
1314
1315         appendStringInfoChar(result, ']');
1316 }
1317
1318 /*
1319  * Turn an array into JSON.
1320  */
1321 static void
1322 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
1323 {
1324         ArrayType  *v = DatumGetArrayTypeP(array);
1325         Oid                     element_type = ARR_ELEMTYPE(v);
1326         int                *dim;
1327         int                     ndim;
1328         int                     nitems;
1329         int                     count = 0;
1330         Datum      *elements;
1331         bool       *nulls;
1332         int16           typlen;
1333         bool            typbyval;
1334         char            typalign,
1335                                 typdelim;
1336         Oid                     typioparam;
1337         Oid                     typoutputfunc;
1338         TYPCATEGORY tcategory;
1339         Oid                     castfunc = InvalidOid;
1340
1341         ndim = ARR_NDIM(v);
1342         dim = ARR_DIMS(v);
1343         nitems = ArrayGetNItems(ndim, dim);
1344
1345         if (nitems <= 0)
1346         {
1347                 appendStringInfoString(result, "[]");
1348                 return;
1349         }
1350
1351         get_type_io_data(element_type, IOFunc_output,
1352                                          &typlen, &typbyval, &typalign,
1353                                          &typdelim, &typioparam, &typoutputfunc);
1354
1355         if (element_type > FirstNormalObjectId)
1356         {
1357                 HeapTuple       tuple;
1358                 Form_pg_cast castForm;
1359
1360                 tuple = SearchSysCache2(CASTSOURCETARGET,
1361                                                                 ObjectIdGetDatum(element_type),
1362                                                                 ObjectIdGetDatum(JSONOID));
1363                 if (HeapTupleIsValid(tuple))
1364                 {
1365                         castForm = (Form_pg_cast) GETSTRUCT(tuple);
1366
1367                         if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1368                                 castfunc = typoutputfunc = castForm->castfunc;
1369
1370                         ReleaseSysCache(tuple);
1371                 }
1372         }
1373
1374         deconstruct_array(v, element_type, typlen, typbyval,
1375                                           typalign, &elements, &nulls,
1376                                           &nitems);
1377
1378         if (castfunc != InvalidOid)
1379                 tcategory = TYPCATEGORY_JSON_CAST;
1380         else if (element_type == RECORDOID)
1381                 tcategory = TYPCATEGORY_COMPOSITE;
1382         else if (element_type == JSONOID)
1383                 tcategory = TYPCATEGORY_JSON;
1384         else
1385                 tcategory = TypeCategory(element_type);
1386
1387         array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
1388                                           typoutputfunc, use_line_feeds);
1389
1390         pfree(elements);
1391         pfree(nulls);
1392 }
1393
1394 /*
1395  * Turn a composite / record into JSON.
1396  */
1397 static void
1398 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
1399 {
1400         HeapTupleHeader td;
1401         Oid                     tupType;
1402         int32           tupTypmod;
1403         TupleDesc       tupdesc;
1404         HeapTupleData tmptup,
1405                            *tuple;
1406         int                     i;
1407         bool            needsep = false;
1408         const char *sep;
1409
1410         sep = use_line_feeds ? ",\n " : ",";
1411
1412         td = DatumGetHeapTupleHeader(composite);
1413
1414         /* Extract rowtype info and find a tupdesc */
1415         tupType = HeapTupleHeaderGetTypeId(td);
1416         tupTypmod = HeapTupleHeaderGetTypMod(td);
1417         tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
1418
1419         /* Build a temporary HeapTuple control structure */
1420         tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
1421         tmptup.t_data = td;
1422         tuple = &tmptup;
1423
1424         appendStringInfoChar(result, '{');
1425
1426         for (i = 0; i < tupdesc->natts; i++)
1427         {
1428                 Datum           val,
1429                                         origval;
1430                 bool            isnull;
1431                 char       *attname;
1432                 TYPCATEGORY tcategory;
1433                 Oid                     typoutput;
1434                 bool            typisvarlena;
1435                 Oid                     castfunc = InvalidOid;
1436
1437                 if (tupdesc->attrs[i]->attisdropped)
1438                         continue;
1439
1440                 if (needsep)
1441                         appendStringInfoString(result, sep);
1442                 needsep = true;
1443
1444                 attname = NameStr(tupdesc->attrs[i]->attname);
1445                 escape_json(result, attname);
1446                 appendStringInfoChar(result, ':');
1447
1448                 origval = heap_getattr(tuple, i + 1, tupdesc, &isnull);
1449
1450                 getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
1451                                                   &typoutput, &typisvarlena);
1452
1453                 if (tupdesc->attrs[i]->atttypid > FirstNormalObjectId)
1454                 {
1455                         HeapTuple       cast_tuple;
1456                         Form_pg_cast castForm;
1457
1458                         cast_tuple = SearchSysCache2(CASTSOURCETARGET,
1459                                                            ObjectIdGetDatum(tupdesc->attrs[i]->atttypid),
1460                                                                                  ObjectIdGetDatum(JSONOID));
1461                         if (HeapTupleIsValid(cast_tuple))
1462                         {
1463                                 castForm = (Form_pg_cast) GETSTRUCT(cast_tuple);
1464
1465                                 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1466                                         castfunc = typoutput = castForm->castfunc;
1467
1468                                 ReleaseSysCache(cast_tuple);
1469                         }
1470                 }
1471
1472                 if (castfunc != InvalidOid)
1473                         tcategory = TYPCATEGORY_JSON_CAST;
1474                 else if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID)
1475                         tcategory = TYPCATEGORY_ARRAY;
1476                 else if (tupdesc->attrs[i]->atttypid == RECORDOID)
1477                         tcategory = TYPCATEGORY_COMPOSITE;
1478                 else if (tupdesc->attrs[i]->atttypid == JSONOID)
1479                         tcategory = TYPCATEGORY_JSON;
1480                 else
1481                         tcategory = TypeCategory(tupdesc->attrs[i]->atttypid);
1482
1483                 /*
1484                  * If we have a toasted datum, forcibly detoast it here to avoid
1485                  * memory leakage inside the type's output routine.
1486                  */
1487                 if (typisvarlena && !isnull)
1488                         val = PointerGetDatum(PG_DETOAST_DATUM(origval));
1489                 else
1490                         val = origval;
1491
1492                 datum_to_json(val, isnull, result, tcategory, typoutput);
1493
1494                 /* Clean up detoasted copy, if any */
1495                 if (val != origval)
1496                         pfree(DatumGetPointer(val));
1497         }
1498
1499         appendStringInfoChar(result, '}');
1500         ReleaseTupleDesc(tupdesc);
1501 }
1502
1503 /*
1504  * SQL function array_to_json(row)
1505  */
1506 extern Datum
1507 array_to_json(PG_FUNCTION_ARGS)
1508 {
1509         Datum           array = PG_GETARG_DATUM(0);
1510         StringInfo      result;
1511
1512         result = makeStringInfo();
1513
1514         array_to_json_internal(array, result, false);
1515
1516         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1517 }
1518
1519 /*
1520  * SQL function array_to_json(row, prettybool)
1521  */
1522 extern Datum
1523 array_to_json_pretty(PG_FUNCTION_ARGS)
1524 {
1525         Datum           array = PG_GETARG_DATUM(0);
1526         bool            use_line_feeds = PG_GETARG_BOOL(1);
1527         StringInfo      result;
1528
1529         result = makeStringInfo();
1530
1531         array_to_json_internal(array, result, use_line_feeds);
1532
1533         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1534 }
1535
1536 /*
1537  * SQL function row_to_json(row)
1538  */
1539 extern Datum
1540 row_to_json(PG_FUNCTION_ARGS)
1541 {
1542         Datum           array = PG_GETARG_DATUM(0);
1543         StringInfo      result;
1544
1545         result = makeStringInfo();
1546
1547         composite_to_json(array, result, false);
1548
1549         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1550 }
1551
1552 /*
1553  * SQL function row_to_json(row, prettybool)
1554  */
1555 extern Datum
1556 row_to_json_pretty(PG_FUNCTION_ARGS)
1557 {
1558         Datum           array = PG_GETARG_DATUM(0);
1559         bool            use_line_feeds = PG_GETARG_BOOL(1);
1560         StringInfo      result;
1561
1562         result = makeStringInfo();
1563
1564         composite_to_json(array, result, use_line_feeds);
1565
1566         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1567 }
1568
1569 /*
1570  * SQL function to_json(anyvalue)
1571  */
1572 Datum
1573 to_json(PG_FUNCTION_ARGS)
1574 {
1575         Oid                     val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
1576         StringInfo      result;
1577         Datum           orig_val,
1578                                 val;
1579         TYPCATEGORY tcategory;
1580         Oid                     typoutput;
1581         bool            typisvarlena;
1582         Oid                     castfunc = InvalidOid;
1583
1584         if (val_type == InvalidOid)
1585                 ereport(ERROR,
1586                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1587                                  errmsg("could not determine input data type")));
1588
1589
1590         result = makeStringInfo();
1591
1592         orig_val = PG_ARGISNULL(0) ? (Datum) 0 : PG_GETARG_DATUM(0);
1593
1594         getTypeOutputInfo(val_type, &typoutput, &typisvarlena);
1595
1596         if (val_type > FirstNormalObjectId)
1597         {
1598                 HeapTuple       tuple;
1599                 Form_pg_cast castForm;
1600
1601                 tuple = SearchSysCache2(CASTSOURCETARGET,
1602                                                                 ObjectIdGetDatum(val_type),
1603                                                                 ObjectIdGetDatum(JSONOID));
1604                 if (HeapTupleIsValid(tuple))
1605                 {
1606                         castForm = (Form_pg_cast) GETSTRUCT(tuple);
1607
1608                         if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1609                                 castfunc = typoutput = castForm->castfunc;
1610
1611                         ReleaseSysCache(tuple);
1612                 }
1613         }
1614
1615         if (castfunc != InvalidOid)
1616                 tcategory = TYPCATEGORY_JSON_CAST;
1617         else if (val_type == RECORDARRAYOID)
1618                 tcategory = TYPCATEGORY_ARRAY;
1619         else if (val_type == RECORDOID)
1620                 tcategory = TYPCATEGORY_COMPOSITE;
1621         else if (val_type == JSONOID)
1622                 tcategory = TYPCATEGORY_JSON;
1623         else
1624                 tcategory = TypeCategory(val_type);
1625
1626         /*
1627          * If we have a toasted datum, forcibly detoast it here to avoid memory
1628          * leakage inside the type's output routine.
1629          */
1630         if (typisvarlena && orig_val != (Datum) 0)
1631                 val = PointerGetDatum(PG_DETOAST_DATUM(orig_val));
1632         else
1633                 val = orig_val;
1634
1635         datum_to_json(val, false, result, tcategory, typoutput);
1636
1637         /* Clean up detoasted copy, if any */
1638         if (val != orig_val)
1639                 pfree(DatumGetPointer(val));
1640
1641         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1642 }
1643
1644 /*
1645  * json_agg transition function
1646  */
1647 Datum
1648 json_agg_transfn(PG_FUNCTION_ARGS)
1649 {
1650         Oid                     val_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
1651         MemoryContext aggcontext,
1652                                 oldcontext;
1653         StringInfo      state;
1654         Datum           orig_val,
1655                                 val;
1656         TYPCATEGORY tcategory;
1657         Oid                     typoutput;
1658         bool            typisvarlena;
1659         Oid                     castfunc = InvalidOid;
1660
1661         if (val_type == InvalidOid)
1662                 ereport(ERROR,
1663                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1664                                  errmsg("could not determine input data type")));
1665
1666         if (!AggCheckCallContext(fcinfo, &aggcontext))
1667         {
1668                 /* cannot be called directly because of internal-type argument */
1669                 elog(ERROR, "json_agg_transfn called in non-aggregate context");
1670         }
1671
1672         if (PG_ARGISNULL(0))
1673         {
1674                 /*
1675                  * Make this StringInfo in a context where it will persist for the
1676                  * duration off the aggregate call. It's only needed for this initial
1677                  * piece, as the StringInfo routines make sure they use the right
1678                  * context to enlarge the object if necessary.
1679                  */
1680                 oldcontext = MemoryContextSwitchTo(aggcontext);
1681                 state = makeStringInfo();
1682                 MemoryContextSwitchTo(oldcontext);
1683
1684                 appendStringInfoChar(state, '[');
1685         }
1686         else
1687         {
1688                 state = (StringInfo) PG_GETARG_POINTER(0);
1689                 appendStringInfoString(state, ", ");
1690         }
1691
1692         /* fast path for NULLs */
1693         if (PG_ARGISNULL(1))
1694         {
1695                 orig_val = (Datum) 0;
1696                 datum_to_json(orig_val, true, state, 0, InvalidOid);
1697                 PG_RETURN_POINTER(state);
1698         }
1699
1700
1701         orig_val = PG_GETARG_DATUM(1);
1702
1703         getTypeOutputInfo(val_type, &typoutput, &typisvarlena);
1704
1705         if (val_type > FirstNormalObjectId)
1706         {
1707                 HeapTuple       tuple;
1708                 Form_pg_cast castForm;
1709
1710                 tuple = SearchSysCache2(CASTSOURCETARGET,
1711                                                                 ObjectIdGetDatum(val_type),
1712                                                                 ObjectIdGetDatum(JSONOID));
1713                 if (HeapTupleIsValid(tuple))
1714                 {
1715                         castForm = (Form_pg_cast) GETSTRUCT(tuple);
1716
1717                         if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1718                                 castfunc = typoutput = castForm->castfunc;
1719
1720                         ReleaseSysCache(tuple);
1721                 }
1722         }
1723
1724         if (castfunc != InvalidOid)
1725                 tcategory = TYPCATEGORY_JSON_CAST;
1726         else if (val_type == RECORDARRAYOID)
1727                 tcategory = TYPCATEGORY_ARRAY;
1728         else if (val_type == RECORDOID)
1729                 tcategory = TYPCATEGORY_COMPOSITE;
1730         else if (val_type == JSONOID)
1731                 tcategory = TYPCATEGORY_JSON;
1732         else
1733                 tcategory = TypeCategory(val_type);
1734
1735         /*
1736          * If we have a toasted datum, forcibly detoast it here to avoid memory
1737          * leakage inside the type's output routine.
1738          */
1739         if (typisvarlena)
1740                 val = PointerGetDatum(PG_DETOAST_DATUM(orig_val));
1741         else
1742                 val = orig_val;
1743
1744         if (!PG_ARGISNULL(0) &&
1745           (tcategory == TYPCATEGORY_ARRAY || tcategory == TYPCATEGORY_COMPOSITE))
1746         {
1747                 appendStringInfoString(state, "\n ");
1748         }
1749
1750         datum_to_json(val, false, state, tcategory, typoutput);
1751
1752         /* Clean up detoasted copy, if any */
1753         if (val != orig_val)
1754                 pfree(DatumGetPointer(val));
1755
1756         /*
1757          * The transition type for array_agg() is declared to be "internal", which
1758          * is a pass-by-value type the same size as a pointer.  So we can safely
1759          * pass the ArrayBuildState pointer through nodeAgg.c's machinations.
1760          */
1761         PG_RETURN_POINTER(state);
1762 }
1763
1764 /*
1765  * json_agg final function
1766  */
1767 Datum
1768 json_agg_finalfn(PG_FUNCTION_ARGS)
1769 {
1770         StringInfo      state;
1771
1772         /* cannot be called directly because of internal-type argument */
1773         Assert(AggCheckCallContext(fcinfo, NULL));
1774
1775         state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
1776
1777         if (state == NULL)
1778                 PG_RETURN_NULL();
1779
1780         appendStringInfoChar(state, ']');
1781
1782         PG_RETURN_TEXT_P(cstring_to_text(state->data));
1783 }
1784
1785 /*
1786  * Produce a JSON string literal, properly escaping characters in the text.
1787  */
1788 void
1789 escape_json(StringInfo buf, const char *str)
1790 {
1791         const char *p;
1792
1793         appendStringInfoCharMacro(buf, '\"');
1794         for (p = str; *p; p++)
1795         {
1796                 switch (*p)
1797                 {
1798                         case '\b':
1799                                 appendStringInfoString(buf, "\\b");
1800                                 break;
1801                         case '\f':
1802                                 appendStringInfoString(buf, "\\f");
1803                                 break;
1804                         case '\n':
1805                                 appendStringInfoString(buf, "\\n");
1806                                 break;
1807                         case '\r':
1808                                 appendStringInfoString(buf, "\\r");
1809                                 break;
1810                         case '\t':
1811                                 appendStringInfoString(buf, "\\t");
1812                                 break;
1813                         case '"':
1814                                 appendStringInfoString(buf, "\\\"");
1815                                 break;
1816                         case '\\':
1817                                 appendStringInfoString(buf, "\\\\");
1818                                 break;
1819                         default:
1820                                 if ((unsigned char) *p < ' ')
1821                                         appendStringInfo(buf, "\\u%04x", (int) *p);
1822                                 else
1823                                         appendStringInfoCharMacro(buf, *p);
1824                                 break;
1825                 }
1826         }
1827         appendStringInfoCharMacro(buf, '\"');
1828 }