]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/json.c
Split tuple struct defs from htup.h to htup_details.h
[postgresql] / src / backend / utils / adt / json.c
1 /*-------------------------------------------------------------------------
2  *
3  * json.c
4  *              JSON data type support.
5  *
6  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/json.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "access/htup_details.h"
17 #include "catalog/pg_type.h"
18 #include "executor/spi.h"
19 #include "lib/stringinfo.h"
20 #include "libpq/pqformat.h"
21 #include "mb/pg_wchar.h"
22 #include "parser/parse_coerce.h"
23 #include "utils/array.h"
24 #include "utils/builtins.h"
25 #include "utils/lsyscache.h"
26 #include "utils/json.h"
27 #include "utils/typcache.h"
28
29 typedef enum                                    /* types of JSON values */
30 {
31         JSON_VALUE_INVALID,                     /* non-value tokens are reported as this */
32         JSON_VALUE_STRING,
33         JSON_VALUE_NUMBER,
34         JSON_VALUE_OBJECT,
35         JSON_VALUE_ARRAY,
36         JSON_VALUE_TRUE,
37         JSON_VALUE_FALSE,
38         JSON_VALUE_NULL
39 } JsonValueType;
40
41 typedef struct                                  /* state of JSON lexer */
42 {
43         char       *input;                      /* whole string being parsed */
44         char       *token_start;        /* start of current token within input */
45         char       *token_terminator; /* end of previous or current token */
46         JsonValueType token_type;       /* type of current token, once it's known */
47 } JsonLexContext;
48
49 typedef enum                                    /* states of JSON parser */
50 {
51         JSON_PARSE_VALUE,                       /* expecting a value */
52         JSON_PARSE_ARRAY_START,         /* saw '[', expecting value or ']' */
53         JSON_PARSE_ARRAY_NEXT,          /* saw array element, expecting ',' or ']' */
54         JSON_PARSE_OBJECT_START,        /* saw '{', expecting label or '}' */
55         JSON_PARSE_OBJECT_LABEL,        /* saw object label, expecting ':' */
56         JSON_PARSE_OBJECT_NEXT,         /* saw object value, expecting ',' or '}' */
57         JSON_PARSE_OBJECT_COMMA         /* saw object ',', expecting next label */
58 } JsonParseState;
59
60 typedef struct JsonParseStack   /* the parser state has to be stackable */
61 {
62         JsonParseState state;
63         /* currently only need the state enum, but maybe someday more stuff */
64 } JsonParseStack;
65
66 typedef enum                                    /* required operations on state stack */
67 {
68         JSON_STACKOP_NONE,                      /* no-op */
69         JSON_STACKOP_PUSH,                      /* push new JSON_PARSE_VALUE stack item */
70         JSON_STACKOP_PUSH_WITH_PUSHBACK, /* push, then rescan current token */
71         JSON_STACKOP_POP                        /* pop, or expect end of input if no stack */
72 } JsonStackOp;
73
74 static void json_validate_cstring(char *input);
75 static void json_lex(JsonLexContext *lex);
76 static void json_lex_string(JsonLexContext *lex);
77 static void json_lex_number(JsonLexContext *lex, char *s);
78 static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex);
79 static void report_invalid_token(JsonLexContext *lex);
80 static int report_json_context(JsonLexContext *lex);
81 static char *extract_mb_char(char *s);
82 static void composite_to_json(Datum composite, StringInfo result,
83                                                           bool use_line_feeds);
84 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
85                                   Datum *vals, bool *nulls, int *valcount,
86                                   TYPCATEGORY tcategory, Oid typoutputfunc,
87                                   bool use_line_feeds);
88 static void array_to_json_internal(Datum array, StringInfo result,
89                                                                    bool use_line_feeds);
90
91 /* fake type category for JSON so we can distinguish it in datum_to_json */
92 #define TYPCATEGORY_JSON 'j'
93 /* letters appearing in numeric output that aren't valid in a JSON number */
94 #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
95 /* chars to consider as part of an alphanumeric token */
96 #define JSON_ALPHANUMERIC_CHAR(c)  \
97         (((c) >= 'a' && (c) <= 'z') || \
98          ((c) >= 'A' && (c) <= 'Z') || \
99          ((c) >= '0' && (c) <= '9') || \
100          (c) == '_' || \
101          IS_HIGHBIT_SET(c))
102
103
104 /*
105  * Input.
106  */
107 Datum
108 json_in(PG_FUNCTION_ARGS)
109 {
110         char       *text = PG_GETARG_CSTRING(0);
111
112         json_validate_cstring(text);
113
114         /* Internal representation is the same as text, for now */
115         PG_RETURN_TEXT_P(cstring_to_text(text));
116 }
117
118 /*
119  * Output.
120  */
121 Datum
122 json_out(PG_FUNCTION_ARGS)
123 {
124         /* we needn't detoast because text_to_cstring will handle that */
125         Datum           txt = PG_GETARG_DATUM(0);
126
127         PG_RETURN_CSTRING(TextDatumGetCString(txt));
128 }
129
130 /*
131  * Binary send.
132  */
133 Datum
134 json_send(PG_FUNCTION_ARGS)
135 {
136         text       *t = PG_GETARG_TEXT_PP(0);
137         StringInfoData buf;
138
139         pq_begintypsend(&buf);
140         pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
141         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
142 }
143
144 /*
145  * Binary receive.
146  */
147 Datum
148 json_recv(PG_FUNCTION_ARGS)
149 {
150         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
151         text       *result;
152         char       *str;
153         int                     nbytes;
154
155         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
156
157         /*
158          * We need a null-terminated string to pass to json_validate_cstring().
159          * Rather than make a separate copy, make the temporary result one byte
160          * bigger than it needs to be.
161          */
162         result = palloc(nbytes + 1 + VARHDRSZ);
163         SET_VARSIZE(result, nbytes + VARHDRSZ);
164         memcpy(VARDATA(result), str, nbytes);
165         str = VARDATA(result);
166         str[nbytes] = '\0';
167
168         /* Validate it. */
169         json_validate_cstring(str);
170
171         PG_RETURN_TEXT_P(result);
172 }
173
174 /*
175  * Check whether supplied input is valid JSON.
176  */
177 static void
178 json_validate_cstring(char *input)
179 {
180         JsonLexContext lex;
181         JsonParseStack *stack,
182                            *stacktop;
183         int                     stacksize;
184
185         /* Set up lexing context. */
186         lex.input = input;
187         lex.token_terminator = lex.input;
188
189         /* Set up parse stack. */
190         stacksize = 32;
191         stacktop = (JsonParseStack *) palloc(sizeof(JsonParseStack) * stacksize);
192         stack = stacktop;
193         stack->state = JSON_PARSE_VALUE;
194
195         /* Main parsing loop. */
196         for (;;)
197         {
198                 JsonStackOp op;
199
200                 /* Fetch next token. */
201                 json_lex(&lex);
202
203                 /* Check for unexpected end of input. */
204                 if (lex.token_start == NULL)
205                         report_parse_error(stack, &lex);
206
207 redo:
208                 /* Figure out what to do with this token. */
209                 op = JSON_STACKOP_NONE;
210                 switch (stack->state)
211                 {
212                         case JSON_PARSE_VALUE:
213                                 if (lex.token_type != JSON_VALUE_INVALID)
214                                         op = JSON_STACKOP_POP;
215                                 else if (lex.token_start[0] == '[')
216                                         stack->state = JSON_PARSE_ARRAY_START;
217                                 else if (lex.token_start[0] == '{')
218                                         stack->state = JSON_PARSE_OBJECT_START;
219                                 else
220                                         report_parse_error(stack, &lex);
221                                 break;
222                         case JSON_PARSE_ARRAY_START:
223                                 if (lex.token_type != JSON_VALUE_INVALID)
224                                         stack->state = JSON_PARSE_ARRAY_NEXT;
225                                 else if (lex.token_start[0] == ']')
226                                         op = JSON_STACKOP_POP;
227                                 else if (lex.token_start[0] == '[' ||
228                                                  lex.token_start[0] == '{')
229                                 {
230                                         stack->state = JSON_PARSE_ARRAY_NEXT;
231                                         op = JSON_STACKOP_PUSH_WITH_PUSHBACK;
232                                 }
233                                 else
234                                         report_parse_error(stack, &lex);
235                                 break;
236                         case JSON_PARSE_ARRAY_NEXT:
237                                 if (lex.token_type != JSON_VALUE_INVALID)
238                                         report_parse_error(stack, &lex);
239                                 else if (lex.token_start[0] == ']')
240                                         op = JSON_STACKOP_POP;
241                                 else if (lex.token_start[0] == ',')
242                                         op = JSON_STACKOP_PUSH;
243                                 else
244                                         report_parse_error(stack, &lex);
245                                 break;
246                         case JSON_PARSE_OBJECT_START:
247                                 if (lex.token_type == JSON_VALUE_STRING)
248                                         stack->state = JSON_PARSE_OBJECT_LABEL;
249                                 else if (lex.token_type == JSON_VALUE_INVALID &&
250                                                  lex.token_start[0] == '}')
251                                         op = JSON_STACKOP_POP;
252                                 else
253                                         report_parse_error(stack, &lex);
254                                 break;
255                         case JSON_PARSE_OBJECT_LABEL:
256                                 if (lex.token_type == JSON_VALUE_INVALID &&
257                                         lex.token_start[0] == ':')
258                                 {
259                                         stack->state = JSON_PARSE_OBJECT_NEXT;
260                                         op = JSON_STACKOP_PUSH;
261                                 }
262                                 else
263                                         report_parse_error(stack, &lex);
264                                 break;
265                         case JSON_PARSE_OBJECT_NEXT:
266                                 if (lex.token_type != JSON_VALUE_INVALID)
267                                         report_parse_error(stack, &lex);
268                                 else if (lex.token_start[0] == '}')
269                                         op = JSON_STACKOP_POP;
270                                 else if (lex.token_start[0] == ',')
271                                         stack->state = JSON_PARSE_OBJECT_COMMA;
272                                 else
273                                         report_parse_error(stack, &lex);
274                                 break;
275                         case JSON_PARSE_OBJECT_COMMA:
276                                 if (lex.token_type == JSON_VALUE_STRING)
277                                         stack->state = JSON_PARSE_OBJECT_LABEL;
278                                 else
279                                         report_parse_error(stack, &lex);
280                                 break;
281                         default:
282                                 elog(ERROR, "unexpected json parse state: %d",
283                                          (int) stack->state);
284                 }
285
286                 /* Push or pop the state stack, if needed. */
287                 switch (op)
288                 {
289                         case JSON_STACKOP_PUSH:
290                         case JSON_STACKOP_PUSH_WITH_PUSHBACK:
291                                 stack++;
292                                 if (stack >= &stacktop[stacksize])
293                                 {
294                                         /* Need to enlarge the stack. */
295                                         int                     stackoffset = stack - stacktop;
296
297                                         stacksize += 32;
298                                         stacktop = (JsonParseStack *)
299                                                 repalloc(stacktop,
300                                                                  sizeof(JsonParseStack) * stacksize);
301                                         stack = stacktop + stackoffset;
302                                 }
303                                 stack->state = JSON_PARSE_VALUE;
304                                 if (op == JSON_STACKOP_PUSH_WITH_PUSHBACK)
305                                         goto redo;
306                                 break;
307                         case JSON_STACKOP_POP:
308                                 if (stack == stacktop)
309                                 {
310                                         /* Expect end of input. */
311                                         json_lex(&lex);
312                                         if (lex.token_start != NULL)
313                                                 report_parse_error(NULL, &lex);
314                                         return;
315                                 }
316                                 stack--;
317                                 break;
318                         case JSON_STACKOP_NONE:
319                                 /* nothing to do */
320                                 break;
321                 }
322         }
323 }
324
325 /*
326  * Lex one token from the input stream.
327  */
328 static void
329 json_lex(JsonLexContext *lex)
330 {
331         char       *s;
332
333         /* Skip leading whitespace. */
334         s = lex->token_terminator;
335         while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
336                 s++;
337         lex->token_start = s;
338
339         /* Determine token type. */
340         if (strchr("{}[],:", s[0]) != NULL)
341         {
342                 /* strchr() is willing to match a zero byte, so test for that. */
343                 if (s[0] == '\0')
344                 {
345                         /* End of string. */
346                         lex->token_start = NULL;
347                         lex->token_terminator = s;
348                 }
349                 else
350                 {
351                         /* Single-character token, some kind of punctuation mark. */
352                         lex->token_terminator = s + 1;
353                 }
354                 lex->token_type = JSON_VALUE_INVALID;
355         }
356         else if (*s == '"')
357         {
358                 /* String. */
359                 json_lex_string(lex);
360                 lex->token_type = JSON_VALUE_STRING;
361         }
362         else if (*s == '-')
363         {
364                 /* Negative number. */
365                 json_lex_number(lex, s + 1);
366                 lex->token_type = JSON_VALUE_NUMBER;
367         }
368         else if (*s >= '0' && *s <= '9')
369         {
370                 /* Positive number. */
371                 json_lex_number(lex, s);
372                 lex->token_type = JSON_VALUE_NUMBER;
373         }
374         else
375         {
376                 char       *p;
377
378                 /*
379                  * We're not dealing with a string, number, legal punctuation mark, or
380                  * end of string.  The only legal tokens we might find here are true,
381                  * false, and null, but for error reporting purposes we scan until we
382                  * see a non-alphanumeric character.  That way, we can report the
383                  * whole word as an unexpected token, rather than just some
384                  * unintuitive prefix thereof.
385                  */
386                 for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
387                         /* skip */ ;
388
389                 if (p == s)
390                 {
391                         /*
392                          * We got some sort of unexpected punctuation or an otherwise
393                          * unexpected character, so just complain about that one
394                          * character.  (It can't be multibyte because the above loop
395                          * will advance over any multibyte characters.)
396                          */
397                         lex->token_terminator = s + 1;
398                         report_invalid_token(lex);
399                 }
400
401                 /*
402                  * We've got a real alphanumeric token here.  If it happens to be
403                  * true, false, or null, all is well.  If not, error out.
404                  */
405                 lex->token_terminator = p;
406                 if (p - s == 4)
407                 {
408                         if (memcmp(s, "true", 4) == 0)
409                                 lex->token_type = JSON_VALUE_TRUE;
410                         else if (memcmp(s, "null", 4) == 0)
411                                 lex->token_type = JSON_VALUE_NULL;
412                         else
413                                 report_invalid_token(lex);
414                 }
415                 else if (p - s == 5 && memcmp(s, "false", 5) == 0)
416                         lex->token_type = JSON_VALUE_FALSE;
417                 else
418                         report_invalid_token(lex);
419         }
420 }
421
422 /*
423  * The next token in the input stream is known to be a string; lex it.
424  */
425 static void
426 json_lex_string(JsonLexContext *lex)
427 {
428         char       *s;
429
430         for (s = lex->token_start + 1; *s != '"'; s++)
431         {
432                 /* Per RFC4627, these characters MUST be escaped. */
433                 if ((unsigned char) *s < 32)
434                 {
435                         /* A NUL byte marks the (premature) end of the string. */
436                         if (*s == '\0')
437                         {
438                                 lex->token_terminator = s;
439                                 report_invalid_token(lex);
440                         }
441                         /* Since *s isn't printable, exclude it from the context string */
442                         lex->token_terminator = s;
443                         ereport(ERROR,
444                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
445                                          errmsg("invalid input syntax for type json"),
446                                          errdetail("Character with value 0x%02x must be escaped.",
447                                                            (unsigned char) *s),
448                                          report_json_context(lex)));
449                 }
450                 else if (*s == '\\')
451                 {
452                         /* OK, we have an escape character. */
453                         s++;
454                         if (*s == '\0')
455                         {
456                                 lex->token_terminator = s;
457                                 report_invalid_token(lex);
458                         }
459                         else if (*s == 'u')
460                         {
461                                 int                     i;
462                                 int                     ch = 0;
463
464                                 for (i = 1; i <= 4; i++)
465                                 {
466                                         s++;
467                                         if (*s == '\0')
468                                         {
469                                                 lex->token_terminator = s;
470                                                 report_invalid_token(lex);
471                                         }
472                                         else if (*s >= '0' && *s <= '9')
473                                                 ch = (ch * 16) + (*s - '0');
474                                         else if (*s >= 'a' && *s <= 'f')
475                                                 ch = (ch * 16) + (*s - 'a') + 10;
476                                         else if (*s >= 'A' && *s <= 'F')
477                                                 ch = (ch * 16) + (*s - 'A') + 10;
478                                         else
479                                         {
480                                                 lex->token_terminator = s + pg_mblen(s);
481                                                 ereport(ERROR,
482                                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
483                                                                  errmsg("invalid input syntax for type json"),
484                                                                  errdetail("\"\\u\" must be followed by four hexadecimal digits."),
485                                                                  report_json_context(lex)));
486                                         }
487                                 }
488                         }
489                         else if (strchr("\"\\/bfnrt", *s) == NULL)
490                         {
491                                 /* Not a valid string escape, so error out. */
492                                 lex->token_terminator = s + pg_mblen(s);
493                                 ereport(ERROR,
494                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
495                                                  errmsg("invalid input syntax for type json"),
496                                                  errdetail("Escape sequence \"\\%s\" is invalid.",
497                                                                    extract_mb_char(s)),
498                                                  report_json_context(lex)));
499                         }
500                 }
501         }
502
503         /* Hooray, we found the end of the string! */
504         lex->token_terminator = s + 1;
505 }
506
507 /*-------------------------------------------------------------------------
508  * The next token in the input stream is known to be a number; lex it.
509  *
510  * In JSON, a number consists of four parts:
511  *
512  * (1) An optional minus sign ('-').
513  *
514  * (2) Either a single '0', or a string of one or more digits that does not
515  *         begin with a '0'.
516  *
517  * (3) An optional decimal part, consisting of a period ('.') followed by
518  *         one or more digits.  (Note: While this part can be omitted
519  *         completely, it's not OK to have only the decimal point without
520  *         any digits afterwards.)
521  *
522  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
523  *         followed by '+' or '-', followed by one or more digits.      (Note:
524  *         As with the decimal part, if 'e' or 'E' is present, it must be
525  *         followed by at least one digit.)
526  *
527  * The 's' argument to this function points to the ostensible beginning
528  * of part 2 - i.e. the character after any optional minus sign, and the
529  * first character of the string if there is none.
530  *
531  *-------------------------------------------------------------------------
532  */
533 static void
534 json_lex_number(JsonLexContext *lex, char *s)
535 {
536         bool            error = false;
537         char       *p;
538
539         /* Part (1): leading sign indicator. */
540         /* Caller already did this for us; so do nothing. */
541
542         /* Part (2): parse main digit string. */
543         if (*s == '0')
544                 s++;
545         else if (*s >= '1' && *s <= '9')
546         {
547                 do
548                 {
549                         s++;
550                 } while (*s >= '0' && *s <= '9');
551         }
552         else
553                 error = true;
554
555         /* Part (3): parse optional decimal portion. */
556         if (*s == '.')
557         {
558                 s++;
559                 if (*s < '0' || *s > '9')
560                         error = true;
561                 else
562                 {
563                         do
564                         {
565                                 s++;
566                         } while (*s >= '0' && *s <= '9');
567                 }
568         }
569
570         /* Part (4): parse optional exponent. */
571         if (*s == 'e' || *s == 'E')
572         {
573                 s++;
574                 if (*s == '+' || *s == '-')
575                         s++;
576                 if (*s < '0' || *s > '9')
577                         error = true;
578                 else
579                 {
580                         do
581                         {
582                                 s++;
583                         } while (*s >= '0' && *s <= '9');
584                 }
585         }
586
587         /*
588          * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
589          * here should be considered part of the token for error-reporting
590          * purposes.
591          */
592         for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
593                 error = true;
594         lex->token_terminator = p;
595         if (error)
596                 report_invalid_token(lex);
597 }
598
599 /*
600  * Report a parse error.
601  *
602  * lex->token_start and lex->token_terminator must identify the current token.
603  */
604 static void
605 report_parse_error(JsonParseStack *stack, JsonLexContext *lex)
606 {
607         char       *token;
608         int                     toklen;
609
610         /* Handle case where the input ended prematurely. */
611         if (lex->token_start == NULL)
612                 ereport(ERROR,
613                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
614                                  errmsg("invalid input syntax for type json"),
615                                  errdetail("The input string ended unexpectedly."),
616                                  report_json_context(lex)));
617
618         /* Separate out the current token. */
619         toklen = lex->token_terminator - lex->token_start;
620         token = palloc(toklen + 1);
621         memcpy(token, lex->token_start, toklen);
622         token[toklen] = '\0';
623
624         /* Complain, with the appropriate detail message. */
625         if (stack == NULL)
626                 ereport(ERROR,
627                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
628                                  errmsg("invalid input syntax for type json"),
629                                  errdetail("Expected end of input, but found \"%s\".",
630                                                    token),
631                                  report_json_context(lex)));
632         else
633         {
634                 switch (stack->state)
635                 {
636                         case JSON_PARSE_VALUE:
637                                 ereport(ERROR,
638                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
639                                                  errmsg("invalid input syntax for type json"),
640                                                  errdetail("Expected JSON value, but found \"%s\".",
641                                                                    token),
642                                                  report_json_context(lex)));
643                                 break;
644                         case JSON_PARSE_ARRAY_START:
645                                 ereport(ERROR,
646                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
647                                                  errmsg("invalid input syntax for type json"),
648                                                  errdetail("Expected array element or \"]\", but found \"%s\".",
649                                                                    token),
650                                                  report_json_context(lex)));
651                                 break;
652                         case JSON_PARSE_ARRAY_NEXT:
653                                 ereport(ERROR,
654                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
655                                                  errmsg("invalid input syntax for type json"),
656                                                  errdetail("Expected \",\" or \"]\", but found \"%s\".",
657                                                                    token),
658                                                  report_json_context(lex)));
659                                 break;
660                         case JSON_PARSE_OBJECT_START:
661                                 ereport(ERROR,
662                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
663                                                  errmsg("invalid input syntax for type json"),
664                                                  errdetail("Expected string or \"}\", but found \"%s\".",
665                                                                    token),
666                                                  report_json_context(lex)));
667                                 break;
668                         case JSON_PARSE_OBJECT_LABEL:
669                                 ereport(ERROR,
670                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
671                                                  errmsg("invalid input syntax for type json"),
672                                                  errdetail("Expected \":\", but found \"%s\".",
673                                                                    token),
674                                                  report_json_context(lex)));
675                                 break;
676                         case JSON_PARSE_OBJECT_NEXT:
677                                 ereport(ERROR,
678                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
679                                                  errmsg("invalid input syntax for type json"),
680                                                  errdetail("Expected \",\" or \"}\", but found \"%s\".",
681                                                                    token),
682                                                  report_json_context(lex)));
683                                 break;
684                         case JSON_PARSE_OBJECT_COMMA:
685                                 ereport(ERROR,
686                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
687                                                  errmsg("invalid input syntax for type json"),
688                                                  errdetail("Expected string, but found \"%s\".",
689                                                                    token),
690                                                  report_json_context(lex)));
691                                 break;
692                         default:
693                                 elog(ERROR, "unexpected json parse state: %d",
694                                          (int) stack->state);
695                 }
696         }
697 }
698
699 /*
700  * Report an invalid input token.
701  *
702  * lex->token_start and lex->token_terminator must identify the token.
703  */
704 static void
705 report_invalid_token(JsonLexContext *lex)
706 {
707         char       *token;
708         int                     toklen;
709
710         /* Separate out the offending token. */
711         toklen = lex->token_terminator - lex->token_start;
712         token = palloc(toklen + 1);
713         memcpy(token, lex->token_start, toklen);
714         token[toklen] = '\0';
715
716         ereport(ERROR,
717                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
718                          errmsg("invalid input syntax for type json"),
719                          errdetail("Token \"%s\" is invalid.", token),
720                          report_json_context(lex)));
721 }
722
723 /*
724  * Report a CONTEXT line for bogus JSON input.
725  *
726  * lex->token_terminator must be set to identify the spot where we detected
727  * the error.  Note that lex->token_start might be NULL, in case we recognized
728  * error at EOF.
729  *
730  * The return value isn't meaningful, but we make it non-void so that this
731  * can be invoked inside ereport().
732  */
733 static int
734 report_json_context(JsonLexContext *lex)
735 {
736         const char *context_start;
737         const char *context_end;
738         const char *line_start;
739         int                     line_number;
740         char       *ctxt;
741         int                     ctxtlen;
742         const char *prefix;
743         const char *suffix;
744
745         /* Choose boundaries for the part of the input we will display */
746         context_start = lex->input;
747         context_end = lex->token_terminator;
748         line_start = context_start;
749         line_number = 1;
750         for (;;)
751         {
752                 /* Always advance over newlines (context_end test is just paranoia) */
753                 if (*context_start == '\n' && context_start < context_end)
754                 {
755                         context_start++;
756                         line_start = context_start;
757                         line_number++;
758                         continue;
759                 }
760                 /* Otherwise, done as soon as we are close enough to context_end */
761                 if (context_end - context_start < 50)
762                         break;
763                 /* Advance to next multibyte character */
764                 if (IS_HIGHBIT_SET(*context_start))
765                         context_start += pg_mblen(context_start);
766                 else
767                         context_start++;
768         }
769
770         /*
771          * We add "..." to indicate that the excerpt doesn't start at the
772          * beginning of the line ... but if we're within 3 characters of the
773          * beginning of the line, we might as well just show the whole line.
774          */
775         if (context_start - line_start <= 3)
776                 context_start = line_start;
777
778         /* Get a null-terminated copy of the data to present */
779         ctxtlen = context_end - context_start;
780         ctxt = palloc(ctxtlen + 1);
781         memcpy(ctxt, context_start, ctxtlen);
782         ctxt[ctxtlen] = '\0';
783
784         /*
785          * Show the context, prefixing "..." if not starting at start of line, and
786          * suffixing "..." if not ending at end of line.
787          */
788         prefix = (context_start > line_start) ? "..." : "";
789         suffix = (*context_end != '\0' && *context_end != '\n' && *context_end != '\r') ? "..." : "";
790
791         return errcontext("JSON data, line %d: %s%s%s",
792                                           line_number, prefix, ctxt, suffix);
793 }
794
795 /*
796  * Extract a single, possibly multi-byte char from the input string.
797  */
798 static char *
799 extract_mb_char(char *s)
800 {
801         char       *res;
802         int                     len;
803
804         len = pg_mblen(s);
805         res = palloc(len + 1);
806         memcpy(res, s, len);
807         res[len] = '\0';
808
809         return res;
810 }
811
812 /*
813  * Turn a scalar Datum into JSON, appending the string to "result".
814  *
815  * Hand off a non-scalar datum to composite_to_json or array_to_json_internal
816  * as appropriate.
817  */
818 static void
819 datum_to_json(Datum val, bool is_null, StringInfo result,
820                           TYPCATEGORY tcategory, Oid typoutputfunc)
821 {
822         char       *outputstr;
823
824         if (is_null)
825         {
826                 appendStringInfoString(result, "null");
827                 return;
828         }
829
830         switch (tcategory)
831         {
832                 case TYPCATEGORY_ARRAY:
833                         array_to_json_internal(val, result, false);
834                         break;
835                 case TYPCATEGORY_COMPOSITE:
836                         composite_to_json(val, result, false);
837                         break;
838                 case TYPCATEGORY_BOOLEAN:
839                         if (DatumGetBool(val))
840                                 appendStringInfoString(result, "true");
841                         else
842                                 appendStringInfoString(result, "false");
843                         break;
844                 case TYPCATEGORY_NUMERIC:
845                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
846
847                         /*
848                          * Don't call escape_json here if it's a valid JSON number.
849                          * Numeric output should usually be a valid JSON number and JSON
850                          * numbers shouldn't be quoted. Quote cases like "Nan" and
851                          * "Infinity", however.
852                          */
853                         if (strpbrk(outputstr, NON_NUMERIC_LETTER) == NULL)
854                                 appendStringInfoString(result, outputstr);
855                         else
856                                 escape_json(result, outputstr);
857                         pfree(outputstr);
858                         break;
859                 case TYPCATEGORY_JSON:
860                         /* JSON will already be escaped */
861                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
862                         appendStringInfoString(result, outputstr);
863                         pfree(outputstr);
864                         break;
865                 default:
866                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
867                         escape_json(result, outputstr);
868                         pfree(outputstr);
869                         break;
870         }
871 }
872
873 /*
874  * Process a single dimension of an array.
875  * If it's the innermost dimension, output the values, otherwise call
876  * ourselves recursively to process the next dimension.
877  */
878 static void
879 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
880                                   bool *nulls, int *valcount, TYPCATEGORY tcategory,
881                                   Oid typoutputfunc, bool use_line_feeds)
882 {
883         int                     i;
884         const char *sep;
885
886         Assert(dim < ndims);
887
888         sep = use_line_feeds ? ",\n " : ",";
889
890         appendStringInfoChar(result, '[');
891
892         for (i = 1; i <= dims[dim]; i++)
893         {
894                 if (i > 1)
895                         appendStringInfoString(result, sep);
896
897                 if (dim + 1 == ndims)
898                 {
899                         datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
900                                                   typoutputfunc);
901                         (*valcount)++;
902                 }
903                 else
904                 {
905                         /*
906                          * Do we want line feeds on inner dimensions of arrays? For now
907                          * we'll say no.
908                          */
909                         array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
910                                                           valcount, tcategory, typoutputfunc, false);
911                 }
912         }
913
914         appendStringInfoChar(result, ']');
915 }
916
917 /*
918  * Turn an array into JSON.
919  */
920 static void
921 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
922 {
923         ArrayType  *v = DatumGetArrayTypeP(array);
924         Oid                     element_type = ARR_ELEMTYPE(v);
925         int                *dim;
926         int                     ndim;
927         int                     nitems;
928         int                     count = 0;
929         Datum      *elements;
930         bool       *nulls;
931         int16           typlen;
932         bool            typbyval;
933         char            typalign,
934                                 typdelim;
935         Oid                     typioparam;
936         Oid                     typoutputfunc;
937         TYPCATEGORY tcategory;
938
939         ndim = ARR_NDIM(v);
940         dim = ARR_DIMS(v);
941         nitems = ArrayGetNItems(ndim, dim);
942
943         if (nitems <= 0)
944         {
945                 appendStringInfoString(result, "[]");
946                 return;
947         }
948
949         get_type_io_data(element_type, IOFunc_output,
950                                          &typlen, &typbyval, &typalign,
951                                          &typdelim, &typioparam, &typoutputfunc);
952
953         deconstruct_array(v, element_type, typlen, typbyval,
954                                           typalign, &elements, &nulls,
955                                           &nitems);
956
957         if (element_type == RECORDOID)
958                 tcategory = TYPCATEGORY_COMPOSITE;
959         else if (element_type == JSONOID)
960                 tcategory = TYPCATEGORY_JSON;
961         else
962                 tcategory = TypeCategory(element_type);
963
964         array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
965                                           typoutputfunc, use_line_feeds);
966
967         pfree(elements);
968         pfree(nulls);
969 }
970
971 /*
972  * Turn a composite / record into JSON.
973  */
974 static void
975 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
976 {
977         HeapTupleHeader td;
978         Oid                     tupType;
979         int32           tupTypmod;
980         TupleDesc       tupdesc;
981         HeapTupleData tmptup,
982                            *tuple;
983         int                     i;
984         bool            needsep = false;
985         const char *sep;
986
987         sep = use_line_feeds ? ",\n " : ",";
988
989         td = DatumGetHeapTupleHeader(composite);
990
991         /* Extract rowtype info and find a tupdesc */
992         tupType = HeapTupleHeaderGetTypeId(td);
993         tupTypmod = HeapTupleHeaderGetTypMod(td);
994         tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
995
996         /* Build a temporary HeapTuple control structure */
997         tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
998         tmptup.t_data = td;
999         tuple = &tmptup;
1000
1001         appendStringInfoChar(result, '{');
1002
1003         for (i = 0; i < tupdesc->natts; i++)
1004         {
1005                 Datum           val,
1006                                         origval;
1007                 bool            isnull;
1008                 char       *attname;
1009                 TYPCATEGORY tcategory;
1010                 Oid                     typoutput;
1011                 bool            typisvarlena;
1012
1013                 if (tupdesc->attrs[i]->attisdropped)
1014                         continue;
1015
1016                 if (needsep)
1017                         appendStringInfoString(result, sep);
1018                 needsep = true;
1019
1020                 attname = NameStr(tupdesc->attrs[i]->attname);
1021                 escape_json(result, attname);
1022                 appendStringInfoChar(result, ':');
1023
1024                 origval = heap_getattr(tuple, i + 1, tupdesc, &isnull);
1025
1026                 if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID)
1027                         tcategory = TYPCATEGORY_ARRAY;
1028                 else if (tupdesc->attrs[i]->atttypid == RECORDOID)
1029                         tcategory = TYPCATEGORY_COMPOSITE;
1030                 else if (tupdesc->attrs[i]->atttypid == JSONOID)
1031                         tcategory = TYPCATEGORY_JSON;
1032                 else
1033                         tcategory = TypeCategory(tupdesc->attrs[i]->atttypid);
1034
1035                 getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
1036                                                   &typoutput, &typisvarlena);
1037
1038                 /*
1039                  * If we have a toasted datum, forcibly detoast it here to avoid
1040                  * memory leakage inside the type's output routine.
1041                  */
1042                 if (typisvarlena && !isnull)
1043                         val = PointerGetDatum(PG_DETOAST_DATUM(origval));
1044                 else
1045                         val = origval;
1046
1047                 datum_to_json(val, isnull, result, tcategory, typoutput);
1048
1049                 /* Clean up detoasted copy, if any */
1050                 if (val != origval)
1051                         pfree(DatumGetPointer(val));
1052         }
1053
1054         appendStringInfoChar(result, '}');
1055         ReleaseTupleDesc(tupdesc);
1056 }
1057
1058 /*
1059  * SQL function array_to_json(row)
1060  */
1061 extern Datum
1062 array_to_json(PG_FUNCTION_ARGS)
1063 {
1064         Datum           array = PG_GETARG_DATUM(0);
1065         StringInfo      result;
1066
1067         result = makeStringInfo();
1068
1069         array_to_json_internal(array, result, false);
1070
1071         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1072 }
1073
1074 /*
1075  * SQL function array_to_json(row, prettybool)
1076  */
1077 extern Datum
1078 array_to_json_pretty(PG_FUNCTION_ARGS)
1079 {
1080         Datum           array = PG_GETARG_DATUM(0);
1081         bool            use_line_feeds = PG_GETARG_BOOL(1);
1082         StringInfo      result;
1083
1084         result = makeStringInfo();
1085
1086         array_to_json_internal(array, result, use_line_feeds);
1087
1088         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1089 }
1090
1091 /*
1092  * SQL function row_to_json(row)
1093  */
1094 extern Datum
1095 row_to_json(PG_FUNCTION_ARGS)
1096 {
1097         Datum           array = PG_GETARG_DATUM(0);
1098         StringInfo      result;
1099
1100         result = makeStringInfo();
1101
1102         composite_to_json(array, result, false);
1103
1104         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1105 }
1106
1107 /*
1108  * SQL function row_to_json(row, prettybool)
1109  */
1110 extern Datum
1111 row_to_json_pretty(PG_FUNCTION_ARGS)
1112 {
1113         Datum           array = PG_GETARG_DATUM(0);
1114         bool            use_line_feeds = PG_GETARG_BOOL(1);
1115         StringInfo      result;
1116
1117         result = makeStringInfo();
1118
1119         composite_to_json(array, result, use_line_feeds);
1120
1121         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1122 }
1123
1124 /*
1125  * Produce a JSON string literal, properly escaping characters in the text.
1126  */
1127 void
1128 escape_json(StringInfo buf, const char *str)
1129 {
1130         const char *p;
1131
1132         appendStringInfoCharMacro(buf, '\"');
1133         for (p = str; *p; p++)
1134         {
1135                 switch (*p)
1136                 {
1137                         case '\b':
1138                                 appendStringInfoString(buf, "\\b");
1139                                 break;
1140                         case '\f':
1141                                 appendStringInfoString(buf, "\\f");
1142                                 break;
1143                         case '\n':
1144                                 appendStringInfoString(buf, "\\n");
1145                                 break;
1146                         case '\r':
1147                                 appendStringInfoString(buf, "\\r");
1148                                 break;
1149                         case '\t':
1150                                 appendStringInfoString(buf, "\\t");
1151                                 break;
1152                         case '"':
1153                                 appendStringInfoString(buf, "\\\"");
1154                                 break;
1155                         case '\\':
1156                                 appendStringInfoString(buf, "\\\\");
1157                                 break;
1158                         default:
1159                                 if ((unsigned char) *p < ' ')
1160                                         appendStringInfo(buf, "\\u%04x", (int) *p);
1161                                 else
1162                                         appendStringInfoCharMacro(buf, *p);
1163                                 break;
1164                 }
1165         }
1166         appendStringInfoCharMacro(buf, '\"');
1167 }