]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/json.c
Remove inappropriate semicolons after function definitions.
[postgresql] / src / backend / utils / adt / json.c
1 /*-------------------------------------------------------------------------
2  *
3  * json.c
4  *              JSON data type support.
5  *
6  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/json.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "catalog/pg_type.h"
17 #include "executor/spi.h"
18 #include "lib/stringinfo.h"
19 #include "libpq/pqformat.h"
20 #include "mb/pg_wchar.h"
21 #include "parser/parse_coerce.h"
22 #include "utils/array.h"
23 #include "utils/builtins.h"
24 #include "utils/lsyscache.h"
25 #include "utils/json.h"
26 #include "utils/typcache.h"
27
28 typedef enum                                    /* types of JSON values */
29 {
30         JSON_VALUE_INVALID,                     /* non-value tokens are reported as this */
31         JSON_VALUE_STRING,
32         JSON_VALUE_NUMBER,
33         JSON_VALUE_OBJECT,
34         JSON_VALUE_ARRAY,
35         JSON_VALUE_TRUE,
36         JSON_VALUE_FALSE,
37         JSON_VALUE_NULL
38 } JsonValueType;
39
40 typedef struct                                  /* state of JSON lexer */
41 {
42         char       *input;                      /* whole string being parsed */
43         char       *token_start;        /* start of current token within input */
44         char       *token_terminator; /* end of previous or current token */
45         JsonValueType token_type;       /* type of current token, once it's known */
46 } JsonLexContext;
47
48 typedef enum                                    /* states of JSON parser */
49 {
50         JSON_PARSE_VALUE,                       /* expecting a value */
51         JSON_PARSE_ARRAY_START,         /* saw '[', expecting value or ']' */
52         JSON_PARSE_ARRAY_NEXT,          /* saw array element, expecting ',' or ']' */
53         JSON_PARSE_OBJECT_START,        /* saw '{', expecting label or '}' */
54         JSON_PARSE_OBJECT_LABEL,        /* saw object label, expecting ':' */
55         JSON_PARSE_OBJECT_NEXT,         /* saw object value, expecting ',' or '}' */
56         JSON_PARSE_OBJECT_COMMA         /* saw object ',', expecting next label */
57 } JsonParseState;
58
59 typedef struct JsonParseStack   /* the parser state has to be stackable */
60 {
61         JsonParseState state;
62         /* currently only need the state enum, but maybe someday more stuff */
63 } JsonParseStack;
64
65 typedef enum                                    /* required operations on state stack */
66 {
67         JSON_STACKOP_NONE,                      /* no-op */
68         JSON_STACKOP_PUSH,                      /* push new JSON_PARSE_VALUE stack item */
69         JSON_STACKOP_PUSH_WITH_PUSHBACK, /* push, then rescan current token */
70         JSON_STACKOP_POP                        /* pop, or expect end of input if no stack */
71 } JsonStackOp;
72
73 static void json_validate_cstring(char *input);
74 static void json_lex(JsonLexContext *lex);
75 static void json_lex_string(JsonLexContext *lex);
76 static void json_lex_number(JsonLexContext *lex, char *s);
77 static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex);
78 static void report_invalid_token(JsonLexContext *lex);
79 static int report_json_context(JsonLexContext *lex);
80 static char *extract_mb_char(char *s);
81 static void composite_to_json(Datum composite, StringInfo result,
82                                                           bool use_line_feeds);
83 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
84                                   Datum *vals, bool *nulls, int *valcount,
85                                   TYPCATEGORY tcategory, Oid typoutputfunc,
86                                   bool use_line_feeds);
87 static void array_to_json_internal(Datum array, StringInfo result,
88                                                                    bool use_line_feeds);
89
90 /* fake type category for JSON so we can distinguish it in datum_to_json */
91 #define TYPCATEGORY_JSON 'j'
92 /* letters appearing in numeric output that aren't valid in a JSON number */
93 #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
94 /* chars to consider as part of an alphanumeric token */
95 #define JSON_ALPHANUMERIC_CHAR(c)  \
96         (((c) >= 'a' && (c) <= 'z') || \
97          ((c) >= 'A' && (c) <= 'Z') || \
98          ((c) >= '0' && (c) <= '9') || \
99          (c) == '_' || \
100          IS_HIGHBIT_SET(c))
101
102
103 /*
104  * Input.
105  */
106 Datum
107 json_in(PG_FUNCTION_ARGS)
108 {
109         char       *text = PG_GETARG_CSTRING(0);
110
111         json_validate_cstring(text);
112
113         /* Internal representation is the same as text, for now */
114         PG_RETURN_TEXT_P(cstring_to_text(text));
115 }
116
117 /*
118  * Output.
119  */
120 Datum
121 json_out(PG_FUNCTION_ARGS)
122 {
123         /* we needn't detoast because text_to_cstring will handle that */
124         Datum           txt = PG_GETARG_DATUM(0);
125
126         PG_RETURN_CSTRING(TextDatumGetCString(txt));
127 }
128
129 /*
130  * Binary send.
131  */
132 Datum
133 json_send(PG_FUNCTION_ARGS)
134 {
135         text       *t = PG_GETARG_TEXT_PP(0);
136         StringInfoData buf;
137
138         pq_begintypsend(&buf);
139         pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
140         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
141 }
142
143 /*
144  * Binary receive.
145  */
146 Datum
147 json_recv(PG_FUNCTION_ARGS)
148 {
149         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
150         text       *result;
151         char       *str;
152         int                     nbytes;
153
154         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
155
156         /*
157          * We need a null-terminated string to pass to json_validate_cstring().
158          * Rather than make a separate copy, make the temporary result one byte
159          * bigger than it needs to be.
160          */
161         result = palloc(nbytes + 1 + VARHDRSZ);
162         SET_VARSIZE(result, nbytes + VARHDRSZ);
163         memcpy(VARDATA(result), str, nbytes);
164         str = VARDATA(result);
165         str[nbytes] = '\0';
166
167         /* Validate it. */
168         json_validate_cstring(str);
169
170         PG_RETURN_TEXT_P(result);
171 }
172
173 /*
174  * Check whether supplied input is valid JSON.
175  */
176 static void
177 json_validate_cstring(char *input)
178 {
179         JsonLexContext lex;
180         JsonParseStack *stack,
181                            *stacktop;
182         int                     stacksize;
183
184         /* Set up lexing context. */
185         lex.input = input;
186         lex.token_terminator = lex.input;
187
188         /* Set up parse stack. */
189         stacksize = 32;
190         stacktop = (JsonParseStack *) palloc(sizeof(JsonParseStack) * stacksize);
191         stack = stacktop;
192         stack->state = JSON_PARSE_VALUE;
193
194         /* Main parsing loop. */
195         for (;;)
196         {
197                 JsonStackOp op;
198
199                 /* Fetch next token. */
200                 json_lex(&lex);
201
202                 /* Check for unexpected end of input. */
203                 if (lex.token_start == NULL)
204                         report_parse_error(stack, &lex);
205
206 redo:
207                 /* Figure out what to do with this token. */
208                 op = JSON_STACKOP_NONE;
209                 switch (stack->state)
210                 {
211                         case JSON_PARSE_VALUE:
212                                 if (lex.token_type != JSON_VALUE_INVALID)
213                                         op = JSON_STACKOP_POP;
214                                 else if (lex.token_start[0] == '[')
215                                         stack->state = JSON_PARSE_ARRAY_START;
216                                 else if (lex.token_start[0] == '{')
217                                         stack->state = JSON_PARSE_OBJECT_START;
218                                 else
219                                         report_parse_error(stack, &lex);
220                                 break;
221                         case JSON_PARSE_ARRAY_START:
222                                 if (lex.token_type != JSON_VALUE_INVALID)
223                                         stack->state = JSON_PARSE_ARRAY_NEXT;
224                                 else if (lex.token_start[0] == ']')
225                                         op = JSON_STACKOP_POP;
226                                 else if (lex.token_start[0] == '[' ||
227                                                  lex.token_start[0] == '{')
228                                 {
229                                         stack->state = JSON_PARSE_ARRAY_NEXT;
230                                         op = JSON_STACKOP_PUSH_WITH_PUSHBACK;
231                                 }
232                                 else
233                                         report_parse_error(stack, &lex);
234                                 break;
235                         case JSON_PARSE_ARRAY_NEXT:
236                                 if (lex.token_type != JSON_VALUE_INVALID)
237                                         report_parse_error(stack, &lex);
238                                 else if (lex.token_start[0] == ']')
239                                         op = JSON_STACKOP_POP;
240                                 else if (lex.token_start[0] == ',')
241                                         op = JSON_STACKOP_PUSH;
242                                 else
243                                         report_parse_error(stack, &lex);
244                                 break;
245                         case JSON_PARSE_OBJECT_START:
246                                 if (lex.token_type == JSON_VALUE_STRING)
247                                         stack->state = JSON_PARSE_OBJECT_LABEL;
248                                 else if (lex.token_type == JSON_VALUE_INVALID &&
249                                                  lex.token_start[0] == '}')
250                                         op = JSON_STACKOP_POP;
251                                 else
252                                         report_parse_error(stack, &lex);
253                                 break;
254                         case JSON_PARSE_OBJECT_LABEL:
255                                 if (lex.token_type == JSON_VALUE_INVALID &&
256                                         lex.token_start[0] == ':')
257                                 {
258                                         stack->state = JSON_PARSE_OBJECT_NEXT;
259                                         op = JSON_STACKOP_PUSH;
260                                 }
261                                 else
262                                         report_parse_error(stack, &lex);
263                                 break;
264                         case JSON_PARSE_OBJECT_NEXT:
265                                 if (lex.token_type != JSON_VALUE_INVALID)
266                                         report_parse_error(stack, &lex);
267                                 else if (lex.token_start[0] == '}')
268                                         op = JSON_STACKOP_POP;
269                                 else if (lex.token_start[0] == ',')
270                                         stack->state = JSON_PARSE_OBJECT_COMMA;
271                                 else
272                                         report_parse_error(stack, &lex);
273                                 break;
274                         case JSON_PARSE_OBJECT_COMMA:
275                                 if (lex.token_type == JSON_VALUE_STRING)
276                                         stack->state = JSON_PARSE_OBJECT_LABEL;
277                                 else
278                                         report_parse_error(stack, &lex);
279                                 break;
280                         default:
281                                 elog(ERROR, "unexpected json parse state: %d",
282                                          (int) stack->state);
283                 }
284
285                 /* Push or pop the state stack, if needed. */
286                 switch (op)
287                 {
288                         case JSON_STACKOP_PUSH:
289                         case JSON_STACKOP_PUSH_WITH_PUSHBACK:
290                                 stack++;
291                                 if (stack >= &stacktop[stacksize])
292                                 {
293                                         /* Need to enlarge the stack. */
294                                         int                     stackoffset = stack - stacktop;
295
296                                         stacksize += 32;
297                                         stacktop = (JsonParseStack *)
298                                                 repalloc(stacktop,
299                                                                  sizeof(JsonParseStack) * stacksize);
300                                         stack = stacktop + stackoffset;
301                                 }
302                                 stack->state = JSON_PARSE_VALUE;
303                                 if (op == JSON_STACKOP_PUSH_WITH_PUSHBACK)
304                                         goto redo;
305                                 break;
306                         case JSON_STACKOP_POP:
307                                 if (stack == stacktop)
308                                 {
309                                         /* Expect end of input. */
310                                         json_lex(&lex);
311                                         if (lex.token_start != NULL)
312                                                 report_parse_error(NULL, &lex);
313                                         return;
314                                 }
315                                 stack--;
316                                 break;
317                         case JSON_STACKOP_NONE:
318                                 /* nothing to do */
319                                 break;
320                 }
321         }
322 }
323
324 /*
325  * Lex one token from the input stream.
326  */
327 static void
328 json_lex(JsonLexContext *lex)
329 {
330         char       *s;
331
332         /* Skip leading whitespace. */
333         s = lex->token_terminator;
334         while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
335                 s++;
336         lex->token_start = s;
337
338         /* Determine token type. */
339         if (strchr("{}[],:", s[0]) != NULL)
340         {
341                 /* strchr() is willing to match a zero byte, so test for that. */
342                 if (s[0] == '\0')
343                 {
344                         /* End of string. */
345                         lex->token_start = NULL;
346                         lex->token_terminator = s;
347                 }
348                 else
349                 {
350                         /* Single-character token, some kind of punctuation mark. */
351                         lex->token_terminator = s + 1;
352                 }
353                 lex->token_type = JSON_VALUE_INVALID;
354         }
355         else if (*s == '"')
356         {
357                 /* String. */
358                 json_lex_string(lex);
359                 lex->token_type = JSON_VALUE_STRING;
360         }
361         else if (*s == '-')
362         {
363                 /* Negative number. */
364                 json_lex_number(lex, s + 1);
365                 lex->token_type = JSON_VALUE_NUMBER;
366         }
367         else if (*s >= '0' && *s <= '9')
368         {
369                 /* Positive number. */
370                 json_lex_number(lex, s);
371                 lex->token_type = JSON_VALUE_NUMBER;
372         }
373         else
374         {
375                 char       *p;
376
377                 /*
378                  * We're not dealing with a string, number, legal punctuation mark, or
379                  * end of string.  The only legal tokens we might find here are true,
380                  * false, and null, but for error reporting purposes we scan until we
381                  * see a non-alphanumeric character.  That way, we can report the
382                  * whole word as an unexpected token, rather than just some
383                  * unintuitive prefix thereof.
384                  */
385                 for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
386                         /* skip */ ;
387
388                 if (p == s)
389                 {
390                         /*
391                          * We got some sort of unexpected punctuation or an otherwise
392                          * unexpected character, so just complain about that one
393                          * character.  (It can't be multibyte because the above loop
394                          * will advance over any multibyte characters.)
395                          */
396                         lex->token_terminator = s + 1;
397                         report_invalid_token(lex);
398                 }
399
400                 /*
401                  * We've got a real alphanumeric token here.  If it happens to be
402                  * true, false, or null, all is well.  If not, error out.
403                  */
404                 lex->token_terminator = p;
405                 if (p - s == 4)
406                 {
407                         if (memcmp(s, "true", 4) == 0)
408                                 lex->token_type = JSON_VALUE_TRUE;
409                         else if (memcmp(s, "null", 4) == 0)
410                                 lex->token_type = JSON_VALUE_NULL;
411                         else
412                                 report_invalid_token(lex);
413                 }
414                 else if (p - s == 5 && memcmp(s, "false", 5) == 0)
415                         lex->token_type = JSON_VALUE_FALSE;
416                 else
417                         report_invalid_token(lex);
418         }
419 }
420
421 /*
422  * The next token in the input stream is known to be a string; lex it.
423  */
424 static void
425 json_lex_string(JsonLexContext *lex)
426 {
427         char       *s;
428
429         for (s = lex->token_start + 1; *s != '"'; s++)
430         {
431                 /* Per RFC4627, these characters MUST be escaped. */
432                 if ((unsigned char) *s < 32)
433                 {
434                         /* A NUL byte marks the (premature) end of the string. */
435                         if (*s == '\0')
436                         {
437                                 lex->token_terminator = s;
438                                 report_invalid_token(lex);
439                         }
440                         /* Since *s isn't printable, exclude it from the context string */
441                         lex->token_terminator = s;
442                         ereport(ERROR,
443                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
444                                          errmsg("invalid input syntax for type json"),
445                                          errdetail("Character with value 0x%02x must be escaped.",
446                                                            (unsigned char) *s),
447                                          report_json_context(lex)));
448                 }
449                 else if (*s == '\\')
450                 {
451                         /* OK, we have an escape character. */
452                         s++;
453                         if (*s == '\0')
454                         {
455                                 lex->token_terminator = s;
456                                 report_invalid_token(lex);
457                         }
458                         else if (*s == 'u')
459                         {
460                                 int                     i;
461                                 int                     ch = 0;
462
463                                 for (i = 1; i <= 4; i++)
464                                 {
465                                         s++;
466                                         if (*s == '\0')
467                                         {
468                                                 lex->token_terminator = s;
469                                                 report_invalid_token(lex);
470                                         }
471                                         else if (*s >= '0' && *s <= '9')
472                                                 ch = (ch * 16) + (*s - '0');
473                                         else if (*s >= 'a' && *s <= 'f')
474                                                 ch = (ch * 16) + (*s - 'a') + 10;
475                                         else if (*s >= 'A' && *s <= 'F')
476                                                 ch = (ch * 16) + (*s - 'A') + 10;
477                                         else
478                                         {
479                                                 lex->token_terminator = s + pg_mblen(s);
480                                                 ereport(ERROR,
481                                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
482                                                                  errmsg("invalid input syntax for type json"),
483                                                                  errdetail("\"\\u\" must be followed by four hexadecimal digits."),
484                                                                  report_json_context(lex)));
485                                         }
486                                 }
487                         }
488                         else if (strchr("\"\\/bfnrt", *s) == NULL)
489                         {
490                                 /* Not a valid string escape, so error out. */
491                                 lex->token_terminator = s + pg_mblen(s);
492                                 ereport(ERROR,
493                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
494                                                  errmsg("invalid input syntax for type json"),
495                                                  errdetail("Escape sequence \"\\%s\" is invalid.",
496                                                                    extract_mb_char(s)),
497                                                  report_json_context(lex)));
498                         }
499                 }
500         }
501
502         /* Hooray, we found the end of the string! */
503         lex->token_terminator = s + 1;
504 }
505
506 /*-------------------------------------------------------------------------
507  * The next token in the input stream is known to be a number; lex it.
508  *
509  * In JSON, a number consists of four parts:
510  *
511  * (1) An optional minus sign ('-').
512  *
513  * (2) Either a single '0', or a string of one or more digits that does not
514  *         begin with a '0'.
515  *
516  * (3) An optional decimal part, consisting of a period ('.') followed by
517  *         one or more digits.  (Note: While this part can be omitted
518  *         completely, it's not OK to have only the decimal point without
519  *         any digits afterwards.)
520  *
521  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
522  *         followed by '+' or '-', followed by one or more digits.      (Note:
523  *         As with the decimal part, if 'e' or 'E' is present, it must be
524  *         followed by at least one digit.)
525  *
526  * The 's' argument to this function points to the ostensible beginning
527  * of part 2 - i.e. the character after any optional minus sign, and the
528  * first character of the string if there is none.
529  *
530  *-------------------------------------------------------------------------
531  */
532 static void
533 json_lex_number(JsonLexContext *lex, char *s)
534 {
535         bool            error = false;
536         char       *p;
537
538         /* Part (1): leading sign indicator. */
539         /* Caller already did this for us; so do nothing. */
540
541         /* Part (2): parse main digit string. */
542         if (*s == '0')
543                 s++;
544         else if (*s >= '1' && *s <= '9')
545         {
546                 do
547                 {
548                         s++;
549                 } while (*s >= '0' && *s <= '9');
550         }
551         else
552                 error = true;
553
554         /* Part (3): parse optional decimal portion. */
555         if (*s == '.')
556         {
557                 s++;
558                 if (*s < '0' || *s > '9')
559                         error = true;
560                 else
561                 {
562                         do
563                         {
564                                 s++;
565                         } while (*s >= '0' && *s <= '9');
566                 }
567         }
568
569         /* Part (4): parse optional exponent. */
570         if (*s == 'e' || *s == 'E')
571         {
572                 s++;
573                 if (*s == '+' || *s == '-')
574                         s++;
575                 if (*s < '0' || *s > '9')
576                         error = true;
577                 else
578                 {
579                         do
580                         {
581                                 s++;
582                         } while (*s >= '0' && *s <= '9');
583                 }
584         }
585
586         /*
587          * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
588          * here should be considered part of the token for error-reporting
589          * purposes.
590          */
591         for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
592                 error = true;
593         lex->token_terminator = p;
594         if (error)
595                 report_invalid_token(lex);
596 }
597
598 /*
599  * Report a parse error.
600  *
601  * lex->token_start and lex->token_terminator must identify the current token.
602  */
603 static void
604 report_parse_error(JsonParseStack *stack, JsonLexContext *lex)
605 {
606         char       *token;
607         int                     toklen;
608
609         /* Handle case where the input ended prematurely. */
610         if (lex->token_start == NULL)
611                 ereport(ERROR,
612                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
613                                  errmsg("invalid input syntax for type json"),
614                                  errdetail("The input string ended unexpectedly."),
615                                  report_json_context(lex)));
616
617         /* Separate out the current token. */
618         toklen = lex->token_terminator - lex->token_start;
619         token = palloc(toklen + 1);
620         memcpy(token, lex->token_start, toklen);
621         token[toklen] = '\0';
622
623         /* Complain, with the appropriate detail message. */
624         if (stack == NULL)
625                 ereport(ERROR,
626                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
627                                  errmsg("invalid input syntax for type json"),
628                                  errdetail("Expected end of input, but found \"%s\".",
629                                                    token),
630                                  report_json_context(lex)));
631         else
632         {
633                 switch (stack->state)
634                 {
635                         case JSON_PARSE_VALUE:
636                                 ereport(ERROR,
637                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
638                                                  errmsg("invalid input syntax for type json"),
639                                                  errdetail("Expected JSON value, but found \"%s\".",
640                                                                    token),
641                                                  report_json_context(lex)));
642                                 break;
643                         case JSON_PARSE_ARRAY_START:
644                                 ereport(ERROR,
645                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
646                                                  errmsg("invalid input syntax for type json"),
647                                                  errdetail("Expected array element or \"]\", but found \"%s\".",
648                                                                    token),
649                                                  report_json_context(lex)));
650                                 break;
651                         case JSON_PARSE_ARRAY_NEXT:
652                                 ereport(ERROR,
653                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
654                                                  errmsg("invalid input syntax for type json"),
655                                                  errdetail("Expected \",\" or \"]\", but found \"%s\".",
656                                                                    token),
657                                                  report_json_context(lex)));
658                                 break;
659                         case JSON_PARSE_OBJECT_START:
660                                 ereport(ERROR,
661                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
662                                                  errmsg("invalid input syntax for type json"),
663                                                  errdetail("Expected string or \"}\", but found \"%s\".",
664                                                                    token),
665                                                  report_json_context(lex)));
666                                 break;
667                         case JSON_PARSE_OBJECT_LABEL:
668                                 ereport(ERROR,
669                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
670                                                  errmsg("invalid input syntax for type json"),
671                                                  errdetail("Expected \":\", but found \"%s\".",
672                                                                    token),
673                                                  report_json_context(lex)));
674                                 break;
675                         case JSON_PARSE_OBJECT_NEXT:
676                                 ereport(ERROR,
677                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
678                                                  errmsg("invalid input syntax for type json"),
679                                                  errdetail("Expected \",\" or \"}\", but found \"%s\".",
680                                                                    token),
681                                                  report_json_context(lex)));
682                                 break;
683                         case JSON_PARSE_OBJECT_COMMA:
684                                 ereport(ERROR,
685                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
686                                                  errmsg("invalid input syntax for type json"),
687                                                  errdetail("Expected string, but found \"%s\".",
688                                                                    token),
689                                                  report_json_context(lex)));
690                                 break;
691                         default:
692                                 elog(ERROR, "unexpected json parse state: %d",
693                                          (int) stack->state);
694                 }
695         }
696 }
697
698 /*
699  * Report an invalid input token.
700  *
701  * lex->token_start and lex->token_terminator must identify the token.
702  */
703 static void
704 report_invalid_token(JsonLexContext *lex)
705 {
706         char       *token;
707         int                     toklen;
708
709         /* Separate out the offending token. */
710         toklen = lex->token_terminator - lex->token_start;
711         token = palloc(toklen + 1);
712         memcpy(token, lex->token_start, toklen);
713         token[toklen] = '\0';
714
715         ereport(ERROR,
716                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
717                          errmsg("invalid input syntax for type json"),
718                          errdetail("Token \"%s\" is invalid.", token),
719                          report_json_context(lex)));
720 }
721
722 /*
723  * Report a CONTEXT line for bogus JSON input.
724  *
725  * lex->token_terminator must be set to identify the spot where we detected
726  * the error.  Note that lex->token_start might be NULL, in case we recognized
727  * error at EOF.
728  *
729  * The return value isn't meaningful, but we make it non-void so that this
730  * can be invoked inside ereport().
731  */
732 static int
733 report_json_context(JsonLexContext *lex)
734 {
735         const char *context_start;
736         const char *context_end;
737         const char *line_start;
738         int                     line_number;
739         char       *ctxt;
740         int                     ctxtlen;
741         const char *prefix;
742         const char *suffix;
743
744         /* Choose boundaries for the part of the input we will display */
745         context_start = lex->input;
746         context_end = lex->token_terminator;
747         line_start = context_start;
748         line_number = 1;
749         for (;;)
750         {
751                 /* Always advance over newlines (context_end test is just paranoia) */
752                 if (*context_start == '\n' && context_start < context_end)
753                 {
754                         context_start++;
755                         line_start = context_start;
756                         line_number++;
757                         continue;
758                 }
759                 /* Otherwise, done as soon as we are close enough to context_end */
760                 if (context_end - context_start < 50)
761                         break;
762                 /* Advance to next multibyte character */
763                 if (IS_HIGHBIT_SET(*context_start))
764                         context_start += pg_mblen(context_start);
765                 else
766                         context_start++;
767         }
768
769         /*
770          * We add "..." to indicate that the excerpt doesn't start at the
771          * beginning of the line ... but if we're within 3 characters of the
772          * beginning of the line, we might as well just show the whole line.
773          */
774         if (context_start - line_start <= 3)
775                 context_start = line_start;
776
777         /* Get a null-terminated copy of the data to present */
778         ctxtlen = context_end - context_start;
779         ctxt = palloc(ctxtlen + 1);
780         memcpy(ctxt, context_start, ctxtlen);
781         ctxt[ctxtlen] = '\0';
782
783         /*
784          * Show the context, prefixing "..." if not starting at start of line, and
785          * suffixing "..." if not ending at end of line.
786          */
787         prefix = (context_start > line_start) ? "..." : "";
788         suffix = (*context_end != '\0' && *context_end != '\n' && *context_end != '\r') ? "..." : "";
789
790         return errcontext("JSON data, line %d: %s%s%s",
791                                           line_number, prefix, ctxt, suffix);
792 }
793
794 /*
795  * Extract a single, possibly multi-byte char from the input string.
796  */
797 static char *
798 extract_mb_char(char *s)
799 {
800         char       *res;
801         int                     len;
802
803         len = pg_mblen(s);
804         res = palloc(len + 1);
805         memcpy(res, s, len);
806         res[len] = '\0';
807
808         return res;
809 }
810
811 /*
812  * Turn a scalar Datum into JSON, appending the string to "result".
813  *
814  * Hand off a non-scalar datum to composite_to_json or array_to_json_internal
815  * as appropriate.
816  */
817 static void
818 datum_to_json(Datum val, bool is_null, StringInfo result,
819                           TYPCATEGORY tcategory, Oid typoutputfunc)
820 {
821         char       *outputstr;
822
823         if (is_null)
824         {
825                 appendStringInfoString(result, "null");
826                 return;
827         }
828
829         switch (tcategory)
830         {
831                 case TYPCATEGORY_ARRAY:
832                         array_to_json_internal(val, result, false);
833                         break;
834                 case TYPCATEGORY_COMPOSITE:
835                         composite_to_json(val, result, false);
836                         break;
837                 case TYPCATEGORY_BOOLEAN:
838                         if (DatumGetBool(val))
839                                 appendStringInfoString(result, "true");
840                         else
841                                 appendStringInfoString(result, "false");
842                         break;
843                 case TYPCATEGORY_NUMERIC:
844                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
845
846                         /*
847                          * Don't call escape_json here if it's a valid JSON number.
848                          * Numeric output should usually be a valid JSON number and JSON
849                          * numbers shouldn't be quoted. Quote cases like "Nan" and
850                          * "Infinity", however.
851                          */
852                         if (strpbrk(outputstr, NON_NUMERIC_LETTER) == NULL)
853                                 appendStringInfoString(result, outputstr);
854                         else
855                                 escape_json(result, outputstr);
856                         pfree(outputstr);
857                         break;
858                 case TYPCATEGORY_JSON:
859                         /* JSON will already be escaped */
860                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
861                         appendStringInfoString(result, outputstr);
862                         pfree(outputstr);
863                         break;
864                 default:
865                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
866                         escape_json(result, outputstr);
867                         pfree(outputstr);
868                         break;
869         }
870 }
871
872 /*
873  * Process a single dimension of an array.
874  * If it's the innermost dimension, output the values, otherwise call
875  * ourselves recursively to process the next dimension.
876  */
877 static void
878 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
879                                   bool *nulls, int *valcount, TYPCATEGORY tcategory,
880                                   Oid typoutputfunc, bool use_line_feeds)
881 {
882         int                     i;
883         const char *sep;
884
885         Assert(dim < ndims);
886
887         sep = use_line_feeds ? ",\n " : ",";
888
889         appendStringInfoChar(result, '[');
890
891         for (i = 1; i <= dims[dim]; i++)
892         {
893                 if (i > 1)
894                         appendStringInfoString(result, sep);
895
896                 if (dim + 1 == ndims)
897                 {
898                         datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
899                                                   typoutputfunc);
900                         (*valcount)++;
901                 }
902                 else
903                 {
904                         /*
905                          * Do we want line feeds on inner dimensions of arrays? For now
906                          * we'll say no.
907                          */
908                         array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
909                                                           valcount, tcategory, typoutputfunc, false);
910                 }
911         }
912
913         appendStringInfoChar(result, ']');
914 }
915
916 /*
917  * Turn an array into JSON.
918  */
919 static void
920 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
921 {
922         ArrayType  *v = DatumGetArrayTypeP(array);
923         Oid                     element_type = ARR_ELEMTYPE(v);
924         int                *dim;
925         int                     ndim;
926         int                     nitems;
927         int                     count = 0;
928         Datum      *elements;
929         bool       *nulls;
930         int16           typlen;
931         bool            typbyval;
932         char            typalign,
933                                 typdelim;
934         Oid                     typioparam;
935         Oid                     typoutputfunc;
936         TYPCATEGORY tcategory;
937
938         ndim = ARR_NDIM(v);
939         dim = ARR_DIMS(v);
940         nitems = ArrayGetNItems(ndim, dim);
941
942         if (nitems <= 0)
943         {
944                 appendStringInfoString(result, "[]");
945                 return;
946         }
947
948         get_type_io_data(element_type, IOFunc_output,
949                                          &typlen, &typbyval, &typalign,
950                                          &typdelim, &typioparam, &typoutputfunc);
951
952         deconstruct_array(v, element_type, typlen, typbyval,
953                                           typalign, &elements, &nulls,
954                                           &nitems);
955
956         if (element_type == RECORDOID)
957                 tcategory = TYPCATEGORY_COMPOSITE;
958         else if (element_type == JSONOID)
959                 tcategory = TYPCATEGORY_JSON;
960         else
961                 tcategory = TypeCategory(element_type);
962
963         array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
964                                           typoutputfunc, use_line_feeds);
965
966         pfree(elements);
967         pfree(nulls);
968 }
969
970 /*
971  * Turn a composite / record into JSON.
972  */
973 static void
974 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
975 {
976         HeapTupleHeader td;
977         Oid                     tupType;
978         int32           tupTypmod;
979         TupleDesc       tupdesc;
980         HeapTupleData tmptup,
981                            *tuple;
982         int                     i;
983         bool            needsep = false;
984         const char *sep;
985
986         sep = use_line_feeds ? ",\n " : ",";
987
988         td = DatumGetHeapTupleHeader(composite);
989
990         /* Extract rowtype info and find a tupdesc */
991         tupType = HeapTupleHeaderGetTypeId(td);
992         tupTypmod = HeapTupleHeaderGetTypMod(td);
993         tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
994
995         /* Build a temporary HeapTuple control structure */
996         tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
997         tmptup.t_data = td;
998         tuple = &tmptup;
999
1000         appendStringInfoChar(result, '{');
1001
1002         for (i = 0; i < tupdesc->natts; i++)
1003         {
1004                 Datum           val,
1005                                         origval;
1006                 bool            isnull;
1007                 char       *attname;
1008                 TYPCATEGORY tcategory;
1009                 Oid                     typoutput;
1010                 bool            typisvarlena;
1011
1012                 if (tupdesc->attrs[i]->attisdropped)
1013                         continue;
1014
1015                 if (needsep)
1016                         appendStringInfoString(result, sep);
1017                 needsep = true;
1018
1019                 attname = NameStr(tupdesc->attrs[i]->attname);
1020                 escape_json(result, attname);
1021                 appendStringInfoChar(result, ':');
1022
1023                 origval = heap_getattr(tuple, i + 1, tupdesc, &isnull);
1024
1025                 if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID)
1026                         tcategory = TYPCATEGORY_ARRAY;
1027                 else if (tupdesc->attrs[i]->atttypid == RECORDOID)
1028                         tcategory = TYPCATEGORY_COMPOSITE;
1029                 else if (tupdesc->attrs[i]->atttypid == JSONOID)
1030                         tcategory = TYPCATEGORY_JSON;
1031                 else
1032                         tcategory = TypeCategory(tupdesc->attrs[i]->atttypid);
1033
1034                 getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
1035                                                   &typoutput, &typisvarlena);
1036
1037                 /*
1038                  * If we have a toasted datum, forcibly detoast it here to avoid
1039                  * memory leakage inside the type's output routine.
1040                  */
1041                 if (typisvarlena && !isnull)
1042                         val = PointerGetDatum(PG_DETOAST_DATUM(origval));
1043                 else
1044                         val = origval;
1045
1046                 datum_to_json(val, isnull, result, tcategory, typoutput);
1047
1048                 /* Clean up detoasted copy, if any */
1049                 if (val != origval)
1050                         pfree(DatumGetPointer(val));
1051         }
1052
1053         appendStringInfoChar(result, '}');
1054         ReleaseTupleDesc(tupdesc);
1055 }
1056
1057 /*
1058  * SQL function array_to_json(row)
1059  */
1060 extern Datum
1061 array_to_json(PG_FUNCTION_ARGS)
1062 {
1063         Datum           array = PG_GETARG_DATUM(0);
1064         StringInfo      result;
1065
1066         result = makeStringInfo();
1067
1068         array_to_json_internal(array, result, false);
1069
1070         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1071 }
1072
1073 /*
1074  * SQL function array_to_json(row, prettybool)
1075  */
1076 extern Datum
1077 array_to_json_pretty(PG_FUNCTION_ARGS)
1078 {
1079         Datum           array = PG_GETARG_DATUM(0);
1080         bool            use_line_feeds = PG_GETARG_BOOL(1);
1081         StringInfo      result;
1082
1083         result = makeStringInfo();
1084
1085         array_to_json_internal(array, result, use_line_feeds);
1086
1087         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1088 }
1089
1090 /*
1091  * SQL function row_to_json(row)
1092  */
1093 extern Datum
1094 row_to_json(PG_FUNCTION_ARGS)
1095 {
1096         Datum           array = PG_GETARG_DATUM(0);
1097         StringInfo      result;
1098
1099         result = makeStringInfo();
1100
1101         composite_to_json(array, result, false);
1102
1103         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1104 }
1105
1106 /*
1107  * SQL function row_to_json(row, prettybool)
1108  */
1109 extern Datum
1110 row_to_json_pretty(PG_FUNCTION_ARGS)
1111 {
1112         Datum           array = PG_GETARG_DATUM(0);
1113         bool            use_line_feeds = PG_GETARG_BOOL(1);
1114         StringInfo      result;
1115
1116         result = makeStringInfo();
1117
1118         composite_to_json(array, result, use_line_feeds);
1119
1120         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1121 }
1122
1123 /*
1124  * Produce a JSON string literal, properly escaping characters in the text.
1125  */
1126 void
1127 escape_json(StringInfo buf, const char *str)
1128 {
1129         const char *p;
1130
1131         appendStringInfoCharMacro(buf, '\"');
1132         for (p = str; *p; p++)
1133         {
1134                 switch (*p)
1135                 {
1136                         case '\b':
1137                                 appendStringInfoString(buf, "\\b");
1138                                 break;
1139                         case '\f':
1140                                 appendStringInfoString(buf, "\\f");
1141                                 break;
1142                         case '\n':
1143                                 appendStringInfoString(buf, "\\n");
1144                                 break;
1145                         case '\r':
1146                                 appendStringInfoString(buf, "\\r");
1147                                 break;
1148                         case '\t':
1149                                 appendStringInfoString(buf, "\\t");
1150                                 break;
1151                         case '"':
1152                                 appendStringInfoString(buf, "\\\"");
1153                                 break;
1154                         case '\\':
1155                                 appendStringInfoString(buf, "\\\\");
1156                                 break;
1157                         default:
1158                                 if ((unsigned char) *p < ' ')
1159                                         appendStringInfo(buf, "\\u%04x", (int) *p);
1160                                 else
1161                                         appendStringInfoCharMacro(buf, *p);
1162                                 break;
1163                 }
1164         }
1165         appendStringInfoCharMacro(buf, '\"');
1166 }