1 /*-------------------------------------------------------------------------
4 * JSON data type support.
6 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/utils/adt/json.c
12 *-------------------------------------------------------------------------
16 #include "access/htup_details.h"
17 #include "access/transam.h"
18 #include "catalog/pg_cast.h"
19 #include "catalog/pg_type.h"
20 #include "executor/spi.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/pqformat.h"
23 #include "mb/pg_wchar.h"
24 #include "parser/parse_coerce.h"
25 #include "utils/array.h"
26 #include "utils/builtins.h"
27 #include "utils/lsyscache.h"
28 #include "utils/json.h"
29 #include "utils/jsonapi.h"
30 #include "utils/typcache.h"
31 #include "utils/syscache.h"
34 * The context of the parser is maintained by the recursive descent
35 * mechanism, but is passed explicitly to the error reporting routine
36 * for better diagnostics.
38 typedef enum /* contexts of JSON parser */
40 JSON_PARSE_VALUE, /* expecting a value */
41 JSON_PARSE_STRING, /* expecting a string (for a field name) */
42 JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
43 JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
44 JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
45 JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
46 JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
47 JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
48 JSON_PARSE_END /* saw the end of a document, expect nothing */
51 static inline void json_lex(JsonLexContext *lex);
52 static inline void json_lex_string(JsonLexContext *lex);
53 static inline void json_lex_number(JsonLexContext *lex, char *s);
54 static inline void parse_scalar(JsonLexContext *lex, JsonSemAction sem);
55 static void parse_object_field(JsonLexContext *lex, JsonSemAction sem);
56 static void parse_object(JsonLexContext *lex, JsonSemAction sem);
57 static void parse_array_element(JsonLexContext *lex, JsonSemAction sem);
58 static void parse_array(JsonLexContext *lex, JsonSemAction sem);
59 static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
60 static void report_invalid_token(JsonLexContext *lex);
61 static int report_json_context(JsonLexContext *lex);
62 static char *extract_mb_char(char *s);
63 static void composite_to_json(Datum composite, StringInfo result,
65 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
66 Datum *vals, bool *nulls, int *valcount,
67 TYPCATEGORY tcategory, Oid typoutputfunc,
69 static void array_to_json_internal(Datum array, StringInfo result,
72 /* the null action object used for pure validation */
73 static jsonSemAction nullSemAction =
75 NULL, NULL, NULL, NULL, NULL,
76 NULL, NULL, NULL, NULL, NULL
78 static JsonSemAction NullSemAction = &nullSemAction;
80 /* Recursive Descent parser support routines */
85 * what is the current look_ahead token?
87 static inline JsonTokenType
88 lex_peek(JsonLexContext *lex)
90 return lex->token_type;
96 * accept the look_ahead token and move the lexer to the next token if the
97 * look_ahead token matches the token parameter. In that case, and if required,
98 * also hand back the de-escaped lexeme.
100 * returns true if the token matched, false otherwise.
103 lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme)
105 if (lex->token_type == token)
109 if (lex->token_type == JSON_TOKEN_STRING)
111 if (lex->strval != NULL)
112 *lexeme = pstrdup(lex->strval->data);
116 int len = (lex->token_terminator - lex->token_start);
117 char *tokstr = palloc(len + 1);
119 memcpy(tokstr, lex->token_start, len);
133 * move the lexer to the next token if the current look_ahead token matches
134 * the parameter token. Otherwise, report an error.
137 lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
139 if (!lex_accept(lex, token, NULL))
140 report_parse_error(ctx, lex);;
144 * All the defined type categories are upper case , so use lower case here
145 * so we avoid any possible clash.
147 /* fake type category for JSON so we can distinguish it in datum_to_json */
148 #define TYPCATEGORY_JSON 'j'
149 /* fake category for types that have a cast to json */
150 #define TYPCATEGORY_JSON_CAST 'c'
151 /* letters appearing in numeric output that aren't valid in a JSON number */
152 #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
153 /* chars to consider as part of an alphanumeric token */
154 #define JSON_ALPHANUMERIC_CHAR(c) \
155 (((c) >= 'a' && (c) <= 'z') || \
156 ((c) >= 'A' && (c) <= 'Z') || \
157 ((c) >= '0' && (c) <= '9') || \
165 json_in(PG_FUNCTION_ARGS)
167 char *json = PG_GETARG_CSTRING(0);
168 text *result = cstring_to_text(json);
172 lex = makeJsonLexContext(result, false);
173 pg_parse_json(lex, NullSemAction);
175 /* Internal representation is the same as text, for now */
176 PG_RETURN_TEXT_P(result);
183 json_out(PG_FUNCTION_ARGS)
185 /* we needn't detoast because text_to_cstring will handle that */
186 Datum txt = PG_GETARG_DATUM(0);
188 PG_RETURN_CSTRING(TextDatumGetCString(txt));
195 json_send(PG_FUNCTION_ARGS)
197 text *t = PG_GETARG_TEXT_PP(0);
200 pq_begintypsend(&buf);
201 pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
202 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
209 json_recv(PG_FUNCTION_ARGS)
211 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
217 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
219 result = palloc(nbytes + VARHDRSZ);
220 SET_VARSIZE(result, nbytes + VARHDRSZ);
221 memcpy(VARDATA(result), str, nbytes);
224 lex = makeJsonLexContext(result, false);
225 pg_parse_json(lex, NullSemAction);
227 PG_RETURN_TEXT_P(result);
233 * lex constructor, with or without StringInfo object
234 * for de-escaped lexemes.
236 * Without is better as it makes the processing faster, so only make one
237 * if really required.
240 makeJsonLexContext(text *json, bool need_escapes)
242 JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
244 lex->input = lex->token_terminator = lex->line_start = VARDATA(json);
245 lex->line_number = 1;
246 lex->input_length = VARSIZE(json) - VARHDRSZ;
248 lex->strval = makeStringInfo();
255 * Publicly visible entry point for the JSON parser.
257 * lex is a lexing context, set up for the json to be processed by calling
258 * makeJsonLexContext(). sem is a strucure of function pointers to semantic
259 * action routines to be called at appropriate spots during parsing, and a
260 * pointer to a state object to be passed to those routines.
263 pg_parse_json(JsonLexContext *lex, JsonSemAction sem)
267 /* get the initial token */
272 /* parse by recursive descent */
275 case JSON_TOKEN_OBJECT_START:
276 parse_object(lex, sem);
278 case JSON_TOKEN_ARRAY_START:
279 parse_array(lex, sem);
282 parse_scalar(lex, sem); /* json can be a bare scalar */
285 lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
290 * Recursive Descent parse routines. There is one for each structural
291 * element in a json document:
292 * - scalar (string, number, true, false, null)
299 parse_scalar(JsonLexContext *lex, JsonSemAction sem)
302 json_scalar_action sfunc = sem->scalar;
304 JsonTokenType tok = lex_peek(lex);
306 valaddr = sfunc == NULL ? NULL : &val;
308 /* a scalar must be a string, a number, true, false, or null */
311 case JSON_TOKEN_TRUE:
312 lex_accept(lex, JSON_TOKEN_TRUE, valaddr);
314 case JSON_TOKEN_FALSE:
315 lex_accept(lex, JSON_TOKEN_FALSE, valaddr);
317 case JSON_TOKEN_NULL:
318 lex_accept(lex, JSON_TOKEN_NULL, valaddr);
320 case JSON_TOKEN_NUMBER:
321 lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);
323 case JSON_TOKEN_STRING:
324 lex_accept(lex, JSON_TOKEN_STRING, valaddr);
327 report_parse_error(JSON_PARSE_VALUE, lex);
331 (*sfunc) (sem->semstate, val, tok);
335 parse_object_field(JsonLexContext *lex, JsonSemAction sem)
338 * an object field is "fieldname" : value where value can be a scalar,
342 char *fname = NULL; /* keep compiler quiet */
343 json_ofield_action ostart = sem->object_field_start;
344 json_ofield_action oend = sem->object_field_end;
346 char **fnameaddr = NULL;
349 if (ostart != NULL || oend != NULL)
352 if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr))
353 report_parse_error(JSON_PARSE_STRING, lex);
355 lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
358 isnull = tok == JSON_TOKEN_NULL;
361 (*ostart) (sem->semstate, fname, isnull);
365 case JSON_TOKEN_OBJECT_START:
366 parse_object(lex, sem);
368 case JSON_TOKEN_ARRAY_START:
369 parse_array(lex, sem);
372 parse_scalar(lex, sem);
376 (*oend) (sem->semstate, fname, isnull);
383 parse_object(JsonLexContext *lex, JsonSemAction sem)
386 * an object is a possibly empty sequence of object fields, separated by
387 * commas and surrounde by curly braces.
389 json_struct_action ostart = sem->object_start;
390 json_struct_action oend = sem->object_end;
394 (*ostart) (sem->semstate);
397 * Data inside an object at at a higher nesting level than the object
398 * itself. Note that we increment this after we call the semantic routine
399 * for the object start and restore it before we call the routine for the
404 /* we know this will succeeed, just clearing the token */
405 lex_expect(JSON_PARSE_OBJECT_START, lex, JSON_TOKEN_OBJECT_START);
410 case JSON_TOKEN_STRING:
411 parse_object_field(lex, sem);
412 while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
413 parse_object_field(lex, sem);
415 case JSON_TOKEN_OBJECT_END:
418 /* case of an invalid initial token inside the object */
419 report_parse_error(JSON_PARSE_OBJECT_START, lex);
422 lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
427 (*oend) (sem->semstate);
431 parse_array_element(JsonLexContext *lex, JsonSemAction sem)
433 json_aelem_action astart = sem->array_element_start;
434 json_aelem_action aend = sem->array_element_end;
435 JsonTokenType tok = lex_peek(lex);
439 isnull = tok == JSON_TOKEN_NULL;
442 (*astart) (sem->semstate, isnull);
444 /* an array element is any object, array or scalar */
447 case JSON_TOKEN_OBJECT_START:
448 parse_object(lex, sem);
450 case JSON_TOKEN_ARRAY_START:
451 parse_array(lex, sem);
454 parse_scalar(lex, sem);
458 (*aend) (sem->semstate, isnull);
462 parse_array(JsonLexContext *lex, JsonSemAction sem)
465 * an array is a possibly empty sequence of array elements, separated by
466 * commas and surrounded by square brackets.
468 json_struct_action astart = sem->array_start;
469 json_struct_action aend = sem->array_end;
472 (*astart) (sem->semstate);
475 * Data inside an array at at a higher nesting level than the array
476 * itself. Note that we increment this after we call the semantic routine
477 * for the array start and restore it before we call the routine for the
482 lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
483 if (lex_peek(lex) != JSON_TOKEN_ARRAY_END)
486 parse_array_element(lex, sem);
488 while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
489 parse_array_element(lex, sem);
492 lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
497 (*aend) (sem->semstate);
501 * Lex one token from the input stream.
504 json_lex(JsonLexContext *lex)
509 /* Skip leading whitespace. */
510 s = lex->token_terminator;
511 len = s - lex->input;
512 while (len < lex->input_length &&
513 (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
520 lex->token_start = s;
522 /* Determine token type. */
523 if (len >= lex->input_length)
525 lex->token_start = NULL;
526 lex->prev_token_terminator = lex->token_terminator;
527 lex->token_terminator = s;
528 lex->token_type = JSON_TOKEN_END;
533 /* Single-character token, some kind of punctuation mark. */
535 lex->prev_token_terminator = lex->token_terminator;
536 lex->token_terminator = s + 1;
537 lex->token_type = JSON_TOKEN_OBJECT_START;
540 lex->prev_token_terminator = lex->token_terminator;
541 lex->token_terminator = s + 1;
542 lex->token_type = JSON_TOKEN_OBJECT_END;
545 lex->prev_token_terminator = lex->token_terminator;
546 lex->token_terminator = s + 1;
547 lex->token_type = JSON_TOKEN_ARRAY_START;
550 lex->prev_token_terminator = lex->token_terminator;
551 lex->token_terminator = s + 1;
552 lex->token_type = JSON_TOKEN_ARRAY_END;
555 lex->prev_token_terminator = lex->token_terminator;
556 lex->token_terminator = s + 1;
557 lex->token_type = JSON_TOKEN_COMMA;
560 lex->prev_token_terminator = lex->token_terminator;
561 lex->token_terminator = s + 1;
562 lex->token_type = JSON_TOKEN_COLON;
566 json_lex_string(lex);
567 lex->token_type = JSON_TOKEN_STRING;
570 /* Negative number. */
571 json_lex_number(lex, s + 1);
572 lex->token_type = JSON_TOKEN_NUMBER;
584 /* Positive number. */
585 json_lex_number(lex, s);
586 lex->token_type = JSON_TOKEN_NUMBER;
593 * We're not dealing with a string, number, legal
594 * punctuation mark, or end of string. The only legal
595 * tokens we might find here are true, false, and null,
596 * but for error reporting purposes we scan until we see a
597 * non-alphanumeric character. That way, we can report
598 * the whole word as an unexpected token, rather than just
599 * some unintuitive prefix thereof.
601 for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && p - s < lex->input_length - len; p++)
605 * We got some sort of unexpected punctuation or an
606 * otherwise unexpected character, so just complain about
607 * that one character.
611 lex->prev_token_terminator = lex->token_terminator;
612 lex->token_terminator = s + 1;
613 report_invalid_token(lex);
617 * We've got a real alphanumeric token here. If it
618 * happens to be true, false, or null, all is well. If
621 lex->prev_token_terminator = lex->token_terminator;
622 lex->token_terminator = p;
625 if (memcmp(s, "true", 4) == 0)
626 lex->token_type = JSON_TOKEN_TRUE;
627 else if (memcmp(s, "null", 4) == 0)
628 lex->token_type = JSON_TOKEN_NULL;
630 report_invalid_token(lex);
632 else if (p - s == 5 && memcmp(s, "false", 5) == 0)
633 lex->token_type = JSON_TOKEN_FALSE;
635 report_invalid_token(lex);
638 } /* end of switch */
642 * The next token in the input stream is known to be a string; lex it.
645 json_lex_string(JsonLexContext *lex)
649 int hi_surrogate = -1;
651 if (lex->strval != NULL)
652 resetStringInfo(lex->strval);
654 len = lex->token_start - lex->input;
656 for (s = lex->token_start + 1; *s != '"'; s++, len++)
658 /* Premature end of the string. */
659 if (len >= lex->input_length)
661 lex->token_terminator = s;
662 report_invalid_token(lex);
664 else if ((unsigned char) *s < 32)
666 /* Per RFC4627, these characters MUST be escaped. */
667 /* Since *s isn't printable, exclude it from the context string */
668 lex->token_terminator = s;
670 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
671 errmsg("invalid input syntax for type json"),
672 errdetail("Character with value 0x%02x must be escaped.",
674 report_json_context(lex)));
678 /* OK, we have an escape character. */
681 if (len >= lex->input_length)
683 lex->token_terminator = s;
684 report_invalid_token(lex);
691 for (i = 1; i <= 4; i++)
695 if (len >= lex->input_length)
697 lex->token_terminator = s;
698 report_invalid_token(lex);
700 else if (*s >= '0' && *s <= '9')
701 ch = (ch * 16) + (*s - '0');
702 else if (*s >= 'a' && *s <= 'f')
703 ch = (ch * 16) + (*s - 'a') + 10;
704 else if (*s >= 'A' && *s <= 'F')
705 ch = (ch * 16) + (*s - 'A') + 10;
708 lex->token_terminator = s + pg_mblen(s);
710 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
711 errmsg("invalid input syntax for type json"),
712 errdetail("\"\\u\" must be followed by four hexadecimal digits."),
713 report_json_context(lex)));
716 if (lex->strval != NULL)
721 if (ch >= 0xd800 && ch <= 0xdbff)
723 if (hi_surrogate != -1)
725 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
726 errmsg("invalid input syntax for type json"),
727 errdetail("high order surrogate must not follow a high order surrogate."),
728 report_json_context(lex)));
729 hi_surrogate = (ch & 0x3ff) << 10;
732 else if (ch >= 0xdc00 && ch <= 0xdfff)
734 if (hi_surrogate == -1)
736 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
737 errmsg("invalid input syntax for type json"),
738 errdetail("low order surrogate must follow a high order surrogate."),
739 report_json_context(lex)));
740 ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
744 if (hi_surrogate != -1)
746 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
747 errmsg("invalid input syntax for type json"),
748 errdetail("low order surrogate must follow a high order surrogate."),
749 report_json_context(lex)));
752 * For UTF8, replace the escape sequence by the actual utf8
753 * character in lex->strval. Do this also for other encodings
754 * if the escape designates an ASCII character, otherwise
755 * raise an error. We don't ever unescape a \u0000, since that
756 * would result in an impermissible nul byte.
761 appendStringInfoString(lex->strval, "\\u0000");
763 else if (GetDatabaseEncoding() == PG_UTF8)
765 unicode_to_utf8(ch, (unsigned char *) utf8str);
766 utf8len = pg_utf_mblen((unsigned char *) utf8str);
767 appendBinaryStringInfo(lex->strval, utf8str, utf8len);
769 else if (ch <= 0x007f)
772 * This is the only way to designate things like a form feed
773 * character in JSON, so it's useful in all encodings.
775 appendStringInfoChar(lex->strval, (char) ch);
780 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
781 errmsg("invalid input syntax for type json"),
782 errdetail("Unicode escape for code points higher than U+007F not permitted in non-UTF8 encoding"),
783 report_json_context(lex)));
788 else if (lex->strval != NULL)
790 if (hi_surrogate != -1)
792 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
793 errmsg("invalid input syntax for type json"),
794 errdetail("low order surrogate must follow a high order surrogate."),
795 report_json_context(lex)));
802 appendStringInfoChar(lex->strval, *s);
805 appendStringInfoChar(lex->strval, '\b');
808 appendStringInfoChar(lex->strval, '\f');
811 appendStringInfoChar(lex->strval, '\n');
814 appendStringInfoChar(lex->strval, '\r');
817 appendStringInfoChar(lex->strval, '\t');
820 /* Not a valid string escape, so error out. */
821 lex->token_terminator = s + pg_mblen(s);
823 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
824 errmsg("invalid input syntax for type json"),
825 errdetail("Escape sequence \"\\%s\" is invalid.",
827 report_json_context(lex)));
830 else if (strchr("\"\\/bfnrt", *s) == NULL)
833 * Simpler processing if we're not bothered about de-escaping
835 * It's very tempting to remove the strchr() call here and
836 * replace it with a switch statement, but testing so far has
837 * shown it's not a performance win.
839 lex->token_terminator = s + pg_mblen(s);
841 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
842 errmsg("invalid input syntax for type json"),
843 errdetail("Escape sequence \"\\%s\" is invalid.",
845 report_json_context(lex)));
849 else if (lex->strval != NULL)
851 if (hi_surrogate != -1)
853 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
854 errmsg("invalid input syntax for type json"),
855 errdetail("low order surrogate must follow a high order surrogate."),
856 report_json_context(lex)));
858 appendStringInfoChar(lex->strval, *s);
863 if (hi_surrogate != -1)
865 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
866 errmsg("invalid input syntax for type json"),
867 errdetail("low order surrogate must follow a high order surrogate."),
868 report_json_context(lex)));
870 /* Hooray, we found the end of the string! */
871 lex->prev_token_terminator = lex->token_terminator;
872 lex->token_terminator = s + 1;
875 /*-------------------------------------------------------------------------
876 * The next token in the input stream is known to be a number; lex it.
878 * In JSON, a number consists of four parts:
880 * (1) An optional minus sign ('-').
882 * (2) Either a single '0', or a string of one or more digits that does not
885 * (3) An optional decimal part, consisting of a period ('.') followed by
886 * one or more digits. (Note: While this part can be omitted
887 * completely, it's not OK to have only the decimal point without
888 * any digits afterwards.)
890 * (4) An optional exponent part, consisting of 'e' or 'E', optionally
891 * followed by '+' or '-', followed by one or more digits. (Note:
892 * As with the decimal part, if 'e' or 'E' is present, it must be
893 * followed by at least one digit.)
895 * The 's' argument to this function points to the ostensible beginning
896 * of part 2 - i.e. the character after any optional minus sign, and the
897 * first character of the string if there is none.
899 *-------------------------------------------------------------------------
902 json_lex_number(JsonLexContext *lex, char *s)
908 len = s - lex->input;
909 /* Part (1): leading sign indicator. */
910 /* Caller already did this for us; so do nothing. */
912 /* Part (2): parse main digit string. */
918 else if (*s >= '1' && *s <= '9')
924 } while (*s >= '0' && *s <= '9' && len < lex->input_length);
929 /* Part (3): parse optional decimal portion. */
930 if (len < lex->input_length && *s == '.')
934 if (len == lex->input_length || *s < '0' || *s > '9')
942 } while (*s >= '0' && *s <= '9' && len < lex->input_length);
946 /* Part (4): parse optional exponent. */
947 if (len < lex->input_length && (*s == 'e' || *s == 'E'))
951 if (len < lex->input_length && (*s == '+' || *s == '-'))
956 if (len == lex->input_length || *s < '0' || *s > '9')
964 } while (len < lex->input_length && *s >= '0' && *s <= '9');
969 * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
970 * here should be considered part of the token for error-reporting
973 for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && len < lex->input_length; p++, len++)
975 lex->prev_token_terminator = lex->token_terminator;
976 lex->token_terminator = p;
978 report_invalid_token(lex);
982 * Report a parse error.
984 * lex->token_start and lex->token_terminator must identify the current token.
987 report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
992 /* Handle case where the input ended prematurely. */
993 if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
995 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
996 errmsg("invalid input syntax for type json"),
997 errdetail("The input string ended unexpectedly."),
998 report_json_context(lex)));
1000 /* Separate out the current token. */
1001 toklen = lex->token_terminator - lex->token_start;
1002 token = palloc(toklen + 1);
1003 memcpy(token, lex->token_start, toklen);
1004 token[toklen] = '\0';
1006 /* Complain, with the appropriate detail message. */
1007 if (ctx == JSON_PARSE_END)
1009 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1010 errmsg("invalid input syntax for type json"),
1011 errdetail("Expected end of input, but found \"%s\".",
1013 report_json_context(lex)));
1018 case JSON_PARSE_VALUE:
1020 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1021 errmsg("invalid input syntax for type json"),
1022 errdetail("Expected JSON value, but found \"%s\".",
1024 report_json_context(lex)));
1026 case JSON_PARSE_STRING:
1028 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1029 errmsg("invalid input syntax for type json"),
1030 errdetail("Expected string, but found \"%s\".",
1032 report_json_context(lex)));
1034 case JSON_PARSE_ARRAY_START:
1036 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1037 errmsg("invalid input syntax for type json"),
1038 errdetail("Expected array element or \"]\", but found \"%s\".",
1040 report_json_context(lex)));
1042 case JSON_PARSE_ARRAY_NEXT:
1044 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1045 errmsg("invalid input syntax for type json"),
1046 errdetail("Expected \",\" or \"]\", but found \"%s\".",
1048 report_json_context(lex)));
1050 case JSON_PARSE_OBJECT_START:
1052 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1053 errmsg("invalid input syntax for type json"),
1054 errdetail("Expected string or \"}\", but found \"%s\".",
1056 report_json_context(lex)));
1058 case JSON_PARSE_OBJECT_LABEL:
1060 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1061 errmsg("invalid input syntax for type json"),
1062 errdetail("Expected \":\", but found \"%s\".",
1064 report_json_context(lex)));
1066 case JSON_PARSE_OBJECT_NEXT:
1068 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1069 errmsg("invalid input syntax for type json"),
1070 errdetail("Expected \",\" or \"}\", but found \"%s\".",
1072 report_json_context(lex)));
1074 case JSON_PARSE_OBJECT_COMMA:
1076 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1077 errmsg("invalid input syntax for type json"),
1078 errdetail("Expected string, but found \"%s\".",
1080 report_json_context(lex)));
1083 elog(ERROR, "unexpected json parse state: %d", ctx);
1089 * Report an invalid input token.
1091 * lex->token_start and lex->token_terminator must identify the token.
1094 report_invalid_token(JsonLexContext *lex)
1099 /* Separate out the offending token. */
1100 toklen = lex->token_terminator - lex->token_start;
1101 token = palloc(toklen + 1);
1102 memcpy(token, lex->token_start, toklen);
1103 token[toklen] = '\0';
1106 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1107 errmsg("invalid input syntax for type json"),
1108 errdetail("Token \"%s\" is invalid.", token),
1109 report_json_context(lex)));
1113 * Report a CONTEXT line for bogus JSON input.
1115 * lex->token_terminator must be set to identify the spot where we detected
1116 * the error. Note that lex->token_start might be NULL, in case we recognized
1119 * The return value isn't meaningful, but we make it non-void so that this
1120 * can be invoked inside ereport().
1123 report_json_context(JsonLexContext *lex)
1125 const char *context_start;
1126 const char *context_end;
1127 const char *line_start;
1134 /* Choose boundaries for the part of the input we will display */
1135 context_start = lex->input;
1136 context_end = lex->token_terminator;
1137 line_start = context_start;
1141 /* Always advance over newlines (context_end test is just paranoia) */
1142 if (*context_start == '\n' && context_start < context_end)
1145 line_start = context_start;
1149 /* Otherwise, done as soon as we are close enough to context_end */
1150 if (context_end - context_start < 50)
1152 /* Advance to next multibyte character */
1153 if (IS_HIGHBIT_SET(*context_start))
1154 context_start += pg_mblen(context_start);
1160 * We add "..." to indicate that the excerpt doesn't start at the
1161 * beginning of the line ... but if we're within 3 characters of the
1162 * beginning of the line, we might as well just show the whole line.
1164 if (context_start - line_start <= 3)
1165 context_start = line_start;
1167 /* Get a null-terminated copy of the data to present */
1168 ctxtlen = context_end - context_start;
1169 ctxt = palloc(ctxtlen + 1);
1170 memcpy(ctxt, context_start, ctxtlen);
1171 ctxt[ctxtlen] = '\0';
1174 * Show the context, prefixing "..." if not starting at start of line, and
1175 * suffixing "..." if not ending at end of line.
1177 prefix = (context_start > line_start) ? "..." : "";
1178 suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
1180 return errcontext("JSON data, line %d: %s%s%s",
1181 line_number, prefix, ctxt, suffix);
1185 * Extract a single, possibly multi-byte char from the input string.
1188 extract_mb_char(char *s)
1194 res = palloc(len + 1);
1195 memcpy(res, s, len);
1202 * Turn a scalar Datum into JSON, appending the string to "result".
1204 * Hand off a non-scalar datum to composite_to_json or array_to_json_internal
1208 datum_to_json(Datum val, bool is_null, StringInfo result,
1209 TYPCATEGORY tcategory, Oid typoutputfunc)
1216 appendStringInfoString(result, "null");
1222 case TYPCATEGORY_ARRAY:
1223 array_to_json_internal(val, result, false);
1225 case TYPCATEGORY_COMPOSITE:
1226 composite_to_json(val, result, false);
1228 case TYPCATEGORY_BOOLEAN:
1229 if (DatumGetBool(val))
1230 appendStringInfoString(result, "true");
1232 appendStringInfoString(result, "false");
1234 case TYPCATEGORY_NUMERIC:
1235 outputstr = OidOutputFunctionCall(typoutputfunc, val);
1238 * Don't call escape_json here if it's a valid JSON number.
1239 * Numeric output should usually be a valid JSON number and JSON
1240 * numbers shouldn't be quoted. Quote cases like "Nan" and
1241 * "Infinity", however.
1243 if (strpbrk(outputstr, NON_NUMERIC_LETTER) == NULL)
1244 appendStringInfoString(result, outputstr);
1246 escape_json(result, outputstr);
1249 case TYPCATEGORY_JSON:
1250 /* JSON will already be escaped */
1251 outputstr = OidOutputFunctionCall(typoutputfunc, val);
1252 appendStringInfoString(result, outputstr);
1255 case TYPCATEGORY_JSON_CAST:
1256 jsontext = DatumGetTextP(OidFunctionCall1(typoutputfunc, val));
1257 outputstr = text_to_cstring(jsontext);
1258 appendStringInfoString(result, outputstr);
1263 outputstr = OidOutputFunctionCall(typoutputfunc, val);
1264 escape_json(result, outputstr);
1271 * Process a single dimension of an array.
1272 * If it's the innermost dimension, output the values, otherwise call
1273 * ourselves recursively to process the next dimension.
1276 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
1277 bool *nulls, int *valcount, TYPCATEGORY tcategory,
1278 Oid typoutputfunc, bool use_line_feeds)
1283 Assert(dim < ndims);
1285 sep = use_line_feeds ? ",\n " : ",";
1287 appendStringInfoChar(result, '[');
1289 for (i = 1; i <= dims[dim]; i++)
1292 appendStringInfoString(result, sep);
1294 if (dim + 1 == ndims)
1296 datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
1303 * Do we want line feeds on inner dimensions of arrays? For now
1306 array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
1307 valcount, tcategory, typoutputfunc, false);
1311 appendStringInfoChar(result, ']');
1315 * Turn an array into JSON.
1318 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
1320 ArrayType *v = DatumGetArrayTypeP(array);
1321 Oid element_type = ARR_ELEMTYPE(v);
1334 TYPCATEGORY tcategory;
1335 Oid castfunc = InvalidOid;
1339 nitems = ArrayGetNItems(ndim, dim);
1343 appendStringInfoString(result, "[]");
1347 get_type_io_data(element_type, IOFunc_output,
1348 &typlen, &typbyval, &typalign,
1349 &typdelim, &typioparam, &typoutputfunc);
1351 if (element_type > FirstNormalObjectId)
1354 Form_pg_cast castForm;
1356 tuple = SearchSysCache2(CASTSOURCETARGET,
1357 ObjectIdGetDatum(element_type),
1358 ObjectIdGetDatum(JSONOID));
1359 if (HeapTupleIsValid(tuple))
1361 castForm = (Form_pg_cast) GETSTRUCT(tuple);
1363 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1364 castfunc = typoutputfunc = castForm->castfunc;
1366 ReleaseSysCache(tuple);
1370 deconstruct_array(v, element_type, typlen, typbyval,
1371 typalign, &elements, &nulls,
1374 if (castfunc != InvalidOid)
1375 tcategory = TYPCATEGORY_JSON_CAST;
1376 else if (element_type == RECORDOID)
1377 tcategory = TYPCATEGORY_COMPOSITE;
1378 else if (element_type == JSONOID)
1379 tcategory = TYPCATEGORY_JSON;
1381 tcategory = TypeCategory(element_type);
1383 array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
1384 typoutputfunc, use_line_feeds);
1391 * Turn a composite / record into JSON.
1394 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
1400 HeapTupleData tmptup,
1403 bool needsep = false;
1406 sep = use_line_feeds ? ",\n " : ",";
1408 td = DatumGetHeapTupleHeader(composite);
1410 /* Extract rowtype info and find a tupdesc */
1411 tupType = HeapTupleHeaderGetTypeId(td);
1412 tupTypmod = HeapTupleHeaderGetTypMod(td);
1413 tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
1415 /* Build a temporary HeapTuple control structure */
1416 tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
1420 appendStringInfoChar(result, '{');
1422 for (i = 0; i < tupdesc->natts; i++)
1428 TYPCATEGORY tcategory;
1431 Oid castfunc = InvalidOid;
1433 if (tupdesc->attrs[i]->attisdropped)
1437 appendStringInfoString(result, sep);
1440 attname = NameStr(tupdesc->attrs[i]->attname);
1441 escape_json(result, attname);
1442 appendStringInfoChar(result, ':');
1444 origval = heap_getattr(tuple, i + 1, tupdesc, &isnull);
1446 getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
1447 &typoutput, &typisvarlena);
1449 if (tupdesc->attrs[i]->atttypid > FirstNormalObjectId)
1451 HeapTuple cast_tuple;
1452 Form_pg_cast castForm;
1454 cast_tuple = SearchSysCache2(CASTSOURCETARGET,
1455 ObjectIdGetDatum(tupdesc->attrs[i]->atttypid),
1456 ObjectIdGetDatum(JSONOID));
1457 if (HeapTupleIsValid(cast_tuple))
1459 castForm = (Form_pg_cast) GETSTRUCT(cast_tuple);
1461 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1462 castfunc = typoutput = castForm->castfunc;
1464 ReleaseSysCache(cast_tuple);
1468 if (castfunc != InvalidOid)
1469 tcategory = TYPCATEGORY_JSON_CAST;
1470 else if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID)
1471 tcategory = TYPCATEGORY_ARRAY;
1472 else if (tupdesc->attrs[i]->atttypid == RECORDOID)
1473 tcategory = TYPCATEGORY_COMPOSITE;
1474 else if (tupdesc->attrs[i]->atttypid == JSONOID)
1475 tcategory = TYPCATEGORY_JSON;
1477 tcategory = TypeCategory(tupdesc->attrs[i]->atttypid);
1480 * If we have a toasted datum, forcibly detoast it here to avoid
1481 * memory leakage inside the type's output routine.
1483 if (typisvarlena && !isnull)
1484 val = PointerGetDatum(PG_DETOAST_DATUM(origval));
1488 datum_to_json(val, isnull, result, tcategory, typoutput);
1490 /* Clean up detoasted copy, if any */
1492 pfree(DatumGetPointer(val));
1495 appendStringInfoChar(result, '}');
1496 ReleaseTupleDesc(tupdesc);
1500 * SQL function array_to_json(row)
1503 array_to_json(PG_FUNCTION_ARGS)
1505 Datum array = PG_GETARG_DATUM(0);
1508 result = makeStringInfo();
1510 array_to_json_internal(array, result, false);
1512 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1516 * SQL function array_to_json(row, prettybool)
1519 array_to_json_pretty(PG_FUNCTION_ARGS)
1521 Datum array = PG_GETARG_DATUM(0);
1522 bool use_line_feeds = PG_GETARG_BOOL(1);
1525 result = makeStringInfo();
1527 array_to_json_internal(array, result, use_line_feeds);
1529 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1533 * SQL function row_to_json(row)
1536 row_to_json(PG_FUNCTION_ARGS)
1538 Datum array = PG_GETARG_DATUM(0);
1541 result = makeStringInfo();
1543 composite_to_json(array, result, false);
1545 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1549 * SQL function row_to_json(row, prettybool)
1552 row_to_json_pretty(PG_FUNCTION_ARGS)
1554 Datum array = PG_GETARG_DATUM(0);
1555 bool use_line_feeds = PG_GETARG_BOOL(1);
1558 result = makeStringInfo();
1560 composite_to_json(array, result, use_line_feeds);
1562 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1566 * SQL function to_json(anyvalue)
1569 to_json(PG_FUNCTION_ARGS)
1571 Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
1575 TYPCATEGORY tcategory;
1578 Oid castfunc = InvalidOid;
1580 if (val_type == InvalidOid)
1582 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1583 errmsg("could not determine input data type")));
1586 result = makeStringInfo();
1588 orig_val = PG_ARGISNULL(0) ? (Datum) 0 : PG_GETARG_DATUM(0);
1590 getTypeOutputInfo(val_type, &typoutput, &typisvarlena);
1592 if (val_type > FirstNormalObjectId)
1595 Form_pg_cast castForm;
1597 tuple = SearchSysCache2(CASTSOURCETARGET,
1598 ObjectIdGetDatum(val_type),
1599 ObjectIdGetDatum(JSONOID));
1600 if (HeapTupleIsValid(tuple))
1602 castForm = (Form_pg_cast) GETSTRUCT(tuple);
1604 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1605 castfunc = typoutput = castForm->castfunc;
1607 ReleaseSysCache(tuple);
1611 if (castfunc != InvalidOid)
1612 tcategory = TYPCATEGORY_JSON_CAST;
1613 else if (val_type == RECORDARRAYOID)
1614 tcategory = TYPCATEGORY_ARRAY;
1615 else if (val_type == RECORDOID)
1616 tcategory = TYPCATEGORY_COMPOSITE;
1617 else if (val_type == JSONOID)
1618 tcategory = TYPCATEGORY_JSON;
1620 tcategory = TypeCategory(val_type);
1623 * If we have a toasted datum, forcibly detoast it here to avoid memory
1624 * leakage inside the type's output routine.
1626 if (typisvarlena && orig_val != (Datum) 0)
1627 val = PointerGetDatum(PG_DETOAST_DATUM(orig_val));
1631 datum_to_json(val, false, result, tcategory, typoutput);
1633 /* Clean up detoasted copy, if any */
1634 if (val != orig_val)
1635 pfree(DatumGetPointer(val));
1637 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1641 * json_agg transition function
1644 json_agg_transfn(PG_FUNCTION_ARGS)
1646 Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
1647 MemoryContext aggcontext,
1652 TYPCATEGORY tcategory;
1655 Oid castfunc = InvalidOid;
1657 if (val_type == InvalidOid)
1659 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1660 errmsg("could not determine input data type")));
1662 if (!AggCheckCallContext(fcinfo, &aggcontext))
1664 /* cannot be called directly because of internal-type argument */
1665 elog(ERROR, "json_agg_transfn called in non-aggregate context");
1668 if (PG_ARGISNULL(0))
1671 * Make this StringInfo in a context where it will persist for the
1672 * duration off the aggregate call. It's only needed for this initial
1673 * piece, as the StringInfo routines make sure they use the right
1674 * context to enlarge the object if necessary.
1676 oldcontext = MemoryContextSwitchTo(aggcontext);
1677 state = makeStringInfo();
1678 MemoryContextSwitchTo(oldcontext);
1680 appendStringInfoChar(state, '[');
1684 state = (StringInfo) PG_GETARG_POINTER(0);
1685 appendStringInfoString(state, ", ");
1688 /* fast path for NULLs */
1689 if (PG_ARGISNULL(1))
1691 orig_val = (Datum) 0;
1692 datum_to_json(orig_val, true, state, 0, InvalidOid);
1693 PG_RETURN_POINTER(state);
1697 orig_val = PG_GETARG_DATUM(1);
1699 getTypeOutputInfo(val_type, &typoutput, &typisvarlena);
1701 if (val_type > FirstNormalObjectId)
1704 Form_pg_cast castForm;
1706 tuple = SearchSysCache2(CASTSOURCETARGET,
1707 ObjectIdGetDatum(val_type),
1708 ObjectIdGetDatum(JSONOID));
1709 if (HeapTupleIsValid(tuple))
1711 castForm = (Form_pg_cast) GETSTRUCT(tuple);
1713 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1714 castfunc = typoutput = castForm->castfunc;
1716 ReleaseSysCache(tuple);
1720 if (castfunc != InvalidOid)
1721 tcategory = TYPCATEGORY_JSON_CAST;
1722 else if (val_type == RECORDARRAYOID)
1723 tcategory = TYPCATEGORY_ARRAY;
1724 else if (val_type == RECORDOID)
1725 tcategory = TYPCATEGORY_COMPOSITE;
1726 else if (val_type == JSONOID)
1727 tcategory = TYPCATEGORY_JSON;
1729 tcategory = TypeCategory(val_type);
1732 * If we have a toasted datum, forcibly detoast it here to avoid memory
1733 * leakage inside the type's output routine.
1736 val = PointerGetDatum(PG_DETOAST_DATUM(orig_val));
1740 if (!PG_ARGISNULL(0) &&
1741 (tcategory == TYPCATEGORY_ARRAY || tcategory == TYPCATEGORY_COMPOSITE))
1743 appendStringInfoString(state, "\n ");
1746 datum_to_json(val, false, state, tcategory, typoutput);
1748 /* Clean up detoasted copy, if any */
1749 if (val != orig_val)
1750 pfree(DatumGetPointer(val));
1753 * The transition type for array_agg() is declared to be "internal", which
1754 * is a pass-by-value type the same size as a pointer. So we can safely
1755 * pass the ArrayBuildState pointer through nodeAgg.c's machinations.
1757 PG_RETURN_POINTER(state);
1761 * json_agg final function
1764 json_agg_finalfn(PG_FUNCTION_ARGS)
1768 /* cannot be called directly because of internal-type argument */
1769 Assert(AggCheckCallContext(fcinfo, NULL));
1771 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
1776 appendStringInfoChar(state, ']');
1778 PG_RETURN_TEXT_P(cstring_to_text(state->data));
1782 * Produce a JSON string literal, properly escaping characters in the text.
1785 escape_json(StringInfo buf, const char *str)
1789 appendStringInfoCharMacro(buf, '\"');
1790 for (p = str; *p; p++)
1795 appendStringInfoString(buf, "\\b");
1798 appendStringInfoString(buf, "\\f");
1801 appendStringInfoString(buf, "\\n");
1804 appendStringInfoString(buf, "\\r");
1807 appendStringInfoString(buf, "\\t");
1810 appendStringInfoString(buf, "\\\"");
1813 appendStringInfoString(buf, "\\\\");
1816 if ((unsigned char) *p < ' ')
1817 appendStringInfo(buf, "\\u%04x", (int) *p);
1819 appendStringInfoCharMacro(buf, *p);
1823 appendStringInfoCharMacro(buf, '\"');