1 /*-------------------------------------------------------------------------
4 * JSON data type support.
6 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/utils/adt/json.c
12 *-------------------------------------------------------------------------
16 #include "access/htup_details.h"
17 #include "access/transam.h"
18 #include "catalog/pg_cast.h"
19 #include "catalog/pg_type.h"
20 #include "executor/spi.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/pqformat.h"
23 #include "mb/pg_wchar.h"
24 #include "parser/parse_coerce.h"
25 #include "utils/array.h"
26 #include "utils/builtins.h"
27 #include "utils/lsyscache.h"
28 #include "utils/json.h"
29 #include "utils/jsonapi.h"
30 #include "utils/typcache.h"
31 #include "utils/syscache.h"
34 * The context of the parser is maintained by the recursive descent
35 * mechanism, but is passed explicitly to the error reporting routine
36 * for better diagnostics.
38 typedef enum /* contexts of JSON parser */
40 JSON_PARSE_VALUE, /* expecting a value */
41 JSON_PARSE_STRING, /* expecting a string (for a field name) */
42 JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
43 JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
44 JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
45 JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
46 JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
47 JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
48 JSON_PARSE_END /* saw the end of a document, expect nothing */
51 static inline void json_lex(JsonLexContext *lex);
52 static inline void json_lex_string(JsonLexContext *lex);
53 static inline void json_lex_number(JsonLexContext *lex, char *s);
54 static inline void parse_scalar(JsonLexContext *lex, JsonSemAction sem);
55 static void parse_object_field(JsonLexContext *lex, JsonSemAction sem);
56 static void parse_object(JsonLexContext *lex, JsonSemAction sem);
57 static void parse_array_element(JsonLexContext *lex, JsonSemAction sem);
58 static void parse_array(JsonLexContext *lex, JsonSemAction sem);
59 static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
60 static void report_invalid_token(JsonLexContext *lex);
61 static int report_json_context(JsonLexContext *lex);
62 static char *extract_mb_char(char *s);
63 static void composite_to_json(Datum composite, StringInfo result,
65 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
66 Datum *vals, bool *nulls, int *valcount,
67 TYPCATEGORY tcategory, Oid typoutputfunc,
69 static void array_to_json_internal(Datum array, StringInfo result,
72 /* the null action object used for pure validation */
73 static jsonSemAction nullSemAction =
75 NULL, NULL, NULL, NULL, NULL,
76 NULL, NULL, NULL, NULL, NULL
78 static JsonSemAction NullSemAction = &nullSemAction;
80 /* Recursive Descent parser support routines */
85 * what is the current look_ahead token?
87 static inline JsonTokenType
88 lex_peek(JsonLexContext *lex)
90 return lex->token_type;
96 * accept the look_ahead token and move the lexer to the next token if the
97 * look_ahead token matches the token parameter. In that case, and if required,
98 * also hand back the de-escaped lexeme.
100 * returns true if the token matched, false otherwise.
103 lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme)
105 if (lex->token_type == token)
109 if (lex->token_type == JSON_TOKEN_STRING)
111 if (lex->strval != NULL)
112 *lexeme = pstrdup(lex->strval->data);
116 int len = (lex->token_terminator - lex->token_start);
117 char *tokstr = palloc(len + 1);
119 memcpy(tokstr, lex->token_start, len);
133 * move the lexer to the next token if the current look_ahead token matches
134 * the parameter token. Otherwise, report an error.
137 lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
139 if (!lex_accept(lex, token, NULL))
140 report_parse_error(ctx, lex);;
144 * All the defined type categories are upper case , so use lower case here
145 * so we avoid any possible clash.
147 /* fake type category for JSON so we can distinguish it in datum_to_json */
148 #define TYPCATEGORY_JSON 'j'
149 /* fake category for types that have a cast to json */
150 #define TYPCATEGORY_JSON_CAST 'c'
151 /* letters appearing in numeric output that aren't valid in a JSON number */
152 #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
153 /* chars to consider as part of an alphanumeric token */
154 #define JSON_ALPHANUMERIC_CHAR(c) \
155 (((c) >= 'a' && (c) <= 'z') || \
156 ((c) >= 'A' && (c) <= 'Z') || \
157 ((c) >= '0' && (c) <= '9') || \
165 json_in(PG_FUNCTION_ARGS)
167 char *json = PG_GETARG_CSTRING(0);
168 text *result = cstring_to_text(json);
172 lex = makeJsonLexContext(result, false);
173 pg_parse_json(lex, NullSemAction);
175 /* Internal representation is the same as text, for now */
176 PG_RETURN_TEXT_P(result);
183 json_out(PG_FUNCTION_ARGS)
185 /* we needn't detoast because text_to_cstring will handle that */
186 Datum txt = PG_GETARG_DATUM(0);
188 PG_RETURN_CSTRING(TextDatumGetCString(txt));
195 json_send(PG_FUNCTION_ARGS)
197 text *t = PG_GETARG_TEXT_PP(0);
200 pq_begintypsend(&buf);
201 pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
202 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
209 json_recv(PG_FUNCTION_ARGS)
211 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
217 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
219 result = palloc(nbytes + VARHDRSZ);
220 SET_VARSIZE(result, nbytes + VARHDRSZ);
221 memcpy(VARDATA(result), str, nbytes);
224 lex = makeJsonLexContext(result, false);
225 pg_parse_json(lex, NullSemAction);
227 PG_RETURN_TEXT_P(result);
233 * lex constructor, with or without StringInfo object
234 * for de-escaped lexemes.
236 * Without is better as it makes the processing faster, so only make one
237 * if really required.
240 makeJsonLexContext(text *json, bool need_escapes)
242 JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
244 lex->input = lex->token_terminator = lex->line_start = VARDATA(json);
245 lex->line_number = 1;
246 lex->input_length = VARSIZE(json) - VARHDRSZ;
248 lex->strval = makeStringInfo();
255 * Publicly visible entry point for the JSON parser.
257 * lex is a lexing context, set up for the json to be processed by calling
258 * makeJsonLexContext(). sem is a strucure of function pointers to semantic
259 * action routines to be called at appropriate spots during parsing, and a
260 * pointer to a state object to be passed to those routines.
263 pg_parse_json(JsonLexContext *lex, JsonSemAction sem)
267 /* get the initial token */
272 /* parse by recursive descent */
275 case JSON_TOKEN_OBJECT_START:
276 parse_object(lex, sem);
278 case JSON_TOKEN_ARRAY_START:
279 parse_array(lex, sem);
282 parse_scalar(lex, sem); /* json can be a bare scalar */
285 lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
290 * Recursive Descent parse routines. There is one for each structural
291 * element in a json document:
292 * - scalar (string, number, true, false, null)
299 parse_scalar(JsonLexContext *lex, JsonSemAction sem)
302 json_scalar_action sfunc = sem->scalar;
304 JsonTokenType tok = lex_peek(lex);
306 valaddr = sfunc == NULL ? NULL : &val;
308 /* a scalar must be a string, a number, true, false, or null */
311 case JSON_TOKEN_TRUE:
312 lex_accept(lex, JSON_TOKEN_TRUE, valaddr);
314 case JSON_TOKEN_FALSE:
315 lex_accept(lex, JSON_TOKEN_FALSE, valaddr);
317 case JSON_TOKEN_NULL:
318 lex_accept(lex, JSON_TOKEN_NULL, valaddr);
320 case JSON_TOKEN_NUMBER:
321 lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);
323 case JSON_TOKEN_STRING:
324 lex_accept(lex, JSON_TOKEN_STRING, valaddr);
327 report_parse_error(JSON_PARSE_VALUE, lex);
331 (*sfunc) (sem->semstate, val, tok);
335 parse_object_field(JsonLexContext *lex, JsonSemAction sem)
338 * an object field is "fieldname" : value where value can be a scalar,
342 char *fname = NULL; /* keep compiler quiet */
343 json_ofield_action ostart = sem->object_field_start;
344 json_ofield_action oend = sem->object_field_end;
346 char **fnameaddr = NULL;
349 if (ostart != NULL || oend != NULL)
352 if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr))
353 report_parse_error(JSON_PARSE_STRING, lex);
355 lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
358 isnull = tok == JSON_TOKEN_NULL;
361 (*ostart) (sem->semstate, fname, isnull);
365 case JSON_TOKEN_OBJECT_START:
366 parse_object(lex, sem);
368 case JSON_TOKEN_ARRAY_START:
369 parse_array(lex, sem);
372 parse_scalar(lex, sem);
376 (*oend) (sem->semstate, fname, isnull);
383 parse_object(JsonLexContext *lex, JsonSemAction sem)
386 * an object is a possibly empty sequence of object fields, separated by
387 * commas and surrounde by curly braces.
389 json_struct_action ostart = sem->object_start;
390 json_struct_action oend = sem->object_end;
394 (*ostart) (sem->semstate);
397 * Data inside an object at at a higher nesting level than the object
398 * itself. Note that we increment this after we call the semantic routine
399 * for the object start and restore it before we call the routine for the
404 /* we know this will succeeed, just clearing the token */
405 lex_expect(JSON_PARSE_OBJECT_START, lex, JSON_TOKEN_OBJECT_START);
410 case JSON_TOKEN_STRING:
411 parse_object_field(lex, sem);
412 while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
413 parse_object_field(lex, sem);
415 case JSON_TOKEN_OBJECT_END:
418 /* case of an invalid initial token inside the object */
419 report_parse_error(JSON_PARSE_OBJECT_START, lex);
422 lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
427 (*oend) (sem->semstate);
431 parse_array_element(JsonLexContext *lex, JsonSemAction sem)
433 json_aelem_action astart = sem->array_element_start;
434 json_aelem_action aend = sem->array_element_end;
435 JsonTokenType tok = lex_peek(lex);
439 isnull = tok == JSON_TOKEN_NULL;
442 (*astart) (sem->semstate, isnull);
444 /* an array element is any object, array or scalar */
447 case JSON_TOKEN_OBJECT_START:
448 parse_object(lex, sem);
450 case JSON_TOKEN_ARRAY_START:
451 parse_array(lex, sem);
454 parse_scalar(lex, sem);
458 (*aend) (sem->semstate, isnull);
462 parse_array(JsonLexContext *lex, JsonSemAction sem)
465 * an array is a possibly empty sequence of array elements, separated by
466 * commas and surrounded by square brackets.
468 json_struct_action astart = sem->array_start;
469 json_struct_action aend = sem->array_end;
472 (*astart) (sem->semstate);
475 * Data inside an array at at a higher nesting level than the array
476 * itself. Note that we increment this after we call the semantic routine
477 * for the array start and restore it before we call the routine for the
482 lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
483 if (lex_peek(lex) != JSON_TOKEN_ARRAY_END)
486 parse_array_element(lex, sem);
488 while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
489 parse_array_element(lex, sem);
492 lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
497 (*aend) (sem->semstate);
501 * Lex one token from the input stream.
504 json_lex(JsonLexContext *lex)
509 /* Skip leading whitespace. */
510 s = lex->token_terminator;
511 len = s - lex->input;
512 while (len < lex->input_length &&
513 (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
520 lex->token_start = s;
522 /* Determine token type. */
523 if (len >= lex->input_length)
525 lex->token_start = NULL;
526 lex->prev_token_terminator = lex->token_terminator;
527 lex->token_terminator = s;
528 lex->token_type = JSON_TOKEN_END;
533 /* Single-character token, some kind of punctuation mark. */
535 lex->prev_token_terminator = lex->token_terminator;
536 lex->token_terminator = s + 1;
537 lex->token_type = JSON_TOKEN_OBJECT_START;
540 lex->prev_token_terminator = lex->token_terminator;
541 lex->token_terminator = s + 1;
542 lex->token_type = JSON_TOKEN_OBJECT_END;
545 lex->prev_token_terminator = lex->token_terminator;
546 lex->token_terminator = s + 1;
547 lex->token_type = JSON_TOKEN_ARRAY_START;
550 lex->prev_token_terminator = lex->token_terminator;
551 lex->token_terminator = s + 1;
552 lex->token_type = JSON_TOKEN_ARRAY_END;
555 lex->prev_token_terminator = lex->token_terminator;
556 lex->token_terminator = s + 1;
557 lex->token_type = JSON_TOKEN_COMMA;
560 lex->prev_token_terminator = lex->token_terminator;
561 lex->token_terminator = s + 1;
562 lex->token_type = JSON_TOKEN_COLON;
566 json_lex_string(lex);
567 lex->token_type = JSON_TOKEN_STRING;
570 /* Negative number. */
571 json_lex_number(lex, s + 1);
572 lex->token_type = JSON_TOKEN_NUMBER;
584 /* Positive number. */
585 json_lex_number(lex, s);
586 lex->token_type = JSON_TOKEN_NUMBER;
593 * We're not dealing with a string, number, legal
594 * punctuation mark, or end of string. The only legal
595 * tokens we might find here are true, false, and null,
596 * but for error reporting purposes we scan until we see a
597 * non-alphanumeric character. That way, we can report
598 * the whole word as an unexpected token, rather than just
599 * some unintuitive prefix thereof.
601 for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && p - s < lex->input_length - len; p++)
605 * We got some sort of unexpected punctuation or an
606 * otherwise unexpected character, so just complain about
607 * that one character.
611 lex->prev_token_terminator = lex->token_terminator;
612 lex->token_terminator = s + 1;
613 report_invalid_token(lex);
617 * We've got a real alphanumeric token here. If it
618 * happens to be true, false, or null, all is well. If
621 lex->prev_token_terminator = lex->token_terminator;
622 lex->token_terminator = p;
625 if (memcmp(s, "true", 4) == 0)
626 lex->token_type = JSON_TOKEN_TRUE;
627 else if (memcmp(s, "null", 4) == 0)
628 lex->token_type = JSON_TOKEN_NULL;
630 report_invalid_token(lex);
632 else if (p - s == 5 && memcmp(s, "false", 5) == 0)
633 lex->token_type = JSON_TOKEN_FALSE;
635 report_invalid_token(lex);
638 } /* end of switch */
642 * The next token in the input stream is known to be a string; lex it.
645 json_lex_string(JsonLexContext *lex)
650 if (lex->strval != NULL)
651 resetStringInfo(lex->strval);
653 len = lex->token_start - lex->input;
655 for (s = lex->token_start + 1; *s != '"'; s++, len++)
657 /* Premature end of the string. */
658 if (len >= lex->input_length)
660 lex->token_terminator = s;
661 report_invalid_token(lex);
663 else if ((unsigned char) *s < 32)
665 /* Per RFC4627, these characters MUST be escaped. */
666 /* Since *s isn't printable, exclude it from the context string */
667 lex->token_terminator = s;
669 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
670 errmsg("invalid input syntax for type json"),
671 errdetail("Character with value 0x%02x must be escaped.",
673 report_json_context(lex)));
677 /* OK, we have an escape character. */
680 if (len >= lex->input_length)
682 lex->token_terminator = s;
683 report_invalid_token(lex);
690 for (i = 1; i <= 4; i++)
694 if (len >= lex->input_length)
696 lex->token_terminator = s;
697 report_invalid_token(lex);
699 else if (*s >= '0' && *s <= '9')
700 ch = (ch * 16) + (*s - '0');
701 else if (*s >= 'a' && *s <= 'f')
702 ch = (ch * 16) + (*s - 'a') + 10;
703 else if (*s >= 'A' && *s <= 'F')
704 ch = (ch * 16) + (*s - 'A') + 10;
707 lex->token_terminator = s + pg_mblen(s);
709 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
710 errmsg("invalid input syntax for type json"),
711 errdetail("\"\\u\" must be followed by four hexadecimal digits."),
712 report_json_context(lex)));
715 if (lex->strval != NULL)
721 unicode_to_utf8(ch, (unsigned char *) utf8str);
722 utf8len = pg_utf_mblen((unsigned char *) utf8str);
723 utf8str[utf8len] = '\0';
724 converted = pg_any_to_server(utf8str, 1, PG_UTF8);
725 appendStringInfoString(lex->strval, converted);
726 if (converted != utf8str)
731 else if (lex->strval != NULL)
738 appendStringInfoChar(lex->strval, *s);
741 appendStringInfoChar(lex->strval, '\b');
744 appendStringInfoChar(lex->strval, '\f');
747 appendStringInfoChar(lex->strval, '\n');
750 appendStringInfoChar(lex->strval, '\r');
753 appendStringInfoChar(lex->strval, '\t');
756 /* Not a valid string escape, so error out. */
757 lex->token_terminator = s + pg_mblen(s);
759 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
760 errmsg("invalid input syntax for type json"),
761 errdetail("Escape sequence \"\\%s\" is invalid.",
763 report_json_context(lex)));
766 else if (strchr("\"\\/bfnrt", *s) == NULL)
769 * Simpler processing if we're not bothered about de-escaping
771 * It's very tempting to remove the strchr() call here and
772 * replace it with a switch statement, but testing so far has
773 * shown it's not a performance win.
775 lex->token_terminator = s + pg_mblen(s);
777 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
778 errmsg("invalid input syntax for type json"),
779 errdetail("Escape sequence \"\\%s\" is invalid.",
781 report_json_context(lex)));
785 else if (lex->strval != NULL)
787 appendStringInfoChar(lex->strval, *s);
792 /* Hooray, we found the end of the string! */
793 lex->prev_token_terminator = lex->token_terminator;
794 lex->token_terminator = s + 1;
797 /*-------------------------------------------------------------------------
798 * The next token in the input stream is known to be a number; lex it.
800 * In JSON, a number consists of four parts:
802 * (1) An optional minus sign ('-').
804 * (2) Either a single '0', or a string of one or more digits that does not
807 * (3) An optional decimal part, consisting of a period ('.') followed by
808 * one or more digits. (Note: While this part can be omitted
809 * completely, it's not OK to have only the decimal point without
810 * any digits afterwards.)
812 * (4) An optional exponent part, consisting of 'e' or 'E', optionally
813 * followed by '+' or '-', followed by one or more digits. (Note:
814 * As with the decimal part, if 'e' or 'E' is present, it must be
815 * followed by at least one digit.)
817 * The 's' argument to this function points to the ostensible beginning
818 * of part 2 - i.e. the character after any optional minus sign, and the
819 * first character of the string if there is none.
821 *-------------------------------------------------------------------------
824 json_lex_number(JsonLexContext *lex, char *s)
830 len = s - lex->input;
831 /* Part (1): leading sign indicator. */
832 /* Caller already did this for us; so do nothing. */
834 /* Part (2): parse main digit string. */
840 else if (*s >= '1' && *s <= '9')
846 } while (*s >= '0' && *s <= '9' && len < lex->input_length);
851 /* Part (3): parse optional decimal portion. */
852 if (len < lex->input_length && *s == '.')
856 if (len == lex->input_length || *s < '0' || *s > '9')
864 } while (*s >= '0' && *s <= '9' && len < lex->input_length);
868 /* Part (4): parse optional exponent. */
869 if (len < lex->input_length && (*s == 'e' || *s == 'E'))
873 if (len < lex->input_length && (*s == '+' || *s == '-'))
878 if (len == lex->input_length || *s < '0' || *s > '9')
886 } while (len < lex->input_length && *s >= '0' && *s <= '9');
891 * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
892 * here should be considered part of the token for error-reporting
895 for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && len < lex->input_length; p++, len++)
897 lex->prev_token_terminator = lex->token_terminator;
898 lex->token_terminator = p;
900 report_invalid_token(lex);
904 * Report a parse error.
906 * lex->token_start and lex->token_terminator must identify the current token.
909 report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
914 /* Handle case where the input ended prematurely. */
915 if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
917 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
918 errmsg("invalid input syntax for type json"),
919 errdetail("The input string ended unexpectedly."),
920 report_json_context(lex)));
922 /* Separate out the current token. */
923 toklen = lex->token_terminator - lex->token_start;
924 token = palloc(toklen + 1);
925 memcpy(token, lex->token_start, toklen);
926 token[toklen] = '\0';
928 /* Complain, with the appropriate detail message. */
929 if (ctx == JSON_PARSE_END)
931 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
932 errmsg("invalid input syntax for type json"),
933 errdetail("Expected end of input, but found \"%s\".",
935 report_json_context(lex)));
940 case JSON_PARSE_VALUE:
942 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
943 errmsg("invalid input syntax for type json"),
944 errdetail("Expected JSON value, but found \"%s\".",
946 report_json_context(lex)));
948 case JSON_PARSE_STRING:
950 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
951 errmsg("invalid input syntax for type json"),
952 errdetail("Expected string, but found \"%s\".",
954 report_json_context(lex)));
956 case JSON_PARSE_ARRAY_START:
958 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
959 errmsg("invalid input syntax for type json"),
960 errdetail("Expected array element or \"]\", but found \"%s\".",
962 report_json_context(lex)));
964 case JSON_PARSE_ARRAY_NEXT:
966 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
967 errmsg("invalid input syntax for type json"),
968 errdetail("Expected \",\" or \"]\", but found \"%s\".",
970 report_json_context(lex)));
972 case JSON_PARSE_OBJECT_START:
974 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
975 errmsg("invalid input syntax for type json"),
976 errdetail("Expected string or \"}\", but found \"%s\".",
978 report_json_context(lex)));
980 case JSON_PARSE_OBJECT_LABEL:
982 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
983 errmsg("invalid input syntax for type json"),
984 errdetail("Expected \":\", but found \"%s\".",
986 report_json_context(lex)));
988 case JSON_PARSE_OBJECT_NEXT:
990 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
991 errmsg("invalid input syntax for type json"),
992 errdetail("Expected \",\" or \"}\", but found \"%s\".",
994 report_json_context(lex)));
996 case JSON_PARSE_OBJECT_COMMA:
998 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
999 errmsg("invalid input syntax for type json"),
1000 errdetail("Expected string, but found \"%s\".",
1002 report_json_context(lex)));
1005 elog(ERROR, "unexpected json parse state: %d", ctx);
1011 * Report an invalid input token.
1013 * lex->token_start and lex->token_terminator must identify the token.
1016 report_invalid_token(JsonLexContext *lex)
1021 /* Separate out the offending token. */
1022 toklen = lex->token_terminator - lex->token_start;
1023 token = palloc(toklen + 1);
1024 memcpy(token, lex->token_start, toklen);
1025 token[toklen] = '\0';
1028 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1029 errmsg("invalid input syntax for type json"),
1030 errdetail("Token \"%s\" is invalid.", token),
1031 report_json_context(lex)));
1035 * Report a CONTEXT line for bogus JSON input.
1037 * lex->token_terminator must be set to identify the spot where we detected
1038 * the error. Note that lex->token_start might be NULL, in case we recognized
1041 * The return value isn't meaningful, but we make it non-void so that this
1042 * can be invoked inside ereport().
1045 report_json_context(JsonLexContext *lex)
1047 const char *context_start;
1048 const char *context_end;
1049 const char *line_start;
1056 /* Choose boundaries for the part of the input we will display */
1057 context_start = lex->input;
1058 context_end = lex->token_terminator;
1059 line_start = context_start;
1063 /* Always advance over newlines (context_end test is just paranoia) */
1064 if (*context_start == '\n' && context_start < context_end)
1067 line_start = context_start;
1071 /* Otherwise, done as soon as we are close enough to context_end */
1072 if (context_end - context_start < 50)
1074 /* Advance to next multibyte character */
1075 if (IS_HIGHBIT_SET(*context_start))
1076 context_start += pg_mblen(context_start);
1082 * We add "..." to indicate that the excerpt doesn't start at the
1083 * beginning of the line ... but if we're within 3 characters of the
1084 * beginning of the line, we might as well just show the whole line.
1086 if (context_start - line_start <= 3)
1087 context_start = line_start;
1089 /* Get a null-terminated copy of the data to present */
1090 ctxtlen = context_end - context_start;
1091 ctxt = palloc(ctxtlen + 1);
1092 memcpy(ctxt, context_start, ctxtlen);
1093 ctxt[ctxtlen] = '\0';
1096 * Show the context, prefixing "..." if not starting at start of line, and
1097 * suffixing "..." if not ending at end of line.
1099 prefix = (context_start > line_start) ? "..." : "";
1100 suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
1102 return errcontext("JSON data, line %d: %s%s%s",
1103 line_number, prefix, ctxt, suffix);
1107 * Extract a single, possibly multi-byte char from the input string.
1110 extract_mb_char(char *s)
1116 res = palloc(len + 1);
1117 memcpy(res, s, len);
1124 * Turn a scalar Datum into JSON, appending the string to "result".
1126 * Hand off a non-scalar datum to composite_to_json or array_to_json_internal
1130 datum_to_json(Datum val, bool is_null, StringInfo result,
1131 TYPCATEGORY tcategory, Oid typoutputfunc)
1138 appendStringInfoString(result, "null");
1144 case TYPCATEGORY_ARRAY:
1145 array_to_json_internal(val, result, false);
1147 case TYPCATEGORY_COMPOSITE:
1148 composite_to_json(val, result, false);
1150 case TYPCATEGORY_BOOLEAN:
1151 if (DatumGetBool(val))
1152 appendStringInfoString(result, "true");
1154 appendStringInfoString(result, "false");
1156 case TYPCATEGORY_NUMERIC:
1157 outputstr = OidOutputFunctionCall(typoutputfunc, val);
1160 * Don't call escape_json here if it's a valid JSON number.
1161 * Numeric output should usually be a valid JSON number and JSON
1162 * numbers shouldn't be quoted. Quote cases like "Nan" and
1163 * "Infinity", however.
1165 if (strpbrk(outputstr, NON_NUMERIC_LETTER) == NULL)
1166 appendStringInfoString(result, outputstr);
1168 escape_json(result, outputstr);
1171 case TYPCATEGORY_JSON:
1172 /* JSON will already be escaped */
1173 outputstr = OidOutputFunctionCall(typoutputfunc, val);
1174 appendStringInfoString(result, outputstr);
1177 case TYPCATEGORY_JSON_CAST:
1178 jsontext = DatumGetTextP(OidFunctionCall1(typoutputfunc, val));
1179 outputstr = text_to_cstring(jsontext);
1180 appendStringInfoString(result, outputstr);
1185 outputstr = OidOutputFunctionCall(typoutputfunc, val);
1186 escape_json(result, outputstr);
1193 * Process a single dimension of an array.
1194 * If it's the innermost dimension, output the values, otherwise call
1195 * ourselves recursively to process the next dimension.
1198 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
1199 bool *nulls, int *valcount, TYPCATEGORY tcategory,
1200 Oid typoutputfunc, bool use_line_feeds)
1205 Assert(dim < ndims);
1207 sep = use_line_feeds ? ",\n " : ",";
1209 appendStringInfoChar(result, '[');
1211 for (i = 1; i <= dims[dim]; i++)
1214 appendStringInfoString(result, sep);
1216 if (dim + 1 == ndims)
1218 datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
1225 * Do we want line feeds on inner dimensions of arrays? For now
1228 array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
1229 valcount, tcategory, typoutputfunc, false);
1233 appendStringInfoChar(result, ']');
1237 * Turn an array into JSON.
1240 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
1242 ArrayType *v = DatumGetArrayTypeP(array);
1243 Oid element_type = ARR_ELEMTYPE(v);
1256 TYPCATEGORY tcategory;
1257 Oid castfunc = InvalidOid;
1261 nitems = ArrayGetNItems(ndim, dim);
1265 appendStringInfoString(result, "[]");
1269 get_type_io_data(element_type, IOFunc_output,
1270 &typlen, &typbyval, &typalign,
1271 &typdelim, &typioparam, &typoutputfunc);
1273 if (element_type > FirstNormalObjectId)
1276 Form_pg_cast castForm;
1278 tuple = SearchSysCache2(CASTSOURCETARGET,
1279 ObjectIdGetDatum(element_type),
1280 ObjectIdGetDatum(JSONOID));
1281 if (HeapTupleIsValid(tuple))
1283 castForm = (Form_pg_cast) GETSTRUCT(tuple);
1285 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1286 castfunc = typoutputfunc = castForm->castfunc;
1288 ReleaseSysCache(tuple);
1292 deconstruct_array(v, element_type, typlen, typbyval,
1293 typalign, &elements, &nulls,
1296 if (castfunc != InvalidOid)
1297 tcategory = TYPCATEGORY_JSON_CAST;
1298 else if (element_type == RECORDOID)
1299 tcategory = TYPCATEGORY_COMPOSITE;
1300 else if (element_type == JSONOID)
1301 tcategory = TYPCATEGORY_JSON;
1303 tcategory = TypeCategory(element_type);
1305 array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
1306 typoutputfunc, use_line_feeds);
1313 * Turn a composite / record into JSON.
1316 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
1322 HeapTupleData tmptup,
1325 bool needsep = false;
1328 sep = use_line_feeds ? ",\n " : ",";
1330 td = DatumGetHeapTupleHeader(composite);
1332 /* Extract rowtype info and find a tupdesc */
1333 tupType = HeapTupleHeaderGetTypeId(td);
1334 tupTypmod = HeapTupleHeaderGetTypMod(td);
1335 tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
1337 /* Build a temporary HeapTuple control structure */
1338 tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
1342 appendStringInfoChar(result, '{');
1344 for (i = 0; i < tupdesc->natts; i++)
1350 TYPCATEGORY tcategory;
1353 Oid castfunc = InvalidOid;
1355 if (tupdesc->attrs[i]->attisdropped)
1359 appendStringInfoString(result, sep);
1362 attname = NameStr(tupdesc->attrs[i]->attname);
1363 escape_json(result, attname);
1364 appendStringInfoChar(result, ':');
1366 origval = heap_getattr(tuple, i + 1, tupdesc, &isnull);
1368 getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
1369 &typoutput, &typisvarlena);
1371 if (tupdesc->attrs[i]->atttypid > FirstNormalObjectId)
1373 HeapTuple cast_tuple;
1374 Form_pg_cast castForm;
1376 cast_tuple = SearchSysCache2(CASTSOURCETARGET,
1377 ObjectIdGetDatum(tupdesc->attrs[i]->atttypid),
1378 ObjectIdGetDatum(JSONOID));
1379 if (HeapTupleIsValid(cast_tuple))
1381 castForm = (Form_pg_cast) GETSTRUCT(cast_tuple);
1383 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1384 castfunc = typoutput = castForm->castfunc;
1386 ReleaseSysCache(cast_tuple);
1390 if (castfunc != InvalidOid)
1391 tcategory = TYPCATEGORY_JSON_CAST;
1392 else if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID)
1393 tcategory = TYPCATEGORY_ARRAY;
1394 else if (tupdesc->attrs[i]->atttypid == RECORDOID)
1395 tcategory = TYPCATEGORY_COMPOSITE;
1396 else if (tupdesc->attrs[i]->atttypid == JSONOID)
1397 tcategory = TYPCATEGORY_JSON;
1399 tcategory = TypeCategory(tupdesc->attrs[i]->atttypid);
1402 * If we have a toasted datum, forcibly detoast it here to avoid
1403 * memory leakage inside the type's output routine.
1405 if (typisvarlena && !isnull)
1406 val = PointerGetDatum(PG_DETOAST_DATUM(origval));
1410 datum_to_json(val, isnull, result, tcategory, typoutput);
1412 /* Clean up detoasted copy, if any */
1414 pfree(DatumGetPointer(val));
1417 appendStringInfoChar(result, '}');
1418 ReleaseTupleDesc(tupdesc);
1422 * SQL function array_to_json(row)
1425 array_to_json(PG_FUNCTION_ARGS)
1427 Datum array = PG_GETARG_DATUM(0);
1430 result = makeStringInfo();
1432 array_to_json_internal(array, result, false);
1434 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1438 * SQL function array_to_json(row, prettybool)
1441 array_to_json_pretty(PG_FUNCTION_ARGS)
1443 Datum array = PG_GETARG_DATUM(0);
1444 bool use_line_feeds = PG_GETARG_BOOL(1);
1447 result = makeStringInfo();
1449 array_to_json_internal(array, result, use_line_feeds);
1451 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1455 * SQL function row_to_json(row)
1458 row_to_json(PG_FUNCTION_ARGS)
1460 Datum array = PG_GETARG_DATUM(0);
1463 result = makeStringInfo();
1465 composite_to_json(array, result, false);
1467 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1471 * SQL function row_to_json(row, prettybool)
1474 row_to_json_pretty(PG_FUNCTION_ARGS)
1476 Datum array = PG_GETARG_DATUM(0);
1477 bool use_line_feeds = PG_GETARG_BOOL(1);
1480 result = makeStringInfo();
1482 composite_to_json(array, result, use_line_feeds);
1484 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1488 * SQL function to_json(anyvalue)
1491 to_json(PG_FUNCTION_ARGS)
1493 Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
1497 TYPCATEGORY tcategory;
1500 Oid castfunc = InvalidOid;
1502 if (val_type == InvalidOid)
1504 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1505 errmsg("could not determine input data type")));
1508 result = makeStringInfo();
1510 orig_val = PG_ARGISNULL(0) ? (Datum) 0 : PG_GETARG_DATUM(0);
1512 getTypeOutputInfo(val_type, &typoutput, &typisvarlena);
1514 if (val_type > FirstNormalObjectId)
1517 Form_pg_cast castForm;
1519 tuple = SearchSysCache2(CASTSOURCETARGET,
1520 ObjectIdGetDatum(val_type),
1521 ObjectIdGetDatum(JSONOID));
1522 if (HeapTupleIsValid(tuple))
1524 castForm = (Form_pg_cast) GETSTRUCT(tuple);
1526 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1527 castfunc = typoutput = castForm->castfunc;
1529 ReleaseSysCache(tuple);
1533 if (castfunc != InvalidOid)
1534 tcategory = TYPCATEGORY_JSON_CAST;
1535 else if (val_type == RECORDARRAYOID)
1536 tcategory = TYPCATEGORY_ARRAY;
1537 else if (val_type == RECORDOID)
1538 tcategory = TYPCATEGORY_COMPOSITE;
1539 else if (val_type == JSONOID)
1540 tcategory = TYPCATEGORY_JSON;
1542 tcategory = TypeCategory(val_type);
1545 * If we have a toasted datum, forcibly detoast it here to avoid memory
1546 * leakage inside the type's output routine.
1548 if (typisvarlena && orig_val != (Datum) 0)
1549 val = PointerGetDatum(PG_DETOAST_DATUM(orig_val));
1553 datum_to_json(val, false, result, tcategory, typoutput);
1555 /* Clean up detoasted copy, if any */
1556 if (val != orig_val)
1557 pfree(DatumGetPointer(val));
1559 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1563 * json_agg transition function
1566 json_agg_transfn(PG_FUNCTION_ARGS)
1568 Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
1569 MemoryContext aggcontext,
1574 TYPCATEGORY tcategory;
1577 Oid castfunc = InvalidOid;
1579 if (val_type == InvalidOid)
1581 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1582 errmsg("could not determine input data type")));
1584 if (!AggCheckCallContext(fcinfo, &aggcontext))
1586 /* cannot be called directly because of internal-type argument */
1587 elog(ERROR, "json_agg_transfn called in non-aggregate context");
1590 if (PG_ARGISNULL(0))
1593 * Make this StringInfo in a context where it will persist for the
1594 * duration off the aggregate call. It's only needed for this initial
1595 * piece, as the StringInfo routines make sure they use the right
1596 * context to enlarge the object if necessary.
1598 oldcontext = MemoryContextSwitchTo(aggcontext);
1599 state = makeStringInfo();
1600 MemoryContextSwitchTo(oldcontext);
1602 appendStringInfoChar(state, '[');
1606 state = (StringInfo) PG_GETARG_POINTER(0);
1607 appendStringInfoString(state, ", ");
1610 /* fast path for NULLs */
1611 if (PG_ARGISNULL(1))
1613 orig_val = (Datum) 0;
1614 datum_to_json(orig_val, true, state, 0, InvalidOid);
1615 PG_RETURN_POINTER(state);
1619 orig_val = PG_GETARG_DATUM(1);
1621 getTypeOutputInfo(val_type, &typoutput, &typisvarlena);
1623 if (val_type > FirstNormalObjectId)
1626 Form_pg_cast castForm;
1628 tuple = SearchSysCache2(CASTSOURCETARGET,
1629 ObjectIdGetDatum(val_type),
1630 ObjectIdGetDatum(JSONOID));
1631 if (HeapTupleIsValid(tuple))
1633 castForm = (Form_pg_cast) GETSTRUCT(tuple);
1635 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1636 castfunc = typoutput = castForm->castfunc;
1638 ReleaseSysCache(tuple);
1642 if (castfunc != InvalidOid)
1643 tcategory = TYPCATEGORY_JSON_CAST;
1644 else if (val_type == RECORDARRAYOID)
1645 tcategory = TYPCATEGORY_ARRAY;
1646 else if (val_type == RECORDOID)
1647 tcategory = TYPCATEGORY_COMPOSITE;
1648 else if (val_type == JSONOID)
1649 tcategory = TYPCATEGORY_JSON;
1651 tcategory = TypeCategory(val_type);
1654 * If we have a toasted datum, forcibly detoast it here to avoid memory
1655 * leakage inside the type's output routine.
1658 val = PointerGetDatum(PG_DETOAST_DATUM(orig_val));
1662 if (!PG_ARGISNULL(0) &&
1663 (tcategory == TYPCATEGORY_ARRAY || tcategory == TYPCATEGORY_COMPOSITE))
1665 appendStringInfoString(state, "\n ");
1668 datum_to_json(val, false, state, tcategory, typoutput);
1670 /* Clean up detoasted copy, if any */
1671 if (val != orig_val)
1672 pfree(DatumGetPointer(val));
1675 * The transition type for array_agg() is declared to be "internal", which
1676 * is a pass-by-value type the same size as a pointer. So we can safely
1677 * pass the ArrayBuildState pointer through nodeAgg.c's machinations.
1679 PG_RETURN_POINTER(state);
1683 * json_agg final function
1686 json_agg_finalfn(PG_FUNCTION_ARGS)
1690 /* cannot be called directly because of internal-type argument */
1691 Assert(AggCheckCallContext(fcinfo, NULL));
1693 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
1698 appendStringInfoChar(state, ']');
1700 PG_RETURN_TEXT_P(cstring_to_text(state->data));
1704 * Produce a JSON string literal, properly escaping characters in the text.
1707 escape_json(StringInfo buf, const char *str)
1711 appendStringInfoCharMacro(buf, '\"');
1712 for (p = str; *p; p++)
1717 appendStringInfoString(buf, "\\b");
1720 appendStringInfoString(buf, "\\f");
1723 appendStringInfoString(buf, "\\n");
1726 appendStringInfoString(buf, "\\r");
1729 appendStringInfoString(buf, "\\t");
1732 appendStringInfoString(buf, "\\\"");
1735 appendStringInfoString(buf, "\\\\");
1738 if ((unsigned char) *p < ' ')
1739 appendStringInfo(buf, "\\u%04x", (int) *p);
1741 appendStringInfoCharMacro(buf, *p);
1745 appendStringInfoCharMacro(buf, '\"');