1 /*-------------------------------------------------------------------------
4 * JSON data type support.
6 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/utils/adt/json.c
12 *-------------------------------------------------------------------------
16 #include "access/htup_details.h"
17 #include "access/transam.h"
18 #include "catalog/pg_cast.h"
19 #include "catalog/pg_type.h"
20 #include "executor/spi.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/pqformat.h"
23 #include "mb/pg_wchar.h"
24 #include "parser/parse_coerce.h"
25 #include "utils/array.h"
26 #include "utils/builtins.h"
27 #include "utils/lsyscache.h"
28 #include "utils/json.h"
29 #include "utils/jsonapi.h"
30 #include "utils/typcache.h"
31 #include "utils/syscache.h"
34 * The context of the parser is maintained by the recursive descent
35 * mechanism, but is passed explicitly to the error reporting routine
36 * for better diagnostics.
38 typedef enum /* contexts of JSON parser */
40 JSON_PARSE_VALUE, /* expecting a value */
41 JSON_PARSE_STRING, /* expecting a string (for a field name) */
42 JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
43 JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
44 JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
45 JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
46 JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
47 JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
48 JSON_PARSE_END /* saw the end of a document, expect nothing */
51 static inline void json_lex(JsonLexContext *lex);
52 static inline void json_lex_string(JsonLexContext *lex);
53 static inline void json_lex_number(JsonLexContext *lex, char *s);
54 static inline void parse_scalar(JsonLexContext *lex, JsonSemAction sem);
55 static void parse_object_field(JsonLexContext *lex, JsonSemAction sem);
56 static void parse_object(JsonLexContext *lex, JsonSemAction sem);
57 static void parse_array_element(JsonLexContext *lex, JsonSemAction sem);
58 static void parse_array(JsonLexContext *lex, JsonSemAction sem);
59 static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
60 static void report_invalid_token(JsonLexContext *lex);
61 static int report_json_context(JsonLexContext *lex);
62 static char *extract_mb_char(char *s);
63 static void composite_to_json(Datum composite, StringInfo result,
65 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
66 Datum *vals, bool *nulls, int *valcount,
67 TYPCATEGORY tcategory, Oid typoutputfunc,
69 static void array_to_json_internal(Datum array, StringInfo result,
72 /* the null action object used for pure validation */
73 static jsonSemAction nullSemAction =
75 NULL, NULL, NULL, NULL, NULL,
76 NULL, NULL, NULL, NULL, NULL
78 static JsonSemAction NullSemAction = &nullSemAction;
80 /* Recursive Descent parser support routines */
85 * what is the current look_ahead token?
87 static inline JsonTokenType
88 lex_peek(JsonLexContext *lex)
90 return lex->token_type;
96 * accept the look_ahead token and move the lexer to the next token if the
97 * look_ahead token matches the token parameter. In that case, and if required,
98 * also hand back the de-escaped lexeme.
100 * returns true if the token matched, false otherwise.
103 lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme)
105 if (lex->token_type == token)
109 if (lex->token_type == JSON_TOKEN_STRING)
111 if (lex->strval != NULL)
112 *lexeme = pstrdup(lex->strval->data);
116 int len = (lex->token_terminator - lex->token_start);
117 char *tokstr = palloc(len + 1);
119 memcpy(tokstr, lex->token_start, len);
133 * move the lexer to the next token if the current look_ahead token matches
134 * the parameter token. Otherwise, report an error.
137 lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
139 if (!lex_accept(lex, token, NULL))
140 report_parse_error(ctx, lex);;
144 * All the defined type categories are upper case , so use lower case here
145 * so we avoid any possible clash.
147 /* fake type category for JSON so we can distinguish it in datum_to_json */
148 #define TYPCATEGORY_JSON 'j'
149 /* fake category for types that have a cast to json */
150 #define TYPCATEGORY_JSON_CAST 'c'
151 /* letters appearing in numeric output that aren't valid in a JSON number */
152 #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
153 /* chars to consider as part of an alphanumeric token */
154 #define JSON_ALPHANUMERIC_CHAR(c) \
155 (((c) >= 'a' && (c) <= 'z') || \
156 ((c) >= 'A' && (c) <= 'Z') || \
157 ((c) >= '0' && (c) <= '9') || \
165 json_in(PG_FUNCTION_ARGS)
167 char *json = PG_GETARG_CSTRING(0);
168 text *result = cstring_to_text(json);
172 lex = makeJsonLexContext(result, false);
173 pg_parse_json(lex, NullSemAction);
175 /* Internal representation is the same as text, for now */
176 PG_RETURN_TEXT_P(result);
183 json_out(PG_FUNCTION_ARGS)
185 /* we needn't detoast because text_to_cstring will handle that */
186 Datum txt = PG_GETARG_DATUM(0);
188 PG_RETURN_CSTRING(TextDatumGetCString(txt));
195 json_send(PG_FUNCTION_ARGS)
197 text *t = PG_GETARG_TEXT_PP(0);
200 pq_begintypsend(&buf);
201 pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
202 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
209 json_recv(PG_FUNCTION_ARGS)
211 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
217 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
219 result = palloc(nbytes + VARHDRSZ);
220 SET_VARSIZE(result, nbytes + VARHDRSZ);
221 memcpy(VARDATA(result), str, nbytes);
224 lex = makeJsonLexContext(result, false);
225 pg_parse_json(lex, NullSemAction);
227 PG_RETURN_TEXT_P(result);
233 * lex constructor, with or without StringInfo object
234 * for de-escaped lexemes.
236 * Without is better as it makes the processing faster, so only make one
237 * if really required.
240 makeJsonLexContext(text *json, bool need_escapes)
242 JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
244 lex->input = lex->token_terminator = lex->line_start = VARDATA(json);
245 lex->line_number = 1;
246 lex->input_length = VARSIZE(json) - VARHDRSZ;
248 lex->strval = makeStringInfo();
255 * Publicly visible entry point for the JSON parser.
257 * lex is a lexing context, set up for the json to be processed by calling
258 * makeJsonLexContext(). sem is a strucure of function pointers to semantic
259 * action routines to be called at appropriate spots during parsing, and a
260 * pointer to a state object to be passed to those routines.
263 pg_parse_json(JsonLexContext *lex, JsonSemAction sem)
267 /* get the initial token */
272 /* parse by recursive descent */
275 case JSON_TOKEN_OBJECT_START:
276 parse_object(lex, sem);
278 case JSON_TOKEN_ARRAY_START:
279 parse_array(lex, sem);
282 parse_scalar(lex, sem); /* json can be a bare scalar */
285 lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
290 * Recursive Descent parse routines. There is one for each structural
291 * element in a json document:
292 * - scalar (string, number, true, false, null)
299 parse_scalar(JsonLexContext *lex, JsonSemAction sem)
302 json_scalar_action sfunc = sem->scalar;
304 JsonTokenType tok = lex_peek(lex);
306 valaddr = sfunc == NULL ? NULL : &val;
308 /* a scalar must be a string, a number, true, false, or null */
311 case JSON_TOKEN_TRUE:
312 lex_accept(lex, JSON_TOKEN_TRUE, valaddr);
314 case JSON_TOKEN_FALSE:
315 lex_accept(lex, JSON_TOKEN_FALSE, valaddr);
317 case JSON_TOKEN_NULL:
318 lex_accept(lex, JSON_TOKEN_NULL, valaddr);
320 case JSON_TOKEN_NUMBER:
321 lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);
323 case JSON_TOKEN_STRING:
324 lex_accept(lex, JSON_TOKEN_STRING, valaddr);
327 report_parse_error(JSON_PARSE_VALUE, lex);
331 (*sfunc) (sem->semstate, val, tok);
335 parse_object_field(JsonLexContext *lex, JsonSemAction sem)
338 * an object field is "fieldname" : value where value can be a scalar,
342 char *fname = NULL; /* keep compiler quiet */
343 json_ofield_action ostart = sem->object_field_start;
344 json_ofield_action oend = sem->object_field_end;
346 char **fnameaddr = NULL;
349 if (ostart != NULL || oend != NULL)
352 if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr))
353 report_parse_error(JSON_PARSE_STRING, lex);
355 lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
358 isnull = tok == JSON_TOKEN_NULL;
361 (*ostart) (sem->semstate, fname, isnull);
365 case JSON_TOKEN_OBJECT_START:
366 parse_object(lex, sem);
368 case JSON_TOKEN_ARRAY_START:
369 parse_array(lex, sem);
372 parse_scalar(lex, sem);
376 (*oend) (sem->semstate, fname, isnull);
383 parse_object(JsonLexContext *lex, JsonSemAction sem)
386 * an object is a possibly empty sequence of object fields, separated by
387 * commas and surrounde by curly braces.
389 json_struct_action ostart = sem->object_start;
390 json_struct_action oend = sem->object_end;
394 (*ostart) (sem->semstate);
397 * Data inside an object at at a higher nesting level than the object
398 * itself. Note that we increment this after we call the semantic routine
399 * for the object start and restore it before we call the routine for the
404 /* we know this will succeeed, just clearing the token */
405 lex_expect(JSON_PARSE_OBJECT_START, lex, JSON_TOKEN_OBJECT_START);
410 case JSON_TOKEN_STRING:
411 parse_object_field(lex, sem);
412 while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
413 parse_object_field(lex, sem);
415 case JSON_TOKEN_OBJECT_END:
418 /* case of an invalid initial token inside the object */
419 report_parse_error(JSON_PARSE_OBJECT_START, lex);
422 lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
427 (*oend) (sem->semstate);
431 parse_array_element(JsonLexContext *lex, JsonSemAction sem)
433 json_aelem_action astart = sem->array_element_start;
434 json_aelem_action aend = sem->array_element_end;
435 JsonTokenType tok = lex_peek(lex);
439 isnull = tok == JSON_TOKEN_NULL;
442 (*astart) (sem->semstate, isnull);
444 /* an array element is any object, array or scalar */
447 case JSON_TOKEN_OBJECT_START:
448 parse_object(lex, sem);
450 case JSON_TOKEN_ARRAY_START:
451 parse_array(lex, sem);
454 parse_scalar(lex, sem);
458 (*aend) (sem->semstate, isnull);
462 parse_array(JsonLexContext *lex, JsonSemAction sem)
465 * an array is a possibly empty sequence of array elements, separated by
466 * commas and surrounded by square brackets.
468 json_struct_action astart = sem->array_start;
469 json_struct_action aend = sem->array_end;
472 (*astart) (sem->semstate);
475 * Data inside an array at at a higher nesting level than the array
476 * itself. Note that we increment this after we call the semantic routine
477 * for the array start and restore it before we call the routine for the
482 lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
483 if (lex_peek(lex) != JSON_TOKEN_ARRAY_END)
486 parse_array_element(lex, sem);
488 while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
489 parse_array_element(lex, sem);
492 lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
497 (*aend) (sem->semstate);
501 * Lex one token from the input stream.
504 json_lex(JsonLexContext *lex)
509 /* Skip leading whitespace. */
510 s = lex->token_terminator;
511 len = s - lex->input;
512 while (len < lex->input_length &&
513 (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
520 lex->token_start = s;
522 /* Determine token type. */
523 if (len >= lex->input_length)
525 lex->token_start = NULL;
526 lex->prev_token_terminator = lex->token_terminator;
527 lex->token_terminator = s;
528 lex->token_type = JSON_TOKEN_END;
533 /* Single-character token, some kind of punctuation mark. */
535 lex->prev_token_terminator = lex->token_terminator;
536 lex->token_terminator = s + 1;
537 lex->token_type = JSON_TOKEN_OBJECT_START;
540 lex->prev_token_terminator = lex->token_terminator;
541 lex->token_terminator = s + 1;
542 lex->token_type = JSON_TOKEN_OBJECT_END;
545 lex->prev_token_terminator = lex->token_terminator;
546 lex->token_terminator = s + 1;
547 lex->token_type = JSON_TOKEN_ARRAY_START;
550 lex->prev_token_terminator = lex->token_terminator;
551 lex->token_terminator = s + 1;
552 lex->token_type = JSON_TOKEN_ARRAY_END;
555 lex->prev_token_terminator = lex->token_terminator;
556 lex->token_terminator = s + 1;
557 lex->token_type = JSON_TOKEN_COMMA;
560 lex->prev_token_terminator = lex->token_terminator;
561 lex->token_terminator = s + 1;
562 lex->token_type = JSON_TOKEN_COLON;
566 json_lex_string(lex);
567 lex->token_type = JSON_TOKEN_STRING;
570 /* Negative number. */
571 json_lex_number(lex, s + 1);
572 lex->token_type = JSON_TOKEN_NUMBER;
584 /* Positive number. */
585 json_lex_number(lex, s);
586 lex->token_type = JSON_TOKEN_NUMBER;
593 * We're not dealing with a string, number, legal
594 * punctuation mark, or end of string. The only legal
595 * tokens we might find here are true, false, and null,
596 * but for error reporting purposes we scan until we see a
597 * non-alphanumeric character. That way, we can report
598 * the whole word as an unexpected token, rather than just
599 * some unintuitive prefix thereof.
601 for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)
605 * We got some sort of unexpected punctuation or an
606 * otherwise unexpected character, so just complain about
607 * that one character.
611 lex->prev_token_terminator = lex->token_terminator;
612 lex->token_terminator = s + 1;
613 report_invalid_token(lex);
617 * We've got a real alphanumeric token here. If it
618 * happens to be true, false, or null, all is well. If
621 lex->prev_token_terminator = lex->token_terminator;
622 lex->token_terminator = p;
625 if (memcmp(s, "true", 4) == 0)
626 lex->token_type = JSON_TOKEN_TRUE;
627 else if (memcmp(s, "null", 4) == 0)
628 lex->token_type = JSON_TOKEN_NULL;
630 report_invalid_token(lex);
632 else if (p - s == 5 && memcmp(s, "false", 5) == 0)
633 lex->token_type = JSON_TOKEN_FALSE;
635 report_invalid_token(lex);
638 } /* end of switch */
642 * The next token in the input stream is known to be a string; lex it.
645 json_lex_string(JsonLexContext *lex)
649 int hi_surrogate = -1;
651 if (lex->strval != NULL)
652 resetStringInfo(lex->strval);
654 Assert(lex->input_length > 0);
655 s = lex->token_start;
656 len = lex->token_start - lex->input;
661 /* Premature end of the string. */
662 if (len >= lex->input_length)
664 lex->token_terminator = s;
665 report_invalid_token(lex);
669 else if ((unsigned char) *s < 32)
671 /* Per RFC4627, these characters MUST be escaped. */
672 /* Since *s isn't printable, exclude it from the context string */
673 lex->token_terminator = s;
675 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
676 errmsg("invalid input syntax for type json"),
677 errdetail("Character with value 0x%02x must be escaped.",
679 report_json_context(lex)));
683 /* OK, we have an escape character. */
686 if (len >= lex->input_length)
688 lex->token_terminator = s;
689 report_invalid_token(lex);
696 for (i = 1; i <= 4; i++)
700 if (len >= lex->input_length)
702 lex->token_terminator = s;
703 report_invalid_token(lex);
705 else if (*s >= '0' && *s <= '9')
706 ch = (ch * 16) + (*s - '0');
707 else if (*s >= 'a' && *s <= 'f')
708 ch = (ch * 16) + (*s - 'a') + 10;
709 else if (*s >= 'A' && *s <= 'F')
710 ch = (ch * 16) + (*s - 'A') + 10;
713 lex->token_terminator = s + pg_mblen(s);
715 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
716 errmsg("invalid input syntax for type json"),
717 errdetail("\"\\u\" must be followed by four hexadecimal digits."),
718 report_json_context(lex)));
721 if (lex->strval != NULL)
726 if (ch >= 0xd800 && ch <= 0xdbff)
728 if (hi_surrogate != -1)
730 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
731 errmsg("invalid input syntax for type json"),
732 errdetail("high order surrogate must not follow a high order surrogate."),
733 report_json_context(lex)));
734 hi_surrogate = (ch & 0x3ff) << 10;
737 else if (ch >= 0xdc00 && ch <= 0xdfff)
739 if (hi_surrogate == -1)
741 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
742 errmsg("invalid input syntax for type json"),
743 errdetail("low order surrogate must follow a high order surrogate."),
744 report_json_context(lex)));
745 ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
749 if (hi_surrogate != -1)
751 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
752 errmsg("invalid input syntax for type json"),
753 errdetail("low order surrogate must follow a high order surrogate."),
754 report_json_context(lex)));
757 * For UTF8, replace the escape sequence by the actual utf8
758 * character in lex->strval. Do this also for other encodings
759 * if the escape designates an ASCII character, otherwise
760 * raise an error. We don't ever unescape a \u0000, since that
761 * would result in an impermissible nul byte.
766 appendStringInfoString(lex->strval, "\\u0000");
768 else if (GetDatabaseEncoding() == PG_UTF8)
770 unicode_to_utf8(ch, (unsigned char *) utf8str);
771 utf8len = pg_utf_mblen((unsigned char *) utf8str);
772 appendBinaryStringInfo(lex->strval, utf8str, utf8len);
774 else if (ch <= 0x007f)
777 * This is the only way to designate things like a form feed
778 * character in JSON, so it's useful in all encodings.
780 appendStringInfoChar(lex->strval, (char) ch);
785 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
786 errmsg("invalid input syntax for type json"),
787 errdetail("Unicode escape for code points higher than U+007F not permitted in non-UTF8 encoding"),
788 report_json_context(lex)));
793 else if (lex->strval != NULL)
795 if (hi_surrogate != -1)
797 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
798 errmsg("invalid input syntax for type json"),
799 errdetail("low order surrogate must follow a high order surrogate."),
800 report_json_context(lex)));
807 appendStringInfoChar(lex->strval, *s);
810 appendStringInfoChar(lex->strval, '\b');
813 appendStringInfoChar(lex->strval, '\f');
816 appendStringInfoChar(lex->strval, '\n');
819 appendStringInfoChar(lex->strval, '\r');
822 appendStringInfoChar(lex->strval, '\t');
825 /* Not a valid string escape, so error out. */
826 lex->token_terminator = s + pg_mblen(s);
828 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
829 errmsg("invalid input syntax for type json"),
830 errdetail("Escape sequence \"\\%s\" is invalid.",
832 report_json_context(lex)));
835 else if (strchr("\"\\/bfnrt", *s) == NULL)
838 * Simpler processing if we're not bothered about de-escaping
840 * It's very tempting to remove the strchr() call here and
841 * replace it with a switch statement, but testing so far has
842 * shown it's not a performance win.
844 lex->token_terminator = s + pg_mblen(s);
846 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
847 errmsg("invalid input syntax for type json"),
848 errdetail("Escape sequence \"\\%s\" is invalid.",
850 report_json_context(lex)));
854 else if (lex->strval != NULL)
856 if (hi_surrogate != -1)
858 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
859 errmsg("invalid input syntax for type json"),
860 errdetail("low order surrogate must follow a high order surrogate."),
861 report_json_context(lex)));
863 appendStringInfoChar(lex->strval, *s);
868 if (hi_surrogate != -1)
870 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
871 errmsg("invalid input syntax for type json"),
872 errdetail("low order surrogate must follow a high order surrogate."),
873 report_json_context(lex)));
875 /* Hooray, we found the end of the string! */
876 lex->prev_token_terminator = lex->token_terminator;
877 lex->token_terminator = s + 1;
880 /*-------------------------------------------------------------------------
881 * The next token in the input stream is known to be a number; lex it.
883 * In JSON, a number consists of four parts:
885 * (1) An optional minus sign ('-').
887 * (2) Either a single '0', or a string of one or more digits that does not
890 * (3) An optional decimal part, consisting of a period ('.') followed by
891 * one or more digits. (Note: While this part can be omitted
892 * completely, it's not OK to have only the decimal point without
893 * any digits afterwards.)
895 * (4) An optional exponent part, consisting of 'e' or 'E', optionally
896 * followed by '+' or '-', followed by one or more digits. (Note:
897 * As with the decimal part, if 'e' or 'E' is present, it must be
898 * followed by at least one digit.)
900 * The 's' argument to this function points to the ostensible beginning
901 * of part 2 - i.e. the character after any optional minus sign, and the
902 * first character of the string if there is none.
904 *-------------------------------------------------------------------------
907 json_lex_number(JsonLexContext *lex, char *s)
913 len = s - lex->input;
914 /* Part (1): leading sign indicator. */
915 /* Caller already did this for us; so do nothing. */
917 /* Part (2): parse main digit string. */
923 else if (*s >= '1' && *s <= '9')
929 } while (len < lex->input_length && *s >= '0' && *s <= '9');
934 /* Part (3): parse optional decimal portion. */
935 if (len < lex->input_length && *s == '.')
939 if (len == lex->input_length || *s < '0' || *s > '9')
947 } while (len < lex->input_length && *s >= '0' && *s <= '9');
951 /* Part (4): parse optional exponent. */
952 if (len < lex->input_length && (*s == 'e' || *s == 'E'))
956 if (len < lex->input_length && (*s == '+' || *s == '-'))
961 if (len == lex->input_length || *s < '0' || *s > '9')
969 } while (len < lex->input_length && *s >= '0' && *s <= '9');
974 * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
975 * here should be considered part of the token for error-reporting
978 for (p = s; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*p); p++, len++)
980 lex->prev_token_terminator = lex->token_terminator;
981 lex->token_terminator = p;
983 report_invalid_token(lex);
987 * Report a parse error.
989 * lex->token_start and lex->token_terminator must identify the current token.
992 report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
997 /* Handle case where the input ended prematurely. */
998 if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
1000 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1001 errmsg("invalid input syntax for type json"),
1002 errdetail("The input string ended unexpectedly."),
1003 report_json_context(lex)));
1005 /* Separate out the current token. */
1006 toklen = lex->token_terminator - lex->token_start;
1007 token = palloc(toklen + 1);
1008 memcpy(token, lex->token_start, toklen);
1009 token[toklen] = '\0';
1011 /* Complain, with the appropriate detail message. */
1012 if (ctx == JSON_PARSE_END)
1014 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1015 errmsg("invalid input syntax for type json"),
1016 errdetail("Expected end of input, but found \"%s\".",
1018 report_json_context(lex)));
1023 case JSON_PARSE_VALUE:
1025 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1026 errmsg("invalid input syntax for type json"),
1027 errdetail("Expected JSON value, but found \"%s\".",
1029 report_json_context(lex)));
1031 case JSON_PARSE_STRING:
1033 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1034 errmsg("invalid input syntax for type json"),
1035 errdetail("Expected string, but found \"%s\".",
1037 report_json_context(lex)));
1039 case JSON_PARSE_ARRAY_START:
1041 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1042 errmsg("invalid input syntax for type json"),
1043 errdetail("Expected array element or \"]\", but found \"%s\".",
1045 report_json_context(lex)));
1047 case JSON_PARSE_ARRAY_NEXT:
1049 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1050 errmsg("invalid input syntax for type json"),
1051 errdetail("Expected \",\" or \"]\", but found \"%s\".",
1053 report_json_context(lex)));
1055 case JSON_PARSE_OBJECT_START:
1057 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1058 errmsg("invalid input syntax for type json"),
1059 errdetail("Expected string or \"}\", but found \"%s\".",
1061 report_json_context(lex)));
1063 case JSON_PARSE_OBJECT_LABEL:
1065 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1066 errmsg("invalid input syntax for type json"),
1067 errdetail("Expected \":\", but found \"%s\".",
1069 report_json_context(lex)));
1071 case JSON_PARSE_OBJECT_NEXT:
1073 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1074 errmsg("invalid input syntax for type json"),
1075 errdetail("Expected \",\" or \"}\", but found \"%s\".",
1077 report_json_context(lex)));
1079 case JSON_PARSE_OBJECT_COMMA:
1081 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1082 errmsg("invalid input syntax for type json"),
1083 errdetail("Expected string, but found \"%s\".",
1085 report_json_context(lex)));
1088 elog(ERROR, "unexpected json parse state: %d", ctx);
1094 * Report an invalid input token.
1096 * lex->token_start and lex->token_terminator must identify the token.
1099 report_invalid_token(JsonLexContext *lex)
1104 /* Separate out the offending token. */
1105 toklen = lex->token_terminator - lex->token_start;
1106 token = palloc(toklen + 1);
1107 memcpy(token, lex->token_start, toklen);
1108 token[toklen] = '\0';
1111 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1112 errmsg("invalid input syntax for type json"),
1113 errdetail("Token \"%s\" is invalid.", token),
1114 report_json_context(lex)));
1118 * Report a CONTEXT line for bogus JSON input.
1120 * lex->token_terminator must be set to identify the spot where we detected
1121 * the error. Note that lex->token_start might be NULL, in case we recognized
1124 * The return value isn't meaningful, but we make it non-void so that this
1125 * can be invoked inside ereport().
1128 report_json_context(JsonLexContext *lex)
1130 const char *context_start;
1131 const char *context_end;
1132 const char *line_start;
1139 /* Choose boundaries for the part of the input we will display */
1140 context_start = lex->input;
1141 context_end = lex->token_terminator;
1142 line_start = context_start;
1146 /* Always advance over newlines */
1147 if (context_start < context_end && *context_start == '\n')
1150 line_start = context_start;
1154 /* Otherwise, done as soon as we are close enough to context_end */
1155 if (context_end - context_start < 50)
1157 /* Advance to next multibyte character */
1158 if (IS_HIGHBIT_SET(*context_start))
1159 context_start += pg_mblen(context_start);
1165 * We add "..." to indicate that the excerpt doesn't start at the
1166 * beginning of the line ... but if we're within 3 characters of the
1167 * beginning of the line, we might as well just show the whole line.
1169 if (context_start - line_start <= 3)
1170 context_start = line_start;
1172 /* Get a null-terminated copy of the data to present */
1173 ctxtlen = context_end - context_start;
1174 ctxt = palloc(ctxtlen + 1);
1175 memcpy(ctxt, context_start, ctxtlen);
1176 ctxt[ctxtlen] = '\0';
1179 * Show the context, prefixing "..." if not starting at start of line, and
1180 * suffixing "..." if not ending at end of line.
1182 prefix = (context_start > line_start) ? "..." : "";
1183 suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
1185 return errcontext("JSON data, line %d: %s%s%s",
1186 line_number, prefix, ctxt, suffix);
1190 * Extract a single, possibly multi-byte char from the input string.
1193 extract_mb_char(char *s)
1199 res = palloc(len + 1);
1200 memcpy(res, s, len);
1207 * Turn a scalar Datum into JSON, appending the string to "result".
1209 * Hand off a non-scalar datum to composite_to_json or array_to_json_internal
1213 datum_to_json(Datum val, bool is_null, StringInfo result,
1214 TYPCATEGORY tcategory, Oid typoutputfunc)
1221 appendStringInfoString(result, "null");
1227 case TYPCATEGORY_ARRAY:
1228 array_to_json_internal(val, result, false);
1230 case TYPCATEGORY_COMPOSITE:
1231 composite_to_json(val, result, false);
1233 case TYPCATEGORY_BOOLEAN:
1234 if (DatumGetBool(val))
1235 appendStringInfoString(result, "true");
1237 appendStringInfoString(result, "false");
1239 case TYPCATEGORY_NUMERIC:
1240 outputstr = OidOutputFunctionCall(typoutputfunc, val);
1243 * Don't call escape_json here if it's a valid JSON number.
1244 * Numeric output should usually be a valid JSON number and JSON
1245 * numbers shouldn't be quoted. Quote cases like "Nan" and
1246 * "Infinity", however.
1248 if (strpbrk(outputstr, NON_NUMERIC_LETTER) == NULL)
1249 appendStringInfoString(result, outputstr);
1251 escape_json(result, outputstr);
1254 case TYPCATEGORY_JSON:
1255 /* JSON will already be escaped */
1256 outputstr = OidOutputFunctionCall(typoutputfunc, val);
1257 appendStringInfoString(result, outputstr);
1260 case TYPCATEGORY_JSON_CAST:
1261 jsontext = DatumGetTextP(OidFunctionCall1(typoutputfunc, val));
1262 outputstr = text_to_cstring(jsontext);
1263 appendStringInfoString(result, outputstr);
1268 outputstr = OidOutputFunctionCall(typoutputfunc, val);
1269 escape_json(result, outputstr);
1276 * Process a single dimension of an array.
1277 * If it's the innermost dimension, output the values, otherwise call
1278 * ourselves recursively to process the next dimension.
1281 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
1282 bool *nulls, int *valcount, TYPCATEGORY tcategory,
1283 Oid typoutputfunc, bool use_line_feeds)
1288 Assert(dim < ndims);
1290 sep = use_line_feeds ? ",\n " : ",";
1292 appendStringInfoChar(result, '[');
1294 for (i = 1; i <= dims[dim]; i++)
1297 appendStringInfoString(result, sep);
1299 if (dim + 1 == ndims)
1301 datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
1308 * Do we want line feeds on inner dimensions of arrays? For now
1311 array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
1312 valcount, tcategory, typoutputfunc, false);
1316 appendStringInfoChar(result, ']');
1320 * Turn an array into JSON.
1323 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
1325 ArrayType *v = DatumGetArrayTypeP(array);
1326 Oid element_type = ARR_ELEMTYPE(v);
1339 TYPCATEGORY tcategory;
1340 Oid castfunc = InvalidOid;
1344 nitems = ArrayGetNItems(ndim, dim);
1348 appendStringInfoString(result, "[]");
1352 get_type_io_data(element_type, IOFunc_output,
1353 &typlen, &typbyval, &typalign,
1354 &typdelim, &typioparam, &typoutputfunc);
1356 if (element_type > FirstNormalObjectId)
1359 Form_pg_cast castForm;
1361 tuple = SearchSysCache2(CASTSOURCETARGET,
1362 ObjectIdGetDatum(element_type),
1363 ObjectIdGetDatum(JSONOID));
1364 if (HeapTupleIsValid(tuple))
1366 castForm = (Form_pg_cast) GETSTRUCT(tuple);
1368 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1369 castfunc = typoutputfunc = castForm->castfunc;
1371 ReleaseSysCache(tuple);
1375 deconstruct_array(v, element_type, typlen, typbyval,
1376 typalign, &elements, &nulls,
1379 if (castfunc != InvalidOid)
1380 tcategory = TYPCATEGORY_JSON_CAST;
1381 else if (element_type == RECORDOID)
1382 tcategory = TYPCATEGORY_COMPOSITE;
1383 else if (element_type == JSONOID)
1384 tcategory = TYPCATEGORY_JSON;
1386 tcategory = TypeCategory(element_type);
1388 array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
1389 typoutputfunc, use_line_feeds);
1396 * Turn a composite / record into JSON.
1399 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
1405 HeapTupleData tmptup,
1408 bool needsep = false;
1411 sep = use_line_feeds ? ",\n " : ",";
1413 td = DatumGetHeapTupleHeader(composite);
1415 /* Extract rowtype info and find a tupdesc */
1416 tupType = HeapTupleHeaderGetTypeId(td);
1417 tupTypmod = HeapTupleHeaderGetTypMod(td);
1418 tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
1420 /* Build a temporary HeapTuple control structure */
1421 tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
1425 appendStringInfoChar(result, '{');
1427 for (i = 0; i < tupdesc->natts; i++)
1433 TYPCATEGORY tcategory;
1436 Oid castfunc = InvalidOid;
1438 if (tupdesc->attrs[i]->attisdropped)
1442 appendStringInfoString(result, sep);
1445 attname = NameStr(tupdesc->attrs[i]->attname);
1446 escape_json(result, attname);
1447 appendStringInfoChar(result, ':');
1449 origval = heap_getattr(tuple, i + 1, tupdesc, &isnull);
1451 getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
1452 &typoutput, &typisvarlena);
1454 if (tupdesc->attrs[i]->atttypid > FirstNormalObjectId)
1456 HeapTuple cast_tuple;
1457 Form_pg_cast castForm;
1459 cast_tuple = SearchSysCache2(CASTSOURCETARGET,
1460 ObjectIdGetDatum(tupdesc->attrs[i]->atttypid),
1461 ObjectIdGetDatum(JSONOID));
1462 if (HeapTupleIsValid(cast_tuple))
1464 castForm = (Form_pg_cast) GETSTRUCT(cast_tuple);
1466 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1467 castfunc = typoutput = castForm->castfunc;
1469 ReleaseSysCache(cast_tuple);
1473 if (castfunc != InvalidOid)
1474 tcategory = TYPCATEGORY_JSON_CAST;
1475 else if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID)
1476 tcategory = TYPCATEGORY_ARRAY;
1477 else if (tupdesc->attrs[i]->atttypid == RECORDOID)
1478 tcategory = TYPCATEGORY_COMPOSITE;
1479 else if (tupdesc->attrs[i]->atttypid == JSONOID)
1480 tcategory = TYPCATEGORY_JSON;
1482 tcategory = TypeCategory(tupdesc->attrs[i]->atttypid);
1485 * If we have a toasted datum, forcibly detoast it here to avoid
1486 * memory leakage inside the type's output routine.
1488 if (typisvarlena && !isnull)
1489 val = PointerGetDatum(PG_DETOAST_DATUM(origval));
1493 datum_to_json(val, isnull, result, tcategory, typoutput);
1495 /* Clean up detoasted copy, if any */
1497 pfree(DatumGetPointer(val));
1500 appendStringInfoChar(result, '}');
1501 ReleaseTupleDesc(tupdesc);
1505 * SQL function array_to_json(row)
1508 array_to_json(PG_FUNCTION_ARGS)
1510 Datum array = PG_GETARG_DATUM(0);
1513 result = makeStringInfo();
1515 array_to_json_internal(array, result, false);
1517 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1521 * SQL function array_to_json(row, prettybool)
1524 array_to_json_pretty(PG_FUNCTION_ARGS)
1526 Datum array = PG_GETARG_DATUM(0);
1527 bool use_line_feeds = PG_GETARG_BOOL(1);
1530 result = makeStringInfo();
1532 array_to_json_internal(array, result, use_line_feeds);
1534 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1538 * SQL function row_to_json(row)
1541 row_to_json(PG_FUNCTION_ARGS)
1543 Datum array = PG_GETARG_DATUM(0);
1546 result = makeStringInfo();
1548 composite_to_json(array, result, false);
1550 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1554 * SQL function row_to_json(row, prettybool)
1557 row_to_json_pretty(PG_FUNCTION_ARGS)
1559 Datum array = PG_GETARG_DATUM(0);
1560 bool use_line_feeds = PG_GETARG_BOOL(1);
1563 result = makeStringInfo();
1565 composite_to_json(array, result, use_line_feeds);
1567 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1571 * SQL function to_json(anyvalue)
1574 to_json(PG_FUNCTION_ARGS)
1576 Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
1580 TYPCATEGORY tcategory;
1583 Oid castfunc = InvalidOid;
1585 if (val_type == InvalidOid)
1587 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1588 errmsg("could not determine input data type")));
1591 result = makeStringInfo();
1593 orig_val = PG_ARGISNULL(0) ? (Datum) 0 : PG_GETARG_DATUM(0);
1595 getTypeOutputInfo(val_type, &typoutput, &typisvarlena);
1597 if (val_type > FirstNormalObjectId)
1600 Form_pg_cast castForm;
1602 tuple = SearchSysCache2(CASTSOURCETARGET,
1603 ObjectIdGetDatum(val_type),
1604 ObjectIdGetDatum(JSONOID));
1605 if (HeapTupleIsValid(tuple))
1607 castForm = (Form_pg_cast) GETSTRUCT(tuple);
1609 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1610 castfunc = typoutput = castForm->castfunc;
1612 ReleaseSysCache(tuple);
1616 if (castfunc != InvalidOid)
1617 tcategory = TYPCATEGORY_JSON_CAST;
1618 else if (val_type == RECORDARRAYOID)
1619 tcategory = TYPCATEGORY_ARRAY;
1620 else if (val_type == RECORDOID)
1621 tcategory = TYPCATEGORY_COMPOSITE;
1622 else if (val_type == JSONOID)
1623 tcategory = TYPCATEGORY_JSON;
1625 tcategory = TypeCategory(val_type);
1628 * If we have a toasted datum, forcibly detoast it here to avoid memory
1629 * leakage inside the type's output routine.
1631 if (typisvarlena && orig_val != (Datum) 0)
1632 val = PointerGetDatum(PG_DETOAST_DATUM(orig_val));
1636 datum_to_json(val, false, result, tcategory, typoutput);
1638 /* Clean up detoasted copy, if any */
1639 if (val != orig_val)
1640 pfree(DatumGetPointer(val));
1642 PG_RETURN_TEXT_P(cstring_to_text(result->data));
1646 * json_agg transition function
1649 json_agg_transfn(PG_FUNCTION_ARGS)
1651 Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
1652 MemoryContext aggcontext,
1657 TYPCATEGORY tcategory;
1660 Oid castfunc = InvalidOid;
1662 if (val_type == InvalidOid)
1664 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1665 errmsg("could not determine input data type")));
1667 if (!AggCheckCallContext(fcinfo, &aggcontext))
1669 /* cannot be called directly because of internal-type argument */
1670 elog(ERROR, "json_agg_transfn called in non-aggregate context");
1673 if (PG_ARGISNULL(0))
1676 * Make this StringInfo in a context where it will persist for the
1677 * duration off the aggregate call. It's only needed for this initial
1678 * piece, as the StringInfo routines make sure they use the right
1679 * context to enlarge the object if necessary.
1681 oldcontext = MemoryContextSwitchTo(aggcontext);
1682 state = makeStringInfo();
1683 MemoryContextSwitchTo(oldcontext);
1685 appendStringInfoChar(state, '[');
1689 state = (StringInfo) PG_GETARG_POINTER(0);
1690 appendStringInfoString(state, ", ");
1693 /* fast path for NULLs */
1694 if (PG_ARGISNULL(1))
1696 orig_val = (Datum) 0;
1697 datum_to_json(orig_val, true, state, 0, InvalidOid);
1698 PG_RETURN_POINTER(state);
1702 orig_val = PG_GETARG_DATUM(1);
1704 getTypeOutputInfo(val_type, &typoutput, &typisvarlena);
1706 if (val_type > FirstNormalObjectId)
1709 Form_pg_cast castForm;
1711 tuple = SearchSysCache2(CASTSOURCETARGET,
1712 ObjectIdGetDatum(val_type),
1713 ObjectIdGetDatum(JSONOID));
1714 if (HeapTupleIsValid(tuple))
1716 castForm = (Form_pg_cast) GETSTRUCT(tuple);
1718 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
1719 castfunc = typoutput = castForm->castfunc;
1721 ReleaseSysCache(tuple);
1725 if (castfunc != InvalidOid)
1726 tcategory = TYPCATEGORY_JSON_CAST;
1727 else if (val_type == RECORDARRAYOID)
1728 tcategory = TYPCATEGORY_ARRAY;
1729 else if (val_type == RECORDOID)
1730 tcategory = TYPCATEGORY_COMPOSITE;
1731 else if (val_type == JSONOID)
1732 tcategory = TYPCATEGORY_JSON;
1734 tcategory = TypeCategory(val_type);
1737 * If we have a toasted datum, forcibly detoast it here to avoid memory
1738 * leakage inside the type's output routine.
1741 val = PointerGetDatum(PG_DETOAST_DATUM(orig_val));
1745 if (!PG_ARGISNULL(0) &&
1746 (tcategory == TYPCATEGORY_ARRAY || tcategory == TYPCATEGORY_COMPOSITE))
1748 appendStringInfoString(state, "\n ");
1751 datum_to_json(val, false, state, tcategory, typoutput);
1753 /* Clean up detoasted copy, if any */
1754 if (val != orig_val)
1755 pfree(DatumGetPointer(val));
1758 * The transition type for array_agg() is declared to be "internal", which
1759 * is a pass-by-value type the same size as a pointer. So we can safely
1760 * pass the ArrayBuildState pointer through nodeAgg.c's machinations.
1762 PG_RETURN_POINTER(state);
1766 * json_agg final function
1769 json_agg_finalfn(PG_FUNCTION_ARGS)
1773 /* cannot be called directly because of internal-type argument */
1774 Assert(AggCheckCallContext(fcinfo, NULL));
1776 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
1781 appendStringInfoChar(state, ']');
1783 PG_RETURN_TEXT_P(cstring_to_text(state->data));
1787 * Produce a JSON string literal, properly escaping characters in the text.
1790 escape_json(StringInfo buf, const char *str)
1794 appendStringInfoCharMacro(buf, '\"');
1795 for (p = str; *p; p++)
1800 appendStringInfoString(buf, "\\b");
1803 appendStringInfoString(buf, "\\f");
1806 appendStringInfoString(buf, "\\n");
1809 appendStringInfoString(buf, "\\r");
1812 appendStringInfoString(buf, "\\t");
1815 appendStringInfoString(buf, "\\\"");
1818 appendStringInfoString(buf, "\\\\");
1821 if ((unsigned char) *p < ' ')
1822 appendStringInfo(buf, "\\u%04x", (int) *p);
1824 appendStringInfoCharMacro(buf, *p);
1828 appendStringInfoCharMacro(buf, '\"');