1 /*-------------------------------------------------------------------------
4 * JSON data type support.
6 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/utils/adt/json.c
12 *-------------------------------------------------------------------------
16 #include "catalog/pg_type.h"
17 #include "executor/spi.h"
18 #include "lib/stringinfo.h"
19 #include "libpq/pqformat.h"
20 #include "mb/pg_wchar.h"
21 #include "parser/parse_coerce.h"
22 #include "utils/array.h"
23 #include "utils/builtins.h"
24 #include "utils/lsyscache.h"
25 #include "utils/json.h"
26 #include "utils/typcache.h"
44 char *token_terminator;
45 JsonValueType token_type;
52 JSON_PARSE_VALUE, /* expecting a value */
53 JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
54 JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
55 JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
56 JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
57 JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
58 JSON_PARSE_OBJECT_COMMA /* saw object ',', expecting next label */
61 typedef struct JsonParseStack
70 JSON_STACKOP_PUSH_WITH_PUSHBACK,
74 static void json_validate_cstring(char *input);
75 static void json_lex(JsonLexContext *lex);
76 static void json_lex_string(JsonLexContext *lex);
77 static void json_lex_number(JsonLexContext *lex, char *s);
78 static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex);
79 static void report_invalid_token(JsonLexContext *lex);
80 static char *extract_mb_char(char *s);
81 static void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds);
82 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
83 Datum *vals, bool *nulls, int *valcount,
84 TYPCATEGORY tcategory, Oid typoutputfunc,
86 static void array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds);
88 /* fake type category for JSON so we can distinguish it in datum_to_json */
89 #define TYPCATEGORY_JSON 'j'
90 /* letters appearing in numeric output that aren't valid in a JSON number */
91 #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
96 json_in(PG_FUNCTION_ARGS)
98 char *text = PG_GETARG_CSTRING(0);
100 json_validate_cstring(text);
102 PG_RETURN_TEXT_P(cstring_to_text(text));
109 json_out(PG_FUNCTION_ARGS)
111 Datum txt = PG_GETARG_DATUM(0);
113 PG_RETURN_CSTRING(TextDatumGetCString(txt));
120 json_send(PG_FUNCTION_ARGS)
123 text *t = PG_GETARG_TEXT_PP(0);
125 pq_begintypsend(&buf);
126 pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
127 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
134 json_recv(PG_FUNCTION_ARGS)
136 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
141 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
144 * We need a null-terminated string to pass to json_validate_cstring().
145 * Rather than make a separate copy, make the temporary result one byte
146 * bigger than it needs to be.
148 result = palloc(nbytes + 1 + VARHDRSZ);
149 SET_VARSIZE(result, nbytes + VARHDRSZ);
150 memcpy(VARDATA(result), str, nbytes);
151 str = VARDATA(result);
155 json_validate_cstring(str);
157 PG_RETURN_TEXT_P(result);
161 * Check whether supplied input is valid JSON.
164 json_validate_cstring(char *input)
167 JsonParseStack *stack,
171 /* Set up lexing context. */
173 lex.token_terminator = lex.input;
175 lex.line_start = input;
177 /* Set up parse stack. */
179 stacktop = palloc(sizeof(JsonParseStack) * stacksize);
181 stack->state = JSON_PARSE_VALUE;
183 /* Main parsing loop. */
188 /* Fetch next token. */
191 /* Check for unexpected end of input. */
192 if (lex.token_start == NULL)
193 report_parse_error(stack, &lex);
196 /* Figure out what to do with this token. */
197 op = JSON_STACKOP_NONE;
198 switch (stack->state)
200 case JSON_PARSE_VALUE:
201 if (lex.token_type != JSON_VALUE_INVALID)
202 op = JSON_STACKOP_POP;
203 else if (lex.token_start[0] == '[')
204 stack->state = JSON_PARSE_ARRAY_START;
205 else if (lex.token_start[0] == '{')
206 stack->state = JSON_PARSE_OBJECT_START;
208 report_parse_error(stack, &lex);
210 case JSON_PARSE_ARRAY_START:
211 if (lex.token_type != JSON_VALUE_INVALID)
212 stack->state = JSON_PARSE_ARRAY_NEXT;
213 else if (lex.token_start[0] == ']')
214 op = JSON_STACKOP_POP;
215 else if (lex.token_start[0] == '['
216 || lex.token_start[0] == '{')
218 stack->state = JSON_PARSE_ARRAY_NEXT;
219 op = JSON_STACKOP_PUSH_WITH_PUSHBACK;
222 report_parse_error(stack, &lex);
224 case JSON_PARSE_ARRAY_NEXT:
225 if (lex.token_type != JSON_VALUE_INVALID)
226 report_parse_error(stack, &lex);
227 else if (lex.token_start[0] == ']')
228 op = JSON_STACKOP_POP;
229 else if (lex.token_start[0] == ',')
230 op = JSON_STACKOP_PUSH;
232 report_parse_error(stack, &lex);
234 case JSON_PARSE_OBJECT_START:
235 if (lex.token_type == JSON_VALUE_STRING)
236 stack->state = JSON_PARSE_OBJECT_LABEL;
237 else if (lex.token_type == JSON_VALUE_INVALID
238 && lex.token_start[0] == '}')
239 op = JSON_STACKOP_POP;
241 report_parse_error(stack, &lex);
243 case JSON_PARSE_OBJECT_LABEL:
244 if (lex.token_type == JSON_VALUE_INVALID
245 && lex.token_start[0] == ':')
247 stack->state = JSON_PARSE_OBJECT_NEXT;
248 op = JSON_STACKOP_PUSH;
251 report_parse_error(stack, &lex);
253 case JSON_PARSE_OBJECT_NEXT:
254 if (lex.token_type != JSON_VALUE_INVALID)
255 report_parse_error(stack, &lex);
256 else if (lex.token_start[0] == '}')
257 op = JSON_STACKOP_POP;
258 else if (lex.token_start[0] == ',')
259 stack->state = JSON_PARSE_OBJECT_COMMA;
261 report_parse_error(stack, &lex);
263 case JSON_PARSE_OBJECT_COMMA:
264 if (lex.token_type == JSON_VALUE_STRING)
265 stack->state = JSON_PARSE_OBJECT_LABEL;
267 report_parse_error(stack, &lex);
270 elog(ERROR, "unexpected json parse state: %d",
274 /* Push or pop the stack, if needed. */
277 case JSON_STACKOP_PUSH:
278 case JSON_STACKOP_PUSH_WITH_PUSHBACK:
280 if (stack >= &stacktop[stacksize])
282 int stackoffset = stack - stacktop;
283 stacksize = stacksize + 32;
284 stacktop = repalloc(stacktop,
285 sizeof(JsonParseStack) * stacksize);
286 stack = stacktop + stackoffset;
288 stack->state = JSON_PARSE_VALUE;
289 if (op == JSON_STACKOP_PUSH_WITH_PUSHBACK)
292 case JSON_STACKOP_POP:
293 if (stack == stacktop)
295 /* Expect end of input. */
297 if (lex.token_start != NULL)
298 report_parse_error(NULL, &lex);
303 case JSON_STACKOP_NONE:
311 * Lex one token from the input stream.
314 json_lex(JsonLexContext *lex)
318 /* Skip leading whitespace. */
319 s = lex->token_terminator;
320 while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
326 lex->token_start = s;
328 /* Determine token type. */
329 if (strchr("{}[],:", s[0]))
331 /* strchr() doesn't return false on a NUL input. */
335 lex->token_start = NULL;
336 lex->token_terminator = NULL;
340 /* Single-character token, some kind of punctuation mark. */
341 lex->token_terminator = s + 1;
343 lex->token_type = JSON_VALUE_INVALID;
348 json_lex_string(lex);
349 lex->token_type = JSON_VALUE_STRING;
353 /* Negative number. */
354 json_lex_number(lex, s + 1);
355 lex->token_type = JSON_VALUE_NUMBER;
357 else if (*s >= '0' && *s <= '9')
359 /* Positive number. */
360 json_lex_number(lex, s);
361 lex->token_type = JSON_VALUE_NUMBER;
368 * We're not dealing with a string, number, legal punctuation mark,
369 * or end of string. The only legal tokens we might find here are
370 * true, false, and null, but for error reporting purposes we scan
371 * until we see a non-alphanumeric character. That way, we can report
372 * the whole word as an unexpected token, rather than just some
373 * unintuitive prefix thereof.
375 for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')
376 || (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p);
381 * We got some sort of unexpected punctuation or an otherwise
382 * unexpected character, so just complain about that one character.
386 lex->token_terminator = s + 1;
387 report_invalid_token(lex);
391 * We've got a real alphanumeric token here. If it happens to be
392 * true, false, or null, all is well. If not, error out.
394 lex->token_terminator = p;
397 if (memcmp(s, "true", 4) == 0)
398 lex->token_type = JSON_VALUE_TRUE;
399 else if (memcmp(s, "null", 4) == 0)
400 lex->token_type = JSON_VALUE_NULL;
402 report_invalid_token(lex);
404 else if (p - s == 5 && memcmp(s, "false", 5) == 0)
405 lex->token_type = JSON_VALUE_FALSE;
407 report_invalid_token(lex);
412 * The next token in the input stream is known to be a string; lex it.
415 json_lex_string(JsonLexContext *lex)
417 char *s = lex->token_start + 1;
419 for (s = lex->token_start + 1; *s != '"'; ++s)
421 /* Per RFC4627, these characters MUST be escaped. */
424 /* A NUL byte marks the (premature) end of the string. */
427 lex->token_terminator = s;
428 report_invalid_token(lex);
431 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
432 errmsg("invalid input syntax for type json"),
433 errdetail_internal("line %d: Character \"%c\" must be escaped.",
434 lex->line_number, *s)));
438 /* OK, we have an escape character. */
442 lex->token_terminator = s;
443 report_invalid_token(lex);
450 for (i = 1; i <= 4; ++i)
454 lex->token_terminator = s + i;
455 report_invalid_token(lex);
457 else if (s[i] >= '0' && s[i] <= '9')
458 ch = (ch * 16) + (s[i] - '0');
459 else if (s[i] >= 'a' && s[i] <= 'f')
460 ch = (ch * 16) + (s[i] - 'a') + 10;
461 else if (s[i] >= 'A' && s[i] <= 'F')
462 ch = (ch * 16) + (s[i] - 'A') + 10;
466 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
467 errmsg("invalid input syntax for type json"),
468 errdetail_internal("line %d: \"\\u\" must be followed by four hexadecimal digits.",
473 /* Account for the four additional bytes we just parsed. */
476 else if (!strchr("\"\\/bfnrt", *s))
480 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
481 errmsg("invalid input syntax for type json"),
482 errdetail_internal("line %d: Invalid escape \"\\%s\".",
483 lex->line_number, extract_mb_char(s))));
488 /* Hooray, we found the end of the string! */
489 lex->token_terminator = s + 1;
492 /*-------------------------------------------------------------------------
493 * The next token in the input stream is known to be a number; lex it.
495 * In JSON, a number consists of four parts:
497 * (1) An optional minus sign ('-').
499 * (2) Either a single '0', or a string of one or more digits that does not
502 * (3) An optional decimal part, consisting of a period ('.') followed by
503 * one or more digits. (Note: While this part can be omitted
504 * completely, it's not OK to have only the decimal point without
505 * any digits afterwards.)
507 * (4) An optional exponent part, consisting of 'e' or 'E', optionally
508 * followed by '+' or '-', followed by one or more digits. (Note:
509 * As with the decimal part, if 'e' or 'E' is present, it must be
510 * followed by at least one digit.)
512 * The 's' argument to this function points to the ostensible beginning
513 * of part 2 - i.e. the character after any optional minus sign, and the
514 * first character of the string if there is none.
516 *-------------------------------------------------------------------------
519 json_lex_number(JsonLexContext *lex, char *s)
524 /* Part (1): leading sign indicator. */
525 /* Caller already did this for us; so do nothing. */
527 /* Part (2): parse main digit string. */
530 else if (*s >= '1' && *s <= '9')
535 } while (*s >= '0' && *s <= '9');
540 /* Part (3): parse optional decimal portion. */
544 if (*s < '0' && *s > '9')
551 } while (*s >= '0' && *s <= '9');
555 /* Part (4): parse optional exponent. */
556 if (*s == 'e' || *s == 'E')
559 if (*s == '+' || *s == '-')
561 if (*s < '0' && *s > '9')
568 } while (*s >= '0' && *s <= '9');
572 /* Check for trailing garbage. */
573 for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')
574 || (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p); ++p)
576 lex->token_terminator = p;
578 report_invalid_token(lex);
582 * Report a parse error.
585 report_parse_error(JsonParseStack *stack, JsonLexContext *lex)
591 /* Handle case where the input ended prematurely. */
592 if (lex->token_start == NULL)
594 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
595 errmsg("invalid input syntax for type json: \"%s\"",
597 errdetail_internal("The input string ended unexpectedly.")));
599 /* Work out the offending token. */
600 toklen = lex->token_terminator - lex->token_start;
601 token = palloc(toklen + 1);
602 memcpy(token, lex->token_start, toklen);
603 token[toklen] = '\0';
605 /* Select correct detail message. */
607 detail = "line %d: Expected end of input, but found \"%s\".";
610 switch (stack->state)
612 case JSON_PARSE_VALUE:
613 detail = "line %d: Expected string, number, object, array, true, false, or null, but found \"%s\".";
615 case JSON_PARSE_ARRAY_START:
616 detail = "line %d: Expected array element or \"]\", but found \"%s\".";
618 case JSON_PARSE_ARRAY_NEXT:
619 detail = "line %d: Expected \",\" or \"]\", but found \"%s\".";
621 case JSON_PARSE_OBJECT_START:
622 detail = "line %d: Expected string or \"}\", but found \"%s\".";
624 case JSON_PARSE_OBJECT_LABEL:
625 detail = "line %d: Expected \":\", but found \"%s\".";
627 case JSON_PARSE_OBJECT_NEXT:
628 detail = "line %d: Expected \",\" or \"}\", but found \"%s\".";
630 case JSON_PARSE_OBJECT_COMMA:
631 detail = "line %d: Expected string, but found \"%s\".";
637 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
638 errmsg("invalid input syntax for type json: \"%s\"",
640 errdetail_internal(detail, lex->line_number, token)));
644 * Report an invalid input token.
647 report_invalid_token(JsonLexContext *lex)
652 toklen = lex->token_terminator - lex->token_start;
653 token = palloc(toklen + 1);
654 memcpy(token, lex->token_start, toklen);
655 token[toklen] = '\0';
658 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
659 errmsg("invalid input syntax for type json"),
660 errdetail_internal("line %d: Token \"%s\" is invalid.",
661 lex->line_number, token)));
665 * Extract a single, possibly multi-byte char from the input string.
668 extract_mb_char(char *s)
674 res = palloc(len + 1);
682 * Turn a scalar Datum into JSON. Hand off a non-scalar datum to
683 * composite_to_json or array_to_json_internal as appropriate.
686 datum_to_json(Datum val, bool is_null, StringInfo result, TYPCATEGORY tcategory,
694 appendStringInfoString(result,"null");
700 case TYPCATEGORY_ARRAY:
701 array_to_json_internal(val, result, false);
703 case TYPCATEGORY_COMPOSITE:
704 composite_to_json(val, result, false);
706 case TYPCATEGORY_BOOLEAN:
707 if (DatumGetBool(val))
708 appendStringInfoString(result,"true");
710 appendStringInfoString(result,"false");
712 case TYPCATEGORY_NUMERIC:
713 outputstr = OidOutputFunctionCall(typoutputfunc, val);
715 * Don't call escape_json here if it's a valid JSON
716 * number. Numeric output should usually be a valid
717 * JSON number and JSON numbers shouldn't be quoted.
718 * Quote cases like "Nan" and "Infinity", however.
720 if (strpbrk(outputstr,NON_NUMERIC_LETTER) == NULL)
721 appendStringInfoString(result, outputstr);
723 escape_json(result, outputstr);
726 case TYPCATEGORY_JSON:
727 /* JSON will already be escaped */
728 outputstr = OidOutputFunctionCall(typoutputfunc, val);
729 appendStringInfoString(result, outputstr);
733 outputstr = OidOutputFunctionCall(typoutputfunc, val);
734 escape_json(result, outputstr);
740 * Process a single dimension of an array.
741 * If it's the innermost dimension, output the values, otherwise call
742 * ourselves recursively to process the next dimension.
745 array_dim_to_json(StringInfo result, int dim, int ndims,int * dims, Datum *vals,
746 bool *nulls, int * valcount, TYPCATEGORY tcategory,
747 Oid typoutputfunc, bool use_line_feeds)
755 sep = use_line_feeds ? ",\n " : ",";
757 appendStringInfoChar(result, '[');
759 for (i = 1; i <= dims[dim]; i++)
762 appendStringInfoString(result,sep);
764 if (dim + 1 == ndims)
766 datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
773 * Do we want line feeds on inner dimensions of arrays?
774 * For now we'll say no.
776 array_dim_to_json(result, dim+1, ndims, dims, vals, nulls,
777 valcount, tcategory, typoutputfunc, false);
781 appendStringInfoChar(result, ']');
785 * Turn an array into JSON.
788 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
790 ArrayType *v = DatumGetArrayTypeP(array);
791 Oid element_type = ARR_ELEMTYPE(v);
805 TYPCATEGORY tcategory;
809 nitems = ArrayGetNItems(ndim, dim);
813 appendStringInfoString(result,"[]");
817 get_type_io_data(element_type, IOFunc_output,
818 &typlen, &typbyval, &typalign,
819 &typdelim, &typioparam, &typoutputfunc);
821 deconstruct_array(v, element_type, typlen, typbyval,
822 typalign, &elements, &nulls,
825 if (element_type == RECORDOID)
826 tcategory = TYPCATEGORY_COMPOSITE;
827 else if (element_type == JSONOID)
828 tcategory = TYPCATEGORY_JSON;
830 tcategory = TypeCategory(element_type);
832 array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
833 typoutputfunc, use_line_feeds);
840 * Turn a composite / record into JSON.
843 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
849 HeapTupleData tmptup, *tuple;
851 bool needsep = false;
854 sep = use_line_feeds ? ",\n " : ",";
856 td = DatumGetHeapTupleHeader(composite);
858 /* Extract rowtype info and find a tupdesc */
859 tupType = HeapTupleHeaderGetTypeId(td);
860 tupTypmod = HeapTupleHeaderGetTypMod(td);
861 tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
863 /* Build a temporary HeapTuple control structure */
864 tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
868 appendStringInfoChar(result,'{');
870 for (i = 0; i < tupdesc->natts; i++)
875 TYPCATEGORY tcategory;
879 if (tupdesc->attrs[i]->attisdropped)
883 appendStringInfoString(result,sep);
886 attname = NameStr(tupdesc->attrs[i]->attname);
887 escape_json(result,attname);
888 appendStringInfoChar(result,':');
890 origval = heap_getattr(tuple, i + 1, tupdesc, &isnull);
892 if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID)
893 tcategory = TYPCATEGORY_ARRAY;
894 else if (tupdesc->attrs[i]->atttypid == RECORDOID)
895 tcategory = TYPCATEGORY_COMPOSITE;
896 else if (tupdesc->attrs[i]->atttypid == JSONOID)
897 tcategory = TYPCATEGORY_JSON;
899 tcategory = TypeCategory(tupdesc->attrs[i]->atttypid);
901 getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
902 &typoutput, &typisvarlena);
905 * If we have a toasted datum, forcibly detoast it here to avoid memory
906 * leakage inside the type's output routine.
908 if (typisvarlena && ! isnull)
909 val = PointerGetDatum(PG_DETOAST_DATUM(origval));
913 datum_to_json(val, isnull, result, tcategory, typoutput);
915 /* Clean up detoasted copy, if any */
917 pfree(DatumGetPointer(val));
920 appendStringInfoChar(result,'}');
921 ReleaseTupleDesc(tupdesc);
925 * SQL function array_to_json(row)
928 array_to_json(PG_FUNCTION_ARGS)
930 Datum array = PG_GETARG_DATUM(0);
933 result = makeStringInfo();
935 array_to_json_internal(array, result, false);
937 PG_RETURN_TEXT_P(cstring_to_text(result->data));
941 * SQL function array_to_json(row, prettybool)
944 array_to_json_pretty(PG_FUNCTION_ARGS)
946 Datum array = PG_GETARG_DATUM(0);
947 bool use_line_feeds = PG_GETARG_BOOL(1);
950 result = makeStringInfo();
952 array_to_json_internal(array, result, use_line_feeds);
954 PG_RETURN_TEXT_P(cstring_to_text(result->data));
958 * SQL function row_to_json(row)
961 row_to_json(PG_FUNCTION_ARGS)
963 Datum array = PG_GETARG_DATUM(0);
966 result = makeStringInfo();
968 composite_to_json(array, result, false);
970 PG_RETURN_TEXT_P(cstring_to_text(result->data));
974 * SQL function row_to_json(row, prettybool)
977 row_to_json_pretty(PG_FUNCTION_ARGS)
979 Datum array = PG_GETARG_DATUM(0);
980 bool use_line_feeds = PG_GETARG_BOOL(1);
983 result = makeStringInfo();
985 composite_to_json(array, result, use_line_feeds);
987 PG_RETURN_TEXT_P(cstring_to_text(result->data));
991 * Produce a JSON string literal, properly escaping characters in the text.
994 escape_json(StringInfo buf, const char *str)
998 appendStringInfoCharMacro(buf, '\"');
999 for (p = str; *p; p++)
1004 appendStringInfoString(buf, "\\b");
1007 appendStringInfoString(buf, "\\f");
1010 appendStringInfoString(buf, "\\n");
1013 appendStringInfoString(buf, "\\r");
1016 appendStringInfoString(buf, "\\t");
1019 appendStringInfoString(buf, "\\\"");
1022 appendStringInfoString(buf, "\\\\");
1025 if ((unsigned char) *p < ' ')
1026 appendStringInfo(buf, "\\u%04x", (int) *p);
1028 appendStringInfoCharMacro(buf, *p);
1032 appendStringInfoCharMacro(buf, '\"');