granicus.if.org Git - postgresql/blob - src/backend/utils/adt/json.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * json.c
   4  *              JSON data type support.
   5  *
   6  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  * IDENTIFICATION
  10  *        src/backend/utils/adt/json.c
  11  *
  12  *-------------------------------------------------------------------------
  13  */
  14 #include "postgres.h"
  15
  16 #include "access/htup_details.h"
  17 #include "catalog/pg_type.h"
  18 #include "executor/spi.h"
  19 #include "lib/stringinfo.h"
  20 #include "libpq/pqformat.h"
  21 #include "mb/pg_wchar.h"
  22 #include "parser/parse_coerce.h"
  23 #include "utils/array.h"
  24 #include "utils/builtins.h"
  25 #include "utils/lsyscache.h"
  26 #include "utils/json.h"
  27 #include "utils/typcache.h"
  28
  29 typedef enum                                    /* types of JSON values */
  30 {
  31         JSON_VALUE_INVALID,                     /* non-value tokens are reported as this */
  32         JSON_VALUE_STRING,
  33         JSON_VALUE_NUMBER,
  34         JSON_VALUE_OBJECT,
  35         JSON_VALUE_ARRAY,
  36         JSON_VALUE_TRUE,
  37         JSON_VALUE_FALSE,
  38         JSON_VALUE_NULL
  39 } JsonValueType;
  40
  41 typedef struct                                  /* state of JSON lexer */
  42 {
  43         char       *input;                      /* whole string being parsed */
  44         char       *token_start;        /* start of current token within input */
  45         char       *token_terminator; /* end of previous or current token */
  46         JsonValueType token_type;       /* type of current token, once it's known */
  47 } JsonLexContext;
  48
  49 typedef enum                                    /* states of JSON parser */
  50 {
  51         JSON_PARSE_VALUE,                       /* expecting a value */
  52         JSON_PARSE_ARRAY_START,         /* saw '[', expecting value or ']' */
  53         JSON_PARSE_ARRAY_NEXT,          /* saw array element, expecting ',' or ']' */
  54         JSON_PARSE_OBJECT_START,        /* saw '{', expecting label or '}' */
  55         JSON_PARSE_OBJECT_LABEL,        /* saw object label, expecting ':' */
  56         JSON_PARSE_OBJECT_NEXT,         /* saw object value, expecting ',' or '}' */
  57         JSON_PARSE_OBJECT_COMMA         /* saw object ',', expecting next label */
  58 } JsonParseState;
  59
  60 typedef struct JsonParseStack   /* the parser state has to be stackable */
  61 {
  62         JsonParseState state;
  63         /* currently only need the state enum, but maybe someday more stuff */
  64 } JsonParseStack;
  65
  66 typedef enum                                    /* required operations on state stack */
  67 {
  68         JSON_STACKOP_NONE,                      /* no-op */
  69         JSON_STACKOP_PUSH,                      /* push new JSON_PARSE_VALUE stack item */
  70         JSON_STACKOP_PUSH_WITH_PUSHBACK, /* push, then rescan current token */
  71         JSON_STACKOP_POP                        /* pop, or expect end of input if no stack */
  72 } JsonStackOp;
  73
  74 static void json_validate_cstring(char *input);
  75 static void json_lex(JsonLexContext *lex);
  76 static void json_lex_string(JsonLexContext *lex);
  77 static void json_lex_number(JsonLexContext *lex, char *s);
  78 static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex);
  79 static void report_invalid_token(JsonLexContext *lex);
  80 static int report_json_context(JsonLexContext *lex);
  81 static char *extract_mb_char(char *s);
  82 static void composite_to_json(Datum composite, StringInfo result,
  83                                                           bool use_line_feeds);
  84 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
  85                                   Datum *vals, bool *nulls, int *valcount,
  86                                   TYPCATEGORY tcategory, Oid typoutputfunc,
  87                                   bool use_line_feeds);
  88 static void array_to_json_internal(Datum array, StringInfo result,
  89                                                                    bool use_line_feeds);
  90
  91 /* fake type category for JSON so we can distinguish it in datum_to_json */
  92 #define TYPCATEGORY_JSON 'j'
  93 /* letters appearing in numeric output that aren't valid in a JSON number */
  94 #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
  95 /* chars to consider as part of an alphanumeric token */
  96 #define JSON_ALPHANUMERIC_CHAR(c)  \
  97         (((c) >= 'a' && (c) <= 'z') || \
  98          ((c) >= 'A' && (c) <= 'Z') || \
  99          ((c) >= '0' && (c) <= '9') || \
 100          (c) == '_' || \
 101          IS_HIGHBIT_SET(c))
 102
 103
 104 /*
 105  * Input.
 106  */
 107 Datum
 108 json_in(PG_FUNCTION_ARGS)
 109 {
 110         char       *text = PG_GETARG_CSTRING(0);
 111
 112         json_validate_cstring(text);
 113
 114         /* Internal representation is the same as text, for now */
 115         PG_RETURN_TEXT_P(cstring_to_text(text));
 116 }
 117
 118 /*
 119  * Output.
 120  */
 121 Datum
 122 json_out(PG_FUNCTION_ARGS)
 123 {
 124         /* we needn't detoast because text_to_cstring will handle that */
 125         Datum           txt = PG_GETARG_DATUM(0);
 126
 127         PG_RETURN_CSTRING(TextDatumGetCString(txt));
 128 }
 129
 130 /*
 131  * Binary send.
 132  */
 133 Datum
 134 json_send(PG_FUNCTION_ARGS)
 135 {
 136         text       *t = PG_GETARG_TEXT_PP(0);
 137         StringInfoData buf;
 138
 139         pq_begintypsend(&buf);
 140         pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
 141         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 142 }
 143
 144 /*
 145  * Binary receive.
 146  */
 147 Datum
 148 json_recv(PG_FUNCTION_ARGS)
 149 {
 150         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 151         text       *result;
 152         char       *str;
 153         int                     nbytes;
 154
 155         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 156
 157         /*
 158          * We need a null-terminated string to pass to json_validate_cstring().
 159          * Rather than make a separate copy, make the temporary result one byte
 160          * bigger than it needs to be.
 161          */
 162         result = palloc(nbytes + 1 + VARHDRSZ);
 163         SET_VARSIZE(result, nbytes + VARHDRSZ);
 164         memcpy(VARDATA(result), str, nbytes);
 165         str = VARDATA(result);
 166         str[nbytes] = '\0';
 167
 168         /* Validate it. */
 169         json_validate_cstring(str);
 170
 171         PG_RETURN_TEXT_P(result);
 172 }
 173
 174 /*
 175  * Check whether supplied input is valid JSON.
 176  */
 177 static void
 178 json_validate_cstring(char *input)
 179 {
 180         JsonLexContext lex;
 181         JsonParseStack *stack,
 182                            *stacktop;
 183         int                     stacksize;
 184
 185         /* Set up lexing context. */
 186         lex.input = input;
 187         lex.token_terminator = lex.input;
 188
 189         /* Set up parse stack. */
 190         stacksize = 32;
 191         stacktop = (JsonParseStack *) palloc(sizeof(JsonParseStack) * stacksize);
 192         stack = stacktop;
 193         stack->state = JSON_PARSE_VALUE;
 194
 195         /* Main parsing loop. */
 196         for (;;)
 197         {
 198                 JsonStackOp op;
 199
 200                 /* Fetch next token. */
 201                 json_lex(&lex);
 202
 203                 /* Check for unexpected end of input. */
 204                 if (lex.token_start == NULL)
 205                         report_parse_error(stack, &lex);
 206
 207 redo:
 208                 /* Figure out what to do with this token. */
 209                 op = JSON_STACKOP_NONE;
 210                 switch (stack->state)
 211                 {
 212                         case JSON_PARSE_VALUE:
 213                                 if (lex.token_type != JSON_VALUE_INVALID)
 214                                         op = JSON_STACKOP_POP;
 215                                 else if (lex.token_start[0] == '[')
 216                                         stack->state = JSON_PARSE_ARRAY_START;
 217                                 else if (lex.token_start[0] == '{')
 218                                         stack->state = JSON_PARSE_OBJECT_START;
 219                                 else
 220                                         report_parse_error(stack, &lex);
 221                                 break;
 222                         case JSON_PARSE_ARRAY_START:
 223                                 if (lex.token_type != JSON_VALUE_INVALID)
 224                                         stack->state = JSON_PARSE_ARRAY_NEXT;
 225                                 else if (lex.token_start[0] == ']')
 226                                         op = JSON_STACKOP_POP;
 227                                 else if (lex.token_start[0] == '[' ||
 228                                                  lex.token_start[0] == '{')
 229                                 {
 230                                         stack->state = JSON_PARSE_ARRAY_NEXT;
 231                                         op = JSON_STACKOP_PUSH_WITH_PUSHBACK;
 232                                 }
 233                                 else
 234                                         report_parse_error(stack, &lex);
 235                                 break;
 236                         case JSON_PARSE_ARRAY_NEXT:
 237                                 if (lex.token_type != JSON_VALUE_INVALID)
 238                                         report_parse_error(stack, &lex);
 239                                 else if (lex.token_start[0] == ']')
 240                                         op = JSON_STACKOP_POP;
 241                                 else if (lex.token_start[0] == ',')
 242                                         op = JSON_STACKOP_PUSH;
 243                                 else
 244                                         report_parse_error(stack, &lex);
 245                                 break;
 246                         case JSON_PARSE_OBJECT_START:
 247                                 if (lex.token_type == JSON_VALUE_STRING)
 248                                         stack->state = JSON_PARSE_OBJECT_LABEL;
 249                                 else if (lex.token_type == JSON_VALUE_INVALID &&
 250                                                  lex.token_start[0] == '}')
 251                                         op = JSON_STACKOP_POP;
 252                                 else
 253                                         report_parse_error(stack, &lex);
 254                                 break;
 255                         case JSON_PARSE_OBJECT_LABEL:
 256                                 if (lex.token_type == JSON_VALUE_INVALID &&
 257                                         lex.token_start[0] == ':')
 258                                 {
 259                                         stack->state = JSON_PARSE_OBJECT_NEXT;
 260                                         op = JSON_STACKOP_PUSH;
 261                                 }
 262                                 else
 263                                         report_parse_error(stack, &lex);
 264                                 break;
 265                         case JSON_PARSE_OBJECT_NEXT:
 266                                 if (lex.token_type != JSON_VALUE_INVALID)
 267                                         report_parse_error(stack, &lex);
 268                                 else if (lex.token_start[0] == '}')
 269                                         op = JSON_STACKOP_POP;
 270                                 else if (lex.token_start[0] == ',')
 271                                         stack->state = JSON_PARSE_OBJECT_COMMA;
 272                                 else
 273                                         report_parse_error(stack, &lex);
 274                                 break;
 275                         case JSON_PARSE_OBJECT_COMMA:
 276                                 if (lex.token_type == JSON_VALUE_STRING)
 277                                         stack->state = JSON_PARSE_OBJECT_LABEL;
 278                                 else
 279                                         report_parse_error(stack, &lex);
 280                                 break;
 281                         default:
 282                                 elog(ERROR, "unexpected json parse state: %d",
 283                                          (int) stack->state);
 284                 }
 285
 286                 /* Push or pop the state stack, if needed. */
 287                 switch (op)
 288                 {
 289                         case JSON_STACKOP_PUSH:
 290                         case JSON_STACKOP_PUSH_WITH_PUSHBACK:
 291                                 stack++;
 292                                 if (stack >= &stacktop[stacksize])
 293                                 {
 294                                         /* Need to enlarge the stack. */
 295                                         int                     stackoffset = stack - stacktop;
 296
 297                                         stacksize += 32;
 298                                         stacktop = (JsonParseStack *)
 299                                                 repalloc(stacktop,
 300                                                                  sizeof(JsonParseStack) * stacksize);
 301                                         stack = stacktop + stackoffset;
 302                                 }
 303                                 stack->state = JSON_PARSE_VALUE;
 304                                 if (op == JSON_STACKOP_PUSH_WITH_PUSHBACK)
 305                                         goto redo;
 306                                 break;
 307                         case JSON_STACKOP_POP:
 308                                 if (stack == stacktop)
 309                                 {
 310                                         /* Expect end of input. */
 311                                         json_lex(&lex);
 312                                         if (lex.token_start != NULL)
 313                                                 report_parse_error(NULL, &lex);
 314                                         return;
 315                                 }
 316                                 stack--;
 317                                 break;
 318                         case JSON_STACKOP_NONE:
 319                                 /* nothing to do */
 320                                 break;
 321                 }
 322         }
 323 }
 324
 325 /*
 326  * Lex one token from the input stream.
 327  */
 328 static void
 329 json_lex(JsonLexContext *lex)
 330 {
 331         char       *s;
 332
 333         /* Skip leading whitespace. */
 334         s = lex->token_terminator;
 335         while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
 336                 s++;
 337         lex->token_start = s;
 338
 339         /* Determine token type. */
 340         if (strchr("{}[],:", s[0]) != NULL)
 341         {
 342                 /* strchr() is willing to match a zero byte, so test for that. */
 343                 if (s[0] == '\0')
 344                 {
 345                         /* End of string. */
 346                         lex->token_start = NULL;
 347                         lex->token_terminator = s;
 348                 }
 349                 else
 350                 {
 351                         /* Single-character token, some kind of punctuation mark. */
 352                         lex->token_terminator = s + 1;
 353                 }
 354                 lex->token_type = JSON_VALUE_INVALID;
 355         }
 356         else if (*s == '"')
 357         {
 358                 /* String. */
 359                 json_lex_string(lex);
 360                 lex->token_type = JSON_VALUE_STRING;
 361         }
 362         else if (*s == '-')
 363         {
 364                 /* Negative number. */
 365                 json_lex_number(lex, s + 1);
 366                 lex->token_type = JSON_VALUE_NUMBER;
 367         }
 368         else if (*s >= '0' && *s <= '9')
 369         {
 370                 /* Positive number. */
 371                 json_lex_number(lex, s);
 372                 lex->token_type = JSON_VALUE_NUMBER;
 373         }
 374         else
 375         {
 376                 char       *p;
 377
 378                 /*
 379                  * We're not dealing with a string, number, legal punctuation mark, or
 380                  * end of string.  The only legal tokens we might find here are true,
 381                  * false, and null, but for error reporting purposes we scan until we
 382                  * see a non-alphanumeric character.  That way, we can report the
 383                  * whole word as an unexpected token, rather than just some
 384                  * unintuitive prefix thereof.
 385                  */
 386                 for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
 387                         /* skip */ ;
 388
 389                 if (p == s)
 390                 {
 391                         /*
 392                          * We got some sort of unexpected punctuation or an otherwise
 393                          * unexpected character, so just complain about that one
 394                          * character.  (It can't be multibyte because the above loop
 395                          * will advance over any multibyte characters.)
 396                          */
 397                         lex->token_terminator = s + 1;
 398                         report_invalid_token(lex);
 399                 }
 400
 401                 /*
 402                  * We've got a real alphanumeric token here.  If it happens to be
 403                  * true, false, or null, all is well.  If not, error out.
 404                  */
 405                 lex->token_terminator = p;
 406                 if (p - s == 4)
 407                 {
 408                         if (memcmp(s, "true", 4) == 0)
 409                                 lex->token_type = JSON_VALUE_TRUE;
 410                         else if (memcmp(s, "null", 4) == 0)
 411                                 lex->token_type = JSON_VALUE_NULL;
 412                         else
 413                                 report_invalid_token(lex);
 414                 }
 415                 else if (p - s == 5 && memcmp(s, "false", 5) == 0)
 416                         lex->token_type = JSON_VALUE_FALSE;
 417                 else
 418                         report_invalid_token(lex);
 419         }
 420 }
 421
 422 /*
 423  * The next token in the input stream is known to be a string; lex it.
 424  */
 425 static void
 426 json_lex_string(JsonLexContext *lex)
 427 {
 428         char       *s;
 429
 430         for (s = lex->token_start + 1; *s != '"'; s++)
 431         {
 432                 /* Per RFC4627, these characters MUST be escaped. */
 433                 if ((unsigned char) *s < 32)
 434                 {
 435                         /* A NUL byte marks the (premature) end of the string. */
 436                         if (*s == '\0')
 437                         {
 438                                 lex->token_terminator = s;
 439                                 report_invalid_token(lex);
 440                         }
 441                         /* Since *s isn't printable, exclude it from the context string */
 442                         lex->token_terminator = s;
 443                         ereport(ERROR,
 444                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 445                                          errmsg("invalid input syntax for type json"),
 446                                          errdetail("Character with value 0x%02x must be escaped.",
 447                                                            (unsigned char) *s),
 448                                          report_json_context(lex)));
 449                 }
 450                 else if (*s == '\\')
 451                 {
 452                         /* OK, we have an escape character. */
 453                         s++;
 454                         if (*s == '\0')
 455                         {
 456                                 lex->token_terminator = s;
 457                                 report_invalid_token(lex);
 458                         }
 459                         else if (*s == 'u')
 460                         {
 461                                 int                     i;
 462                                 int                     ch = 0;
 463
 464                                 for (i = 1; i <= 4; i++)
 465                                 {
 466                                         s++;
 467                                         if (*s == '\0')
 468                                         {
 469                                                 lex->token_terminator = s;
 470                                                 report_invalid_token(lex);
 471                                         }
 472                                         else if (*s >= '0' && *s <= '9')
 473                                                 ch = (ch * 16) + (*s - '0');
 474                                         else if (*s >= 'a' && *s <= 'f')
 475                                                 ch = (ch * 16) + (*s - 'a') + 10;
 476                                         else if (*s >= 'A' && *s <= 'F')
 477                                                 ch = (ch * 16) + (*s - 'A') + 10;
 478                                         else
 479                                         {
 480                                                 lex->token_terminator = s + pg_mblen(s);
 481                                                 ereport(ERROR,
 482                                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 483                                                                  errmsg("invalid input syntax for type json"),
 484                                                                  errdetail("\"\\u\" must be followed by four hexadecimal digits."),
 485                                                                  report_json_context(lex)));
 486                                         }
 487                                 }
 488                         }
 489                         else if (strchr("\"\\/bfnrt", *s) == NULL)
 490                         {
 491                                 /* Not a valid string escape, so error out. */
 492                                 lex->token_terminator = s + pg_mblen(s);
 493                                 ereport(ERROR,
 494                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 495                                                  errmsg("invalid input syntax for type json"),
 496                                                  errdetail("Escape sequence \"\\%s\" is invalid.",
 497                                                                    extract_mb_char(s)),
 498                                                  report_json_context(lex)));
 499                         }
 500                 }
 501         }
 502
 503         /* Hooray, we found the end of the string! */
 504         lex->token_terminator = s + 1;
 505 }
 506
 507 /*-------------------------------------------------------------------------
 508  * The next token in the input stream is known to be a number; lex it.
 509  *
 510  * In JSON, a number consists of four parts:
 511  *
 512  * (1) An optional minus sign ('-').
 513  *
 514  * (2) Either a single '0', or a string of one or more digits that does not
 515  *         begin with a '0'.
 516  *
 517  * (3) An optional decimal part, consisting of a period ('.') followed by
 518  *         one or more digits.  (Note: While this part can be omitted
 519  *         completely, it's not OK to have only the decimal point without
 520  *         any digits afterwards.)
 521  *
 522  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
 523  *         followed by '+' or '-', followed by one or more digits.      (Note:
 524  *         As with the decimal part, if 'e' or 'E' is present, it must be
 525  *         followed by at least one digit.)
 526  *
 527  * The 's' argument to this function points to the ostensible beginning
 528  * of part 2 - i.e. the character after any optional minus sign, and the
 529  * first character of the string if there is none.
 530  *
 531  *-------------------------------------------------------------------------
 532  */
 533 static void
 534 json_lex_number(JsonLexContext *lex, char *s)
 535 {
 536         bool            error = false;
 537         char       *p;
 538
 539         /* Part (1): leading sign indicator. */
 540         /* Caller already did this for us; so do nothing. */
 541
 542         /* Part (2): parse main digit string. */
 543         if (*s == '0')
 544                 s++;
 545         else if (*s >= '1' && *s <= '9')
 546         {
 547                 do
 548                 {
 549                         s++;
 550                 } while (*s >= '0' && *s <= '9');
 551         }
 552         else
 553                 error = true;
 554
 555         /* Part (3): parse optional decimal portion. */
 556         if (*s == '.')
 557         {
 558                 s++;
 559                 if (*s < '0' || *s > '9')
 560                         error = true;
 561                 else
 562                 {
 563                         do
 564                         {
 565                                 s++;
 566                         } while (*s >= '0' && *s <= '9');
 567                 }
 568         }
 569
 570         /* Part (4): parse optional exponent. */
 571         if (*s == 'e' || *s == 'E')
 572         {
 573                 s++;
 574                 if (*s == '+' || *s == '-')
 575                         s++;
 576                 if (*s < '0' || *s > '9')
 577                         error = true;
 578                 else
 579                 {
 580                         do
 581                         {
 582                                 s++;
 583                         } while (*s >= '0' && *s <= '9');
 584                 }
 585         }
 586
 587         /*
 588          * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
 589          * here should be considered part of the token for error-reporting
 590          * purposes.
 591          */
 592         for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
 593                 error = true;
 594         lex->token_terminator = p;
 595         if (error)
 596                 report_invalid_token(lex);
 597 }
 598
 599 /*
 600  * Report a parse error.
 601  *
 602  * lex->token_start and lex->token_terminator must identify the current token.
 603  */
 604 static void
 605 report_parse_error(JsonParseStack *stack, JsonLexContext *lex)
 606 {
 607         char       *token;
 608         int                     toklen;
 609
 610         /* Handle case where the input ended prematurely. */
 611         if (lex->token_start == NULL)
 612                 ereport(ERROR,
 613                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 614                                  errmsg("invalid input syntax for type json"),
 615                                  errdetail("The input string ended unexpectedly."),
 616                                  report_json_context(lex)));
 617
 618         /* Separate out the current token. */
 619         toklen = lex->token_terminator - lex->token_start;
 620         token = palloc(toklen + 1);
 621         memcpy(token, lex->token_start, toklen);
 622         token[toklen] = '\0';
 623
 624         /* Complain, with the appropriate detail message. */
 625         if (stack == NULL)
 626                 ereport(ERROR,
 627                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 628                                  errmsg("invalid input syntax for type json"),
 629                                  errdetail("Expected end of input, but found \"%s\".",
 630                                                    token),
 631                                  report_json_context(lex)));
 632         else
 633         {
 634                 switch (stack->state)
 635                 {
 636                         case JSON_PARSE_VALUE:
 637                                 ereport(ERROR,
 638                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 639                                                  errmsg("invalid input syntax for type json"),
 640                                                  errdetail("Expected JSON value, but found \"%s\".",
 641                                                                    token),
 642                                                  report_json_context(lex)));
 643                                 break;
 644                         case JSON_PARSE_ARRAY_START:
 645                                 ereport(ERROR,
 646                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 647                                                  errmsg("invalid input syntax for type json"),
 648                                                  errdetail("Expected array element or \"]\", but found \"%s\".",
 649                                                                    token),
 650                                                  report_json_context(lex)));
 651                                 break;
 652                         case JSON_PARSE_ARRAY_NEXT:
 653                                 ereport(ERROR,
 654                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 655                                                  errmsg("invalid input syntax for type json"),
 656                                                  errdetail("Expected \",\" or \"]\", but found \"%s\".",
 657                                                                    token),
 658                                                  report_json_context(lex)));
 659                                 break;
 660                         case JSON_PARSE_OBJECT_START:
 661                                 ereport(ERROR,
 662                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 663                                                  errmsg("invalid input syntax for type json"),
 664                                                  errdetail("Expected string or \"}\", but found \"%s\".",
 665                                                                    token),
 666                                                  report_json_context(lex)));
 667                                 break;
 668                         case JSON_PARSE_OBJECT_LABEL:
 669                                 ereport(ERROR,
 670                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 671                                                  errmsg("invalid input syntax for type json"),
 672                                                  errdetail("Expected \":\", but found \"%s\".",
 673                                                                    token),
 674                                                  report_json_context(lex)));
 675                                 break;
 676                         case JSON_PARSE_OBJECT_NEXT:
 677                                 ereport(ERROR,
 678                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 679                                                  errmsg("invalid input syntax for type json"),
 680                                                  errdetail("Expected \",\" or \"}\", but found \"%s\".",
 681                                                                    token),
 682                                                  report_json_context(lex)));
 683                                 break;
 684                         case JSON_PARSE_OBJECT_COMMA:
 685                                 ereport(ERROR,
 686                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 687                                                  errmsg("invalid input syntax for type json"),
 688                                                  errdetail("Expected string, but found \"%s\".",
 689                                                                    token),
 690                                                  report_json_context(lex)));
 691                                 break;
 692                         default:
 693                                 elog(ERROR, "unexpected json parse state: %d",
 694                                          (int) stack->state);
 695                 }
 696         }
 697 }
 698
 699 /*
 700  * Report an invalid input token.
 701  *
 702  * lex->token_start and lex->token_terminator must identify the token.
 703  */
 704 static void
 705 report_invalid_token(JsonLexContext *lex)
 706 {
 707         char       *token;
 708         int                     toklen;
 709
 710         /* Separate out the offending token. */
 711         toklen = lex->token_terminator - lex->token_start;
 712         token = palloc(toklen + 1);
 713         memcpy(token, lex->token_start, toklen);
 714         token[toklen] = '\0';
 715
 716         ereport(ERROR,
 717                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 718                          errmsg("invalid input syntax for type json"),
 719                          errdetail("Token \"%s\" is invalid.", token),
 720                          report_json_context(lex)));
 721 }
 722
 723 /*
 724  * Report a CONTEXT line for bogus JSON input.
 725  *
 726  * lex->token_terminator must be set to identify the spot where we detected
 727  * the error.  Note that lex->token_start might be NULL, in case we recognized
 728  * error at EOF.
 729  *
 730  * The return value isn't meaningful, but we make it non-void so that this
 731  * can be invoked inside ereport().
 732  */
 733 static int
 734 report_json_context(JsonLexContext *lex)
 735 {
 736         const char *context_start;
 737         const char *context_end;
 738         const char *line_start;
 739         int                     line_number;
 740         char       *ctxt;
 741         int                     ctxtlen;
 742         const char *prefix;
 743         const char *suffix;
 744
 745         /* Choose boundaries for the part of the input we will display */
 746         context_start = lex->input;
 747         context_end = lex->token_terminator;
 748         line_start = context_start;
 749         line_number = 1;
 750         for (;;)
 751         {
 752                 /* Always advance over newlines (context_end test is just paranoia) */
 753                 if (*context_start == '\n' && context_start < context_end)
 754                 {
 755                         context_start++;
 756                         line_start = context_start;
 757                         line_number++;
 758                         continue;
 759                 }
 760                 /* Otherwise, done as soon as we are close enough to context_end */
 761                 if (context_end - context_start < 50)
 762                         break;
 763                 /* Advance to next multibyte character */
 764                 if (IS_HIGHBIT_SET(*context_start))
 765                         context_start += pg_mblen(context_start);
 766                 else
 767                         context_start++;
 768         }
 769
 770         /*
 771          * We add "..." to indicate that the excerpt doesn't start at the
 772          * beginning of the line ... but if we're within 3 characters of the
 773          * beginning of the line, we might as well just show the whole line.
 774          */
 775         if (context_start - line_start <= 3)
 776                 context_start = line_start;
 777
 778         /* Get a null-terminated copy of the data to present */
 779         ctxtlen = context_end - context_start;
 780         ctxt = palloc(ctxtlen + 1);
 781         memcpy(ctxt, context_start, ctxtlen);
 782         ctxt[ctxtlen] = '\0';
 783
 784         /*
 785          * Show the context, prefixing "..." if not starting at start of line, and
 786          * suffixing "..." if not ending at end of line.
 787          */
 788         prefix = (context_start > line_start) ? "..." : "";
 789         suffix = (*context_end != '\0' && *context_end != '\n' && *context_end != '\r') ? "..." : "";
 790
 791         return errcontext("JSON data, line %d: %s%s%s",
 792                                           line_number, prefix, ctxt, suffix);
 793 }
 794
 795 /*
 796  * Extract a single, possibly multi-byte char from the input string.
 797  */
 798 static char *
 799 extract_mb_char(char *s)
 800 {
 801         char       *res;
 802         int                     len;
 803
 804         len = pg_mblen(s);
 805         res = palloc(len + 1);
 806         memcpy(res, s, len);
 807         res[len] = '\0';
 808
 809         return res;
 810 }
 811
 812 /*
 813  * Turn a scalar Datum into JSON, appending the string to "result".
 814  *
 815  * Hand off a non-scalar datum to composite_to_json or array_to_json_internal
 816  * as appropriate.
 817  */
 818 static void
 819 datum_to_json(Datum val, bool is_null, StringInfo result,
 820                           TYPCATEGORY tcategory, Oid typoutputfunc)
 821 {
 822         char       *outputstr;
 823
 824         if (is_null)
 825         {
 826                 appendStringInfoString(result, "null");
 827                 return;
 828         }
 829
 830         switch (tcategory)
 831         {
 832                 case TYPCATEGORY_ARRAY:
 833                         array_to_json_internal(val, result, false);
 834                         break;
 835                 case TYPCATEGORY_COMPOSITE:
 836                         composite_to_json(val, result, false);
 837                         break;
 838                 case TYPCATEGORY_BOOLEAN:
 839                         if (DatumGetBool(val))
 840                                 appendStringInfoString(result, "true");
 841                         else
 842                                 appendStringInfoString(result, "false");
 843                         break;
 844                 case TYPCATEGORY_NUMERIC:
 845                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
 846
 847                         /*
 848                          * Don't call escape_json here if it's a valid JSON number.
 849                          * Numeric output should usually be a valid JSON number and JSON
 850                          * numbers shouldn't be quoted. Quote cases like "Nan" and
 851                          * "Infinity", however.
 852                          */
 853                         if (strpbrk(outputstr, NON_NUMERIC_LETTER) == NULL)
 854                                 appendStringInfoString(result, outputstr);
 855                         else
 856                                 escape_json(result, outputstr);
 857                         pfree(outputstr);
 858                         break;
 859                 case TYPCATEGORY_JSON:
 860                         /* JSON will already be escaped */
 861                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
 862                         appendStringInfoString(result, outputstr);
 863                         pfree(outputstr);
 864                         break;
 865                 default:
 866                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
 867                         escape_json(result, outputstr);
 868                         pfree(outputstr);
 869                         break;
 870         }
 871 }
 872
 873 /*
 874  * Process a single dimension of an array.
 875  * If it's the innermost dimension, output the values, otherwise call
 876  * ourselves recursively to process the next dimension.
 877  */
 878 static void
 879 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
 880                                   bool *nulls, int *valcount, TYPCATEGORY tcategory,
 881                                   Oid typoutputfunc, bool use_line_feeds)
 882 {
 883         int                     i;
 884         const char *sep;
 885
 886         Assert(dim < ndims);
 887
 888         sep = use_line_feeds ? ",\n " : ",";
 889
 890         appendStringInfoChar(result, '[');
 891
 892         for (i = 1; i <= dims[dim]; i++)
 893         {
 894                 if (i > 1)
 895                         appendStringInfoString(result, sep);
 896
 897                 if (dim + 1 == ndims)
 898                 {
 899                         datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
 900                                                   typoutputfunc);
 901                         (*valcount)++;
 902                 }
 903                 else
 904                 {
 905                         /*
 906                          * Do we want line feeds on inner dimensions of arrays? For now
 907                          * we'll say no.
 908                          */
 909                         array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
 910                                                           valcount, tcategory, typoutputfunc, false);
 911                 }
 912         }
 913
 914         appendStringInfoChar(result, ']');
 915 }
 916
 917 /*
 918  * Turn an array into JSON.
 919  */
 920 static void
 921 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
 922 {
 923         ArrayType  *v = DatumGetArrayTypeP(array);
 924         Oid                     element_type = ARR_ELEMTYPE(v);
 925         int                *dim;
 926         int                     ndim;
 927         int                     nitems;
 928         int                     count = 0;
 929         Datum      *elements;
 930         bool       *nulls;
 931         int16           typlen;
 932         bool            typbyval;
 933         char            typalign,
 934                                 typdelim;
 935         Oid                     typioparam;
 936         Oid                     typoutputfunc;
 937         TYPCATEGORY tcategory;
 938
 939         ndim = ARR_NDIM(v);
 940         dim = ARR_DIMS(v);
 941         nitems = ArrayGetNItems(ndim, dim);
 942
 943         if (nitems <= 0)
 944         {
 945                 appendStringInfoString(result, "[]");
 946                 return;
 947         }
 948
 949         get_type_io_data(element_type, IOFunc_output,
 950                                          &typlen, &typbyval, &typalign,
 951                                          &typdelim, &typioparam, &typoutputfunc);
 952
 953         deconstruct_array(v, element_type, typlen, typbyval,
 954                                           typalign, &elements, &nulls,
 955                                           &nitems);
 956
 957         if (element_type == RECORDOID)
 958                 tcategory = TYPCATEGORY_COMPOSITE;
 959         else if (element_type == JSONOID)
 960                 tcategory = TYPCATEGORY_JSON;
 961         else
 962                 tcategory = TypeCategory(element_type);
 963
 964         array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
 965                                           typoutputfunc, use_line_feeds);
 966
 967         pfree(elements);
 968         pfree(nulls);
 969 }
 970
 971 /*
 972  * Turn a composite / record into JSON.
 973  */
 974 static void
 975 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
 976 {
 977         HeapTupleHeader td;
 978         Oid                     tupType;
 979         int32           tupTypmod;
 980         TupleDesc       tupdesc;
 981         HeapTupleData tmptup,
 982                            *tuple;
 983         int                     i;
 984         bool            needsep = false;
 985         const char *sep;
 986
 987         sep = use_line_feeds ? ",\n " : ",";
 988
 989         td = DatumGetHeapTupleHeader(composite);
 990
 991         /* Extract rowtype info and find a tupdesc */
 992         tupType = HeapTupleHeaderGetTypeId(td);
 993         tupTypmod = HeapTupleHeaderGetTypMod(td);
 994         tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
 995
 996         /* Build a temporary HeapTuple control structure */
 997         tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
 998         tmptup.t_data = td;
 999         tuple = &tmptup;
1000
1001         appendStringInfoChar(result, '{');
1002
1003         for (i = 0; i < tupdesc->natts; i++)
1004         {
1005                 Datum           val,
1006                                         origval;
1007                 bool            isnull;
1008                 char       *attname;
1009                 TYPCATEGORY tcategory;
1010                 Oid                     typoutput;
1011                 bool            typisvarlena;
1012
1013                 if (tupdesc->attrs[i]->attisdropped)
1014                         continue;
1015
1016                 if (needsep)
1017                         appendStringInfoString(result, sep);
1018                 needsep = true;
1019
1020                 attname = NameStr(tupdesc->attrs[i]->attname);
1021                 escape_json(result, attname);
1022                 appendStringInfoChar(result, ':');
1023
1024                 origval = heap_getattr(tuple, i + 1, tupdesc, &isnull);
1025
1026                 if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID)
1027                         tcategory = TYPCATEGORY_ARRAY;
1028                 else if (tupdesc->attrs[i]->atttypid == RECORDOID)
1029                         tcategory = TYPCATEGORY_COMPOSITE;
1030                 else if (tupdesc->attrs[i]->atttypid == JSONOID)
1031                         tcategory = TYPCATEGORY_JSON;
1032                 else
1033                         tcategory = TypeCategory(tupdesc->attrs[i]->atttypid);
1034
1035                 getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
1036                                                   &typoutput, &typisvarlena);
1037
1038                 /*
1039                  * If we have a toasted datum, forcibly detoast it here to avoid
1040                  * memory leakage inside the type's output routine.
1041                  */
1042                 if (typisvarlena && !isnull)
1043                         val = PointerGetDatum(PG_DETOAST_DATUM(origval));
1044                 else
1045                         val = origval;
1046
1047                 datum_to_json(val, isnull, result, tcategory, typoutput);
1048
1049                 /* Clean up detoasted copy, if any */
1050                 if (val != origval)
1051                         pfree(DatumGetPointer(val));
1052         }
1053
1054         appendStringInfoChar(result, '}');
1055         ReleaseTupleDesc(tupdesc);
1056 }
1057
1058 /*
1059  * SQL function array_to_json(row)
1060  */
1061 extern Datum
1062 array_to_json(PG_FUNCTION_ARGS)
1063 {
1064         Datum           array = PG_GETARG_DATUM(0);
1065         StringInfo      result;
1066
1067         result = makeStringInfo();
1068
1069         array_to_json_internal(array, result, false);
1070
1071         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1072 }
1073
1074 /*
1075  * SQL function array_to_json(row, prettybool)
1076  */
1077 extern Datum
1078 array_to_json_pretty(PG_FUNCTION_ARGS)
1079 {
1080         Datum           array = PG_GETARG_DATUM(0);
1081         bool            use_line_feeds = PG_GETARG_BOOL(1);
1082         StringInfo      result;
1083
1084         result = makeStringInfo();
1085
1086         array_to_json_internal(array, result, use_line_feeds);
1087
1088         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1089 }
1090
1091 /*
1092  * SQL function row_to_json(row)
1093  */
1094 extern Datum
1095 row_to_json(PG_FUNCTION_ARGS)
1096 {
1097         Datum           array = PG_GETARG_DATUM(0);
1098         StringInfo      result;
1099
1100         result = makeStringInfo();
1101
1102         composite_to_json(array, result, false);
1103
1104         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1105 }
1106
1107 /*
1108  * SQL function row_to_json(row, prettybool)
1109  */
1110 extern Datum
1111 row_to_json_pretty(PG_FUNCTION_ARGS)
1112 {
1113         Datum           array = PG_GETARG_DATUM(0);
1114         bool            use_line_feeds = PG_GETARG_BOOL(1);
1115         StringInfo      result;
1116
1117         result = makeStringInfo();
1118
1119         composite_to_json(array, result, use_line_feeds);
1120
1121         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1122 }
1123
1124 /*
1125  * Produce a JSON string literal, properly escaping characters in the text.
1126  */
1127 void
1128 escape_json(StringInfo buf, const char *str)
1129 {
1130         const char *p;
1131
1132         appendStringInfoCharMacro(buf, '\"');
1133         for (p = str; *p; p++)
1134         {
1135                 switch (*p)
1136                 {
1137                         case '\b':
1138                                 appendStringInfoString(buf, "\\b");
1139                                 break;
1140                         case '\f':
1141                                 appendStringInfoString(buf, "\\f");
1142                                 break;
1143                         case '\n':
1144                                 appendStringInfoString(buf, "\\n");
1145                                 break;
1146                         case '\r':
1147                                 appendStringInfoString(buf, "\\r");
1148                                 break;
1149                         case '\t':
1150                                 appendStringInfoString(buf, "\\t");
1151                                 break;
1152                         case '"':
1153                                 appendStringInfoString(buf, "\\\"");
1154                                 break;
1155                         case '\\':
1156                                 appendStringInfoString(buf, "\\\\");
1157                                 break;
1158                         default:
1159                                 if ((unsigned char) *p < ' ')
1160                                         appendStringInfo(buf, "\\u%04x", (int) *p);
1161                                 else
1162                                         appendStringInfoCharMacro(buf, *p);
1163                                 break;
1164                 }
1165         }
1166         appendStringInfoCharMacro(buf, '\"');
1167 }