granicus.if.org Git - postgresql/blob - src/backend/utils/adt/json.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * json.c
   4  *              JSON data type support.
   5  *
   6  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  * IDENTIFICATION
  10  *        src/backend/utils/adt/json.c
  11  *
  12  *-------------------------------------------------------------------------
  13  */
  14 #include "postgres.h"
  15
  16 #include "catalog/pg_type.h"
  17 #include "executor/spi.h"
  18 #include "lib/stringinfo.h"
  19 #include "libpq/pqformat.h"
  20 #include "mb/pg_wchar.h"
  21 #include "parser/parse_coerce.h"
  22 #include "utils/array.h"
  23 #include "utils/builtins.h"
  24 #include "utils/lsyscache.h"
  25 #include "utils/json.h"
  26 #include "utils/typcache.h"
  27
  28 typedef enum                                    /* types of JSON values */
  29 {
  30         JSON_VALUE_INVALID,                     /* non-value tokens are reported as this */
  31         JSON_VALUE_STRING,
  32         JSON_VALUE_NUMBER,
  33         JSON_VALUE_OBJECT,
  34         JSON_VALUE_ARRAY,
  35         JSON_VALUE_TRUE,
  36         JSON_VALUE_FALSE,
  37         JSON_VALUE_NULL
  38 } JsonValueType;
  39
  40 typedef struct                                  /* state of JSON lexer */
  41 {
  42         char       *input;                      /* whole string being parsed */
  43         char       *token_start;        /* start of current token within input */
  44         char       *token_terminator; /* end of previous or current token */
  45         JsonValueType token_type;       /* type of current token, once it's known */
  46 } JsonLexContext;
  47
  48 typedef enum                                    /* states of JSON parser */
  49 {
  50         JSON_PARSE_VALUE,                       /* expecting a value */
  51         JSON_PARSE_ARRAY_START,         /* saw '[', expecting value or ']' */
  52         JSON_PARSE_ARRAY_NEXT,          /* saw array element, expecting ',' or ']' */
  53         JSON_PARSE_OBJECT_START,        /* saw '{', expecting label or '}' */
  54         JSON_PARSE_OBJECT_LABEL,        /* saw object label, expecting ':' */
  55         JSON_PARSE_OBJECT_NEXT,         /* saw object value, expecting ',' or '}' */
  56         JSON_PARSE_OBJECT_COMMA         /* saw object ',', expecting next label */
  57 } JsonParseState;
  58
  59 typedef struct JsonParseStack   /* the parser state has to be stackable */
  60 {
  61         JsonParseState state;
  62         /* currently only need the state enum, but maybe someday more stuff */
  63 } JsonParseStack;
  64
  65 typedef enum                                    /* required operations on state stack */
  66 {
  67         JSON_STACKOP_NONE,                      /* no-op */
  68         JSON_STACKOP_PUSH,                      /* push new JSON_PARSE_VALUE stack item */
  69         JSON_STACKOP_PUSH_WITH_PUSHBACK, /* push, then rescan current token */
  70         JSON_STACKOP_POP                        /* pop, or expect end of input if no stack */
  71 } JsonStackOp;
  72
  73 static void json_validate_cstring(char *input);
  74 static void json_lex(JsonLexContext *lex);
  75 static void json_lex_string(JsonLexContext *lex);
  76 static void json_lex_number(JsonLexContext *lex, char *s);
  77 static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex);
  78 static void report_invalid_token(JsonLexContext *lex);
  79 static int report_json_context(JsonLexContext *lex);
  80 static char *extract_mb_char(char *s);
  81 static void composite_to_json(Datum composite, StringInfo result,
  82                                                           bool use_line_feeds);
  83 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
  84                                   Datum *vals, bool *nulls, int *valcount,
  85                                   TYPCATEGORY tcategory, Oid typoutputfunc,
  86                                   bool use_line_feeds);
  87 static void array_to_json_internal(Datum array, StringInfo result,
  88                                                                    bool use_line_feeds);
  89
  90 /* fake type category for JSON so we can distinguish it in datum_to_json */
  91 #define TYPCATEGORY_JSON 'j'
  92 /* letters appearing in numeric output that aren't valid in a JSON number */
  93 #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
  94 /* chars to consider as part of an alphanumeric token */
  95 #define JSON_ALPHANUMERIC_CHAR(c)  \
  96         (((c) >= 'a' && (c) <= 'z') || \
  97          ((c) >= 'A' && (c) <= 'Z') || \
  98          ((c) >= '0' && (c) <= '9') || \
  99          (c) == '_' || \
 100          IS_HIGHBIT_SET(c))
 101
 102
 103 /*
 104  * Input.
 105  */
 106 Datum
 107 json_in(PG_FUNCTION_ARGS)
 108 {
 109         char       *text = PG_GETARG_CSTRING(0);
 110
 111         json_validate_cstring(text);
 112
 113         /* Internal representation is the same as text, for now */
 114         PG_RETURN_TEXT_P(cstring_to_text(text));
 115 }
 116
 117 /*
 118  * Output.
 119  */
 120 Datum
 121 json_out(PG_FUNCTION_ARGS)
 122 {
 123         /* we needn't detoast because text_to_cstring will handle that */
 124         Datum           txt = PG_GETARG_DATUM(0);
 125
 126         PG_RETURN_CSTRING(TextDatumGetCString(txt));
 127 }
 128
 129 /*
 130  * Binary send.
 131  */
 132 Datum
 133 json_send(PG_FUNCTION_ARGS)
 134 {
 135         text       *t = PG_GETARG_TEXT_PP(0);
 136         StringInfoData buf;
 137
 138         pq_begintypsend(&buf);
 139         pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
 140         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 141 }
 142
 143 /*
 144  * Binary receive.
 145  */
 146 Datum
 147 json_recv(PG_FUNCTION_ARGS)
 148 {
 149         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 150         text       *result;
 151         char       *str;
 152         int                     nbytes;
 153
 154         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 155
 156         /*
 157          * We need a null-terminated string to pass to json_validate_cstring().
 158          * Rather than make a separate copy, make the temporary result one byte
 159          * bigger than it needs to be.
 160          */
 161         result = palloc(nbytes + 1 + VARHDRSZ);
 162         SET_VARSIZE(result, nbytes + VARHDRSZ);
 163         memcpy(VARDATA(result), str, nbytes);
 164         str = VARDATA(result);
 165         str[nbytes] = '\0';
 166
 167         /* Validate it. */
 168         json_validate_cstring(str);
 169
 170         PG_RETURN_TEXT_P(result);
 171 }
 172
 173 /*
 174  * Check whether supplied input is valid JSON.
 175  */
 176 static void
 177 json_validate_cstring(char *input)
 178 {
 179         JsonLexContext lex;
 180         JsonParseStack *stack,
 181                            *stacktop;
 182         int                     stacksize;
 183
 184         /* Set up lexing context. */
 185         lex.input = input;
 186         lex.token_terminator = lex.input;
 187
 188         /* Set up parse stack. */
 189         stacksize = 32;
 190         stacktop = (JsonParseStack *) palloc(sizeof(JsonParseStack) * stacksize);
 191         stack = stacktop;
 192         stack->state = JSON_PARSE_VALUE;
 193
 194         /* Main parsing loop. */
 195         for (;;)
 196         {
 197                 JsonStackOp op;
 198
 199                 /* Fetch next token. */
 200                 json_lex(&lex);
 201
 202                 /* Check for unexpected end of input. */
 203                 if (lex.token_start == NULL)
 204                         report_parse_error(stack, &lex);
 205
 206 redo:
 207                 /* Figure out what to do with this token. */
 208                 op = JSON_STACKOP_NONE;
 209                 switch (stack->state)
 210                 {
 211                         case JSON_PARSE_VALUE:
 212                                 if (lex.token_type != JSON_VALUE_INVALID)
 213                                         op = JSON_STACKOP_POP;
 214                                 else if (lex.token_start[0] == '[')
 215                                         stack->state = JSON_PARSE_ARRAY_START;
 216                                 else if (lex.token_start[0] == '{')
 217                                         stack->state = JSON_PARSE_OBJECT_START;
 218                                 else
 219                                         report_parse_error(stack, &lex);
 220                                 break;
 221                         case JSON_PARSE_ARRAY_START:
 222                                 if (lex.token_type != JSON_VALUE_INVALID)
 223                                         stack->state = JSON_PARSE_ARRAY_NEXT;
 224                                 else if (lex.token_start[0] == ']')
 225                                         op = JSON_STACKOP_POP;
 226                                 else if (lex.token_start[0] == '[' ||
 227                                                  lex.token_start[0] == '{')
 228                                 {
 229                                         stack->state = JSON_PARSE_ARRAY_NEXT;
 230                                         op = JSON_STACKOP_PUSH_WITH_PUSHBACK;
 231                                 }
 232                                 else
 233                                         report_parse_error(stack, &lex);
 234                                 break;
 235                         case JSON_PARSE_ARRAY_NEXT:
 236                                 if (lex.token_type != JSON_VALUE_INVALID)
 237                                         report_parse_error(stack, &lex);
 238                                 else if (lex.token_start[0] == ']')
 239                                         op = JSON_STACKOP_POP;
 240                                 else if (lex.token_start[0] == ',')
 241                                         op = JSON_STACKOP_PUSH;
 242                                 else
 243                                         report_parse_error(stack, &lex);
 244                                 break;
 245                         case JSON_PARSE_OBJECT_START:
 246                                 if (lex.token_type == JSON_VALUE_STRING)
 247                                         stack->state = JSON_PARSE_OBJECT_LABEL;
 248                                 else if (lex.token_type == JSON_VALUE_INVALID &&
 249                                                  lex.token_start[0] == '}')
 250                                         op = JSON_STACKOP_POP;
 251                                 else
 252                                         report_parse_error(stack, &lex);
 253                                 break;
 254                         case JSON_PARSE_OBJECT_LABEL:
 255                                 if (lex.token_type == JSON_VALUE_INVALID &&
 256                                         lex.token_start[0] == ':')
 257                                 {
 258                                         stack->state = JSON_PARSE_OBJECT_NEXT;
 259                                         op = JSON_STACKOP_PUSH;
 260                                 }
 261                                 else
 262                                         report_parse_error(stack, &lex);
 263                                 break;
 264                         case JSON_PARSE_OBJECT_NEXT:
 265                                 if (lex.token_type != JSON_VALUE_INVALID)
 266                                         report_parse_error(stack, &lex);
 267                                 else if (lex.token_start[0] == '}')
 268                                         op = JSON_STACKOP_POP;
 269                                 else if (lex.token_start[0] == ',')
 270                                         stack->state = JSON_PARSE_OBJECT_COMMA;
 271                                 else
 272                                         report_parse_error(stack, &lex);
 273                                 break;
 274                         case JSON_PARSE_OBJECT_COMMA:
 275                                 if (lex.token_type == JSON_VALUE_STRING)
 276                                         stack->state = JSON_PARSE_OBJECT_LABEL;
 277                                 else
 278                                         report_parse_error(stack, &lex);
 279                                 break;
 280                         default:
 281                                 elog(ERROR, "unexpected json parse state: %d",
 282                                          (int) stack->state);
 283                 }
 284
 285                 /* Push or pop the state stack, if needed. */
 286                 switch (op)
 287                 {
 288                         case JSON_STACKOP_PUSH:
 289                         case JSON_STACKOP_PUSH_WITH_PUSHBACK:
 290                                 stack++;
 291                                 if (stack >= &stacktop[stacksize])
 292                                 {
 293                                         /* Need to enlarge the stack. */
 294                                         int                     stackoffset = stack - stacktop;
 295
 296                                         stacksize += 32;
 297                                         stacktop = (JsonParseStack *)
 298                                                 repalloc(stacktop,
 299                                                                  sizeof(JsonParseStack) * stacksize);
 300                                         stack = stacktop + stackoffset;
 301                                 }
 302                                 stack->state = JSON_PARSE_VALUE;
 303                                 if (op == JSON_STACKOP_PUSH_WITH_PUSHBACK)
 304                                         goto redo;
 305                                 break;
 306                         case JSON_STACKOP_POP:
 307                                 if (stack == stacktop)
 308                                 {
 309                                         /* Expect end of input. */
 310                                         json_lex(&lex);
 311                                         if (lex.token_start != NULL)
 312                                                 report_parse_error(NULL, &lex);
 313                                         return;
 314                                 }
 315                                 stack--;
 316                                 break;
 317                         case JSON_STACKOP_NONE:
 318                                 /* nothing to do */
 319                                 break;
 320                 }
 321         }
 322 }
 323
 324 /*
 325  * Lex one token from the input stream.
 326  */
 327 static void
 328 json_lex(JsonLexContext *lex)
 329 {
 330         char       *s;
 331
 332         /* Skip leading whitespace. */
 333         s = lex->token_terminator;
 334         while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
 335                 s++;
 336         lex->token_start = s;
 337
 338         /* Determine token type. */
 339         if (strchr("{}[],:", s[0]) != NULL)
 340         {
 341                 /* strchr() is willing to match a zero byte, so test for that. */
 342                 if (s[0] == '\0')
 343                 {
 344                         /* End of string. */
 345                         lex->token_start = NULL;
 346                         lex->token_terminator = s;
 347                 }
 348                 else
 349                 {
 350                         /* Single-character token, some kind of punctuation mark. */
 351                         lex->token_terminator = s + 1;
 352                 }
 353                 lex->token_type = JSON_VALUE_INVALID;
 354         }
 355         else if (*s == '"')
 356         {
 357                 /* String. */
 358                 json_lex_string(lex);
 359                 lex->token_type = JSON_VALUE_STRING;
 360         }
 361         else if (*s == '-')
 362         {
 363                 /* Negative number. */
 364                 json_lex_number(lex, s + 1);
 365                 lex->token_type = JSON_VALUE_NUMBER;
 366         }
 367         else if (*s >= '0' && *s <= '9')
 368         {
 369                 /* Positive number. */
 370                 json_lex_number(lex, s);
 371                 lex->token_type = JSON_VALUE_NUMBER;
 372         }
 373         else
 374         {
 375                 char       *p;
 376
 377                 /*
 378                  * We're not dealing with a string, number, legal punctuation mark, or
 379                  * end of string.  The only legal tokens we might find here are true,
 380                  * false, and null, but for error reporting purposes we scan until we
 381                  * see a non-alphanumeric character.  That way, we can report the
 382                  * whole word as an unexpected token, rather than just some
 383                  * unintuitive prefix thereof.
 384                  */
 385                 for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
 386                         /* skip */ ;
 387
 388                 if (p == s)
 389                 {
 390                         /*
 391                          * We got some sort of unexpected punctuation or an otherwise
 392                          * unexpected character, so just complain about that one
 393                          * character.  (It can't be multibyte because the above loop
 394                          * will advance over any multibyte characters.)
 395                          */
 396                         lex->token_terminator = s + 1;
 397                         report_invalid_token(lex);
 398                 }
 399
 400                 /*
 401                  * We've got a real alphanumeric token here.  If it happens to be
 402                  * true, false, or null, all is well.  If not, error out.
 403                  */
 404                 lex->token_terminator = p;
 405                 if (p - s == 4)
 406                 {
 407                         if (memcmp(s, "true", 4) == 0)
 408                                 lex->token_type = JSON_VALUE_TRUE;
 409                         else if (memcmp(s, "null", 4) == 0)
 410                                 lex->token_type = JSON_VALUE_NULL;
 411                         else
 412                                 report_invalid_token(lex);
 413                 }
 414                 else if (p - s == 5 && memcmp(s, "false", 5) == 0)
 415                         lex->token_type = JSON_VALUE_FALSE;
 416                 else
 417                         report_invalid_token(lex);
 418         }
 419 }
 420
 421 /*
 422  * The next token in the input stream is known to be a string; lex it.
 423  */
 424 static void
 425 json_lex_string(JsonLexContext *lex)
 426 {
 427         char       *s;
 428
 429         for (s = lex->token_start + 1; *s != '"'; s++)
 430         {
 431                 /* Per RFC4627, these characters MUST be escaped. */
 432                 if ((unsigned char) *s < 32)
 433                 {
 434                         /* A NUL byte marks the (premature) end of the string. */
 435                         if (*s == '\0')
 436                         {
 437                                 lex->token_terminator = s;
 438                                 report_invalid_token(lex);
 439                         }
 440                         /* Since *s isn't printable, exclude it from the context string */
 441                         lex->token_terminator = s;
 442                         ereport(ERROR,
 443                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 444                                          errmsg("invalid input syntax for type json"),
 445                                          errdetail("Character with value 0x%02x must be escaped.",
 446                                                            (unsigned char) *s),
 447                                          report_json_context(lex)));
 448                 }
 449                 else if (*s == '\\')
 450                 {
 451                         /* OK, we have an escape character. */
 452                         s++;
 453                         if (*s == '\0')
 454                         {
 455                                 lex->token_terminator = s;
 456                                 report_invalid_token(lex);
 457                         }
 458                         else if (*s == 'u')
 459                         {
 460                                 int                     i;
 461                                 int                     ch = 0;
 462
 463                                 for (i = 1; i <= 4; i++)
 464                                 {
 465                                         s++;
 466                                         if (*s == '\0')
 467                                         {
 468                                                 lex->token_terminator = s;
 469                                                 report_invalid_token(lex);
 470                                         }
 471                                         else if (*s >= '0' && *s <= '9')
 472                                                 ch = (ch * 16) + (*s - '0');
 473                                         else if (*s >= 'a' && *s <= 'f')
 474                                                 ch = (ch * 16) + (*s - 'a') + 10;
 475                                         else if (*s >= 'A' && *s <= 'F')
 476                                                 ch = (ch * 16) + (*s - 'A') + 10;
 477                                         else
 478                                         {
 479                                                 lex->token_terminator = s + pg_mblen(s);
 480                                                 ereport(ERROR,
 481                                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 482                                                                  errmsg("invalid input syntax for type json"),
 483                                                                  errdetail("\"\\u\" must be followed by four hexadecimal digits."),
 484                                                                  report_json_context(lex)));
 485                                         }
 486                                 }
 487                         }
 488                         else if (strchr("\"\\/bfnrt", *s) == NULL)
 489                         {
 490                                 /* Not a valid string escape, so error out. */
 491                                 lex->token_terminator = s + pg_mblen(s);
 492                                 ereport(ERROR,
 493                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 494                                                  errmsg("invalid input syntax for type json"),
 495                                                  errdetail("Escape sequence \"\\%s\" is invalid.",
 496                                                                    extract_mb_char(s)),
 497                                                  report_json_context(lex)));
 498                         }
 499                 }
 500         }
 501
 502         /* Hooray, we found the end of the string! */
 503         lex->token_terminator = s + 1;
 504 }
 505
 506 /*-------------------------------------------------------------------------
 507  * The next token in the input stream is known to be a number; lex it.
 508  *
 509  * In JSON, a number consists of four parts:
 510  *
 511  * (1) An optional minus sign ('-').
 512  *
 513  * (2) Either a single '0', or a string of one or more digits that does not
 514  *         begin with a '0'.
 515  *
 516  * (3) An optional decimal part, consisting of a period ('.') followed by
 517  *         one or more digits.  (Note: While this part can be omitted
 518  *         completely, it's not OK to have only the decimal point without
 519  *         any digits afterwards.)
 520  *
 521  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
 522  *         followed by '+' or '-', followed by one or more digits.      (Note:
 523  *         As with the decimal part, if 'e' or 'E' is present, it must be
 524  *         followed by at least one digit.)
 525  *
 526  * The 's' argument to this function points to the ostensible beginning
 527  * of part 2 - i.e. the character after any optional minus sign, and the
 528  * first character of the string if there is none.
 529  *
 530  *-------------------------------------------------------------------------
 531  */
 532 static void
 533 json_lex_number(JsonLexContext *lex, char *s)
 534 {
 535         bool            error = false;
 536         char       *p;
 537
 538         /* Part (1): leading sign indicator. */
 539         /* Caller already did this for us; so do nothing. */
 540
 541         /* Part (2): parse main digit string. */
 542         if (*s == '0')
 543                 s++;
 544         else if (*s >= '1' && *s <= '9')
 545         {
 546                 do
 547                 {
 548                         s++;
 549                 } while (*s >= '0' && *s <= '9');
 550         }
 551         else
 552                 error = true;
 553
 554         /* Part (3): parse optional decimal portion. */
 555         if (*s == '.')
 556         {
 557                 s++;
 558                 if (*s < '0' || *s > '9')
 559                         error = true;
 560                 else
 561                 {
 562                         do
 563                         {
 564                                 s++;
 565                         } while (*s >= '0' && *s <= '9');
 566                 }
 567         }
 568
 569         /* Part (4): parse optional exponent. */
 570         if (*s == 'e' || *s == 'E')
 571         {
 572                 s++;
 573                 if (*s == '+' || *s == '-')
 574                         s++;
 575                 if (*s < '0' || *s > '9')
 576                         error = true;
 577                 else
 578                 {
 579                         do
 580                         {
 581                                 s++;
 582                         } while (*s >= '0' && *s <= '9');
 583                 }
 584         }
 585
 586         /*
 587          * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
 588          * here should be considered part of the token for error-reporting
 589          * purposes.
 590          */
 591         for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
 592                 error = true;
 593         lex->token_terminator = p;
 594         if (error)
 595                 report_invalid_token(lex);
 596 }
 597
 598 /*
 599  * Report a parse error.
 600  *
 601  * lex->token_start and lex->token_terminator must identify the current token.
 602  */
 603 static void
 604 report_parse_error(JsonParseStack *stack, JsonLexContext *lex)
 605 {
 606         char       *token;
 607         int                     toklen;
 608
 609         /* Handle case where the input ended prematurely. */
 610         if (lex->token_start == NULL)
 611                 ereport(ERROR,
 612                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 613                                  errmsg("invalid input syntax for type json"),
 614                                  errdetail("The input string ended unexpectedly."),
 615                                  report_json_context(lex)));
 616
 617         /* Separate out the current token. */
 618         toklen = lex->token_terminator - lex->token_start;
 619         token = palloc(toklen + 1);
 620         memcpy(token, lex->token_start, toklen);
 621         token[toklen] = '\0';
 622
 623         /* Complain, with the appropriate detail message. */
 624         if (stack == NULL)
 625                 ereport(ERROR,
 626                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 627                                  errmsg("invalid input syntax for type json"),
 628                                  errdetail("Expected end of input, but found \"%s\".",
 629                                                    token),
 630                                  report_json_context(lex)));
 631         else
 632         {
 633                 switch (stack->state)
 634                 {
 635                         case JSON_PARSE_VALUE:
 636                                 ereport(ERROR,
 637                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 638                                                  errmsg("invalid input syntax for type json"),
 639                                                  errdetail("Expected JSON value, but found \"%s\".",
 640                                                                    token),
 641                                                  report_json_context(lex)));
 642                                 break;
 643                         case JSON_PARSE_ARRAY_START:
 644                                 ereport(ERROR,
 645                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 646                                                  errmsg("invalid input syntax for type json"),
 647                                                  errdetail("Expected array element or \"]\", but found \"%s\".",
 648                                                                    token),
 649                                                  report_json_context(lex)));
 650                                 break;
 651                         case JSON_PARSE_ARRAY_NEXT:
 652                                 ereport(ERROR,
 653                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 654                                                  errmsg("invalid input syntax for type json"),
 655                                                  errdetail("Expected \",\" or \"]\", but found \"%s\".",
 656                                                                    token),
 657                                                  report_json_context(lex)));
 658                                 break;
 659                         case JSON_PARSE_OBJECT_START:
 660                                 ereport(ERROR,
 661                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 662                                                  errmsg("invalid input syntax for type json"),
 663                                                  errdetail("Expected string or \"}\", but found \"%s\".",
 664                                                                    token),
 665                                                  report_json_context(lex)));
 666                                 break;
 667                         case JSON_PARSE_OBJECT_LABEL:
 668                                 ereport(ERROR,
 669                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 670                                                  errmsg("invalid input syntax for type json"),
 671                                                  errdetail("Expected \":\", but found \"%s\".",
 672                                                                    token),
 673                                                  report_json_context(lex)));
 674                                 break;
 675                         case JSON_PARSE_OBJECT_NEXT:
 676                                 ereport(ERROR,
 677                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 678                                                  errmsg("invalid input syntax for type json"),
 679                                                  errdetail("Expected \",\" or \"}\", but found \"%s\".",
 680                                                                    token),
 681                                                  report_json_context(lex)));
 682                                 break;
 683                         case JSON_PARSE_OBJECT_COMMA:
 684                                 ereport(ERROR,
 685                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 686                                                  errmsg("invalid input syntax for type json"),
 687                                                  errdetail("Expected string, but found \"%s\".",
 688                                                                    token),
 689                                                  report_json_context(lex)));
 690                                 break;
 691                         default:
 692                                 elog(ERROR, "unexpected json parse state: %d",
 693                                          (int) stack->state);
 694                 }
 695         }
 696 }
 697
 698 /*
 699  * Report an invalid input token.
 700  *
 701  * lex->token_start and lex->token_terminator must identify the token.
 702  */
 703 static void
 704 report_invalid_token(JsonLexContext *lex)
 705 {
 706         char       *token;
 707         int                     toklen;
 708
 709         /* Separate out the offending token. */
 710         toklen = lex->token_terminator - lex->token_start;
 711         token = palloc(toklen + 1);
 712         memcpy(token, lex->token_start, toklen);
 713         token[toklen] = '\0';
 714
 715         ereport(ERROR,
 716                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 717                          errmsg("invalid input syntax for type json"),
 718                          errdetail("Token \"%s\" is invalid.", token),
 719                          report_json_context(lex)));
 720 }
 721
 722 /*
 723  * Report a CONTEXT line for bogus JSON input.
 724  *
 725  * lex->token_terminator must be set to identify the spot where we detected
 726  * the error.  Note that lex->token_start might be NULL, in case we recognized
 727  * error at EOF.
 728  *
 729  * The return value isn't meaningful, but we make it non-void so that this
 730  * can be invoked inside ereport().
 731  */
 732 static int
 733 report_json_context(JsonLexContext *lex)
 734 {
 735         const char *context_start;
 736         const char *context_end;
 737         const char *line_start;
 738         int                     line_number;
 739         char       *ctxt;
 740         int                     ctxtlen;
 741         const char *prefix;
 742         const char *suffix;
 743
 744         /* Choose boundaries for the part of the input we will display */
 745         context_start = lex->input;
 746         context_end = lex->token_terminator;
 747         line_start = context_start;
 748         line_number = 1;
 749         for (;;)
 750         {
 751                 /* Always advance over newlines (context_end test is just paranoia) */
 752                 if (*context_start == '\n' && context_start < context_end)
 753                 {
 754                         context_start++;
 755                         line_start = context_start;
 756                         line_number++;
 757                         continue;
 758                 }
 759                 /* Otherwise, done as soon as we are close enough to context_end */
 760                 if (context_end - context_start < 50)
 761                         break;
 762                 /* Advance to next multibyte character */
 763                 if (IS_HIGHBIT_SET(*context_start))
 764                         context_start += pg_mblen(context_start);
 765                 else
 766                         context_start++;
 767         }
 768
 769         /*
 770          * We add "..." to indicate that the excerpt doesn't start at the
 771          * beginning of the line ... but if we're within 3 characters of the
 772          * beginning of the line, we might as well just show the whole line.
 773          */
 774         if (context_start - line_start <= 3)
 775                 context_start = line_start;
 776
 777         /* Get a null-terminated copy of the data to present */
 778         ctxtlen = context_end - context_start;
 779         ctxt = palloc(ctxtlen + 1);
 780         memcpy(ctxt, context_start, ctxtlen);
 781         ctxt[ctxtlen] = '\0';
 782
 783         /*
 784          * Show the context, prefixing "..." if not starting at start of line, and
 785          * suffixing "..." if not ending at end of line.
 786          */
 787         prefix = (context_start > line_start) ? "..." : "";
 788         suffix = (*context_end != '\0' && *context_end != '\n' && *context_end != '\r') ? "..." : "";
 789
 790         return errcontext("JSON data, line %d: %s%s%s",
 791                                           line_number, prefix, ctxt, suffix);
 792 }
 793
 794 /*
 795  * Extract a single, possibly multi-byte char from the input string.
 796  */
 797 static char *
 798 extract_mb_char(char *s)
 799 {
 800         char       *res;
 801         int                     len;
 802
 803         len = pg_mblen(s);
 804         res = palloc(len + 1);
 805         memcpy(res, s, len);
 806         res[len] = '\0';
 807
 808         return res;
 809 }
 810
 811 /*
 812  * Turn a scalar Datum into JSON, appending the string to "result".
 813  *
 814  * Hand off a non-scalar datum to composite_to_json or array_to_json_internal
 815  * as appropriate.
 816  */
 817 static void
 818 datum_to_json(Datum val, bool is_null, StringInfo result,
 819                           TYPCATEGORY tcategory, Oid typoutputfunc)
 820 {
 821         char       *outputstr;
 822
 823         if (is_null)
 824         {
 825                 appendStringInfoString(result, "null");
 826                 return;
 827         }
 828
 829         switch (tcategory)
 830         {
 831                 case TYPCATEGORY_ARRAY:
 832                         array_to_json_internal(val, result, false);
 833                         break;
 834                 case TYPCATEGORY_COMPOSITE:
 835                         composite_to_json(val, result, false);
 836                         break;
 837                 case TYPCATEGORY_BOOLEAN:
 838                         if (DatumGetBool(val))
 839                                 appendStringInfoString(result, "true");
 840                         else
 841                                 appendStringInfoString(result, "false");
 842                         break;
 843                 case TYPCATEGORY_NUMERIC:
 844                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
 845
 846                         /*
 847                          * Don't call escape_json here if it's a valid JSON number.
 848                          * Numeric output should usually be a valid JSON number and JSON
 849                          * numbers shouldn't be quoted. Quote cases like "Nan" and
 850                          * "Infinity", however.
 851                          */
 852                         if (strpbrk(outputstr, NON_NUMERIC_LETTER) == NULL)
 853                                 appendStringInfoString(result, outputstr);
 854                         else
 855                                 escape_json(result, outputstr);
 856                         pfree(outputstr);
 857                         break;
 858                 case TYPCATEGORY_JSON:
 859                         /* JSON will already be escaped */
 860                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
 861                         appendStringInfoString(result, outputstr);
 862                         pfree(outputstr);
 863                         break;
 864                 default:
 865                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
 866                         escape_json(result, outputstr);
 867                         pfree(outputstr);
 868                         break;
 869         }
 870 }
 871
 872 /*
 873  * Process a single dimension of an array.
 874  * If it's the innermost dimension, output the values, otherwise call
 875  * ourselves recursively to process the next dimension.
 876  */
 877 static void
 878 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
 879                                   bool *nulls, int *valcount, TYPCATEGORY tcategory,
 880                                   Oid typoutputfunc, bool use_line_feeds)
 881 {
 882         int                     i;
 883         const char *sep;
 884
 885         Assert(dim < ndims);
 886
 887         sep = use_line_feeds ? ",\n " : ",";
 888
 889         appendStringInfoChar(result, '[');
 890
 891         for (i = 1; i <= dims[dim]; i++)
 892         {
 893                 if (i > 1)
 894                         appendStringInfoString(result, sep);
 895
 896                 if (dim + 1 == ndims)
 897                 {
 898                         datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
 899                                                   typoutputfunc);
 900                         (*valcount)++;
 901                 }
 902                 else
 903                 {
 904                         /*
 905                          * Do we want line feeds on inner dimensions of arrays? For now
 906                          * we'll say no.
 907                          */
 908                         array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
 909                                                           valcount, tcategory, typoutputfunc, false);
 910                 }
 911         }
 912
 913         appendStringInfoChar(result, ']');
 914 }
 915
 916 /*
 917  * Turn an array into JSON.
 918  */
 919 static void
 920 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
 921 {
 922         ArrayType  *v = DatumGetArrayTypeP(array);
 923         Oid                     element_type = ARR_ELEMTYPE(v);
 924         int                *dim;
 925         int                     ndim;
 926         int                     nitems;
 927         int                     count = 0;
 928         Datum      *elements;
 929         bool       *nulls;
 930         int16           typlen;
 931         bool            typbyval;
 932         char            typalign,
 933                                 typdelim;
 934         Oid                     typioparam;
 935         Oid                     typoutputfunc;
 936         TYPCATEGORY tcategory;
 937
 938         ndim = ARR_NDIM(v);
 939         dim = ARR_DIMS(v);
 940         nitems = ArrayGetNItems(ndim, dim);
 941
 942         if (nitems <= 0)
 943         {
 944                 appendStringInfoString(result, "[]");
 945                 return;
 946         }
 947
 948         get_type_io_data(element_type, IOFunc_output,
 949                                          &typlen, &typbyval, &typalign,
 950                                          &typdelim, &typioparam, &typoutputfunc);
 951
 952         deconstruct_array(v, element_type, typlen, typbyval,
 953                                           typalign, &elements, &nulls,
 954                                           &nitems);
 955
 956         if (element_type == RECORDOID)
 957                 tcategory = TYPCATEGORY_COMPOSITE;
 958         else if (element_type == JSONOID)
 959                 tcategory = TYPCATEGORY_JSON;
 960         else
 961                 tcategory = TypeCategory(element_type);
 962
 963         array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
 964                                           typoutputfunc, use_line_feeds);
 965
 966         pfree(elements);
 967         pfree(nulls);
 968 }
 969
 970 /*
 971  * Turn a composite / record into JSON.
 972  */
 973 static void
 974 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
 975 {
 976         HeapTupleHeader td;
 977         Oid                     tupType;
 978         int32           tupTypmod;
 979         TupleDesc       tupdesc;
 980         HeapTupleData tmptup,
 981                            *tuple;
 982         int                     i;
 983         bool            needsep = false;
 984         const char *sep;
 985
 986         sep = use_line_feeds ? ",\n " : ",";
 987
 988         td = DatumGetHeapTupleHeader(composite);
 989
 990         /* Extract rowtype info and find a tupdesc */
 991         tupType = HeapTupleHeaderGetTypeId(td);
 992         tupTypmod = HeapTupleHeaderGetTypMod(td);
 993         tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
 994
 995         /* Build a temporary HeapTuple control structure */
 996         tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
 997         tmptup.t_data = td;
 998         tuple = &tmptup;
 999
1000         appendStringInfoChar(result, '{');
1001
1002         for (i = 0; i < tupdesc->natts; i++)
1003         {
1004                 Datum           val,
1005                                         origval;
1006                 bool            isnull;
1007                 char       *attname;
1008                 TYPCATEGORY tcategory;
1009                 Oid                     typoutput;
1010                 bool            typisvarlena;
1011
1012                 if (tupdesc->attrs[i]->attisdropped)
1013                         continue;
1014
1015                 if (needsep)
1016                         appendStringInfoString(result, sep);
1017                 needsep = true;
1018
1019                 attname = NameStr(tupdesc->attrs[i]->attname);
1020                 escape_json(result, attname);
1021                 appendStringInfoChar(result, ':');
1022
1023                 origval = heap_getattr(tuple, i + 1, tupdesc, &isnull);
1024
1025                 if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID)
1026                         tcategory = TYPCATEGORY_ARRAY;
1027                 else if (tupdesc->attrs[i]->atttypid == RECORDOID)
1028                         tcategory = TYPCATEGORY_COMPOSITE;
1029                 else if (tupdesc->attrs[i]->atttypid == JSONOID)
1030                         tcategory = TYPCATEGORY_JSON;
1031                 else
1032                         tcategory = TypeCategory(tupdesc->attrs[i]->atttypid);
1033
1034                 getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
1035                                                   &typoutput, &typisvarlena);
1036
1037                 /*
1038                  * If we have a toasted datum, forcibly detoast it here to avoid
1039                  * memory leakage inside the type's output routine.
1040                  */
1041                 if (typisvarlena && !isnull)
1042                         val = PointerGetDatum(PG_DETOAST_DATUM(origval));
1043                 else
1044                         val = origval;
1045
1046                 datum_to_json(val, isnull, result, tcategory, typoutput);
1047
1048                 /* Clean up detoasted copy, if any */
1049                 if (val != origval)
1050                         pfree(DatumGetPointer(val));
1051         }
1052
1053         appendStringInfoChar(result, '}');
1054         ReleaseTupleDesc(tupdesc);
1055 }
1056
1057 /*
1058  * SQL function array_to_json(row)
1059  */
1060 extern Datum
1061 array_to_json(PG_FUNCTION_ARGS)
1062 {
1063         Datum           array = PG_GETARG_DATUM(0);
1064         StringInfo      result;
1065
1066         result = makeStringInfo();
1067
1068         array_to_json_internal(array, result, false);
1069
1070         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1071 }
1072
1073 /*
1074  * SQL function array_to_json(row, prettybool)
1075  */
1076 extern Datum
1077 array_to_json_pretty(PG_FUNCTION_ARGS)
1078 {
1079         Datum           array = PG_GETARG_DATUM(0);
1080         bool            use_line_feeds = PG_GETARG_BOOL(1);
1081         StringInfo      result;
1082
1083         result = makeStringInfo();
1084
1085         array_to_json_internal(array, result, use_line_feeds);
1086
1087         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1088 }
1089
1090 /*
1091  * SQL function row_to_json(row)
1092  */
1093 extern Datum
1094 row_to_json(PG_FUNCTION_ARGS)
1095 {
1096         Datum           array = PG_GETARG_DATUM(0);
1097         StringInfo      result;
1098
1099         result = makeStringInfo();
1100
1101         composite_to_json(array, result, false);
1102
1103         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1104 }
1105
1106 /*
1107  * SQL function row_to_json(row, prettybool)
1108  */
1109 extern Datum
1110 row_to_json_pretty(PG_FUNCTION_ARGS)
1111 {
1112         Datum           array = PG_GETARG_DATUM(0);
1113         bool            use_line_feeds = PG_GETARG_BOOL(1);
1114         StringInfo      result;
1115
1116         result = makeStringInfo();
1117
1118         composite_to_json(array, result, use_line_feeds);
1119
1120         PG_RETURN_TEXT_P(cstring_to_text(result->data));
1121 }
1122
1123 /*
1124  * Produce a JSON string literal, properly escaping characters in the text.
1125  */
1126 void
1127 escape_json(StringInfo buf, const char *str)
1128 {
1129         const char *p;
1130
1131         appendStringInfoCharMacro(buf, '\"');
1132         for (p = str; *p; p++)
1133         {
1134                 switch (*p)
1135                 {
1136                         case '\b':
1137                                 appendStringInfoString(buf, "\\b");
1138                                 break;
1139                         case '\f':
1140                                 appendStringInfoString(buf, "\\f");
1141                                 break;
1142                         case '\n':
1143                                 appendStringInfoString(buf, "\\n");
1144                                 break;
1145                         case '\r':
1146                                 appendStringInfoString(buf, "\\r");
1147                                 break;
1148                         case '\t':
1149                                 appendStringInfoString(buf, "\\t");
1150                                 break;
1151                         case '"':
1152                                 appendStringInfoString(buf, "\\\"");
1153                                 break;
1154                         case '\\':
1155                                 appendStringInfoString(buf, "\\\\");
1156                                 break;
1157                         default:
1158                                 if ((unsigned char) *p < ' ')
1159                                         appendStringInfo(buf, "\\u%04x", (int) *p);
1160                                 else
1161                                         appendStringInfoCharMacro(buf, *p);
1162                                 break;
1163                 }
1164         }
1165         appendStringInfoCharMacro(buf, '\"');
1166 }