granicus.if.org Git - postgresql/blob - src/backend/utils/adt/jsonfuncs.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * jsonfuncs.c
   4  *              Functions to process JSON data type.
   5  *
   6  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  * IDENTIFICATION
  10  *        src/backend/utils/adt/jsonfuncs.c
  11  *
  12  *-------------------------------------------------------------------------
  13  */
  14
  15 #include "postgres.h"
  16
  17 #include <limits.h>
  18
  19 #include "fmgr.h"
  20 #include "funcapi.h"
  21 #include "miscadmin.h"
  22 #include "access/htup_details.h"
  23 #include "catalog/pg_type.h"
  24 #include "lib/stringinfo.h"
  25 #include "mb/pg_wchar.h"
  26 #include "utils/array.h"
  27 #include "utils/builtins.h"
  28 #include "utils/hsearch.h"
  29 #include "utils/json.h"
  30 #include "utils/jsonapi.h"
  31 #include "utils/lsyscache.h"
  32 #include "utils/memutils.h"
  33 #include "utils/typcache.h"
  34
  35 /* semantic action functions for json_object_keys */
  36 static void okeys_object_field_start(void *state, char *fname, bool isnull);
  37 static void okeys_array_start(void *state);
  38 static void okeys_scalar(void *state, char *token, JsonTokenType tokentype);
  39
  40 /* semantic action functions for json_get* functions */
  41 static void get_object_start(void *state);
  42 static void get_object_field_start(void *state, char *fname, bool isnull);
  43 static void get_object_field_end(void *state, char *fname, bool isnull);
  44 static void get_array_start(void *state);
  45 static void get_array_element_start(void *state, bool isnull);
  46 static void get_array_element_end(void *state, bool isnull);
  47 static void get_scalar(void *state, char *token, JsonTokenType tokentype);
  48
  49 /* common worker function for json getter functions */
  50 static inline Datum get_path_all(PG_FUNCTION_ARGS, bool as_text);
  51 static inline text *get_worker(text *json, char *field, int elem_index,
  52                    char **tpath, int *ipath, int npath,
  53                    bool normalize_results);
  54
  55 /* semantic action functions for json_array_length */
  56 static void alen_object_start(void *state);
  57 static void alen_scalar(void *state, char *token, JsonTokenType tokentype);
  58 static void alen_array_element_start(void *state, bool isnull);
  59
  60 /* common worker for json_each* functions */
  61 static inline Datum each_worker(PG_FUNCTION_ARGS, bool as_text);
  62
  63 /* semantic action functions for json_each */
  64 static void each_object_field_start(void *state, char *fname, bool isnull);
  65 static void each_object_field_end(void *state, char *fname, bool isnull);
  66 static void each_array_start(void *state);
  67 static void each_scalar(void *state, char *token, JsonTokenType tokentype);
  68
  69 /* common worker for json_each* functions */
  70 static inline Datum elements_worker(PG_FUNCTION_ARGS, bool as_text);
  71
  72 /* semantic action functions for json_array_elements */
  73 static void elements_object_start(void *state);
  74 static void elements_array_element_start(void *state, bool isnull);
  75 static void elements_array_element_end(void *state, bool isnull);
  76 static void elements_scalar(void *state, char *token, JsonTokenType tokentype);
  77
  78 /* turn a json object into a hash table */
  79 static HTAB *get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text);
  80
  81 /* common worker for populate_record and to_record */
  82 static inline Datum populate_record_worker(PG_FUNCTION_ARGS,
  83                                            bool have_record_arg);
  84
  85 /* semantic action functions for get_json_object_as_hash */
  86 static void hash_object_field_start(void *state, char *fname, bool isnull);
  87 static void hash_object_field_end(void *state, char *fname, bool isnull);
  88 static void hash_array_start(void *state);
  89 static void hash_scalar(void *state, char *token, JsonTokenType tokentype);
  90
  91 /* semantic action functions for populate_recordset */
  92 static void populate_recordset_object_field_start(void *state, char *fname, bool isnull);
  93 static void populate_recordset_object_field_end(void *state, char *fname, bool isnull);
  94 static void populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype);
  95 static void populate_recordset_object_start(void *state);
  96 static void populate_recordset_object_end(void *state);
  97 static void populate_recordset_array_start(void *state);
  98 static void populate_recordset_array_element_start(void *state, bool isnull);
  99
 100 /* worker function for populate_recordset and to_recordset */
 101 static inline Datum populate_recordset_worker(PG_FUNCTION_ARGS,
 102                                                   bool have_record_arg);
 103
 104 /* search type classification for json_get* functions */
 105 typedef enum
 106 {
 107         JSON_SEARCH_OBJECT = 1,
 108         JSON_SEARCH_ARRAY,
 109         JSON_SEARCH_PATH
 110 } JsonSearch;
 111
 112 /* state for json_object_keys */
 113 typedef struct OkeysState
 114 {
 115         JsonLexContext *lex;
 116         char      **result;
 117         int                     result_size;
 118         int                     result_count;
 119         int                     sent_count;
 120 } OkeysState;
 121
 122 /* state for json_get* functions */
 123 typedef struct GetState
 124 {
 125         JsonLexContext *lex;
 126         JsonSearch      search_type;
 127         int                     search_index;
 128         int                     array_index;
 129         char       *search_term;
 130         char       *result_start;
 131         text       *tresult;
 132         bool            result_is_null;
 133         bool            normalize_results;
 134         bool            next_scalar;
 135         char      **path;
 136         int                     npath;
 137         char      **current_path;
 138         bool       *pathok;
 139         int                *array_level_index;
 140         int                *path_level_index;
 141 } GetState;
 142
 143 /* state for json_array_length */
 144 typedef struct AlenState
 145 {
 146         JsonLexContext *lex;
 147         int                     count;
 148 } AlenState;
 149
 150 /* state for json_each */
 151 typedef struct EachState
 152 {
 153         JsonLexContext *lex;
 154         Tuplestorestate *tuple_store;
 155         TupleDesc       ret_tdesc;
 156         MemoryContext tmp_cxt;
 157         char       *result_start;
 158         bool            normalize_results;
 159         bool            next_scalar;
 160         char       *normalized_scalar;
 161 } EachState;
 162
 163 /* state for json_array_elements */
 164 typedef struct ElementsState
 165 {
 166         JsonLexContext *lex;
 167         Tuplestorestate *tuple_store;
 168         TupleDesc       ret_tdesc;
 169         MemoryContext tmp_cxt;
 170         char       *result_start;
 171         bool            normalize_results;
 172         bool            next_scalar;
 173         char       *normalized_scalar;
 174 } ElementsState;
 175
 176 /* state for get_json_object_as_hash */
 177 typedef struct JhashState
 178 {
 179         JsonLexContext *lex;
 180         HTAB       *hash;
 181         char       *saved_scalar;
 182         char       *save_json_start;
 183         bool            use_json_as_text;
 184         char       *function_name;
 185 } JHashState;
 186
 187 /* used to build the hashtable */
 188 typedef struct JsonHashEntry
 189 {
 190         char            fname[NAMEDATALEN];
 191         char       *val;
 192         char       *json;
 193         bool            isnull;
 194 } JsonHashEntry;
 195
 196 /* these two are stolen from hstore / record_out, used in populate_record* */
 197 typedef struct ColumnIOData
 198 {
 199         Oid                     column_type;
 200         Oid                     typiofunc;
 201         Oid                     typioparam;
 202         FmgrInfo        proc;
 203 } ColumnIOData;
 204
 205 typedef struct RecordIOData
 206 {
 207         Oid                     record_type;
 208         int32           record_typmod;
 209         int                     ncolumns;
 210         ColumnIOData columns[1];        /* VARIABLE LENGTH ARRAY */
 211 } RecordIOData;
 212
 213 /* state for populate_recordset */
 214 typedef struct PopulateRecordsetState
 215 {
 216         JsonLexContext *lex;
 217         HTAB       *json_hash;
 218         char       *saved_scalar;
 219         char       *save_json_start;
 220         bool            use_json_as_text;
 221         Tuplestorestate *tuple_store;
 222         TupleDesc       ret_tdesc;
 223         HeapTupleHeader rec;
 224         RecordIOData *my_extra;
 225         MemoryContext fn_mcxt;          /* used to stash IO funcs */
 226 } PopulateRecordsetState;
 227
 228 /*
 229  * SQL function json_object-keys
 230  *
 231  * Returns the set of keys for the object argument.
 232  *
 233  * This SRF operates in value-per-call mode. It processes the
 234  * object during the first call, and the keys are simply stashed
 235  * in an array, whise size is expanded as necessary. This is probably
 236  * safe enough for a list of keys of a single object, since they are
 237  * limited in size to NAMEDATALEN and the number of keys is unlikely to
 238  * be so huge that it has major memory implications.
 239  */
 240
 241
 242 Datum
 243 json_object_keys(PG_FUNCTION_ARGS)
 244 {
 245         FuncCallContext *funcctx;
 246         OkeysState *state;
 247         int                     i;
 248
 249         if (SRF_IS_FIRSTCALL())
 250         {
 251                 text       *json = PG_GETARG_TEXT_P(0);
 252                 JsonLexContext *lex = makeJsonLexContext(json, true);
 253                 JsonSemAction *sem;
 254
 255                 MemoryContext oldcontext;
 256
 257                 funcctx = SRF_FIRSTCALL_INIT();
 258                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
 259
 260                 state = palloc(sizeof(OkeysState));
 261                 sem = palloc0(sizeof(JsonSemAction));
 262
 263                 state->lex = lex;
 264                 state->result_size = 256;
 265                 state->result_count = 0;
 266                 state->sent_count = 0;
 267                 state->result = palloc(256 * sizeof(char *));
 268
 269                 sem->semstate = (void *) state;
 270                 sem->array_start = okeys_array_start;
 271                 sem->scalar = okeys_scalar;
 272                 sem->object_field_start = okeys_object_field_start;
 273                 /* remainder are all NULL, courtesy of palloc0 above */
 274
 275                 pg_parse_json(lex, sem);
 276                 /* keys are now in state->result */
 277
 278                 pfree(lex->strval->data);
 279                 pfree(lex->strval);
 280                 pfree(lex);
 281                 pfree(sem);
 282
 283                 MemoryContextSwitchTo(oldcontext);
 284                 funcctx->user_fctx = (void *) state;
 285
 286         }
 287
 288         funcctx = SRF_PERCALL_SETUP();
 289         state = (OkeysState *) funcctx->user_fctx;
 290
 291         if (state->sent_count < state->result_count)
 292         {
 293                 char       *nxt = state->result[state->sent_count++];
 294
 295                 SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt));
 296         }
 297
 298         /* cleanup to reduce or eliminate memory leaks */
 299         for (i = 0; i < state->result_count; i++)
 300                 pfree(state->result[i]);
 301         pfree(state->result);
 302         pfree(state);
 303
 304         SRF_RETURN_DONE(funcctx);
 305 }
 306
 307 static void
 308 okeys_object_field_start(void *state, char *fname, bool isnull)
 309 {
 310         OkeysState *_state = (OkeysState *) state;
 311
 312         /* only collecting keys for the top level object */
 313         if (_state->lex->lex_level != 1)
 314                 return;
 315
 316         /* enlarge result array if necessary */
 317         if (_state->result_count >= _state->result_size)
 318         {
 319                 _state->result_size *= 2;
 320                 _state->result =
 321                         repalloc(_state->result, sizeof(char *) * _state->result_size);
 322         }
 323
 324         /* save a copy of the field name */
 325         _state->result[_state->result_count++] = pstrdup(fname);
 326 }
 327
 328 static void
 329 okeys_array_start(void *state)
 330 {
 331         OkeysState *_state = (OkeysState *) state;
 332
 333         /* top level must be a json object */
 334         if (_state->lex->lex_level == 0)
 335                 ereport(ERROR,
 336                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 337                                  errmsg("cannot call json_object_keys on an array")));
 338 }
 339
 340 static void
 341 okeys_scalar(void *state, char *token, JsonTokenType tokentype)
 342 {
 343         OkeysState *_state = (OkeysState *) state;
 344
 345         /* top level must be a json object */
 346         if (_state->lex->lex_level == 0)
 347                 ereport(ERROR,
 348                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 349                                  errmsg("cannot call json_object_keys on a scalar")));
 350 }
 351
 352 /*
 353  * json getter functions
 354  * these implement the -> ->> #> and #>> operators
 355  * and the json_extract_path*(json, text, ...) functions
 356  */
 357
 358
 359 Datum
 360 json_object_field(PG_FUNCTION_ARGS)
 361 {
 362         text       *json = PG_GETARG_TEXT_P(0);
 363         text       *result;
 364         text       *fname = PG_GETARG_TEXT_P(1);
 365         char       *fnamestr = text_to_cstring(fname);
 366
 367         result = get_worker(json, fnamestr, -1, NULL, NULL, -1, false);
 368
 369         if (result != NULL)
 370                 PG_RETURN_TEXT_P(result);
 371         else
 372                 PG_RETURN_NULL();
 373 }
 374
 375 Datum
 376 json_object_field_text(PG_FUNCTION_ARGS)
 377 {
 378         text       *json = PG_GETARG_TEXT_P(0);
 379         text       *result;
 380         text       *fname = PG_GETARG_TEXT_P(1);
 381         char       *fnamestr = text_to_cstring(fname);
 382
 383         result = get_worker(json, fnamestr, -1, NULL, NULL, -1, true);
 384
 385         if (result != NULL)
 386                 PG_RETURN_TEXT_P(result);
 387         else
 388                 PG_RETURN_NULL();
 389 }
 390
 391 Datum
 392 json_array_element(PG_FUNCTION_ARGS)
 393 {
 394         text       *json = PG_GETARG_TEXT_P(0);
 395         text       *result;
 396         int                     element = PG_GETARG_INT32(1);
 397
 398         result = get_worker(json, NULL, element, NULL, NULL, -1, false);
 399
 400         if (result != NULL)
 401                 PG_RETURN_TEXT_P(result);
 402         else
 403                 PG_RETURN_NULL();
 404 }
 405
 406 Datum
 407 json_array_element_text(PG_FUNCTION_ARGS)
 408 {
 409         text       *json = PG_GETARG_TEXT_P(0);
 410         text       *result;
 411         int                     element = PG_GETARG_INT32(1);
 412
 413         result = get_worker(json, NULL, element, NULL, NULL, -1, true);
 414
 415         if (result != NULL)
 416                 PG_RETURN_TEXT_P(result);
 417         else
 418                 PG_RETURN_NULL();
 419 }
 420
 421 Datum
 422 json_extract_path(PG_FUNCTION_ARGS)
 423 {
 424         return get_path_all(fcinfo, false);
 425 }
 426
 427 Datum
 428 json_extract_path_text(PG_FUNCTION_ARGS)
 429 {
 430         return get_path_all(fcinfo, true);
 431 }
 432
 433 /*
 434  * common routine for extract_path functions
 435  */
 436 static inline Datum
 437 get_path_all(PG_FUNCTION_ARGS, bool as_text)
 438 {
 439         text       *json = PG_GETARG_TEXT_P(0);
 440         ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
 441         text       *result;
 442         Datum      *pathtext;
 443         bool       *pathnulls;
 444         int                     npath;
 445         char      **tpath;
 446         int                *ipath;
 447         int                     i;
 448         long            ind;
 449         char       *endptr;
 450
 451         if (array_contains_nulls(path))
 452                 ereport(ERROR,
 453                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 454                                  errmsg("cannot call function with null path elements")));
 455
 456
 457         deconstruct_array(path, TEXTOID, -1, false, 'i',
 458                                           &pathtext, &pathnulls, &npath);
 459
 460         tpath = palloc(npath * sizeof(char *));
 461         ipath = palloc(npath * sizeof(int));
 462
 463
 464         for (i = 0; i < npath; i++)
 465         {
 466                 tpath[i] = TextDatumGetCString(pathtext[i]);
 467                 if (*tpath[i] == '\0')
 468                         ereport(
 469                                         ERROR,
 470                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 471                                    errmsg("cannot call function with empty path elements")));
 472
 473                 /*
 474                  * we have no idea at this stage what structure the document is so
 475                  * just convert anything in the path that we can to an integer and set
 476                  * all the other integers to -1 which will never match.
 477                  */
 478                 ind = strtol(tpath[i], &endptr, 10);
 479                 if (*endptr == '\0' && ind <= INT_MAX && ind >= 0)
 480                         ipath[i] = (int) ind;
 481                 else
 482                         ipath[i] = -1;
 483         }
 484
 485
 486         result = get_worker(json, NULL, -1, tpath, ipath, npath, as_text);
 487
 488         if (result != NULL)
 489                 PG_RETURN_TEXT_P(result);
 490         else
 491                 PG_RETURN_NULL();
 492 }
 493
 494 /*
 495  * get_worker
 496  *
 497  * common worker for all the json getter functions
 498  */
 499 static inline text *
 500 get_worker(text *json,
 501                    char *field,
 502                    int elem_index,
 503                    char **tpath,
 504                    int *ipath,
 505                    int npath,
 506                    bool normalize_results)
 507 {
 508         GetState   *state;
 509         JsonLexContext *lex = makeJsonLexContext(json, true);
 510         JsonSemAction *sem;
 511
 512         /* only allowed to use one of these */
 513         Assert(elem_index < 0 || (tpath == NULL && ipath == NULL && field == NULL));
 514         Assert(tpath == NULL || field == NULL);
 515
 516         state = palloc0(sizeof(GetState));
 517         sem = palloc0(sizeof(JsonSemAction));
 518
 519         state->lex = lex;
 520         /* is it "_as_text" variant? */
 521         state->normalize_results = normalize_results;
 522         if (field != NULL)
 523         {
 524                 /* single text argument */
 525                 state->search_type = JSON_SEARCH_OBJECT;
 526                 state->search_term = field;
 527         }
 528         else if (tpath != NULL)
 529         {
 530                 /* path array argument */
 531                 state->search_type = JSON_SEARCH_PATH;
 532                 state->path = tpath;
 533                 state->npath = npath;
 534                 state->current_path = palloc(sizeof(char *) * npath);
 535                 state->pathok = palloc0(sizeof(bool) * npath);
 536                 state->pathok[0] = true;
 537                 state->array_level_index = palloc(sizeof(int) * npath);
 538                 state->path_level_index = ipath;
 539
 540         }
 541         else
 542         {
 543                 /* single integer argument */
 544                 state->search_type = JSON_SEARCH_ARRAY;
 545                 state->search_index = elem_index;
 546                 state->array_index = -1;
 547         }
 548
 549         sem->semstate = (void *) state;
 550
 551         /*
 552          * Not all      variants need all the semantic routines. only set the ones
 553          * that are actually needed for maximum efficiency.
 554          */
 555         sem->object_start = get_object_start;
 556         sem->array_start = get_array_start;
 557         sem->scalar = get_scalar;
 558         if (field != NULL || tpath != NULL)
 559         {
 560                 sem->object_field_start = get_object_field_start;
 561                 sem->object_field_end = get_object_field_end;
 562         }
 563         if (field == NULL)
 564         {
 565                 sem->array_element_start = get_array_element_start;
 566                 sem->array_element_end = get_array_element_end;
 567         }
 568
 569         pg_parse_json(lex, sem);
 570
 571         return state->tresult;
 572 }
 573
 574 static void
 575 get_object_start(void *state)
 576 {
 577         GetState   *_state = (GetState *) state;
 578
 579         /* json structure check */
 580         if (_state->lex->lex_level == 0 && _state->search_type == JSON_SEARCH_ARRAY)
 581                 ereport(ERROR,
 582                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 583                                  errmsg("cannot extract array element from a non-array")));
 584 }
 585
 586 static void
 587 get_object_field_start(void *state, char *fname, bool isnull)
 588 {
 589         GetState   *_state = (GetState *) state;
 590         bool            get_next = false;
 591         int                     lex_level = _state->lex->lex_level;
 592
 593         if (lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT &&
 594                 strcmp(fname, _state->search_term) == 0)
 595         {
 596
 597                 _state->tresult = NULL;
 598                 _state->result_start = NULL;
 599                 get_next = true;
 600         }
 601         else if (_state->search_type == JSON_SEARCH_PATH &&
 602                          lex_level <= _state->npath &&
 603                          _state->pathok[_state->lex->lex_level - 1] &&
 604                          strcmp(fname, _state->path[lex_level - 1]) == 0)
 605         {
 606                 /* path search, path so far is ok,      and we have a match */
 607
 608                 /* this object overrides any previous matching object */
 609
 610                 _state->tresult = NULL;
 611                 _state->result_start = NULL;
 612
 613                 /* if not at end of path just mark path ok */
 614                 if (lex_level < _state->npath)
 615                         _state->pathok[lex_level] = true;
 616
 617                 /* end of path, so we want this value */
 618                 if (lex_level == _state->npath)
 619                         get_next = true;
 620         }
 621
 622         if (get_next)
 623         {
 624                 if (_state->normalize_results &&
 625                         _state->lex->token_type == JSON_TOKEN_STRING)
 626                 {
 627                         /* for as_text variants, tell get_scalar to set it for us */
 628                         _state->next_scalar = true;
 629                 }
 630                 else
 631                 {
 632                         /* for non-as_text variants, just note the json starting point */
 633                         _state->result_start = _state->lex->token_start;
 634                 }
 635         }
 636 }
 637
 638 static void
 639 get_object_field_end(void *state, char *fname, bool isnull)
 640 {
 641         GetState   *_state = (GetState *) state;
 642         bool            get_last = false;
 643         int                     lex_level = _state->lex->lex_level;
 644
 645
 646         /* same tests as in get_object_field_start, mutatis mutandis */
 647         if (lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT &&
 648                 strcmp(fname, _state->search_term) == 0)
 649         {
 650                 get_last = true;
 651         }
 652         else if (_state->search_type == JSON_SEARCH_PATH &&
 653                          lex_level <= _state->npath &&
 654                          _state->pathok[lex_level - 1] &&
 655                          strcmp(fname, _state->path[lex_level - 1]) == 0)
 656         {
 657                 /* done with this field so reset pathok */
 658                 if (lex_level < _state->npath)
 659                         _state->pathok[lex_level] = false;
 660
 661                 if (lex_level == _state->npath)
 662                         get_last = true;
 663         }
 664
 665         /* for as_test variants our work is already done */
 666         if (get_last && _state->result_start != NULL)
 667         {
 668                 /*
 669                  * make a text object from the string from the prevously noted json
 670                  * start up to the end of the previous token (the lexer is by now
 671                  * ahead of us on whatevere came after what we're interested in).
 672                  */
 673                 int                     len = _state->lex->prev_token_terminator - _state->result_start;
 674
 675                 if (isnull && _state->normalize_results)
 676                         _state->tresult = (text *) NULL;
 677                 else
 678                         _state->tresult = cstring_to_text_with_len(_state->result_start, len);
 679         }
 680
 681         /*
 682          * don't need to reset _state->result_start b/c we're only returning one
 683          * datum, the conditions should not occur more than once, and this lets us
 684          * check cheaply that they don't (see object_field_start() )
 685          */
 686 }
 687
 688 static void
 689 get_array_start(void *state)
 690 {
 691         GetState   *_state = (GetState *) state;
 692         int                     lex_level = _state->lex->lex_level;
 693
 694         /* json structure check */
 695         if (lex_level == 0 && _state->search_type == JSON_SEARCH_OBJECT)
 696                 ereport(ERROR,
 697                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 698                                  errmsg("cannot extract field from a non-object")));
 699
 700         /*
 701          * initialize array count for this nesting level Note: the lex_level seen
 702          * by array_start is one less than that seen by the elements of the array.
 703          */
 704         if (_state->search_type == JSON_SEARCH_PATH &&
 705                 lex_level < _state->npath)
 706                 _state->array_level_index[lex_level] = -1;
 707 }
 708
 709 static void
 710 get_array_element_start(void *state, bool isnull)
 711 {
 712         GetState   *_state = (GetState *) state;
 713         bool            get_next = false;
 714         int                     lex_level = _state->lex->lex_level;
 715
 716         if (lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY)
 717         {
 718                 /* single integer search */
 719                 _state->array_index++;
 720                 if (_state->array_index == _state->search_index)
 721                         get_next = true;
 722         }
 723         else if (_state->search_type == JSON_SEARCH_PATH &&
 724                          lex_level <= _state->npath &&
 725                          _state->pathok[lex_level - 1])
 726         {
 727                 /*
 728                  * path search, path so far is ok
 729                  *
 730                  * increment the array counter. no point doing this if we already know
 731                  * the path is bad.
 732                  *
 733                  * then check if we have a match.
 734                  */
 735
 736                 if (++_state->array_level_index[lex_level - 1] ==
 737                         _state->path_level_index[lex_level - 1])
 738                 {
 739                         if (lex_level == _state->npath)
 740                         {
 741                                 /* match and at end of path, so get value */
 742                                 get_next = true;
 743                         }
 744                         else
 745                         {
 746                                 /* not at end of path just mark path ok */
 747                                 _state->pathok[lex_level] = true;
 748                         }
 749                 }
 750
 751         }
 752
 753         /* same logic as for objects */
 754         if (get_next)
 755         {
 756                 if (_state->normalize_results &&
 757                         _state->lex->token_type == JSON_TOKEN_STRING)
 758                 {
 759                         _state->next_scalar = true;
 760                 }
 761                 else
 762                 {
 763                         _state->result_start = _state->lex->token_start;
 764                 }
 765         }
 766 }
 767
 768 static void
 769 get_array_element_end(void *state, bool isnull)
 770 {
 771         GetState   *_state = (GetState *) state;
 772         bool            get_last = false;
 773         int                     lex_level = _state->lex->lex_level;
 774
 775         /* same logic as in get_object_end, modified for arrays */
 776
 777         if (lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY &&
 778                 _state->array_index == _state->search_index)
 779         {
 780                 get_last = true;
 781         }
 782         else if (_state->search_type == JSON_SEARCH_PATH &&
 783                          lex_level <= _state->npath &&
 784                          _state->pathok[lex_level - 1] &&
 785                          _state->array_level_index[lex_level - 1] ==
 786                          _state->path_level_index[lex_level - 1])
 787         {
 788                 /* done with this element so reset pathok */
 789                 if (lex_level < _state->npath)
 790                         _state->pathok[lex_level] = false;
 791
 792                 if (lex_level == _state->npath)
 793                         get_last = true;
 794         }
 795         if (get_last && _state->result_start != NULL)
 796         {
 797                 int                     len = _state->lex->prev_token_terminator - _state->result_start;
 798
 799                 if (isnull && _state->normalize_results)
 800                         _state->tresult = (text *) NULL;
 801                 else
 802                         _state->tresult = cstring_to_text_with_len(_state->result_start, len);
 803         }
 804 }
 805
 806 static void
 807 get_scalar(void *state, char *token, JsonTokenType tokentype)
 808 {
 809         GetState   *_state = (GetState *) state;
 810
 811         if (_state->lex->lex_level == 0 && _state->search_type != JSON_SEARCH_PATH)
 812                 ereport(ERROR,
 813                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 814                                  errmsg("cannot extract element from a scalar")));
 815         if (_state->next_scalar)
 816         {
 817                 /* a de-escaped text value is wanted, so supply it */
 818                 _state->tresult = cstring_to_text(token);
 819                 /* make sure the next call to get_scalar doesn't overwrite it */
 820                 _state->next_scalar = false;
 821         }
 822
 823 }
 824
 825 /*
 826  * SQL function json_array_length(json) -> int
 827  */
 828 Datum
 829 json_array_length(PG_FUNCTION_ARGS)
 830 {
 831         text       *json = PG_GETARG_TEXT_P(0);
 832
 833         AlenState  *state;
 834         JsonLexContext *lex = makeJsonLexContext(json, false);
 835         JsonSemAction *sem;
 836
 837         state = palloc0(sizeof(AlenState));
 838         sem = palloc0(sizeof(JsonSemAction));
 839
 840         /* palloc0 does this for us */
 841 #if 0
 842         state->count = 0;
 843 #endif
 844         state->lex = lex;
 845
 846         sem->semstate = (void *) state;
 847         sem->object_start = alen_object_start;
 848         sem->scalar = alen_scalar;
 849         sem->array_element_start = alen_array_element_start;
 850
 851         pg_parse_json(lex, sem);
 852
 853         PG_RETURN_INT32(state->count);
 854 }
 855
 856 /*
 857  * These next two check ensure that the json is an array (since it can't be
 858  * a scalar or an object).
 859  */
 860
 861 static void
 862 alen_object_start(void *state)
 863 {
 864         AlenState  *_state = (AlenState *) state;
 865
 866         /* json structure check */
 867         if (_state->lex->lex_level == 0)
 868                 ereport(ERROR,
 869                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 870                                  errmsg("cannot get array length of a non-array")));
 871 }
 872
 873 static void
 874 alen_scalar(void *state, char *token, JsonTokenType tokentype)
 875 {
 876         AlenState  *_state = (AlenState *) state;
 877
 878         /* json structure check */
 879         if (_state->lex->lex_level == 0)
 880                 ereport(ERROR,
 881                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 882                                  errmsg("cannot get array length of a scalar")));
 883 }
 884
 885 static void
 886 alen_array_element_start(void *state, bool isnull)
 887 {
 888         AlenState  *_state = (AlenState *) state;
 889
 890         /* just count up all the level 1 elements */
 891         if (_state->lex->lex_level == 1)
 892                 _state->count++;
 893 }
 894
 895 /*
 896  * SQL function json_each and json_each_text
 897  *
 898  * decompose a json object into key value pairs.
 899  *
 900  * Unlike json_object_keys() these SRFs operate in materialize mode,
 901  * stashing results into a Tuplestore object as they go.
 902  * The construction of tuples is done using a temporary memory context
 903  * that is cleared out after each tuple is built.
 904  */
 905 Datum
 906 json_each(PG_FUNCTION_ARGS)
 907 {
 908         return each_worker(fcinfo, false);
 909 }
 910
 911 Datum
 912 json_each_text(PG_FUNCTION_ARGS)
 913 {
 914         return each_worker(fcinfo, true);
 915 }
 916
 917 static inline Datum
 918 each_worker(PG_FUNCTION_ARGS, bool as_text)
 919 {
 920         text       *json = PG_GETARG_TEXT_P(0);
 921         JsonLexContext *lex = makeJsonLexContext(json, true);
 922         JsonSemAction *sem;
 923         ReturnSetInfo *rsi;
 924         MemoryContext old_cxt;
 925         TupleDesc       tupdesc;
 926         EachState  *state;
 927
 928         state = palloc0(sizeof(EachState));
 929         sem = palloc0(sizeof(JsonSemAction));
 930
 931         rsi = (ReturnSetInfo *) fcinfo->resultinfo;
 932
 933         if (!rsi || !IsA(rsi, ReturnSetInfo) ||
 934                 (rsi->allowedModes & SFRM_Materialize) == 0 ||
 935                 rsi->expectedDesc == NULL)
 936                 ereport(ERROR,
 937                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 938                                  errmsg("set-valued function called in context that "
 939                                                 "cannot accept a set")));
 940
 941
 942         rsi->returnMode = SFRM_Materialize;
 943
 944         (void) get_call_result_type(fcinfo, NULL, &tupdesc);
 945
 946         /* make these in a sufficiently long-lived memory context */
 947         old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
 948
 949         state->ret_tdesc = CreateTupleDescCopy(tupdesc);
 950         BlessTupleDesc(state->ret_tdesc);
 951         state->tuple_store =
 952                 tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
 953                                                           false, work_mem);
 954
 955         MemoryContextSwitchTo(old_cxt);
 956
 957         sem->semstate = (void *) state;
 958         sem->array_start = each_array_start;
 959         sem->scalar = each_scalar;
 960         sem->object_field_start = each_object_field_start;
 961         sem->object_field_end = each_object_field_end;
 962
 963         state->normalize_results = as_text;
 964         state->next_scalar = false;
 965
 966         state->lex = lex;
 967         state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
 968                                                                                    "json_each temporary cxt",
 969                                                                                    ALLOCSET_DEFAULT_MINSIZE,
 970                                                                                    ALLOCSET_DEFAULT_INITSIZE,
 971                                                                                    ALLOCSET_DEFAULT_MAXSIZE);
 972
 973         pg_parse_json(lex, sem);
 974
 975         rsi->setResult = state->tuple_store;
 976         rsi->setDesc = state->ret_tdesc;
 977
 978         PG_RETURN_NULL();
 979 }
 980
 981
 982 static void
 983 each_object_field_start(void *state, char *fname, bool isnull)
 984 {
 985         EachState  *_state = (EachState *) state;
 986
 987         /* save a pointer to where the value starts */
 988         if (_state->lex->lex_level == 1)
 989         {
 990                 /*
 991                  * next_scalar will be reset in the object_field_end handler, and
 992                  * since we know the value is a scalar there is no danger of it being
 993                  * on while recursing down the tree.
 994                  */
 995                 if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
 996                         _state->next_scalar = true;
 997                 else
 998                         _state->result_start = _state->lex->token_start;
 999         }
1000 }
1001
1002 static void
1003 each_object_field_end(void *state, char *fname, bool isnull)
1004 {
1005         EachState  *_state = (EachState *) state;
1006         MemoryContext old_cxt;
1007         int                     len;
1008         text       *val;
1009         HeapTuple       tuple;
1010         Datum           values[2];
1011         bool            nulls[2] = {false, false};
1012
1013         /* skip over nested objects */
1014         if (_state->lex->lex_level != 1)
1015                 return;
1016
1017         /* use the tmp context so we can clean up after each tuple is done */
1018         old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
1019
1020         values[0] = CStringGetTextDatum(fname);
1021
1022         if (isnull && _state->normalize_results)
1023         {
1024                 nulls[1] = true;
1025                 values[1] = (Datum) NULL;
1026         }
1027         else if (_state->next_scalar)
1028         {
1029                 values[1] = CStringGetTextDatum(_state->normalized_scalar);
1030                 _state->next_scalar = false;
1031         }
1032         else
1033         {
1034                 len = _state->lex->prev_token_terminator - _state->result_start;
1035                 val = cstring_to_text_with_len(_state->result_start, len);
1036                 values[1] = PointerGetDatum(val);
1037         }
1038
1039
1040         tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
1041
1042         tuplestore_puttuple(_state->tuple_store, tuple);
1043
1044         /* clean up and switch back */
1045         MemoryContextSwitchTo(old_cxt);
1046         MemoryContextReset(_state->tmp_cxt);
1047 }
1048
1049 static void
1050 each_array_start(void *state)
1051 {
1052         EachState  *_state = (EachState *) state;
1053
1054         /* json structure check */
1055         if (_state->lex->lex_level == 0)
1056                 ereport(ERROR,
1057                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1058                                  errmsg("cannot deconstruct an array as an object")));
1059 }
1060
1061 static void
1062 each_scalar(void *state, char *token, JsonTokenType tokentype)
1063 {
1064         EachState  *_state = (EachState *) state;
1065
1066         /* json structure check */
1067         if (_state->lex->lex_level == 0)
1068                 ereport(ERROR,
1069                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1070                                  errmsg("cannot deconstruct a scalar")));
1071
1072         /* supply de-escaped value if required */
1073         if (_state->next_scalar)
1074                 _state->normalized_scalar = token;
1075 }
1076
1077 /*
1078  * SQL functions json_array_elements and json_array_elements_text
1079  *
1080  * get the elements from a json array
1081  *
1082  * a lot of this processing is similar to the json_each* functions
1083  */
1084 Datum
1085 json_array_elements(PG_FUNCTION_ARGS)
1086 {
1087         return elements_worker(fcinfo, false);
1088 }
1089
1090 Datum
1091 json_array_elements_text(PG_FUNCTION_ARGS)
1092 {
1093         return elements_worker(fcinfo, true);
1094 }
1095
1096 static inline Datum
1097 elements_worker(PG_FUNCTION_ARGS, bool as_text)
1098 {
1099         text       *json = PG_GETARG_TEXT_P(0);
1100
1101         /* elements only needs escaped strings when as_text */
1102         JsonLexContext *lex = makeJsonLexContext(json, as_text);
1103         JsonSemAction *sem;
1104         ReturnSetInfo *rsi;
1105         MemoryContext old_cxt;
1106         TupleDesc       tupdesc;
1107         ElementsState *state;
1108
1109         state = palloc0(sizeof(ElementsState));
1110         sem = palloc0(sizeof(JsonSemAction));
1111
1112         rsi = (ReturnSetInfo *) fcinfo->resultinfo;
1113
1114         if (!rsi || !IsA(rsi, ReturnSetInfo) ||
1115                 (rsi->allowedModes & SFRM_Materialize) == 0 ||
1116                 rsi->expectedDesc == NULL)
1117                 ereport(ERROR,
1118                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1119                                  errmsg("set-valued function called in context that "
1120                                                 "cannot accept a set")));
1121
1122
1123         rsi->returnMode = SFRM_Materialize;
1124
1125         /* it's a simple type, so don't use get_call_result_type() */
1126         tupdesc = rsi->expectedDesc;
1127
1128         /* make these in a sufficiently long-lived memory context */
1129         old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
1130
1131         state->ret_tdesc = CreateTupleDescCopy(tupdesc);
1132         BlessTupleDesc(state->ret_tdesc);
1133         state->tuple_store =
1134                 tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
1135                                                           false, work_mem);
1136
1137         MemoryContextSwitchTo(old_cxt);
1138
1139         sem->semstate = (void *) state;
1140         sem->object_start = elements_object_start;
1141         sem->scalar = elements_scalar;
1142         sem->array_element_start = elements_array_element_start;
1143         sem->array_element_end = elements_array_element_end;
1144
1145         state->normalize_results = as_text;
1146         state->next_scalar = false;
1147
1148         state->lex = lex;
1149         state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
1150                                                                                  "json_array_elements temporary cxt",
1151                                                                                    ALLOCSET_DEFAULT_MINSIZE,
1152                                                                                    ALLOCSET_DEFAULT_INITSIZE,
1153                                                                                    ALLOCSET_DEFAULT_MAXSIZE);
1154
1155         pg_parse_json(lex, sem);
1156
1157         rsi->setResult = state->tuple_store;
1158         rsi->setDesc = state->ret_tdesc;
1159
1160         PG_RETURN_NULL();
1161 }
1162
1163 static void
1164 elements_array_element_start(void *state, bool isnull)
1165 {
1166         ElementsState *_state = (ElementsState *) state;
1167
1168         /* save a pointer to where the value starts */
1169         if (_state->lex->lex_level == 1)
1170         {
1171                 /*
1172                  * next_scalar will be reset in the array_element_end handler, and
1173                  * since we know the value is a scalar there is no danger of it being
1174                  * on while recursing down the tree.
1175                  */
1176                 if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
1177                         _state->next_scalar = true;
1178                 else
1179                         _state->result_start = _state->lex->token_start;
1180         }
1181 }
1182
1183 static void
1184 elements_array_element_end(void *state, bool isnull)
1185 {
1186         ElementsState *_state = (ElementsState *) state;
1187         MemoryContext old_cxt;
1188         int                     len;
1189         text       *val;
1190         HeapTuple       tuple;
1191         Datum           values[1];
1192         bool nulls[1] = {false};
1193
1194         /* skip over nested objects */
1195         if (_state->lex->lex_level != 1)
1196                 return;
1197
1198         /* use the tmp context so we can clean up after each tuple is done */
1199         old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
1200
1201         if (isnull && _state->normalize_results)
1202         {
1203                 nulls[0] = true;
1204                 values[0] = (Datum) NULL;
1205         }
1206         else if (_state->next_scalar)
1207         {
1208                 values[0] = CStringGetTextDatum(_state->normalized_scalar);
1209                 _state->next_scalar = false;
1210         }
1211         else
1212         {
1213                 len = _state->lex->prev_token_terminator - _state->result_start;
1214                 val = cstring_to_text_with_len(_state->result_start, len);
1215                 values[0] = PointerGetDatum(val);
1216         }
1217
1218
1219         tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
1220
1221         tuplestore_puttuple(_state->tuple_store, tuple);
1222
1223         /* clean up and switch back */
1224         MemoryContextSwitchTo(old_cxt);
1225         MemoryContextReset(_state->tmp_cxt);
1226 }
1227
1228 static void
1229 elements_object_start(void *state)
1230 {
1231         ElementsState *_state = (ElementsState *) state;
1232
1233         /* json structure check */
1234         if (_state->lex->lex_level == 0)
1235                 ereport(ERROR,
1236                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1237                                  errmsg("cannot call json_array_elements on a non-array")));
1238 }
1239
1240 static void
1241 elements_scalar(void *state, char *token, JsonTokenType tokentype)
1242 {
1243         ElementsState *_state = (ElementsState *) state;
1244
1245         /* json structure check */
1246         if (_state->lex->lex_level == 0)
1247                 ereport(ERROR,
1248                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1249                                  errmsg("cannot call json_array_elements on a scalar")));
1250
1251         /* supply de-escaped value if required */
1252         if (_state->next_scalar)
1253                 _state->normalized_scalar = token;
1254 }
1255
1256 /*
1257  * SQL function json_populate_record
1258  *
1259  * set fields in a record from the argument json
1260  *
1261  * Code adapted shamelessly from hstore's populate_record
1262  * which is in turn partly adapted from record_out.
1263  *
1264  * The json is decomposed into a hash table, in which each
1265  * field in the record is then looked up by name.
1266  */
1267 Datum
1268 json_populate_record(PG_FUNCTION_ARGS)
1269 {
1270         return populate_record_worker(fcinfo, true);
1271 }
1272
1273 Datum
1274 json_to_record(PG_FUNCTION_ARGS)
1275 {
1276         return populate_record_worker(fcinfo, false);
1277 }
1278
1279 static inline Datum
1280 populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg)
1281 {
1282         text       *json;
1283         bool            use_json_as_text;
1284         HTAB       *json_hash;
1285         HeapTupleHeader rec = NULL;
1286         Oid                     tupType = InvalidOid;
1287         int32           tupTypmod = -1;
1288         TupleDesc       tupdesc;
1289         HeapTupleData tuple;
1290         HeapTuple       rettuple;
1291         RecordIOData *my_extra;
1292         int                     ncolumns;
1293         int                     i;
1294         Datum      *values;
1295         bool       *nulls;
1296         char            fname[NAMEDATALEN];
1297         JsonHashEntry *hashentry;
1298
1299         if (have_record_arg)
1300         {
1301                 Oid                     argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
1302
1303                 use_json_as_text = PG_ARGISNULL(2) ? false : PG_GETARG_BOOL(2);
1304
1305                 if (!type_is_rowtype(argtype))
1306                         ereport(ERROR,
1307                                         (errcode(ERRCODE_DATATYPE_MISMATCH),
1308                                          errmsg("first argument of json_populate_record must be a row type")));
1309
1310                 if (PG_ARGISNULL(0))
1311                 {
1312                         if (PG_ARGISNULL(1))
1313                                 PG_RETURN_NULL();
1314
1315                         /*
1316                          * have no tuple to look at, so the only source of type info is
1317                          * the argtype. The lookup_rowtype_tupdesc call below will error
1318                          * out if we don't have a known composite type oid here.
1319                          */
1320                         tupType = argtype;
1321                         tupTypmod = -1;
1322                 }
1323                 else
1324                 {
1325                         rec = PG_GETARG_HEAPTUPLEHEADER(0);
1326
1327                         if (PG_ARGISNULL(1))
1328                                 PG_RETURN_POINTER(rec);
1329
1330                         /* Extract type info from the tuple itself */
1331                         tupType = HeapTupleHeaderGetTypeId(rec);
1332                         tupTypmod = HeapTupleHeaderGetTypMod(rec);
1333                 }
1334
1335                 json = PG_GETARG_TEXT_P(1);
1336         }
1337         else
1338         {
1339                 /* json_to_record case */
1340
1341                 use_json_as_text = PG_ARGISNULL(1) ? false : PG_GETARG_BOOL(1);
1342
1343                 if (PG_ARGISNULL(0))
1344                         PG_RETURN_NULL();
1345
1346                 json = PG_GETARG_TEXT_P(0);
1347
1348                 get_call_result_type(fcinfo, NULL, &tupdesc);
1349         }
1350
1351         json_hash = get_json_object_as_hash(json, "json_populate_record",
1352                                                                                 use_json_as_text);
1353
1354         if (have_record_arg)
1355         {
1356                 /*
1357                  * if the input json is empty, we can only skip the rest if we were
1358                  * passed in a non-null record, since otherwise there may be issues
1359                  * with domain nulls.
1360                  */
1361                 if (hash_get_num_entries(json_hash) == 0 && rec)
1362                         PG_RETURN_POINTER(rec);
1363
1364
1365                 tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
1366         }
1367
1368         ncolumns = tupdesc->natts;
1369
1370         if (rec)
1371         {
1372                 /* Build a temporary HeapTuple control structure */
1373                 tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
1374                 ItemPointerSetInvalid(&(tuple.t_self));
1375                 tuple.t_tableOid = InvalidOid;
1376                 tuple.t_data = rec;
1377         }
1378
1379         /*
1380          * We arrange to look up the needed I/O info just once per series of
1381          * calls, assuming the record type doesn't change underneath us.
1382          */
1383         my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
1384         if (my_extra == NULL ||
1385                 my_extra->ncolumns != ncolumns)
1386         {
1387                 fcinfo->flinfo->fn_extra =
1388                         MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
1389                                                            sizeof(RecordIOData) - sizeof(ColumnIOData)
1390                                                            + ncolumns * sizeof(ColumnIOData));
1391                 my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
1392                 my_extra->record_type = InvalidOid;
1393                 my_extra->record_typmod = 0;
1394         }
1395
1396         if (have_record_arg && (my_extra->record_type != tupType ||
1397                                                         my_extra->record_typmod != tupTypmod))
1398         {
1399                 MemSet(my_extra, 0,
1400                            sizeof(RecordIOData) - sizeof(ColumnIOData)
1401                            + ncolumns * sizeof(ColumnIOData));
1402                 my_extra->record_type = tupType;
1403                 my_extra->record_typmod = tupTypmod;
1404                 my_extra->ncolumns = ncolumns;
1405         }
1406
1407         values = (Datum *) palloc(ncolumns * sizeof(Datum));
1408         nulls = (bool *) palloc(ncolumns * sizeof(bool));
1409
1410         if (rec)
1411         {
1412                 /* Break down the tuple into fields */
1413                 heap_deform_tuple(&tuple, tupdesc, values, nulls);
1414         }
1415         else
1416         {
1417                 for (i = 0; i < ncolumns; ++i)
1418                 {
1419                         values[i] = (Datum) 0;
1420                         nulls[i] = true;
1421                 }
1422         }
1423
1424         for (i = 0; i < ncolumns; ++i)
1425         {
1426                 ColumnIOData *column_info = &my_extra->columns[i];
1427                 Oid                     column_type = tupdesc->attrs[i]->atttypid;
1428                 char       *value;
1429
1430                 /* Ignore dropped columns in datatype */
1431                 if (tupdesc->attrs[i]->attisdropped)
1432                 {
1433                         nulls[i] = true;
1434                         continue;
1435                 }
1436
1437                 memset(fname, 0, NAMEDATALEN);
1438                 strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN);
1439                 hashentry = hash_search(json_hash, fname, HASH_FIND, NULL);
1440
1441                 /*
1442                  * we can't just skip here if the key wasn't found since we might have
1443                  * a domain to deal with. If we were passed in a non-null record
1444                  * datum, we assume that the existing values are valid (if they're
1445                  * not, then it's not our fault), but if we were passed in a null,
1446                  * then every field which we don't populate needs to be run through
1447                  * the input function just in case it's a domain type.
1448                  */
1449                 if (hashentry == NULL && rec)
1450                         continue;
1451
1452                 /*
1453                  * Prepare to convert the column value from text
1454                  */
1455                 if (column_info->column_type != column_type)
1456                 {
1457                         getTypeInputInfo(column_type,
1458                                                          &column_info->typiofunc,
1459                                                          &column_info->typioparam);
1460                         fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
1461                                                   fcinfo->flinfo->fn_mcxt);
1462                         column_info->column_type = column_type;
1463                 }
1464                 if (hashentry == NULL || hashentry->isnull)
1465                 {
1466                         /*
1467                          * need InputFunctionCall to happen even for nulls, so that domain
1468                          * checks are done
1469                          */
1470                         values[i] = InputFunctionCall(&column_info->proc, NULL,
1471                                                                                   column_info->typioparam,
1472                                                                                   tupdesc->attrs[i]->atttypmod);
1473                         nulls[i] = true;
1474                 }
1475                 else
1476                 {
1477                         value = hashentry->val;
1478
1479                         values[i] = InputFunctionCall(&column_info->proc, value,
1480                                                                                   column_info->typioparam,
1481                                                                                   tupdesc->attrs[i]->atttypmod);
1482                         nulls[i] = false;
1483                 }
1484         }
1485
1486         rettuple = heap_form_tuple(tupdesc, values, nulls);
1487
1488         ReleaseTupleDesc(tupdesc);
1489
1490         PG_RETURN_DATUM(HeapTupleGetDatum(rettuple));
1491 }
1492
1493 /*
1494  * get_json_object_as_hash
1495  *
1496  * decompose a json object into a hash table.
1497  *
1498  * Currently doesn't allow anything but a flat object. Should this
1499  * change?
1500  *
1501  * funcname argument allows caller to pass in its name for use in
1502  * error messages.
1503  */
1504 static HTAB *
1505 get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text)
1506 {
1507         HASHCTL         ctl;
1508         HTAB       *tab;
1509         JHashState *state;
1510         JsonLexContext *lex = makeJsonLexContext(json, true);
1511         JsonSemAction *sem;
1512
1513         memset(&ctl, 0, sizeof(ctl));
1514         ctl.keysize = NAMEDATALEN;
1515         ctl.entrysize = sizeof(JsonHashEntry);
1516         ctl.hcxt = CurrentMemoryContext;
1517         tab = hash_create("json object hashtable",
1518                                           100,
1519                                           &ctl,
1520                                           HASH_ELEM | HASH_CONTEXT);
1521
1522         state = palloc0(sizeof(JHashState));
1523         sem = palloc0(sizeof(JsonSemAction));
1524
1525         state->function_name = funcname;
1526         state->hash = tab;
1527         state->lex = lex;
1528         state->use_json_as_text = use_json_as_text;
1529
1530         sem->semstate = (void *) state;
1531         sem->array_start = hash_array_start;
1532         sem->scalar = hash_scalar;
1533         sem->object_field_start = hash_object_field_start;
1534         sem->object_field_end = hash_object_field_end;
1535
1536         pg_parse_json(lex, sem);
1537
1538         return tab;
1539 }
1540
1541 static void
1542 hash_object_field_start(void *state, char *fname, bool isnull)
1543 {
1544         JHashState *_state = (JHashState *) state;
1545
1546         if (_state->lex->lex_level > 1)
1547                 return;
1548
1549         if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
1550                 _state->lex->token_type == JSON_TOKEN_OBJECT_START)
1551         {
1552                 if (!_state->use_json_as_text)
1553                         ereport(ERROR,
1554                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1555                                          errmsg("cannot call %s on a nested object",
1556                                                         _state->function_name)));
1557                 _state->save_json_start = _state->lex->token_start;
1558         }
1559         else
1560         {
1561                 /* must be a scalar */
1562                 _state->save_json_start = NULL;
1563         }
1564 }
1565
1566 static void
1567 hash_object_field_end(void *state, char *fname, bool isnull)
1568 {
1569         JHashState *_state = (JHashState *) state;
1570         JsonHashEntry *hashentry;
1571         bool            found;
1572         char            name[NAMEDATALEN];
1573
1574         /*
1575          * ignore field names >= NAMEDATALEN - they can't match a record field
1576          * ignore nested fields.
1577          */
1578         if (_state->lex->lex_level > 2 || strlen(fname) >= NAMEDATALEN)
1579                 return;
1580
1581         memset(name, 0, NAMEDATALEN);
1582         strncpy(name, fname, NAMEDATALEN);
1583
1584         hashentry = hash_search(_state->hash, name, HASH_ENTER, &found);
1585
1586         /*
1587          * found being true indicates a duplicate. We don't do anything about
1588          * that, a later field with the same name overrides the earlier field.
1589          */
1590
1591         hashentry->isnull = isnull;
1592         if (_state->save_json_start != NULL)
1593         {
1594                 int                     len = _state->lex->prev_token_terminator - _state->save_json_start;
1595                 char       *val = palloc((len + 1) * sizeof(char));
1596
1597                 memcpy(val, _state->save_json_start, len);
1598                 val[len] = '\0';
1599                 hashentry->val = val;
1600         }
1601         else
1602         {
1603                 /* must have had a scalar instead */
1604                 hashentry->val = _state->saved_scalar;
1605         }
1606 }
1607
1608 static void
1609 hash_array_start(void *state)
1610 {
1611         JHashState *_state = (JHashState *) state;
1612
1613         if (_state->lex->lex_level == 0)
1614                 ereport(ERROR,
1615                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1616                            errmsg("cannot call %s on an array", _state->function_name)));
1617 }
1618
1619 static void
1620 hash_scalar(void *state, char *token, JsonTokenType tokentype)
1621 {
1622         JHashState *_state = (JHashState *) state;
1623
1624         if (_state->lex->lex_level == 0)
1625                 ereport(ERROR,
1626                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1627                            errmsg("cannot call %s on a scalar", _state->function_name)));
1628
1629         if (_state->lex->lex_level == 1)
1630                 _state->saved_scalar = token;
1631 }
1632
1633
1634 /*
1635  * SQL function json_populate_recordset
1636  *
1637  * set fields in a set of records from the argument json,
1638  * which must be an array of objects.
1639  *
1640  * similar to json_populate_record, but the tuple-building code
1641  * is pushed down into the semantic action handlers so it's done
1642  * per object in the array.
1643  */
1644 Datum
1645 json_populate_recordset(PG_FUNCTION_ARGS)
1646 {
1647         return populate_recordset_worker(fcinfo, true);
1648 }
1649
1650 Datum
1651 json_to_recordset(PG_FUNCTION_ARGS)
1652 {
1653         return populate_recordset_worker(fcinfo, false);
1654 }
1655
1656 /*
1657  * common worker for json_populate_recordset() and json_to_recordset()
1658  */
1659 static inline Datum
1660 populate_recordset_worker(PG_FUNCTION_ARGS, bool have_record_arg)
1661 {
1662         Oid                     argtype;
1663         text       *json;
1664         bool            use_json_as_text;
1665         ReturnSetInfo *rsi;
1666         MemoryContext old_cxt;
1667         Oid                     tupType;
1668         int32           tupTypmod;
1669         HeapTupleHeader rec;
1670         TupleDesc       tupdesc;
1671         RecordIOData *my_extra;
1672         int                     ncolumns;
1673         JsonLexContext *lex;
1674         JsonSemAction *sem;
1675         PopulateRecordsetState *state;
1676
1677         if (have_record_arg)
1678         {
1679                 argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
1680
1681                 use_json_as_text = PG_ARGISNULL(2) ? false : PG_GETARG_BOOL(2);
1682
1683                 if (!type_is_rowtype(argtype))
1684                         ereport(ERROR,
1685                                         (errcode(ERRCODE_DATATYPE_MISMATCH),
1686                                          errmsg("first argument of json_populate_recordset must be a row type")));
1687         }
1688         else
1689         {
1690                 argtype = InvalidOid;
1691
1692                 use_json_as_text = PG_ARGISNULL(1) ? false : PG_GETARG_BOOL(1);
1693         }
1694
1695         rsi = (ReturnSetInfo *) fcinfo->resultinfo;
1696
1697         if (!rsi || !IsA(rsi, ReturnSetInfo) ||
1698                 (rsi->allowedModes & SFRM_Materialize) == 0 ||
1699                 rsi->expectedDesc == NULL)
1700                 ereport(ERROR,
1701                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1702                                  errmsg("set-valued function called in context that "
1703                                                 "cannot accept a set")));
1704
1705
1706         rsi->returnMode = SFRM_Materialize;
1707
1708         /*
1709          * get the tupdesc from the result set info - it must be a record type
1710          * because we already checked that arg1 is a record type.
1711          */
1712         (void) get_call_result_type(fcinfo, NULL, &tupdesc);
1713
1714         state = palloc0(sizeof(PopulateRecordsetState));
1715         sem = palloc0(sizeof(JsonSemAction));
1716
1717
1718         /* make these in a sufficiently long-lived memory context */
1719         old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
1720
1721         state->ret_tdesc = CreateTupleDescCopy(tupdesc);
1722         BlessTupleDesc(state->ret_tdesc);
1723         state->tuple_store =
1724                 tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
1725                                                           false, work_mem);
1726
1727         MemoryContextSwitchTo(old_cxt);
1728
1729         /* if the json is null send back an empty set */
1730         if (have_record_arg)
1731         {
1732                 if (PG_ARGISNULL(1))
1733                         PG_RETURN_NULL();
1734
1735                 json = PG_GETARG_TEXT_P(1);
1736
1737                 if (PG_ARGISNULL(0))
1738                         rec = NULL;
1739                 else
1740                         rec = PG_GETARG_HEAPTUPLEHEADER(0);
1741         }
1742         else
1743         {
1744                 if (PG_ARGISNULL(0))
1745                         PG_RETURN_NULL();
1746
1747                 json = PG_GETARG_TEXT_P(0);
1748
1749                 rec = NULL;
1750         }
1751
1752         tupType = tupdesc->tdtypeid;
1753         tupTypmod = tupdesc->tdtypmod;
1754         ncolumns = tupdesc->natts;
1755
1756         lex = makeJsonLexContext(json, true);
1757
1758         /*
1759          * We arrange to look up the needed I/O info just once per series of
1760          * calls, assuming the record type doesn't change underneath us.
1761          */
1762         my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
1763         if (my_extra == NULL ||
1764                 my_extra->ncolumns != ncolumns)
1765         {
1766                 fcinfo->flinfo->fn_extra =
1767                         MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
1768                                                            sizeof(RecordIOData) - sizeof(ColumnIOData)
1769                                                            + ncolumns * sizeof(ColumnIOData));
1770                 my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
1771                 my_extra->record_type = InvalidOid;
1772                 my_extra->record_typmod = 0;
1773         }
1774
1775         if (my_extra->record_type != tupType ||
1776                 my_extra->record_typmod != tupTypmod)
1777         {
1778                 MemSet(my_extra, 0,
1779                            sizeof(RecordIOData) - sizeof(ColumnIOData)
1780                            + ncolumns * sizeof(ColumnIOData));
1781                 my_extra->record_type = tupType;
1782                 my_extra->record_typmod = tupTypmod;
1783                 my_extra->ncolumns = ncolumns;
1784         }
1785
1786         sem->semstate = (void *) state;
1787         sem->array_start = populate_recordset_array_start;
1788         sem->array_element_start = populate_recordset_array_element_start;
1789         sem->scalar = populate_recordset_scalar;
1790         sem->object_field_start = populate_recordset_object_field_start;
1791         sem->object_field_end = populate_recordset_object_field_end;
1792         sem->object_start = populate_recordset_object_start;
1793         sem->object_end = populate_recordset_object_end;
1794
1795         state->lex = lex;
1796
1797         state->my_extra = my_extra;
1798         state->rec = rec;
1799         state->use_json_as_text = use_json_as_text;
1800         state->fn_mcxt = fcinfo->flinfo->fn_mcxt;
1801
1802         pg_parse_json(lex, sem);
1803
1804         rsi->setResult = state->tuple_store;
1805         rsi->setDesc = state->ret_tdesc;
1806
1807         PG_RETURN_NULL();
1808
1809 }
1810
1811 static void
1812 populate_recordset_object_start(void *state)
1813 {
1814         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1815         int                     lex_level = _state->lex->lex_level;
1816         HASHCTL         ctl;
1817
1818         if (lex_level == 0)
1819                 ereport(ERROR,
1820                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1821                                  errmsg("cannot call json_populate_recordset on an object")));
1822         else if (lex_level > 1 && !_state->use_json_as_text)
1823                 ereport(ERROR,
1824                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1825                  errmsg("cannot call json_populate_recordset with nested objects")));
1826
1827         /* set up a new hash for this entry */
1828         memset(&ctl, 0, sizeof(ctl));
1829         ctl.keysize = NAMEDATALEN;
1830         ctl.entrysize = sizeof(JsonHashEntry);
1831         ctl.hcxt = CurrentMemoryContext;
1832         _state->json_hash = hash_create("json object hashtable",
1833                                                                         100,
1834                                                                         &ctl,
1835                                                                         HASH_ELEM | HASH_CONTEXT);
1836 }
1837
1838 static void
1839 populate_recordset_object_end(void *state)
1840 {
1841         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1842         HTAB       *json_hash = _state->json_hash;
1843         Datum      *values;
1844         bool       *nulls;
1845         char            fname[NAMEDATALEN];
1846         int                     i;
1847         RecordIOData *my_extra = _state->my_extra;
1848         int                     ncolumns = my_extra->ncolumns;
1849         TupleDesc       tupdesc = _state->ret_tdesc;
1850         JsonHashEntry *hashentry;
1851         HeapTupleHeader rec = _state->rec;
1852         HeapTuple       rettuple;
1853
1854         if (_state->lex->lex_level > 1)
1855                 return;
1856
1857         values = (Datum *) palloc(ncolumns * sizeof(Datum));
1858         nulls = (bool *) palloc(ncolumns * sizeof(bool));
1859
1860         if (_state->rec)
1861         {
1862                 HeapTupleData tuple;
1863
1864                 /* Build a temporary HeapTuple control structure */
1865                 tuple.t_len = HeapTupleHeaderGetDatumLength(_state->rec);
1866                 ItemPointerSetInvalid(&(tuple.t_self));
1867                 tuple.t_tableOid = InvalidOid;
1868                 tuple.t_data = _state->rec;
1869
1870                 /* Break down the tuple into fields */
1871                 heap_deform_tuple(&tuple, tupdesc, values, nulls);
1872         }
1873         else
1874         {
1875                 for (i = 0; i < ncolumns; ++i)
1876                 {
1877                         values[i] = (Datum) 0;
1878                         nulls[i] = true;
1879                 }
1880         }
1881
1882         for (i = 0; i < ncolumns; ++i)
1883         {
1884                 ColumnIOData *column_info = &my_extra->columns[i];
1885                 Oid                     column_type = tupdesc->attrs[i]->atttypid;
1886                 char       *value;
1887
1888                 /* Ignore dropped columns in datatype */
1889                 if (tupdesc->attrs[i]->attisdropped)
1890                 {
1891                         nulls[i] = true;
1892                         continue;
1893                 }
1894
1895                 memset(fname, 0, NAMEDATALEN);
1896                 strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN);
1897                 hashentry = hash_search(json_hash, fname, HASH_FIND, NULL);
1898
1899                 /*
1900                  * we can't just skip here if the key wasn't found since we might have
1901                  * a domain to deal with. If we were passed in a non-null record
1902                  * datum, we assume that the existing values are valid (if they're
1903                  * not, then it's not our fault), but if we were passed in a null,
1904                  * then every field which we don't populate needs to be run through
1905                  * the input function just in case it's a domain type.
1906                  */
1907                 if (hashentry == NULL && rec)
1908                         continue;
1909
1910                 /*
1911                  * Prepare to convert the column value from text
1912                  */
1913                 if (column_info->column_type != column_type)
1914                 {
1915                         getTypeInputInfo(column_type,
1916                                                          &column_info->typiofunc,
1917                                                          &column_info->typioparam);
1918                         fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
1919                                                   _state->fn_mcxt);
1920                         column_info->column_type = column_type;
1921                 }
1922                 if (hashentry == NULL || hashentry->isnull)
1923                 {
1924                         /*
1925                          * need InputFunctionCall to happen even for nulls, so that domain
1926                          * checks are done
1927                          */
1928                         values[i] = InputFunctionCall(&column_info->proc, NULL,
1929                                                                                   column_info->typioparam,
1930                                                                                   tupdesc->attrs[i]->atttypmod);
1931                         nulls[i] = true;
1932                 }
1933                 else
1934                 {
1935                         value = hashentry->val;
1936
1937                         values[i] = InputFunctionCall(&column_info->proc, value,
1938                                                                                   column_info->typioparam,
1939                                                                                   tupdesc->attrs[i]->atttypmod);
1940                         nulls[i] = false;
1941                 }
1942         }
1943
1944         rettuple = heap_form_tuple(tupdesc, values, nulls);
1945
1946         tuplestore_puttuple(_state->tuple_store, rettuple);
1947
1948         hash_destroy(json_hash);
1949 }
1950
1951 static void
1952 populate_recordset_array_element_start(void *state, bool isnull)
1953 {
1954         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1955
1956         if (_state->lex->lex_level == 1 &&
1957                 _state->lex->token_type != JSON_TOKEN_OBJECT_START)
1958                 ereport(ERROR,
1959                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1960                 errmsg("must call json_populate_recordset on an array of objects")));
1961 }
1962
1963 static void
1964 populate_recordset_array_start(void *state)
1965 {
1966         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1967
1968         if (_state->lex->lex_level != 0 && !_state->use_json_as_text)
1969                 ereport(ERROR,
1970                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1971                   errmsg("cannot call json_populate_recordset with nested arrays")));
1972 }
1973
1974 static void
1975 populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype)
1976 {
1977         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1978
1979         if (_state->lex->lex_level == 0)
1980                 ereport(ERROR,
1981                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1982                                  errmsg("cannot call json_populate_recordset on a scalar")));
1983
1984         if (_state->lex->lex_level == 2)
1985                 _state->saved_scalar = token;
1986 }
1987
1988 static void
1989 populate_recordset_object_field_start(void *state, char *fname, bool isnull)
1990 {
1991         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1992
1993         if (_state->lex->lex_level > 2)
1994                 return;
1995
1996         if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
1997                 _state->lex->token_type == JSON_TOKEN_OBJECT_START)
1998         {
1999                 if (!_state->use_json_as_text)
2000                         ereport(ERROR,
2001                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2002                                          errmsg("cannot call json_populate_recordset on a nested object")));
2003                 _state->save_json_start = _state->lex->token_start;
2004         }
2005         else
2006         {
2007                 _state->save_json_start = NULL;
2008         }
2009 }
2010
2011 static void
2012 populate_recordset_object_field_end(void *state, char *fname, bool isnull)
2013 {
2014         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
2015         JsonHashEntry *hashentry;
2016         bool            found;
2017         char            name[NAMEDATALEN];
2018
2019         /*
2020          * ignore field names >= NAMEDATALEN - they can't match a record field
2021          * ignore nested fields.
2022          */
2023         if (_state->lex->lex_level > 2 || strlen(fname) >= NAMEDATALEN)
2024                 return;
2025
2026         memset(name, 0, NAMEDATALEN);
2027         strncpy(name, fname, NAMEDATALEN);
2028
2029         hashentry = hash_search(_state->json_hash, name, HASH_ENTER, &found);
2030
2031         /*
2032          * found being true indicates a duplicate. We don't do anything about
2033          * that, a later field with the same name overrides the earlier field.
2034          */
2035
2036         hashentry->isnull = isnull;
2037         if (_state->save_json_start != NULL)
2038         {
2039                 int                     len = _state->lex->prev_token_terminator - _state->save_json_start;
2040                 char       *val = palloc((len + 1) * sizeof(char));
2041
2042                 memcpy(val, _state->save_json_start, len);
2043                 val[len] = '\0';
2044                 hashentry->val = val;
2045         }
2046         else
2047         {
2048                 /* must have had a scalar instead */
2049                 hashentry->val = _state->saved_scalar;
2050         }
2051 }