]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/jsonfuncs.c
Silence compiler warnings about possibly unset variables.
[postgresql] / src / backend / utils / adt / jsonfuncs.c
1 /*-------------------------------------------------------------------------
2  *
3  * jsonfuncs.c
4  *              Functions to process JSON data type.
5  *
6  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/jsonfuncs.c
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16
17 #include <limits.h>
18
19 #include "fmgr.h"
20 #include "funcapi.h"
21 #include "miscadmin.h"
22 #include "access/htup_details.h"
23 #include "catalog/pg_type.h"
24 #include "lib/stringinfo.h"
25 #include "mb/pg_wchar.h"
26 #include "utils/array.h"
27 #include "utils/builtins.h"
28 #include "utils/hsearch.h"
29 #include "utils/json.h"
30 #include "utils/jsonapi.h"
31 #include "utils/lsyscache.h"
32 #include "utils/memutils.h"
33 #include "utils/typcache.h"
34
35 /* semantic action functions for json_object_keys */
36 static void okeys_object_field_start(void *state, char *fname, bool isnull);
37 static void okeys_array_start(void *state);
38 static void okeys_scalar(void *state, char *token, JsonTokenType tokentype);
39
40 /* semantic action functions for json_get* functions */
41 static void get_object_start(void *state);
42 static void get_object_field_start(void *state, char *fname, bool isnull);
43 static void get_object_field_end(void *state, char *fname, bool isnull);
44 static void get_array_start(void *state);
45 static void get_array_element_start(void *state, bool isnull);
46 static void get_array_element_end(void *state, bool isnull);
47 static void get_scalar(void *state, char *token, JsonTokenType tokentype);
48
49 /* common worker function for json getter functions */
50 static inline Datum get_path_all(PG_FUNCTION_ARGS, bool as_text);
51 static inline text *get_worker(text *json, char *field, int elem_index,
52                    char **tpath, int *ipath, int npath,
53                    bool normalize_results);
54
55 /* semantic action functions for json_array_length */
56 static void alen_object_start(void *state);
57 static void alen_scalar(void *state, char *token, JsonTokenType tokentype);
58 static void alen_array_element_start(void *state, bool isnull);
59
60 /* common worker for json_each* functions */
61 static inline Datum each_worker(PG_FUNCTION_ARGS, bool as_text);
62
63 /* semantic action functions for json_each */
64 static void each_object_field_start(void *state, char *fname, bool isnull);
65 static void each_object_field_end(void *state, char *fname, bool isnull);
66 static void each_array_start(void *state);
67 static void each_scalar(void *state, char *token, JsonTokenType tokentype);
68
69 /* common worker for json_each* functions */
70 static inline Datum elements_worker(PG_FUNCTION_ARGS, bool as_text);
71
72 /* semantic action functions for json_array_elements */
73 static void elements_object_start(void *state);
74 static void elements_array_element_start(void *state, bool isnull);
75 static void elements_array_element_end(void *state, bool isnull);
76 static void elements_scalar(void *state, char *token, JsonTokenType tokentype);
77
78 /* turn a json object into a hash table */
79 static HTAB *get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text);
80
81 /* common worker for populate_record and to_record */
82 static inline Datum populate_record_worker(PG_FUNCTION_ARGS,
83                                            bool have_record_arg);
84
85 /* semantic action functions for get_json_object_as_hash */
86 static void hash_object_field_start(void *state, char *fname, bool isnull);
87 static void hash_object_field_end(void *state, char *fname, bool isnull);
88 static void hash_array_start(void *state);
89 static void hash_scalar(void *state, char *token, JsonTokenType tokentype);
90
91 /* semantic action functions for populate_recordset */
92 static void populate_recordset_object_field_start(void *state, char *fname, bool isnull);
93 static void populate_recordset_object_field_end(void *state, char *fname, bool isnull);
94 static void populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype);
95 static void populate_recordset_object_start(void *state);
96 static void populate_recordset_object_end(void *state);
97 static void populate_recordset_array_start(void *state);
98 static void populate_recordset_array_element_start(void *state, bool isnull);
99
100 /* worker function for populate_recordset and to_recordset */
101 static inline Datum populate_recordset_worker(PG_FUNCTION_ARGS,
102                                                   bool have_record_arg);
103
104 /* search type classification for json_get* functions */
105 typedef enum
106 {
107         JSON_SEARCH_OBJECT = 1,
108         JSON_SEARCH_ARRAY,
109         JSON_SEARCH_PATH
110 } JsonSearch;
111
112 /* state for json_object_keys */
113 typedef struct OkeysState
114 {
115         JsonLexContext *lex;
116         char      **result;
117         int                     result_size;
118         int                     result_count;
119         int                     sent_count;
120 } OkeysState;
121
122 /* state for json_get* functions */
123 typedef struct GetState
124 {
125         JsonLexContext *lex;
126         JsonSearch      search_type;
127         int                     search_index;
128         int                     array_index;
129         char       *search_term;
130         char       *result_start;
131         text       *tresult;
132         bool            result_is_null;
133         bool            normalize_results;
134         bool            next_scalar;
135         char      **path;
136         int                     npath;
137         char      **current_path;
138         bool       *pathok;
139         int                *array_level_index;
140         int                *path_level_index;
141 } GetState;
142
143 /* state for json_array_length */
144 typedef struct AlenState
145 {
146         JsonLexContext *lex;
147         int                     count;
148 } AlenState;
149
150 /* state for json_each */
151 typedef struct EachState
152 {
153         JsonLexContext *lex;
154         Tuplestorestate *tuple_store;
155         TupleDesc       ret_tdesc;
156         MemoryContext tmp_cxt;
157         char       *result_start;
158         bool            normalize_results;
159         bool            next_scalar;
160         char       *normalized_scalar;
161 } EachState;
162
163 /* state for json_array_elements */
164 typedef struct ElementsState
165 {
166         JsonLexContext *lex;
167         Tuplestorestate *tuple_store;
168         TupleDesc       ret_tdesc;
169         MemoryContext tmp_cxt;
170         char       *result_start;
171         bool            normalize_results;
172         bool            next_scalar;
173         char       *normalized_scalar;
174 } ElementsState;
175
176 /* state for get_json_object_as_hash */
177 typedef struct JhashState
178 {
179         JsonLexContext *lex;
180         HTAB       *hash;
181         char       *saved_scalar;
182         char       *save_json_start;
183         bool            use_json_as_text;
184         char       *function_name;
185 } JHashState;
186
187 /* used to build the hashtable */
188 typedef struct JsonHashEntry
189 {
190         char            fname[NAMEDATALEN];
191         char       *val;
192         char       *json;
193         bool            isnull;
194 } JsonHashEntry;
195
196 /* these two are stolen from hstore / record_out, used in populate_record* */
197 typedef struct ColumnIOData
198 {
199         Oid                     column_type;
200         Oid                     typiofunc;
201         Oid                     typioparam;
202         FmgrInfo        proc;
203 } ColumnIOData;
204
205 typedef struct RecordIOData
206 {
207         Oid                     record_type;
208         int32           record_typmod;
209         int                     ncolumns;
210         ColumnIOData columns[1];        /* VARIABLE LENGTH ARRAY */
211 } RecordIOData;
212
213 /* state for populate_recordset */
214 typedef struct PopulateRecordsetState
215 {
216         JsonLexContext *lex;
217         HTAB       *json_hash;
218         char       *saved_scalar;
219         char       *save_json_start;
220         bool            use_json_as_text;
221         Tuplestorestate *tuple_store;
222         TupleDesc       ret_tdesc;
223         HeapTupleHeader rec;
224         RecordIOData *my_extra;
225         MemoryContext fn_mcxt;          /* used to stash IO funcs */
226 } PopulateRecordsetState;
227
228 /*
229  * SQL function json_object-keys
230  *
231  * Returns the set of keys for the object argument.
232  *
233  * This SRF operates in value-per-call mode. It processes the
234  * object during the first call, and the keys are simply stashed
235  * in an array, whise size is expanded as necessary. This is probably
236  * safe enough for a list of keys of a single object, since they are
237  * limited in size to NAMEDATALEN and the number of keys is unlikely to
238  * be so huge that it has major memory implications.
239  */
240
241
242 Datum
243 json_object_keys(PG_FUNCTION_ARGS)
244 {
245         FuncCallContext *funcctx;
246         OkeysState *state;
247         int                     i;
248
249         if (SRF_IS_FIRSTCALL())
250         {
251                 text       *json = PG_GETARG_TEXT_P(0);
252                 JsonLexContext *lex = makeJsonLexContext(json, true);
253                 JsonSemAction *sem;
254
255                 MemoryContext oldcontext;
256
257                 funcctx = SRF_FIRSTCALL_INIT();
258                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
259
260                 state = palloc(sizeof(OkeysState));
261                 sem = palloc0(sizeof(JsonSemAction));
262
263                 state->lex = lex;
264                 state->result_size = 256;
265                 state->result_count = 0;
266                 state->sent_count = 0;
267                 state->result = palloc(256 * sizeof(char *));
268
269                 sem->semstate = (void *) state;
270                 sem->array_start = okeys_array_start;
271                 sem->scalar = okeys_scalar;
272                 sem->object_field_start = okeys_object_field_start;
273                 /* remainder are all NULL, courtesy of palloc0 above */
274
275                 pg_parse_json(lex, sem);
276                 /* keys are now in state->result */
277
278                 pfree(lex->strval->data);
279                 pfree(lex->strval);
280                 pfree(lex);
281                 pfree(sem);
282
283                 MemoryContextSwitchTo(oldcontext);
284                 funcctx->user_fctx = (void *) state;
285
286         }
287
288         funcctx = SRF_PERCALL_SETUP();
289         state = (OkeysState *) funcctx->user_fctx;
290
291         if (state->sent_count < state->result_count)
292         {
293                 char       *nxt = state->result[state->sent_count++];
294
295                 SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt));
296         }
297
298         /* cleanup to reduce or eliminate memory leaks */
299         for (i = 0; i < state->result_count; i++)
300                 pfree(state->result[i]);
301         pfree(state->result);
302         pfree(state);
303
304         SRF_RETURN_DONE(funcctx);
305 }
306
307 static void
308 okeys_object_field_start(void *state, char *fname, bool isnull)
309 {
310         OkeysState *_state = (OkeysState *) state;
311
312         /* only collecting keys for the top level object */
313         if (_state->lex->lex_level != 1)
314                 return;
315
316         /* enlarge result array if necessary */
317         if (_state->result_count >= _state->result_size)
318         {
319                 _state->result_size *= 2;
320                 _state->result =
321                         repalloc(_state->result, sizeof(char *) * _state->result_size);
322         }
323
324         /* save a copy of the field name */
325         _state->result[_state->result_count++] = pstrdup(fname);
326 }
327
328 static void
329 okeys_array_start(void *state)
330 {
331         OkeysState *_state = (OkeysState *) state;
332
333         /* top level must be a json object */
334         if (_state->lex->lex_level == 0)
335                 ereport(ERROR,
336                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
337                                  errmsg("cannot call json_object_keys on an array")));
338 }
339
340 static void
341 okeys_scalar(void *state, char *token, JsonTokenType tokentype)
342 {
343         OkeysState *_state = (OkeysState *) state;
344
345         /* top level must be a json object */
346         if (_state->lex->lex_level == 0)
347                 ereport(ERROR,
348                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
349                                  errmsg("cannot call json_object_keys on a scalar")));
350 }
351
352 /*
353  * json getter functions
354  * these implement the -> ->> #> and #>> operators
355  * and the json_extract_path*(json, text, ...) functions
356  */
357
358
359 Datum
360 json_object_field(PG_FUNCTION_ARGS)
361 {
362         text       *json = PG_GETARG_TEXT_P(0);
363         text       *result;
364         text       *fname = PG_GETARG_TEXT_P(1);
365         char       *fnamestr = text_to_cstring(fname);
366
367         result = get_worker(json, fnamestr, -1, NULL, NULL, -1, false);
368
369         if (result != NULL)
370                 PG_RETURN_TEXT_P(result);
371         else
372                 PG_RETURN_NULL();
373 }
374
375 Datum
376 json_object_field_text(PG_FUNCTION_ARGS)
377 {
378         text       *json = PG_GETARG_TEXT_P(0);
379         text       *result;
380         text       *fname = PG_GETARG_TEXT_P(1);
381         char       *fnamestr = text_to_cstring(fname);
382
383         result = get_worker(json, fnamestr, -1, NULL, NULL, -1, true);
384
385         if (result != NULL)
386                 PG_RETURN_TEXT_P(result);
387         else
388                 PG_RETURN_NULL();
389 }
390
391 Datum
392 json_array_element(PG_FUNCTION_ARGS)
393 {
394         text       *json = PG_GETARG_TEXT_P(0);
395         text       *result;
396         int                     element = PG_GETARG_INT32(1);
397
398         result = get_worker(json, NULL, element, NULL, NULL, -1, false);
399
400         if (result != NULL)
401                 PG_RETURN_TEXT_P(result);
402         else
403                 PG_RETURN_NULL();
404 }
405
406 Datum
407 json_array_element_text(PG_FUNCTION_ARGS)
408 {
409         text       *json = PG_GETARG_TEXT_P(0);
410         text       *result;
411         int                     element = PG_GETARG_INT32(1);
412
413         result = get_worker(json, NULL, element, NULL, NULL, -1, true);
414
415         if (result != NULL)
416                 PG_RETURN_TEXT_P(result);
417         else
418                 PG_RETURN_NULL();
419 }
420
421 Datum
422 json_extract_path(PG_FUNCTION_ARGS)
423 {
424         return get_path_all(fcinfo, false);
425 }
426
427 Datum
428 json_extract_path_text(PG_FUNCTION_ARGS)
429 {
430         return get_path_all(fcinfo, true);
431 }
432
433 /*
434  * common routine for extract_path functions
435  */
436 static inline Datum
437 get_path_all(PG_FUNCTION_ARGS, bool as_text)
438 {
439         text       *json = PG_GETARG_TEXT_P(0);
440         ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
441         text       *result;
442         Datum      *pathtext;
443         bool       *pathnulls;
444         int                     npath;
445         char      **tpath;
446         int                *ipath;
447         int                     i;
448         long            ind;
449         char       *endptr;
450
451         if (array_contains_nulls(path))
452                 ereport(ERROR,
453                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
454                                  errmsg("cannot call function with null path elements")));
455
456
457         deconstruct_array(path, TEXTOID, -1, false, 'i',
458                                           &pathtext, &pathnulls, &npath);
459
460         tpath = palloc(npath * sizeof(char *));
461         ipath = palloc(npath * sizeof(int));
462
463
464         for (i = 0; i < npath; i++)
465         {
466                 tpath[i] = TextDatumGetCString(pathtext[i]);
467                 if (*tpath[i] == '\0')
468                         ereport(
469                                         ERROR,
470                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
471                                    errmsg("cannot call function with empty path elements")));
472
473                 /*
474                  * we have no idea at this stage what structure the document is so
475                  * just convert anything in the path that we can to an integer and set
476                  * all the other integers to -1 which will never match.
477                  */
478                 ind = strtol(tpath[i], &endptr, 10);
479                 if (*endptr == '\0' && ind <= INT_MAX && ind >= 0)
480                         ipath[i] = (int) ind;
481                 else
482                         ipath[i] = -1;
483         }
484
485
486         result = get_worker(json, NULL, -1, tpath, ipath, npath, as_text);
487
488         if (result != NULL)
489                 PG_RETURN_TEXT_P(result);
490         else
491                 PG_RETURN_NULL();
492 }
493
494 /*
495  * get_worker
496  *
497  * common worker for all the json getter functions
498  */
499 static inline text *
500 get_worker(text *json,
501                    char *field,
502                    int elem_index,
503                    char **tpath,
504                    int *ipath,
505                    int npath,
506                    bool normalize_results)
507 {
508         GetState   *state;
509         JsonLexContext *lex = makeJsonLexContext(json, true);
510         JsonSemAction *sem;
511
512         /* only allowed to use one of these */
513         Assert(elem_index < 0 || (tpath == NULL && ipath == NULL && field == NULL));
514         Assert(tpath == NULL || field == NULL);
515
516         state = palloc0(sizeof(GetState));
517         sem = palloc0(sizeof(JsonSemAction));
518
519         state->lex = lex;
520         /* is it "_as_text" variant? */
521         state->normalize_results = normalize_results;
522         if (field != NULL)
523         {
524                 /* single text argument */
525                 state->search_type = JSON_SEARCH_OBJECT;
526                 state->search_term = field;
527         }
528         else if (tpath != NULL)
529         {
530                 /* path array argument */
531                 state->search_type = JSON_SEARCH_PATH;
532                 state->path = tpath;
533                 state->npath = npath;
534                 state->current_path = palloc(sizeof(char *) * npath);
535                 state->pathok = palloc0(sizeof(bool) * npath);
536                 state->pathok[0] = true;
537                 state->array_level_index = palloc(sizeof(int) * npath);
538                 state->path_level_index = ipath;
539
540         }
541         else
542         {
543                 /* single integer argument */
544                 state->search_type = JSON_SEARCH_ARRAY;
545                 state->search_index = elem_index;
546                 state->array_index = -1;
547         }
548
549         sem->semstate = (void *) state;
550
551         /*
552          * Not all      variants need all the semantic routines. only set the ones
553          * that are actually needed for maximum efficiency.
554          */
555         sem->object_start = get_object_start;
556         sem->array_start = get_array_start;
557         sem->scalar = get_scalar;
558         if (field != NULL || tpath != NULL)
559         {
560                 sem->object_field_start = get_object_field_start;
561                 sem->object_field_end = get_object_field_end;
562         }
563         if (field == NULL)
564         {
565                 sem->array_element_start = get_array_element_start;
566                 sem->array_element_end = get_array_element_end;
567         }
568
569         pg_parse_json(lex, sem);
570
571         return state->tresult;
572 }
573
574 static void
575 get_object_start(void *state)
576 {
577         GetState   *_state = (GetState *) state;
578
579         /* json structure check */
580         if (_state->lex->lex_level == 0 && _state->search_type == JSON_SEARCH_ARRAY)
581                 ereport(ERROR,
582                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
583                                  errmsg("cannot extract array element from a non-array")));
584 }
585
586 static void
587 get_object_field_start(void *state, char *fname, bool isnull)
588 {
589         GetState   *_state = (GetState *) state;
590         bool            get_next = false;
591         int                     lex_level = _state->lex->lex_level;
592
593         if (lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT &&
594                 strcmp(fname, _state->search_term) == 0)
595         {
596
597                 _state->tresult = NULL;
598                 _state->result_start = NULL;
599                 get_next = true;
600         }
601         else if (_state->search_type == JSON_SEARCH_PATH &&
602                          lex_level <= _state->npath &&
603                          _state->pathok[_state->lex->lex_level - 1] &&
604                          strcmp(fname, _state->path[lex_level - 1]) == 0)
605         {
606                 /* path search, path so far is ok,      and we have a match */
607
608                 /* this object overrides any previous matching object */
609
610                 _state->tresult = NULL;
611                 _state->result_start = NULL;
612
613                 /* if not at end of path just mark path ok */
614                 if (lex_level < _state->npath)
615                         _state->pathok[lex_level] = true;
616
617                 /* end of path, so we want this value */
618                 if (lex_level == _state->npath)
619                         get_next = true;
620         }
621
622         if (get_next)
623         {
624                 if (_state->normalize_results &&
625                         _state->lex->token_type == JSON_TOKEN_STRING)
626                 {
627                         /* for as_text variants, tell get_scalar to set it for us */
628                         _state->next_scalar = true;
629                 }
630                 else
631                 {
632                         /* for non-as_text variants, just note the json starting point */
633                         _state->result_start = _state->lex->token_start;
634                 }
635         }
636 }
637
638 static void
639 get_object_field_end(void *state, char *fname, bool isnull)
640 {
641         GetState   *_state = (GetState *) state;
642         bool            get_last = false;
643         int                     lex_level = _state->lex->lex_level;
644
645
646         /* same tests as in get_object_field_start, mutatis mutandis */
647         if (lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT &&
648                 strcmp(fname, _state->search_term) == 0)
649         {
650                 get_last = true;
651         }
652         else if (_state->search_type == JSON_SEARCH_PATH &&
653                          lex_level <= _state->npath &&
654                          _state->pathok[lex_level - 1] &&
655                          strcmp(fname, _state->path[lex_level - 1]) == 0)
656         {
657                 /* done with this field so reset pathok */
658                 if (lex_level < _state->npath)
659                         _state->pathok[lex_level] = false;
660
661                 if (lex_level == _state->npath)
662                         get_last = true;
663         }
664
665         /* for as_test variants our work is already done */
666         if (get_last && _state->result_start != NULL)
667         {
668                 /*
669                  * make a text object from the string from the prevously noted json
670                  * start up to the end of the previous token (the lexer is by now
671                  * ahead of us on whatevere came after what we're interested in).
672                  */
673                 int                     len = _state->lex->prev_token_terminator - _state->result_start;
674
675                 if (isnull && _state->normalize_results)
676                         _state->tresult = (text *) NULL;
677                 else
678                         _state->tresult = cstring_to_text_with_len(_state->result_start, len);
679         }
680
681         /*
682          * don't need to reset _state->result_start b/c we're only returning one
683          * datum, the conditions should not occur more than once, and this lets us
684          * check cheaply that they don't (see object_field_start() )
685          */
686 }
687
688 static void
689 get_array_start(void *state)
690 {
691         GetState   *_state = (GetState *) state;
692         int                     lex_level = _state->lex->lex_level;
693
694         /* json structure check */
695         if (lex_level == 0 && _state->search_type == JSON_SEARCH_OBJECT)
696                 ereport(ERROR,
697                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
698                                  errmsg("cannot extract field from a non-object")));
699
700         /*
701          * initialize array count for this nesting level Note: the lex_level seen
702          * by array_start is one less than that seen by the elements of the array.
703          */
704         if (_state->search_type == JSON_SEARCH_PATH &&
705                 lex_level < _state->npath)
706                 _state->array_level_index[lex_level] = -1;
707 }
708
709 static void
710 get_array_element_start(void *state, bool isnull)
711 {
712         GetState   *_state = (GetState *) state;
713         bool            get_next = false;
714         int                     lex_level = _state->lex->lex_level;
715
716         if (lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY)
717         {
718                 /* single integer search */
719                 _state->array_index++;
720                 if (_state->array_index == _state->search_index)
721                         get_next = true;
722         }
723         else if (_state->search_type == JSON_SEARCH_PATH &&
724                          lex_level <= _state->npath &&
725                          _state->pathok[lex_level - 1])
726         {
727                 /*
728                  * path search, path so far is ok
729                  *
730                  * increment the array counter. no point doing this if we already know
731                  * the path is bad.
732                  *
733                  * then check if we have a match.
734                  */
735
736                 if (++_state->array_level_index[lex_level - 1] ==
737                         _state->path_level_index[lex_level - 1])
738                 {
739                         if (lex_level == _state->npath)
740                         {
741                                 /* match and at end of path, so get value */
742                                 get_next = true;
743                         }
744                         else
745                         {
746                                 /* not at end of path just mark path ok */
747                                 _state->pathok[lex_level] = true;
748                         }
749                 }
750
751         }
752
753         /* same logic as for objects */
754         if (get_next)
755         {
756                 if (_state->normalize_results &&
757                         _state->lex->token_type == JSON_TOKEN_STRING)
758                 {
759                         _state->next_scalar = true;
760                 }
761                 else
762                 {
763                         _state->result_start = _state->lex->token_start;
764                 }
765         }
766 }
767
768 static void
769 get_array_element_end(void *state, bool isnull)
770 {
771         GetState   *_state = (GetState *) state;
772         bool            get_last = false;
773         int                     lex_level = _state->lex->lex_level;
774
775         /* same logic as in get_object_end, modified for arrays */
776
777         if (lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY &&
778                 _state->array_index == _state->search_index)
779         {
780                 get_last = true;
781         }
782         else if (_state->search_type == JSON_SEARCH_PATH &&
783                          lex_level <= _state->npath &&
784                          _state->pathok[lex_level - 1] &&
785                          _state->array_level_index[lex_level - 1] ==
786                          _state->path_level_index[lex_level - 1])
787         {
788                 /* done with this element so reset pathok */
789                 if (lex_level < _state->npath)
790                         _state->pathok[lex_level] = false;
791
792                 if (lex_level == _state->npath)
793                         get_last = true;
794         }
795         if (get_last && _state->result_start != NULL)
796         {
797                 int                     len = _state->lex->prev_token_terminator - _state->result_start;
798
799                 if (isnull && _state->normalize_results)
800                         _state->tresult = (text *) NULL;
801                 else
802                         _state->tresult = cstring_to_text_with_len(_state->result_start, len);
803         }
804 }
805
806 static void
807 get_scalar(void *state, char *token, JsonTokenType tokentype)
808 {
809         GetState   *_state = (GetState *) state;
810
811         if (_state->lex->lex_level == 0 && _state->search_type != JSON_SEARCH_PATH)
812                 ereport(ERROR,
813                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
814                                  errmsg("cannot extract element from a scalar")));
815         if (_state->next_scalar)
816         {
817                 /* a de-escaped text value is wanted, so supply it */
818                 _state->tresult = cstring_to_text(token);
819                 /* make sure the next call to get_scalar doesn't overwrite it */
820                 _state->next_scalar = false;
821         }
822
823 }
824
825 /*
826  * SQL function json_array_length(json) -> int
827  */
828 Datum
829 json_array_length(PG_FUNCTION_ARGS)
830 {
831         text       *json = PG_GETARG_TEXT_P(0);
832
833         AlenState  *state;
834         JsonLexContext *lex = makeJsonLexContext(json, false);
835         JsonSemAction *sem;
836
837         state = palloc0(sizeof(AlenState));
838         sem = palloc0(sizeof(JsonSemAction));
839
840         /* palloc0 does this for us */
841 #if 0
842         state->count = 0;
843 #endif
844         state->lex = lex;
845
846         sem->semstate = (void *) state;
847         sem->object_start = alen_object_start;
848         sem->scalar = alen_scalar;
849         sem->array_element_start = alen_array_element_start;
850
851         pg_parse_json(lex, sem);
852
853         PG_RETURN_INT32(state->count);
854 }
855
856 /*
857  * These next two check ensure that the json is an array (since it can't be
858  * a scalar or an object).
859  */
860
861 static void
862 alen_object_start(void *state)
863 {
864         AlenState  *_state = (AlenState *) state;
865
866         /* json structure check */
867         if (_state->lex->lex_level == 0)
868                 ereport(ERROR,
869                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
870                                  errmsg("cannot get array length of a non-array")));
871 }
872
873 static void
874 alen_scalar(void *state, char *token, JsonTokenType tokentype)
875 {
876         AlenState  *_state = (AlenState *) state;
877
878         /* json structure check */
879         if (_state->lex->lex_level == 0)
880                 ereport(ERROR,
881                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
882                                  errmsg("cannot get array length of a scalar")));
883 }
884
885 static void
886 alen_array_element_start(void *state, bool isnull)
887 {
888         AlenState  *_state = (AlenState *) state;
889
890         /* just count up all the level 1 elements */
891         if (_state->lex->lex_level == 1)
892                 _state->count++;
893 }
894
895 /*
896  * SQL function json_each and json_each_text
897  *
898  * decompose a json object into key value pairs.
899  *
900  * Unlike json_object_keys() these SRFs operate in materialize mode,
901  * stashing results into a Tuplestore object as they go.
902  * The construction of tuples is done using a temporary memory context
903  * that is cleared out after each tuple is built.
904  */
905 Datum
906 json_each(PG_FUNCTION_ARGS)
907 {
908         return each_worker(fcinfo, false);
909 }
910
911 Datum
912 json_each_text(PG_FUNCTION_ARGS)
913 {
914         return each_worker(fcinfo, true);
915 }
916
917 static inline Datum
918 each_worker(PG_FUNCTION_ARGS, bool as_text)
919 {
920         text       *json = PG_GETARG_TEXT_P(0);
921         JsonLexContext *lex = makeJsonLexContext(json, true);
922         JsonSemAction *sem;
923         ReturnSetInfo *rsi;
924         MemoryContext old_cxt;
925         TupleDesc       tupdesc;
926         EachState  *state;
927
928         state = palloc0(sizeof(EachState));
929         sem = palloc0(sizeof(JsonSemAction));
930
931         rsi = (ReturnSetInfo *) fcinfo->resultinfo;
932
933         if (!rsi || !IsA(rsi, ReturnSetInfo) ||
934                 (rsi->allowedModes & SFRM_Materialize) == 0 ||
935                 rsi->expectedDesc == NULL)
936                 ereport(ERROR,
937                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
938                                  errmsg("set-valued function called in context that "
939                                                 "cannot accept a set")));
940
941
942         rsi->returnMode = SFRM_Materialize;
943
944         (void) get_call_result_type(fcinfo, NULL, &tupdesc);
945
946         /* make these in a sufficiently long-lived memory context */
947         old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
948
949         state->ret_tdesc = CreateTupleDescCopy(tupdesc);
950         BlessTupleDesc(state->ret_tdesc);
951         state->tuple_store =
952                 tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
953                                                           false, work_mem);
954
955         MemoryContextSwitchTo(old_cxt);
956
957         sem->semstate = (void *) state;
958         sem->array_start = each_array_start;
959         sem->scalar = each_scalar;
960         sem->object_field_start = each_object_field_start;
961         sem->object_field_end = each_object_field_end;
962
963         state->normalize_results = as_text;
964         state->next_scalar = false;
965
966         state->lex = lex;
967         state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
968                                                                                    "json_each temporary cxt",
969                                                                                    ALLOCSET_DEFAULT_MINSIZE,
970                                                                                    ALLOCSET_DEFAULT_INITSIZE,
971                                                                                    ALLOCSET_DEFAULT_MAXSIZE);
972
973         pg_parse_json(lex, sem);
974
975         rsi->setResult = state->tuple_store;
976         rsi->setDesc = state->ret_tdesc;
977
978         PG_RETURN_NULL();
979 }
980
981
982 static void
983 each_object_field_start(void *state, char *fname, bool isnull)
984 {
985         EachState  *_state = (EachState *) state;
986
987         /* save a pointer to where the value starts */
988         if (_state->lex->lex_level == 1)
989         {
990                 /*
991                  * next_scalar will be reset in the object_field_end handler, and
992                  * since we know the value is a scalar there is no danger of it being
993                  * on while recursing down the tree.
994                  */
995                 if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
996                         _state->next_scalar = true;
997                 else
998                         _state->result_start = _state->lex->token_start;
999         }
1000 }
1001
1002 static void
1003 each_object_field_end(void *state, char *fname, bool isnull)
1004 {
1005         EachState  *_state = (EachState *) state;
1006         MemoryContext old_cxt;
1007         int                     len;
1008         text       *val;
1009         HeapTuple       tuple;
1010         Datum           values[2];
1011         bool            nulls[2] = {false, false};
1012
1013         /* skip over nested objects */
1014         if (_state->lex->lex_level != 1)
1015                 return;
1016
1017         /* use the tmp context so we can clean up after each tuple is done */
1018         old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
1019
1020         values[0] = CStringGetTextDatum(fname);
1021
1022         if (isnull && _state->normalize_results)
1023         {
1024                 nulls[1] = true;
1025                 values[1] = (Datum) NULL;
1026         }
1027         else if (_state->next_scalar)
1028         {
1029                 values[1] = CStringGetTextDatum(_state->normalized_scalar);
1030                 _state->next_scalar = false;
1031         }
1032         else
1033         {
1034                 len = _state->lex->prev_token_terminator - _state->result_start;
1035                 val = cstring_to_text_with_len(_state->result_start, len);
1036                 values[1] = PointerGetDatum(val);
1037         }
1038
1039
1040         tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
1041
1042         tuplestore_puttuple(_state->tuple_store, tuple);
1043
1044         /* clean up and switch back */
1045         MemoryContextSwitchTo(old_cxt);
1046         MemoryContextReset(_state->tmp_cxt);
1047 }
1048
1049 static void
1050 each_array_start(void *state)
1051 {
1052         EachState  *_state = (EachState *) state;
1053
1054         /* json structure check */
1055         if (_state->lex->lex_level == 0)
1056                 ereport(ERROR,
1057                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1058                                  errmsg("cannot deconstruct an array as an object")));
1059 }
1060
1061 static void
1062 each_scalar(void *state, char *token, JsonTokenType tokentype)
1063 {
1064         EachState  *_state = (EachState *) state;
1065
1066         /* json structure check */
1067         if (_state->lex->lex_level == 0)
1068                 ereport(ERROR,
1069                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1070                                  errmsg("cannot deconstruct a scalar")));
1071
1072         /* supply de-escaped value if required */
1073         if (_state->next_scalar)
1074                 _state->normalized_scalar = token;
1075 }
1076
1077 /*
1078  * SQL functions json_array_elements and json_array_elements_text
1079  *
1080  * get the elements from a json array
1081  *
1082  * a lot of this processing is similar to the json_each* functions
1083  */
1084 Datum
1085 json_array_elements(PG_FUNCTION_ARGS)
1086 {
1087         return elements_worker(fcinfo, false);
1088 }
1089
1090 Datum
1091 json_array_elements_text(PG_FUNCTION_ARGS)
1092 {
1093         return elements_worker(fcinfo, true);
1094 }
1095
1096 static inline Datum
1097 elements_worker(PG_FUNCTION_ARGS, bool as_text)
1098 {
1099         text       *json = PG_GETARG_TEXT_P(0);
1100
1101         /* elements only needs escaped strings when as_text */
1102         JsonLexContext *lex = makeJsonLexContext(json, as_text);
1103         JsonSemAction *sem;
1104         ReturnSetInfo *rsi;
1105         MemoryContext old_cxt;
1106         TupleDesc       tupdesc;
1107         ElementsState *state;
1108
1109         state = palloc0(sizeof(ElementsState));
1110         sem = palloc0(sizeof(JsonSemAction));
1111
1112         rsi = (ReturnSetInfo *) fcinfo->resultinfo;
1113
1114         if (!rsi || !IsA(rsi, ReturnSetInfo) ||
1115                 (rsi->allowedModes & SFRM_Materialize) == 0 ||
1116                 rsi->expectedDesc == NULL)
1117                 ereport(ERROR,
1118                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1119                                  errmsg("set-valued function called in context that "
1120                                                 "cannot accept a set")));
1121
1122
1123         rsi->returnMode = SFRM_Materialize;
1124
1125         /* it's a simple type, so don't use get_call_result_type() */
1126         tupdesc = rsi->expectedDesc;
1127
1128         /* make these in a sufficiently long-lived memory context */
1129         old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
1130
1131         state->ret_tdesc = CreateTupleDescCopy(tupdesc);
1132         BlessTupleDesc(state->ret_tdesc);
1133         state->tuple_store =
1134                 tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
1135                                                           false, work_mem);
1136
1137         MemoryContextSwitchTo(old_cxt);
1138
1139         sem->semstate = (void *) state;
1140         sem->object_start = elements_object_start;
1141         sem->scalar = elements_scalar;
1142         sem->array_element_start = elements_array_element_start;
1143         sem->array_element_end = elements_array_element_end;
1144
1145         state->normalize_results = as_text;
1146         state->next_scalar = false;
1147
1148         state->lex = lex;
1149         state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
1150                                                                                  "json_array_elements temporary cxt",
1151                                                                                    ALLOCSET_DEFAULT_MINSIZE,
1152                                                                                    ALLOCSET_DEFAULT_INITSIZE,
1153                                                                                    ALLOCSET_DEFAULT_MAXSIZE);
1154
1155         pg_parse_json(lex, sem);
1156
1157         rsi->setResult = state->tuple_store;
1158         rsi->setDesc = state->ret_tdesc;
1159
1160         PG_RETURN_NULL();
1161 }
1162
1163 static void
1164 elements_array_element_start(void *state, bool isnull)
1165 {
1166         ElementsState *_state = (ElementsState *) state;
1167
1168         /* save a pointer to where the value starts */
1169         if (_state->lex->lex_level == 1)
1170         {
1171                 /*
1172                  * next_scalar will be reset in the array_element_end handler, and
1173                  * since we know the value is a scalar there is no danger of it being
1174                  * on while recursing down the tree.
1175                  */
1176                 if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
1177                         _state->next_scalar = true;
1178                 else
1179                         _state->result_start = _state->lex->token_start;
1180         }
1181 }
1182
1183 static void
1184 elements_array_element_end(void *state, bool isnull)
1185 {
1186         ElementsState *_state = (ElementsState *) state;
1187         MemoryContext old_cxt;
1188         int                     len;
1189         text       *val;
1190         HeapTuple       tuple;
1191         Datum           values[1];
1192         bool nulls[1] = {false};
1193
1194         /* skip over nested objects */
1195         if (_state->lex->lex_level != 1)
1196                 return;
1197
1198         /* use the tmp context so we can clean up after each tuple is done */
1199         old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
1200
1201         if (isnull && _state->normalize_results)
1202         {
1203                 nulls[0] = true;
1204                 values[0] = (Datum) NULL;
1205         }
1206         else if (_state->next_scalar)
1207         {
1208                 values[0] = CStringGetTextDatum(_state->normalized_scalar);
1209                 _state->next_scalar = false;
1210         }
1211         else
1212         {
1213                 len = _state->lex->prev_token_terminator - _state->result_start;
1214                 val = cstring_to_text_with_len(_state->result_start, len);
1215                 values[0] = PointerGetDatum(val);
1216         }
1217
1218
1219         tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
1220
1221         tuplestore_puttuple(_state->tuple_store, tuple);
1222
1223         /* clean up and switch back */
1224         MemoryContextSwitchTo(old_cxt);
1225         MemoryContextReset(_state->tmp_cxt);
1226 }
1227
1228 static void
1229 elements_object_start(void *state)
1230 {
1231         ElementsState *_state = (ElementsState *) state;
1232
1233         /* json structure check */
1234         if (_state->lex->lex_level == 0)
1235                 ereport(ERROR,
1236                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1237                                  errmsg("cannot call json_array_elements on a non-array")));
1238 }
1239
1240 static void
1241 elements_scalar(void *state, char *token, JsonTokenType tokentype)
1242 {
1243         ElementsState *_state = (ElementsState *) state;
1244
1245         /* json structure check */
1246         if (_state->lex->lex_level == 0)
1247                 ereport(ERROR,
1248                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1249                                  errmsg("cannot call json_array_elements on a scalar")));
1250
1251         /* supply de-escaped value if required */
1252         if (_state->next_scalar)
1253                 _state->normalized_scalar = token;
1254 }
1255
1256 /*
1257  * SQL function json_populate_record
1258  *
1259  * set fields in a record from the argument json
1260  *
1261  * Code adapted shamelessly from hstore's populate_record
1262  * which is in turn partly adapted from record_out.
1263  *
1264  * The json is decomposed into a hash table, in which each
1265  * field in the record is then looked up by name.
1266  */
1267 Datum
1268 json_populate_record(PG_FUNCTION_ARGS)
1269 {
1270         return populate_record_worker(fcinfo, true);
1271 }
1272
1273 Datum
1274 json_to_record(PG_FUNCTION_ARGS)
1275 {
1276         return populate_record_worker(fcinfo, false);
1277 }
1278
1279 static inline Datum
1280 populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg)
1281 {
1282         text       *json;
1283         bool            use_json_as_text;
1284         HTAB       *json_hash;
1285         HeapTupleHeader rec = NULL;
1286         Oid                     tupType = InvalidOid;
1287         int32           tupTypmod = -1;
1288         TupleDesc       tupdesc;
1289         HeapTupleData tuple;
1290         HeapTuple       rettuple;
1291         RecordIOData *my_extra;
1292         int                     ncolumns;
1293         int                     i;
1294         Datum      *values;
1295         bool       *nulls;
1296         char            fname[NAMEDATALEN];
1297         JsonHashEntry *hashentry;
1298
1299         if (have_record_arg)
1300         {
1301                 Oid                     argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
1302
1303                 use_json_as_text = PG_ARGISNULL(2) ? false : PG_GETARG_BOOL(2);
1304
1305                 if (!type_is_rowtype(argtype))
1306                         ereport(ERROR,
1307                                         (errcode(ERRCODE_DATATYPE_MISMATCH),
1308                                          errmsg("first argument of json_populate_record must be a row type")));
1309
1310                 if (PG_ARGISNULL(0))
1311                 {
1312                         if (PG_ARGISNULL(1))
1313                                 PG_RETURN_NULL();
1314
1315                         /*
1316                          * have no tuple to look at, so the only source of type info is
1317                          * the argtype. The lookup_rowtype_tupdesc call below will error
1318                          * out if we don't have a known composite type oid here.
1319                          */
1320                         tupType = argtype;
1321                         tupTypmod = -1;
1322                 }
1323                 else
1324                 {
1325                         rec = PG_GETARG_HEAPTUPLEHEADER(0);
1326
1327                         if (PG_ARGISNULL(1))
1328                                 PG_RETURN_POINTER(rec);
1329
1330                         /* Extract type info from the tuple itself */
1331                         tupType = HeapTupleHeaderGetTypeId(rec);
1332                         tupTypmod = HeapTupleHeaderGetTypMod(rec);
1333                 }
1334
1335                 json = PG_GETARG_TEXT_P(1);
1336         }
1337         else
1338         {
1339                 /* json_to_record case */
1340
1341                 use_json_as_text = PG_ARGISNULL(1) ? false : PG_GETARG_BOOL(1);
1342
1343                 if (PG_ARGISNULL(0))
1344                         PG_RETURN_NULL();
1345
1346                 json = PG_GETARG_TEXT_P(0);
1347
1348                 get_call_result_type(fcinfo, NULL, &tupdesc);
1349         }
1350
1351         json_hash = get_json_object_as_hash(json, "json_populate_record",
1352                                                                                 use_json_as_text);
1353
1354         if (have_record_arg)
1355         {
1356                 /*
1357                  * if the input json is empty, we can only skip the rest if we were
1358                  * passed in a non-null record, since otherwise there may be issues
1359                  * with domain nulls.
1360                  */
1361                 if (hash_get_num_entries(json_hash) == 0 && rec)
1362                         PG_RETURN_POINTER(rec);
1363
1364
1365                 tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
1366         }
1367
1368         ncolumns = tupdesc->natts;
1369
1370         if (rec)
1371         {
1372                 /* Build a temporary HeapTuple control structure */
1373                 tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
1374                 ItemPointerSetInvalid(&(tuple.t_self));
1375                 tuple.t_tableOid = InvalidOid;
1376                 tuple.t_data = rec;
1377         }
1378
1379         /*
1380          * We arrange to look up the needed I/O info just once per series of
1381          * calls, assuming the record type doesn't change underneath us.
1382          */
1383         my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
1384         if (my_extra == NULL ||
1385                 my_extra->ncolumns != ncolumns)
1386         {
1387                 fcinfo->flinfo->fn_extra =
1388                         MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
1389                                                            sizeof(RecordIOData) - sizeof(ColumnIOData)
1390                                                            + ncolumns * sizeof(ColumnIOData));
1391                 my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
1392                 my_extra->record_type = InvalidOid;
1393                 my_extra->record_typmod = 0;
1394         }
1395
1396         if (have_record_arg && (my_extra->record_type != tupType ||
1397                                                         my_extra->record_typmod != tupTypmod))
1398         {
1399                 MemSet(my_extra, 0,
1400                            sizeof(RecordIOData) - sizeof(ColumnIOData)
1401                            + ncolumns * sizeof(ColumnIOData));
1402                 my_extra->record_type = tupType;
1403                 my_extra->record_typmod = tupTypmod;
1404                 my_extra->ncolumns = ncolumns;
1405         }
1406
1407         values = (Datum *) palloc(ncolumns * sizeof(Datum));
1408         nulls = (bool *) palloc(ncolumns * sizeof(bool));
1409
1410         if (rec)
1411         {
1412                 /* Break down the tuple into fields */
1413                 heap_deform_tuple(&tuple, tupdesc, values, nulls);
1414         }
1415         else
1416         {
1417                 for (i = 0; i < ncolumns; ++i)
1418                 {
1419                         values[i] = (Datum) 0;
1420                         nulls[i] = true;
1421                 }
1422         }
1423
1424         for (i = 0; i < ncolumns; ++i)
1425         {
1426                 ColumnIOData *column_info = &my_extra->columns[i];
1427                 Oid                     column_type = tupdesc->attrs[i]->atttypid;
1428                 char       *value;
1429
1430                 /* Ignore dropped columns in datatype */
1431                 if (tupdesc->attrs[i]->attisdropped)
1432                 {
1433                         nulls[i] = true;
1434                         continue;
1435                 }
1436
1437                 memset(fname, 0, NAMEDATALEN);
1438                 strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN);
1439                 hashentry = hash_search(json_hash, fname, HASH_FIND, NULL);
1440
1441                 /*
1442                  * we can't just skip here if the key wasn't found since we might have
1443                  * a domain to deal with. If we were passed in a non-null record
1444                  * datum, we assume that the existing values are valid (if they're
1445                  * not, then it's not our fault), but if we were passed in a null,
1446                  * then every field which we don't populate needs to be run through
1447                  * the input function just in case it's a domain type.
1448                  */
1449                 if (hashentry == NULL && rec)
1450                         continue;
1451
1452                 /*
1453                  * Prepare to convert the column value from text
1454                  */
1455                 if (column_info->column_type != column_type)
1456                 {
1457                         getTypeInputInfo(column_type,
1458                                                          &column_info->typiofunc,
1459                                                          &column_info->typioparam);
1460                         fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
1461                                                   fcinfo->flinfo->fn_mcxt);
1462                         column_info->column_type = column_type;
1463                 }
1464                 if (hashentry == NULL || hashentry->isnull)
1465                 {
1466                         /*
1467                          * need InputFunctionCall to happen even for nulls, so that domain
1468                          * checks are done
1469                          */
1470                         values[i] = InputFunctionCall(&column_info->proc, NULL,
1471                                                                                   column_info->typioparam,
1472                                                                                   tupdesc->attrs[i]->atttypmod);
1473                         nulls[i] = true;
1474                 }
1475                 else
1476                 {
1477                         value = hashentry->val;
1478
1479                         values[i] = InputFunctionCall(&column_info->proc, value,
1480                                                                                   column_info->typioparam,
1481                                                                                   tupdesc->attrs[i]->atttypmod);
1482                         nulls[i] = false;
1483                 }
1484         }
1485
1486         rettuple = heap_form_tuple(tupdesc, values, nulls);
1487
1488         ReleaseTupleDesc(tupdesc);
1489
1490         PG_RETURN_DATUM(HeapTupleGetDatum(rettuple));
1491 }
1492
1493 /*
1494  * get_json_object_as_hash
1495  *
1496  * decompose a json object into a hash table.
1497  *
1498  * Currently doesn't allow anything but a flat object. Should this
1499  * change?
1500  *
1501  * funcname argument allows caller to pass in its name for use in
1502  * error messages.
1503  */
1504 static HTAB *
1505 get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text)
1506 {
1507         HASHCTL         ctl;
1508         HTAB       *tab;
1509         JHashState *state;
1510         JsonLexContext *lex = makeJsonLexContext(json, true);
1511         JsonSemAction *sem;
1512
1513         memset(&ctl, 0, sizeof(ctl));
1514         ctl.keysize = NAMEDATALEN;
1515         ctl.entrysize = sizeof(JsonHashEntry);
1516         ctl.hcxt = CurrentMemoryContext;
1517         tab = hash_create("json object hashtable",
1518                                           100,
1519                                           &ctl,
1520                                           HASH_ELEM | HASH_CONTEXT);
1521
1522         state = palloc0(sizeof(JHashState));
1523         sem = palloc0(sizeof(JsonSemAction));
1524
1525         state->function_name = funcname;
1526         state->hash = tab;
1527         state->lex = lex;
1528         state->use_json_as_text = use_json_as_text;
1529
1530         sem->semstate = (void *) state;
1531         sem->array_start = hash_array_start;
1532         sem->scalar = hash_scalar;
1533         sem->object_field_start = hash_object_field_start;
1534         sem->object_field_end = hash_object_field_end;
1535
1536         pg_parse_json(lex, sem);
1537
1538         return tab;
1539 }
1540
1541 static void
1542 hash_object_field_start(void *state, char *fname, bool isnull)
1543 {
1544         JHashState *_state = (JHashState *) state;
1545
1546         if (_state->lex->lex_level > 1)
1547                 return;
1548
1549         if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
1550                 _state->lex->token_type == JSON_TOKEN_OBJECT_START)
1551         {
1552                 if (!_state->use_json_as_text)
1553                         ereport(ERROR,
1554                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1555                                          errmsg("cannot call %s on a nested object",
1556                                                         _state->function_name)));
1557                 _state->save_json_start = _state->lex->token_start;
1558         }
1559         else
1560         {
1561                 /* must be a scalar */
1562                 _state->save_json_start = NULL;
1563         }
1564 }
1565
1566 static void
1567 hash_object_field_end(void *state, char *fname, bool isnull)
1568 {
1569         JHashState *_state = (JHashState *) state;
1570         JsonHashEntry *hashentry;
1571         bool            found;
1572         char            name[NAMEDATALEN];
1573
1574         /*
1575          * ignore field names >= NAMEDATALEN - they can't match a record field
1576          * ignore nested fields.
1577          */
1578         if (_state->lex->lex_level > 2 || strlen(fname) >= NAMEDATALEN)
1579                 return;
1580
1581         memset(name, 0, NAMEDATALEN);
1582         strncpy(name, fname, NAMEDATALEN);
1583
1584         hashentry = hash_search(_state->hash, name, HASH_ENTER, &found);
1585
1586         /*
1587          * found being true indicates a duplicate. We don't do anything about
1588          * that, a later field with the same name overrides the earlier field.
1589          */
1590
1591         hashentry->isnull = isnull;
1592         if (_state->save_json_start != NULL)
1593         {
1594                 int                     len = _state->lex->prev_token_terminator - _state->save_json_start;
1595                 char       *val = palloc((len + 1) * sizeof(char));
1596
1597                 memcpy(val, _state->save_json_start, len);
1598                 val[len] = '\0';
1599                 hashentry->val = val;
1600         }
1601         else
1602         {
1603                 /* must have had a scalar instead */
1604                 hashentry->val = _state->saved_scalar;
1605         }
1606 }
1607
1608 static void
1609 hash_array_start(void *state)
1610 {
1611         JHashState *_state = (JHashState *) state;
1612
1613         if (_state->lex->lex_level == 0)
1614                 ereport(ERROR,
1615                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1616                            errmsg("cannot call %s on an array", _state->function_name)));
1617 }
1618
1619 static void
1620 hash_scalar(void *state, char *token, JsonTokenType tokentype)
1621 {
1622         JHashState *_state = (JHashState *) state;
1623
1624         if (_state->lex->lex_level == 0)
1625                 ereport(ERROR,
1626                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1627                            errmsg("cannot call %s on a scalar", _state->function_name)));
1628
1629         if (_state->lex->lex_level == 1)
1630                 _state->saved_scalar = token;
1631 }
1632
1633
1634 /*
1635  * SQL function json_populate_recordset
1636  *
1637  * set fields in a set of records from the argument json,
1638  * which must be an array of objects.
1639  *
1640  * similar to json_populate_record, but the tuple-building code
1641  * is pushed down into the semantic action handlers so it's done
1642  * per object in the array.
1643  */
1644 Datum
1645 json_populate_recordset(PG_FUNCTION_ARGS)
1646 {
1647         return populate_recordset_worker(fcinfo, true);
1648 }
1649
1650 Datum
1651 json_to_recordset(PG_FUNCTION_ARGS)
1652 {
1653         return populate_recordset_worker(fcinfo, false);
1654 }
1655
1656 /*
1657  * common worker for json_populate_recordset() and json_to_recordset()
1658  */
1659 static inline Datum
1660 populate_recordset_worker(PG_FUNCTION_ARGS, bool have_record_arg)
1661 {
1662         Oid                     argtype;
1663         text       *json;
1664         bool            use_json_as_text;
1665         ReturnSetInfo *rsi;
1666         MemoryContext old_cxt;
1667         Oid                     tupType;
1668         int32           tupTypmod;
1669         HeapTupleHeader rec;
1670         TupleDesc       tupdesc;
1671         RecordIOData *my_extra;
1672         int                     ncolumns;
1673         JsonLexContext *lex;
1674         JsonSemAction *sem;
1675         PopulateRecordsetState *state;
1676
1677         if (have_record_arg)
1678         {
1679                 argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
1680
1681                 use_json_as_text = PG_ARGISNULL(2) ? false : PG_GETARG_BOOL(2);
1682
1683                 if (!type_is_rowtype(argtype))
1684                         ereport(ERROR,
1685                                         (errcode(ERRCODE_DATATYPE_MISMATCH),
1686                                          errmsg("first argument of json_populate_recordset must be a row type")));
1687         }
1688         else
1689         {
1690                 argtype = InvalidOid;
1691
1692                 use_json_as_text = PG_ARGISNULL(1) ? false : PG_GETARG_BOOL(1);
1693         }
1694
1695         rsi = (ReturnSetInfo *) fcinfo->resultinfo;
1696
1697         if (!rsi || !IsA(rsi, ReturnSetInfo) ||
1698                 (rsi->allowedModes & SFRM_Materialize) == 0 ||
1699                 rsi->expectedDesc == NULL)
1700                 ereport(ERROR,
1701                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1702                                  errmsg("set-valued function called in context that "
1703                                                 "cannot accept a set")));
1704
1705
1706         rsi->returnMode = SFRM_Materialize;
1707
1708         /*
1709          * get the tupdesc from the result set info - it must be a record type
1710          * because we already checked that arg1 is a record type.
1711          */
1712         (void) get_call_result_type(fcinfo, NULL, &tupdesc);
1713
1714         state = palloc0(sizeof(PopulateRecordsetState));
1715         sem = palloc0(sizeof(JsonSemAction));
1716
1717
1718         /* make these in a sufficiently long-lived memory context */
1719         old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
1720
1721         state->ret_tdesc = CreateTupleDescCopy(tupdesc);
1722         BlessTupleDesc(state->ret_tdesc);
1723         state->tuple_store =
1724                 tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
1725                                                           false, work_mem);
1726
1727         MemoryContextSwitchTo(old_cxt);
1728
1729         /* if the json is null send back an empty set */
1730         if (have_record_arg)
1731         {
1732                 if (PG_ARGISNULL(1))
1733                         PG_RETURN_NULL();
1734
1735                 json = PG_GETARG_TEXT_P(1);
1736
1737                 if (PG_ARGISNULL(0))
1738                         rec = NULL;
1739                 else
1740                         rec = PG_GETARG_HEAPTUPLEHEADER(0);
1741         }
1742         else
1743         {
1744                 if (PG_ARGISNULL(0))
1745                         PG_RETURN_NULL();
1746
1747                 json = PG_GETARG_TEXT_P(0);
1748
1749                 rec = NULL;
1750         }
1751
1752         tupType = tupdesc->tdtypeid;
1753         tupTypmod = tupdesc->tdtypmod;
1754         ncolumns = tupdesc->natts;
1755
1756         lex = makeJsonLexContext(json, true);
1757
1758         /*
1759          * We arrange to look up the needed I/O info just once per series of
1760          * calls, assuming the record type doesn't change underneath us.
1761          */
1762         my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
1763         if (my_extra == NULL ||
1764                 my_extra->ncolumns != ncolumns)
1765         {
1766                 fcinfo->flinfo->fn_extra =
1767                         MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
1768                                                            sizeof(RecordIOData) - sizeof(ColumnIOData)
1769                                                            + ncolumns * sizeof(ColumnIOData));
1770                 my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
1771                 my_extra->record_type = InvalidOid;
1772                 my_extra->record_typmod = 0;
1773         }
1774
1775         if (my_extra->record_type != tupType ||
1776                 my_extra->record_typmod != tupTypmod)
1777         {
1778                 MemSet(my_extra, 0,
1779                            sizeof(RecordIOData) - sizeof(ColumnIOData)
1780                            + ncolumns * sizeof(ColumnIOData));
1781                 my_extra->record_type = tupType;
1782                 my_extra->record_typmod = tupTypmod;
1783                 my_extra->ncolumns = ncolumns;
1784         }
1785
1786         sem->semstate = (void *) state;
1787         sem->array_start = populate_recordset_array_start;
1788         sem->array_element_start = populate_recordset_array_element_start;
1789         sem->scalar = populate_recordset_scalar;
1790         sem->object_field_start = populate_recordset_object_field_start;
1791         sem->object_field_end = populate_recordset_object_field_end;
1792         sem->object_start = populate_recordset_object_start;
1793         sem->object_end = populate_recordset_object_end;
1794
1795         state->lex = lex;
1796
1797         state->my_extra = my_extra;
1798         state->rec = rec;
1799         state->use_json_as_text = use_json_as_text;
1800         state->fn_mcxt = fcinfo->flinfo->fn_mcxt;
1801
1802         pg_parse_json(lex, sem);
1803
1804         rsi->setResult = state->tuple_store;
1805         rsi->setDesc = state->ret_tdesc;
1806
1807         PG_RETURN_NULL();
1808
1809 }
1810
1811 static void
1812 populate_recordset_object_start(void *state)
1813 {
1814         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1815         int                     lex_level = _state->lex->lex_level;
1816         HASHCTL         ctl;
1817
1818         if (lex_level == 0)
1819                 ereport(ERROR,
1820                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1821                                  errmsg("cannot call json_populate_recordset on an object")));
1822         else if (lex_level > 1 && !_state->use_json_as_text)
1823                 ereport(ERROR,
1824                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1825                  errmsg("cannot call json_populate_recordset with nested objects")));
1826
1827         /* set up a new hash for this entry */
1828         memset(&ctl, 0, sizeof(ctl));
1829         ctl.keysize = NAMEDATALEN;
1830         ctl.entrysize = sizeof(JsonHashEntry);
1831         ctl.hcxt = CurrentMemoryContext;
1832         _state->json_hash = hash_create("json object hashtable",
1833                                                                         100,
1834                                                                         &ctl,
1835                                                                         HASH_ELEM | HASH_CONTEXT);
1836 }
1837
1838 static void
1839 populate_recordset_object_end(void *state)
1840 {
1841         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1842         HTAB       *json_hash = _state->json_hash;
1843         Datum      *values;
1844         bool       *nulls;
1845         char            fname[NAMEDATALEN];
1846         int                     i;
1847         RecordIOData *my_extra = _state->my_extra;
1848         int                     ncolumns = my_extra->ncolumns;
1849         TupleDesc       tupdesc = _state->ret_tdesc;
1850         JsonHashEntry *hashentry;
1851         HeapTupleHeader rec = _state->rec;
1852         HeapTuple       rettuple;
1853
1854         if (_state->lex->lex_level > 1)
1855                 return;
1856
1857         values = (Datum *) palloc(ncolumns * sizeof(Datum));
1858         nulls = (bool *) palloc(ncolumns * sizeof(bool));
1859
1860         if (_state->rec)
1861         {
1862                 HeapTupleData tuple;
1863
1864                 /* Build a temporary HeapTuple control structure */
1865                 tuple.t_len = HeapTupleHeaderGetDatumLength(_state->rec);
1866                 ItemPointerSetInvalid(&(tuple.t_self));
1867                 tuple.t_tableOid = InvalidOid;
1868                 tuple.t_data = _state->rec;
1869
1870                 /* Break down the tuple into fields */
1871                 heap_deform_tuple(&tuple, tupdesc, values, nulls);
1872         }
1873         else
1874         {
1875                 for (i = 0; i < ncolumns; ++i)
1876                 {
1877                         values[i] = (Datum) 0;
1878                         nulls[i] = true;
1879                 }
1880         }
1881
1882         for (i = 0; i < ncolumns; ++i)
1883         {
1884                 ColumnIOData *column_info = &my_extra->columns[i];
1885                 Oid                     column_type = tupdesc->attrs[i]->atttypid;
1886                 char       *value;
1887
1888                 /* Ignore dropped columns in datatype */
1889                 if (tupdesc->attrs[i]->attisdropped)
1890                 {
1891                         nulls[i] = true;
1892                         continue;
1893                 }
1894
1895                 memset(fname, 0, NAMEDATALEN);
1896                 strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN);
1897                 hashentry = hash_search(json_hash, fname, HASH_FIND, NULL);
1898
1899                 /*
1900                  * we can't just skip here if the key wasn't found since we might have
1901                  * a domain to deal with. If we were passed in a non-null record
1902                  * datum, we assume that the existing values are valid (if they're
1903                  * not, then it's not our fault), but if we were passed in a null,
1904                  * then every field which we don't populate needs to be run through
1905                  * the input function just in case it's a domain type.
1906                  */
1907                 if (hashentry == NULL && rec)
1908                         continue;
1909
1910                 /*
1911                  * Prepare to convert the column value from text
1912                  */
1913                 if (column_info->column_type != column_type)
1914                 {
1915                         getTypeInputInfo(column_type,
1916                                                          &column_info->typiofunc,
1917                                                          &column_info->typioparam);
1918                         fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
1919                                                   _state->fn_mcxt);
1920                         column_info->column_type = column_type;
1921                 }
1922                 if (hashentry == NULL || hashentry->isnull)
1923                 {
1924                         /*
1925                          * need InputFunctionCall to happen even for nulls, so that domain
1926                          * checks are done
1927                          */
1928                         values[i] = InputFunctionCall(&column_info->proc, NULL,
1929                                                                                   column_info->typioparam,
1930                                                                                   tupdesc->attrs[i]->atttypmod);
1931                         nulls[i] = true;
1932                 }
1933                 else
1934                 {
1935                         value = hashentry->val;
1936
1937                         values[i] = InputFunctionCall(&column_info->proc, value,
1938                                                                                   column_info->typioparam,
1939                                                                                   tupdesc->attrs[i]->atttypmod);
1940                         nulls[i] = false;
1941                 }
1942         }
1943
1944         rettuple = heap_form_tuple(tupdesc, values, nulls);
1945
1946         tuplestore_puttuple(_state->tuple_store, rettuple);
1947
1948         hash_destroy(json_hash);
1949 }
1950
1951 static void
1952 populate_recordset_array_element_start(void *state, bool isnull)
1953 {
1954         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1955
1956         if (_state->lex->lex_level == 1 &&
1957                 _state->lex->token_type != JSON_TOKEN_OBJECT_START)
1958                 ereport(ERROR,
1959                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1960                 errmsg("must call json_populate_recordset on an array of objects")));
1961 }
1962
1963 static void
1964 populate_recordset_array_start(void *state)
1965 {
1966         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1967
1968         if (_state->lex->lex_level != 0 && !_state->use_json_as_text)
1969                 ereport(ERROR,
1970                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1971                   errmsg("cannot call json_populate_recordset with nested arrays")));
1972 }
1973
1974 static void
1975 populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype)
1976 {
1977         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1978
1979         if (_state->lex->lex_level == 0)
1980                 ereport(ERROR,
1981                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1982                                  errmsg("cannot call json_populate_recordset on a scalar")));
1983
1984         if (_state->lex->lex_level == 2)
1985                 _state->saved_scalar = token;
1986 }
1987
1988 static void
1989 populate_recordset_object_field_start(void *state, char *fname, bool isnull)
1990 {
1991         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
1992
1993         if (_state->lex->lex_level > 2)
1994                 return;
1995
1996         if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
1997                 _state->lex->token_type == JSON_TOKEN_OBJECT_START)
1998         {
1999                 if (!_state->use_json_as_text)
2000                         ereport(ERROR,
2001                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2002                                          errmsg("cannot call json_populate_recordset on a nested object")));
2003                 _state->save_json_start = _state->lex->token_start;
2004         }
2005         else
2006         {
2007                 _state->save_json_start = NULL;
2008         }
2009 }
2010
2011 static void
2012 populate_recordset_object_field_end(void *state, char *fname, bool isnull)
2013 {
2014         PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
2015         JsonHashEntry *hashentry;
2016         bool            found;
2017         char            name[NAMEDATALEN];
2018
2019         /*
2020          * ignore field names >= NAMEDATALEN - they can't match a record field
2021          * ignore nested fields.
2022          */
2023         if (_state->lex->lex_level > 2 || strlen(fname) >= NAMEDATALEN)
2024                 return;
2025
2026         memset(name, 0, NAMEDATALEN);
2027         strncpy(name, fname, NAMEDATALEN);
2028
2029         hashentry = hash_search(_state->json_hash, name, HASH_ENTER, &found);
2030
2031         /*
2032          * found being true indicates a duplicate. We don't do anything about
2033          * that, a later field with the same name overrides the earlier field.
2034          */
2035
2036         hashentry->isnull = isnull;
2037         if (_state->save_json_start != NULL)
2038         {
2039                 int                     len = _state->lex->prev_token_terminator - _state->save_json_start;
2040                 char       *val = palloc((len + 1) * sizeof(char));
2041
2042                 memcpy(val, _state->save_json_start, len);
2043                 val[len] = '\0';
2044                 hashentry->val = val;
2045         }
2046         else
2047         {
2048                 /* must have had a scalar instead */
2049                 hashentry->val = _state->saved_scalar;
2050         }
2051 }