]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/jsonb_util.c
hash_any returns Datum, not uint32 (and definitely not "int").
[postgresql] / src / backend / utils / adt / jsonb_util.c
1 /*-------------------------------------------------------------------------
2  *
3  * jsonb_util.c
4  *        Utilities for jsonb datatype
5  *
6  * Copyright (c) 2014, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/jsonb_util.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "access/hash.h"
17 #include "catalog/pg_collation.h"
18 #include "catalog/pg_type.h"
19 #include "miscadmin.h"
20 #include "utils/builtins.h"
21 #include "utils/jsonb.h"
22 #include "utils/memutils.h"
23
24 /*
25  * Twice as many values may be stored within pairs (for an Object) than within
26  * elements (for an Array), modulo the current MaxAllocSize limitation.  Note
27  * that JSONB_MAX_PAIRS is derived from the number of possible pairs, not
28  * values (as is the case for arrays and their elements), because we're
29  * concerned about limitations on the representation of the number of pairs.
30  * Over twice the memory is required to store n JsonbPairs as n JsonbValues.
31  * It only takes exactly twice as much disk space for storage, though.  The
32  * JsonbPair (not an actual pair of values) representation is used here because
33  * that is what is subject to the MaxAllocSize restriction when building an
34  * object.
35  */
36 #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JENTRY_POSMASK))
37 #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), \
38                                                          JENTRY_POSMASK))
39
40 /*
41  * State used while converting an arbitrary JsonbValue into a Jsonb value
42  * (4-byte varlena uncompressed representation of a Jsonb)
43  *
44  * ConvertLevel:  Bookkeeping around particular level when converting.
45  */
46 typedef struct convertLevel
47 {
48         uint32          i;                              /* Iterates once per element, or once per pair */
49         uint32     *header;                     /* Pointer to current container header */
50         JEntry     *meta;                       /* This level's metadata */
51         char       *begin;                      /* Pointer into convertState.buffer */
52 } convertLevel;
53
54 /*
55  * convertState:  Overall bookkeeping state for conversion
56  */
57 typedef struct convertState
58 {
59         /* Preallocated buffer in which to form varlena/Jsonb value */
60         Jsonb      *buffer;
61         /* Pointer into buffer */
62         char       *ptr;
63
64         /* State for  */
65         convertLevel *allState,         /* Overall state array */
66                            *contPtr;            /* Cur container pointer (in allState) */
67
68         /* Current size of buffer containing allState array */
69         Size            levelSz;
70
71 } convertState;
72
73 static int      compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
74 static int      lexicalCompareJsonbStringValue(const void *a, const void *b);
75 static Size convertJsonb(JsonbValue *val, Jsonb *buffer);
76 static inline short addPaddingInt(convertState *cstate);
77 static void walkJsonbValueConversion(JsonbValue *val, convertState *cstate,
78                                                  uint32 nestlevel);
79 static void putJsonbValueConversion(convertState *cstate, JsonbValue *val,
80                                                 uint32 flags, uint32 level);
81 static void putScalarConversion(convertState *cstate, JsonbValue *scalarVal,
82                                         uint32 level, uint32 i);
83 static void iteratorFromContainerBuf(JsonbIterator *it, char *buffer);
84 static bool formIterIsContainer(JsonbIterator **it, JsonbValue *val,
85                                         JEntry *ent, bool skipNested);
86 static JsonbIterator *freeAndGetParent(JsonbIterator *it);
87 static JsonbParseState *pushState(JsonbParseState **pstate);
88 static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
89 static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
90 static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
91 static int      lengthCompareJsonbStringValue(const void *a, const void *b, void *arg);
92 static int      lengthCompareJsonbPair(const void *a, const void *b, void *arg);
93 static void uniqueifyJsonbObject(JsonbValue *object);
94 static void uniqueifyJsonbArray(JsonbValue *array);
95
96 /*
97  * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
98  *
99  * There isn't a JsonbToJsonbValue(), because generally we find it more
100  * convenient to directly iterate through the Jsonb representation and only
101  * really convert nested scalar values.  formIterIsContainer() does this, so
102  * that clients of the iteration code don't have to directly deal with the
103  * binary representation (JsonbDeepContains() is a notable exception, although
104  * all exceptions are internal to this module).  In general, functions that
105  * accept a JsonbValue argument are concerned with the manipulation of scalar
106  * values, or simple containers of scalar values, where it would be
107  * inconvenient to deal with a great amount of other state.
108  */
109 Jsonb *
110 JsonbValueToJsonb(JsonbValue *val)
111 {
112         Jsonb      *out;
113         Size            sz;
114
115         if (IsAJsonbScalar(val))
116         {
117                 /* Scalar value */
118                 JsonbParseState *pstate = NULL;
119                 JsonbValue *res;
120                 JsonbValue      scalarArray;
121
122                 scalarArray.type = jbvArray;
123                 scalarArray.val.array.rawScalar = true;
124                 scalarArray.val.array.nElems = 1;
125
126                 pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
127                 pushJsonbValue(&pstate, WJB_ELEM, val);
128                 res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
129
130                 out = palloc(VARHDRSZ + res->estSize);
131                 sz = convertJsonb(res, out);
132                 Assert(sz <= res->estSize);
133                 SET_VARSIZE(out, sz + VARHDRSZ);
134         }
135         else if (val->type == jbvObject || val->type == jbvArray)
136         {
137                 out = palloc(VARHDRSZ + val->estSize);
138                 sz = convertJsonb(val, out);
139                 Assert(sz <= val->estSize);
140                 SET_VARSIZE(out, VARHDRSZ + sz);
141         }
142         else
143         {
144                 Assert(val->type == jbvBinary);
145                 out = palloc(VARHDRSZ + val->val.binary.len);
146                 SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
147                 memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
148         }
149
150         return out;
151 }
152
153 /*
154  * BT comparator worker function.  Returns an integer less than, equal to, or
155  * greater than zero, indicating whether a is less than, equal to, or greater
156  * than b.  Consistent with the requirements for a B-Tree operator class
157  *
158  * Strings are compared lexically, in contrast with other places where we use a
159  * much simpler comparator logic for searching through Strings.  Since this is
160  * called from B-Tree support function 1, we're careful about not leaking
161  * memory here.
162  */
163 int
164 compareJsonbSuperHeaderValue(JsonbSuperHeader a, JsonbSuperHeader b)
165 {
166         JsonbIterator *ita,
167                            *itb;
168         int                     res = 0;
169
170         ita = JsonbIteratorInit(a);
171         itb = JsonbIteratorInit(b);
172
173         do
174         {
175                 JsonbValue      va,
176                                         vb;
177                 int                     ra,
178                                         rb;
179
180                 ra = JsonbIteratorNext(&ita, &va, false);
181                 rb = JsonbIteratorNext(&itb, &vb, false);
182
183                 /*
184                  * To a limited extent we'll redundantly iterate over an array/object
185                  * while re-performing the same test without any reasonable
186                  * expectation of the same container types having differing lengths
187                  * (as when we process a WJB_BEGIN_OBJECT, and later the corresponding
188                  * WJB_END_OBJECT), but no matter.
189                  */
190                 if (ra == rb)
191                 {
192                         if (ra == WJB_DONE)
193                         {
194                                 /* Decisively equal */
195                                 break;
196                         }
197
198                         if (va.type == vb.type)
199                         {
200                                 switch (va.type)
201                                 {
202                                         case jbvString:
203                                                 res = lexicalCompareJsonbStringValue(&va, &vb);
204                                                 break;
205                                         case jbvNull:
206                                         case jbvNumeric:
207                                         case jbvBool:
208                                                 res = compareJsonbScalarValue(&va, &vb);
209                                                 break;
210                                         case jbvArray:
211
212                                                 /*
213                                                  * This could be a "raw scalar" pseudo array.  That's
214                                                  * a special case here though, since we still want the
215                                                  * general type-based comparisons to apply, and as far
216                                                  * as we're concerned a pseudo array is just a scalar.
217                                                  */
218                                                 if (va.val.array.rawScalar != vb.val.array.rawScalar)
219                                                         res = (va.val.array.rawScalar) ? -1 : 1;
220                                                 if (va.val.array.nElems != vb.val.array.nElems)
221                                                         res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
222                                                 break;
223                                         case jbvObject:
224                                                 if (va.val.object.nPairs != vb.val.object.nPairs)
225                                                         res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
226                                                 break;
227                                         case jbvBinary:
228                                                 elog(ERROR, "unexpected jbvBinary value");
229                                 }
230                         }
231                         else
232                         {
233                                 /* Type-defined order */
234                                 res = (va.type > vb.type) ? 1 : -1;
235                         }
236                 }
237                 else
238                 {
239                         /*
240                          * It's safe to assume that the types differed.
241                          *
242                          * If the two values were the same container type, then there'd
243                          * have been a chance to observe the variation in the number of
244                          * elements/pairs (when processing WJB_BEGIN_OBJECT, say).  They
245                          * can't be scalar types either, because then they'd have to be
246                          * contained in containers already ruled unequal due to differing
247                          * numbers of pairs/elements, or already directly ruled unequal
248                          * with a call to the underlying type's comparator.
249                          */
250                         Assert(va.type != vb.type);
251                         Assert(va.type == jbvArray || va.type == jbvObject);
252                         Assert(vb.type == jbvArray || vb.type == jbvObject);
253                         /* Type-defined order */
254                         res = (va.type > vb.type) ? 1 : -1;
255                 }
256         }
257         while (res == 0);
258
259         while (ita != NULL)
260         {
261                 JsonbIterator *i = ita->parent;
262
263                 pfree(ita);
264                 ita = i;
265         }
266         while (itb != NULL)
267         {
268                 JsonbIterator *i = itb->parent;
269
270                 pfree(itb);
271                 itb = i;
272         }
273
274         return res;
275 }
276
277 /*
278  * Find value in object (i.e. the "value" part of some key/value pair in an
279  * object), or find a matching element if we're looking through an array.  Do
280  * so on the basis of equality of the object keys only, or alternatively
281  * element values only, with a caller-supplied value "key".  The "flags"
282  * argument allows the caller to specify which container types are of interest.
283  *
284  * This exported utility function exists to facilitate various cases concerned
285  * with "containment".  If asked to look through an object, the caller had
286  * better pass a Jsonb String, because their keys can only be strings.
287  * Otherwise, for an array, any type of JsonbValue will do.
288  *
289  * In order to proceed with the search, it is necessary for callers to have
290  * both specified an interest in exactly one particular container type with an
291  * appropriate flag, as well as having the pointed-to Jsonb superheader be of
292  * one of those same container types at the top level. (Actually, we just do
293  * whichever makes sense to save callers the trouble of figuring it out - at
294  * most one can make sense, because the super header either points to an array
295  * (possible a "raw scalar" pseudo array) or an object.)
296  *
297  * Note that we can return a jbvBinary JsonbValue if this is called on an
298  * object, but we never do so on an array.  If the caller asks to look through
299  * a container type that is not of the type pointed to by the superheader,
300  * immediately fall through and return NULL.  If we cannot find the value,
301  * return NULL.  Otherwise, return palloc()'d copy of value.
302  *
303  * lowbound can be NULL, but if not it's used to establish a point at which to
304  * start searching.  If the value searched for is found, then lowbound is then
305  * set to an offset into the array or object.  Typically, this is used to
306  * exploit the ordering of objects to avoid redundant work, by also sorting a
307  * list of items to be checked using the internal sort criteria for objects
308  * (object pair keys), and then, when searching for the second or subsequent
309  * item, picking it up where we left off knowing that the second or subsequent
310  * item can not be at a point below the low bound set when the first was found.
311  * This is only useful for objects, not arrays (which have a user-defined
312  * order), so array superheader Jsonbs should just pass NULL.  Moreover, it's
313  * only useful because we only match object pairs on the basis of their key, so
314  * presumably anyone exploiting this is only interested in matching Object keys
315  * with a String.  lowbound is given in units of pairs, not underlying values.
316  */
317 JsonbValue *
318 findJsonbValueFromSuperHeader(JsonbSuperHeader sheader, uint32 flags,
319                                                           uint32 *lowbound, JsonbValue *key)
320 {
321         uint32          superheader = *(uint32 *) sheader;
322         JEntry     *array = (JEntry *) (sheader + sizeof(uint32));
323         int                     count = (superheader & JB_CMASK);
324         JsonbValue *result = palloc(sizeof(JsonbValue));
325
326         Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
327
328         if (flags & JB_FARRAY & superheader)
329         {
330                 char       *data = (char *) (array + (superheader & JB_CMASK));
331                 int                     i;
332
333                 for (i = 0; i < count; i++)
334                 {
335                         JEntry     *e = array + i;
336
337                         if (JBE_ISNULL(*e) && key->type == jbvNull)
338                         {
339                                 result->type = jbvNull;
340                                 result->estSize = sizeof(JEntry);
341                         }
342                         else if (JBE_ISSTRING(*e) && key->type == jbvString)
343                         {
344                                 result->type = jbvString;
345                                 result->val.string.val = data + JBE_OFF(*e);
346                                 result->val.string.len = JBE_LEN(*e);
347                                 result->estSize = sizeof(JEntry) + result->val.string.len;
348                         }
349                         else if (JBE_ISNUMERIC(*e) && key->type == jbvNumeric)
350                         {
351                                 result->type = jbvNumeric;
352                                 result->val.numeric = (Numeric) (data + INTALIGN(JBE_OFF(*e)));
353
354                                 result->estSize = 2 * sizeof(JEntry) +
355                                         VARSIZE_ANY(result->val.numeric);
356                         }
357                         else if (JBE_ISBOOL(*e) && key->type == jbvBool)
358                         {
359                                 result->type = jbvBool;
360                                 result->val.boolean = JBE_ISBOOL_TRUE(*e) != 0;
361                                 result->estSize = sizeof(JEntry);
362                         }
363                         else
364                                 continue;
365
366                         if (compareJsonbScalarValue(key, result) == 0)
367                                 return result;
368                 }
369         }
370         else if (flags & JB_FOBJECT & superheader)
371         {
372                 /* Since this is an object, account for *Pairs* of Jentrys */
373                 char       *data = (char *) (array + (superheader & JB_CMASK) * 2);
374                 uint32          stopLow = lowbound ? *lowbound : 0,
375                                         stopMiddle;
376
377                 /* Object key past by caller must be a string */
378                 Assert(key->type == jbvString);
379
380                 /* Binary search on object/pair keys *only* */
381                 while (stopLow < count)
382                 {
383                         JEntry     *entry;
384                         int                     difference;
385                         JsonbValue      candidate;
386
387                         /*
388                          * Note how we compensate for the fact that we're iterating
389                          * through pairs (not entries) throughout.
390                          */
391                         stopMiddle = stopLow + (count - stopLow) / 2;
392
393                         entry = array + stopMiddle * 2;
394
395                         candidate.type = jbvString;
396                         candidate.val.string.val = data + JBE_OFF(*entry);
397                         candidate.val.string.len = JBE_LEN(*entry);
398                         candidate.estSize = sizeof(JEntry) + candidate.val.string.len;
399
400                         difference = lengthCompareJsonbStringValue(&candidate, key, NULL);
401
402                         if (difference == 0)
403                         {
404                                 /* Found our value (from key/value pair) */
405                                 JEntry     *v = entry + 1;
406
407                                 if (lowbound)
408                                         *lowbound = stopMiddle + 1;
409
410                                 if (JBE_ISNULL(*v))
411                                 {
412                                         result->type = jbvNull;
413                                         result->estSize = sizeof(JEntry);
414                                 }
415                                 else if (JBE_ISSTRING(*v))
416                                 {
417                                         result->type = jbvString;
418                                         result->val.string.val = data + JBE_OFF(*v);
419                                         result->val.string.len = JBE_LEN(*v);
420                                         result->estSize = sizeof(JEntry) + result->val.string.len;
421                                 }
422                                 else if (JBE_ISNUMERIC(*v))
423                                 {
424                                         result->type = jbvNumeric;
425                                         result->val.numeric = (Numeric) (data + INTALIGN(JBE_OFF(*v)));
426
427                                         result->estSize = 2 * sizeof(JEntry) +
428                                                 VARSIZE_ANY(result->val.numeric);
429                                 }
430                                 else if (JBE_ISBOOL(*v))
431                                 {
432                                         result->type = jbvBool;
433                                         result->val.boolean = JBE_ISBOOL_TRUE(*v) != 0;
434                                         result->estSize = sizeof(JEntry);
435                                 }
436                                 else
437                                 {
438                                         /*
439                                          * See header comments to understand why this never
440                                          * happens with arrays
441                                          */
442                                         result->type = jbvBinary;
443                                         result->val.binary.data = data + INTALIGN(JBE_OFF(*v));
444                                         result->val.binary.len = JBE_LEN(*v) -
445                                                 (INTALIGN(JBE_OFF(*v)) - JBE_OFF(*v));
446                                         result->estSize = 2 * sizeof(JEntry) + result->val.binary.len;
447                                 }
448
449                                 return result;
450                         }
451                         else
452                         {
453                                 if (difference < 0)
454                                         stopLow = stopMiddle + 1;
455                                 else
456                                         count = stopMiddle;
457                         }
458                 }
459
460                 if (lowbound)
461                         *lowbound = stopLow;
462         }
463
464         /* Not found */
465         pfree(result);
466         return NULL;
467 }
468
469 /*
470  * Get i-th value of Jsonb array from superheader.
471  *
472  * Returns palloc()'d copy of value.
473  */
474 JsonbValue *
475 getIthJsonbValueFromSuperHeader(JsonbSuperHeader sheader, uint32 i)
476 {
477         uint32          superheader = *(uint32 *) sheader;
478         JsonbValue *result;
479         JEntry     *array,
480                            *e;
481         char       *data;
482
483         result = palloc(sizeof(JsonbValue));
484
485         if (i >= (superheader & JB_CMASK))
486                 return NULL;
487
488         array = (JEntry *) (sheader + sizeof(uint32));
489
490         if (superheader & JB_FARRAY)
491         {
492                 e = array + i;
493                 data = (char *) (array + (superheader & JB_CMASK));
494         }
495         else
496         {
497                 elog(ERROR, "not a jsonb array");
498         }
499
500         if (JBE_ISNULL(*e))
501         {
502                 result->type = jbvNull;
503                 result->estSize = sizeof(JEntry);
504         }
505         else if (JBE_ISSTRING(*e))
506         {
507                 result->type = jbvString;
508                 result->val.string.val = data + JBE_OFF(*e);
509                 result->val.string.len = JBE_LEN(*e);
510                 result->estSize = sizeof(JEntry) + result->val.string.len;
511         }
512         else if (JBE_ISNUMERIC(*e))
513         {
514                 result->type = jbvNumeric;
515                 result->val.numeric = (Numeric) (data + INTALIGN(JBE_OFF(*e)));
516
517                 result->estSize = 2 * sizeof(JEntry) + VARSIZE_ANY(result->val.numeric);
518         }
519         else if (JBE_ISBOOL(*e))
520         {
521                 result->type = jbvBool;
522                 result->val.boolean = JBE_ISBOOL_TRUE(*e) != 0;
523                 result->estSize = sizeof(JEntry);
524         }
525         else
526         {
527                 result->type = jbvBinary;
528                 result->val.binary.data = data + INTALIGN(JBE_OFF(*e));
529                 result->val.binary.len = JBE_LEN(*e) - (INTALIGN(JBE_OFF(*e)) - JBE_OFF(*e));
530                 result->estSize = result->val.binary.len + 2 * sizeof(JEntry);
531         }
532
533         return result;
534 }
535
536 /*
537  * Push JsonbValue into JsonbParseState.
538  *
539  * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
540  * JsonbValue to a Jsonb.
541  *
542  * Initial state of *JsonbParseState is NULL, since it'll be allocated here
543  * originally (caller will get JsonbParseState back by reference).
544  *
545  * Only sequential tokens pertaining to non-container types should pass a
546  * JsonbValue.  There is one exception -- WJB_BEGIN_ARRAY callers may pass a
547  * "raw scalar" pseudo array to append that.
548  */
549 JsonbValue *
550 pushJsonbValue(JsonbParseState **pstate, int seq, JsonbValue *scalarVal)
551 {
552         JsonbValue *result = NULL;
553
554         switch (seq)
555         {
556                 case WJB_BEGIN_ARRAY:
557                         Assert(!scalarVal || scalarVal->val.array.rawScalar);
558                         *pstate = pushState(pstate);
559                         result = &(*pstate)->contVal;
560                         (*pstate)->contVal.type = jbvArray;
561                         (*pstate)->contVal.estSize = 3 * sizeof(JEntry);
562                         (*pstate)->contVal.val.array.nElems = 0;
563                         (*pstate)->contVal.val.array.rawScalar = (scalarVal &&
564                                                                                          scalarVal->val.array.rawScalar);
565                         if (scalarVal && scalarVal->val.array.nElems > 0)
566                         {
567                                 /* Assume that this array is still really a scalar */
568                                 Assert(scalarVal->type == jbvArray);
569                                 (*pstate)->size = scalarVal->val.array.nElems;
570                         }
571                         else
572                         {
573                                 (*pstate)->size = 4;
574                         }
575                         (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
576                                                                                                                 (*pstate)->size);
577                         break;
578                 case WJB_BEGIN_OBJECT:
579                         Assert(!scalarVal);
580                         *pstate = pushState(pstate);
581                         result = &(*pstate)->contVal;
582                         (*pstate)->contVal.type = jbvObject;
583                         (*pstate)->contVal.estSize = 3 * sizeof(JEntry);
584                         (*pstate)->contVal.val.object.nPairs = 0;
585                         (*pstate)->size = 4;
586                         (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
587                                                                                                                  (*pstate)->size);
588                         break;
589                 case WJB_KEY:
590                         Assert(scalarVal->type == jbvString);
591                         appendKey(*pstate, scalarVal);
592                         break;
593                 case WJB_VALUE:
594                         Assert(IsAJsonbScalar(scalarVal) ||
595                                    scalarVal->type == jbvBinary);
596                         appendValue(*pstate, scalarVal);
597                         break;
598                 case WJB_ELEM:
599                         Assert(IsAJsonbScalar(scalarVal) ||
600                                    scalarVal->type == jbvBinary);
601                         appendElement(*pstate, scalarVal);
602                         break;
603                 case WJB_END_OBJECT:
604                         uniqueifyJsonbObject(&(*pstate)->contVal);
605                 case WJB_END_ARRAY:
606                         /* Steps here common to WJB_END_OBJECT case */
607                         Assert(!scalarVal);
608                         result = &(*pstate)->contVal;
609
610                         /*
611                          * Pop stack and push current array/object as value in parent
612                          * array/object
613                          */
614                         *pstate = (*pstate)->next;
615                         if (*pstate)
616                         {
617                                 switch ((*pstate)->contVal.type)
618                                 {
619                                         case jbvArray:
620                                                 appendElement(*pstate, result);
621                                                 break;
622                                         case jbvObject:
623                                                 appendValue(*pstate, result);
624                                                 break;
625                                         default:
626                                                 elog(ERROR, "invalid jsonb container type");
627                                 }
628                         }
629                         break;
630                 default:
631                         elog(ERROR, "unrecognized jsonb sequential processing token");
632         }
633
634         return result;
635 }
636
637 /*
638  * Given a Jsonb superheader, expand to JsonbIterator to iterate over items
639  * fully expanded to in-memory representation for manipulation.
640  *
641  * See JsonbIteratorNext() for notes on memory management.
642  */
643 JsonbIterator *
644 JsonbIteratorInit(JsonbSuperHeader sheader)
645 {
646         JsonbIterator *it = palloc(sizeof(JsonbIterator));
647
648         iteratorFromContainerBuf(it, sheader);
649         it->parent = NULL;
650
651         return it;
652 }
653
654 /*
655  * Get next JsonbValue while iterating
656  *
657  * Caller should initially pass their own, original iterator.  They may get
658  * back a child iterator palloc()'d here instead.  The function can be relied
659  * on to free those child iterators, lest the memory allocated for highly
660  * nested objects become unreasonable, but only if callers don't end iteration
661  * early (by breaking upon having found something in a search, for example).
662  *
663  * Callers in such a scenario, that are particularly sensitive to leaking
664  * memory in a long-lived context may walk the ancestral tree from the final
665  * iterator we left them with to its oldest ancestor, pfree()ing as they go.
666  * They do not have to free any other memory previously allocated for iterators
667  * but not accessible as direct ancestors of the iterator they're last passed
668  * back.
669  *
670  * Returns "Jsonb sequential processing" token value.  Iterator "state"
671  * reflects the current stage of the process in a less granular fashion, and is
672  * mostly used here to track things internally with respect to particular
673  * iterators.
674  *
675  * Clients of this function should not have to handle any jbvBinary values
676  * (since recursive calls will deal with this), provided skipNested is false.
677  * It is our job to expand the jbvBinary representation without bothering them
678  * with it.  However, clients should not take it upon themselves to touch array
679  * or Object element/pair buffers, since their element/pair pointers are
680  * garbage.
681  */
682 int
683 JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
684 {
685         JsonbIterState state;
686
687         /* Guard against stack overflow due to overly complex Jsonb */
688         check_stack_depth();
689
690         /* Recursive caller may have original caller's iterator */
691         if (*it == NULL)
692                 return WJB_DONE;
693
694         state = (*it)->state;
695
696         if ((*it)->containerType == JB_FARRAY)
697         {
698                 if (state == jbi_start)
699                 {
700                         /* Set v to array on first array call */
701                         val->type = jbvArray;
702                         val->val.array.nElems = (*it)->nElems;
703
704                         /*
705                          * v->val.array.elems is not actually set, because we aren't doing
706                          * a full conversion
707                          */
708                         val->val.array.rawScalar = (*it)->isScalar;
709                         (*it)->i = 0;
710                         /* Set state for next call */
711                         (*it)->state = jbi_elem;
712                         return WJB_BEGIN_ARRAY;
713                 }
714                 else if (state == jbi_elem)
715                 {
716                         if ((*it)->i >= (*it)->nElems)
717                         {
718                                 /*
719                                  * All elements within array already processed.  Report this
720                                  * to caller, and give it back original parent iterator (which
721                                  * independently tracks iteration progress at its level of
722                                  * nesting).
723                                  */
724                                 *it = freeAndGetParent(*it);
725                                 return WJB_END_ARRAY;
726                         }
727                         else if (formIterIsContainer(it, val, &(*it)->meta[(*it)->i++],
728                                                                                  skipNested))
729                         {
730                                 /*
731                                  * New child iterator acquired within formIterIsContainer.
732                                  * Recurse into container.  Don't directly return jbvBinary
733                                  * value to top-level client.
734                                  */
735                                 return JsonbIteratorNext(it, val, skipNested);
736                         }
737                         else
738                         {
739                                 /* Scalar item in array */
740                                 return WJB_ELEM;
741                         }
742                 }
743         }
744         else if ((*it)->containerType == JB_FOBJECT)
745         {
746                 if (state == jbi_start)
747                 {
748                         /* Set v to object on first object call */
749                         val->type = jbvObject;
750                         val->val.object.nPairs = (*it)->nElems;
751
752                         /*
753                          * v->val.object.pairs is not actually set, because we aren't
754                          * doing a full conversion
755                          */
756                         (*it)->i = 0;
757                         /* Set state for next call */
758                         (*it)->state = jbi_key;
759                         return WJB_BEGIN_OBJECT;
760                 }
761                 else if (state == jbi_key)
762                 {
763                         if ((*it)->i >= (*it)->nElems)
764                         {
765                                 /*
766                                  * All pairs within object already processed.  Report this to
767                                  * caller, and give it back original containing iterator
768                                  * (which independently tracks iteration progress at its level
769                                  * of nesting).
770                                  */
771                                 *it = freeAndGetParent(*it);
772                                 return WJB_END_OBJECT;
773                         }
774                         else
775                         {
776                                 /*
777                                  * Return binary item key (ensured by setting skipNested to
778                                  * false directly).  No child iterator, no further recursion.
779                                  * When control reaches here, it's probably from a recursive
780                                  * call.
781                                  */
782                                 if (formIterIsContainer(it, val, &(*it)->meta[(*it)->i * 2], false))
783                                         elog(ERROR, "unexpected container as object key");
784
785                                 Assert(val->type == jbvString);
786                                 /* Set state for next call */
787                                 (*it)->state = jbi_value;
788                                 return WJB_KEY;
789                         }
790                 }
791                 else if (state == jbi_value)
792                 {
793                         /* Set state for next call */
794                         (*it)->state = jbi_key;
795
796                         /*
797                          * Value may be a container, in which case we recurse with new,
798                          * child iterator.  If it is, don't bother !skipNested callers
799                          * with dealing with the jbvBinary representation.
800                          */
801                         if (formIterIsContainer(it, val, &(*it)->meta[((*it)->i++) * 2 + 1],
802                                                                         skipNested))
803                                 return JsonbIteratorNext(it, val, skipNested);
804                         else
805                                 return WJB_VALUE;
806                 }
807         }
808
809         elog(ERROR, "invalid iterator state");
810         return -1;
811 }
812
813 /*
814  * Worker for "contains" operator's function
815  *
816  * Formally speaking, containment is top-down, unordered subtree isomorphism.
817  *
818  * Takes iterators that belong to some container type.  These iterators
819  * "belong" to those values in the sense that they've just been initialized in
820  * respect of them by the caller (perhaps in a nested fashion).
821  *
822  * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
823  * We determine if mContained is contained within val.
824  */
825 bool
826 JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
827 {
828         uint32          rval,
829                                 rcont;
830         JsonbValue      vval,
831                                 vcontained;
832
833         /*
834          * Guard against stack overflow due to overly complex Jsonb.
835          *
836          * Functions called here independently take this precaution, but that
837          * might not be sufficient since this is also a recursive function.
838          */
839         check_stack_depth();
840
841         rval = JsonbIteratorNext(val, &vval, false);
842         rcont = JsonbIteratorNext(mContained, &vcontained, false);
843
844         if (rval != rcont)
845         {
846                 /*
847                  * The differing return values can immediately be taken as indicating
848                  * two differing container types at this nesting level, which is
849                  * sufficient reason to give up entirely (but it should be the case
850                  * that they're both some container type).
851                  */
852                 Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
853                 Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
854                 return false;
855         }
856         else if (rcont == WJB_BEGIN_OBJECT)
857         {
858                 JsonbValue *lhsVal;             /* lhsVal is from pair in lhs object */
859
860                 Assert(vcontained.type == jbvObject);
861
862                 /* Work through rhs "is it contained within?" object */
863                 for (;;)
864                 {
865                         rcont = JsonbIteratorNext(mContained, &vcontained, false);
866
867                         /*
868                          * When we get through caller's rhs "is it contained within?"
869                          * object without failing to find one of its values, it's
870                          * contained.
871                          */
872                         if (rcont == WJB_END_OBJECT)
873                                 return true;
874
875                         Assert(rcont == WJB_KEY);
876
877                         /* First, find value by key... */
878                         lhsVal = findJsonbValueFromSuperHeader((*val)->buffer,
879                                                                                                    JB_FOBJECT,
880                                                                                                    NULL,
881                                                                                                    &vcontained);
882
883                         if (!lhsVal)
884                                 return false;
885
886                         /*
887                          * ...at this stage it is apparent that there is at least a key
888                          * match for this rhs pair.
889                          */
890                         rcont = JsonbIteratorNext(mContained, &vcontained, true);
891
892                         Assert(rcont == WJB_VALUE);
893
894                         /*
895                          * Compare rhs pair's value with lhs pair's value just found using
896                          * key
897                          */
898                         if (lhsVal->type != vcontained.type)
899                         {
900                                 return false;
901                         }
902                         else if (IsAJsonbScalar(lhsVal))
903                         {
904                                 if (compareJsonbScalarValue(lhsVal, &vcontained) != 0)
905                                         return false;
906                         }
907                         else
908                         {
909                                 /* Nested container value (object or array) */
910                                 JsonbIterator *nestval,
911                                                    *nestContained;
912
913                                 Assert(lhsVal->type == jbvBinary);
914                                 Assert(vcontained.type == jbvBinary);
915
916                                 nestval = JsonbIteratorInit(lhsVal->val.binary.data);
917                                 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
918
919                                 /*
920                                  * Match "value" side of rhs datum object's pair recursively.
921                                  * It's a nested structure.
922                                  *
923                                  * Note that nesting still has to "match up" at the right
924                                  * nesting sub-levels.  However, there need only be zero or
925                                  * more matching pairs (or elements) at each nesting level
926                                  * (provided the *rhs* pairs/elements *all* match on each
927                                  * level), which enables searching nested structures for a
928                                  * single String or other primitive type sub-datum quite
929                                  * effectively (provided the user constructed the rhs nested
930                                  * structure such that we "know where to look").
931                                  *
932                                  * In other words, the mapping of container nodes in the rhs
933                                  * "vcontained" Jsonb to internal nodes on the lhs is
934                                  * injective, and parent-child edges on the rhs must be mapped
935                                  * to parent-child edges on the lhs to satisfy the condition
936                                  * of containment (plus of course the mapped nodes must be
937                                  * equal).
938                                  */
939                                 if (!JsonbDeepContains(&nestval, &nestContained))
940                                         return false;
941                         }
942                 }
943         }
944         else if (rcont == WJB_BEGIN_ARRAY)
945         {
946                 JsonbValue *lhsConts = NULL;
947                 uint32          nLhsElems = vval.val.array.nElems;
948
949                 Assert(vcontained.type == jbvArray);
950
951                 /*
952                  * Handle distinction between "raw scalar" pseudo arrays, and real
953                  * arrays.
954                  *
955                  * A raw scalar may contain another raw scalar, and an array may
956                  * contain a raw scalar, but a raw scalar may not contain an array. We
957                  * don't do something like this for the object case, since objects can
958                  * only contain pairs, never raw scalars (a pair is represented by an
959                  * rhs object argument with a single contained pair).
960                  */
961                 if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
962                         return false;
963
964                 /* Work through rhs "is it contained within?" array */
965                 for (;;)
966                 {
967                         rcont = JsonbIteratorNext(mContained, &vcontained, true);
968
969                         /*
970                          * When we get through caller's rhs "is it contained within?"
971                          * array without failing to find one of its values, it's
972                          * contained.
973                          */
974                         if (rcont == WJB_END_ARRAY)
975                                 return true;
976
977                         Assert(rcont == WJB_ELEM);
978
979                         if (IsAJsonbScalar(&vcontained))
980                         {
981                                 if (!findJsonbValueFromSuperHeader((*val)->buffer,
982                                                                                                    JB_FARRAY,
983                                                                                                    NULL,
984                                                                                                    &vcontained))
985                                         return false;
986                         }
987                         else
988                         {
989                                 uint32          i;
990
991                                 /*
992                                  * If this is first container found in rhs array (at this
993                                  * depth), initialize temp lhs array of containers
994                                  */
995                                 if (lhsConts == NULL)
996                                 {
997                                         uint32          j = 0;
998
999                                         /* Make room for all possible values */
1000                                         lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
1001
1002                                         for (i = 0; i < nLhsElems; i++)
1003                                         {
1004                                                 /* Store all lhs elements in temp array */
1005                                                 rcont = JsonbIteratorNext(val, &vval, true);
1006                                                 Assert(rcont == WJB_ELEM);
1007
1008                                                 if (vval.type == jbvBinary)
1009                                                         lhsConts[j++] = vval;
1010                                         }
1011
1012                                         /* No container elements in temp array, so give up now */
1013                                         if (j == 0)
1014                                                 return false;
1015
1016                                         /* We may have only partially filled array */
1017                                         nLhsElems = j;
1018                                 }
1019
1020                                 /* XXX: Nested array containment is O(N^2) */
1021                                 for (i = 0; i < nLhsElems; i++)
1022                                 {
1023                                         /* Nested container value (object or array) */
1024                                         JsonbIterator *nestval,
1025                                                            *nestContained;
1026                                         bool            contains;
1027
1028                                         nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
1029                                         nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1030
1031                                         contains = JsonbDeepContains(&nestval, &nestContained);
1032
1033                                         if (nestval)
1034                                                 pfree(nestval);
1035                                         if (nestContained)
1036                                                 pfree(nestContained);
1037                                         if (contains)
1038                                                 break;
1039                                 }
1040
1041                                 /*
1042                                  * Report rhs container value is not contained if couldn't
1043                                  * match rhs container to *some* lhs cont
1044                                  */
1045                                 if (i == nLhsElems)
1046                                         return false;
1047                         }
1048                 }
1049         }
1050         else
1051         {
1052                 elog(ERROR, "invalid jsonb container type");
1053         }
1054
1055         elog(ERROR, "unexpectedly fell off end of jsonb container");
1056         return false;
1057 }
1058
1059 /*
1060  * Convert a Postgres text array to a Jsonb array, sorted and with
1061  * de-duplicated key elements.  This is used for searching an object for items
1062  * in the array, so we enforce that the number of strings cannot exceed
1063  * JSONB_MAX_PAIRS.
1064  */
1065 JsonbValue *
1066 arrayToJsonbSortedArray(ArrayType *array)
1067 {
1068         Datum      *key_datums;
1069         bool       *key_nulls;
1070         int                     elem_count;
1071         JsonbValue *result;
1072         int                     i,
1073                                 j;
1074
1075         /* Extract data for sorting */
1076         deconstruct_array(array, TEXTOID, -1, false, 'i', &key_datums, &key_nulls,
1077                                           &elem_count);
1078
1079         if (elem_count == 0)
1080                 return NULL;
1081
1082         /*
1083          * A text array uses at least eight bytes per element, so any overflow in
1084          * "key_count * sizeof(JsonbPair)" is small enough for palloc() to catch.
1085          * However, credible improvements to the array format could invalidate
1086          * that assumption.  Therefore, use an explicit check rather than relying
1087          * on palloc() to complain.
1088          */
1089         if (elem_count > JSONB_MAX_PAIRS)
1090                 ereport(ERROR,
1091                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1092                                  errmsg("number of array elements (%d) exceeds maximum allowed Jsonb pairs (%zu)",
1093                                                 elem_count, JSONB_MAX_PAIRS)));
1094
1095         result = palloc(sizeof(JsonbValue));
1096         result->type = jbvArray;
1097         result->val.array.rawScalar = false;
1098         result->val.array.elems = palloc(sizeof(JsonbPair) * elem_count);
1099
1100         for (i = 0, j = 0; i < elem_count; i++)
1101         {
1102                 if (!key_nulls[i])
1103                 {
1104                         result->val.array.elems[j].type = jbvString;
1105                         result->val.array.elems[j].val.string.val = VARDATA(key_datums[i]);
1106                         result->val.array.elems[j].val.string.len = VARSIZE(key_datums[i]) - VARHDRSZ;
1107                         j++;
1108                 }
1109         }
1110         result->val.array.nElems = j;
1111
1112         uniqueifyJsonbArray(result);
1113         return result;
1114 }
1115
1116 /*
1117  * Hash a JsonbValue scalar value, mixing the hash value into an existing
1118  * hash provided by the caller.
1119  *
1120  * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
1121  * flags.
1122  */
1123 void
1124 JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
1125 {
1126         uint32          tmp;
1127
1128         /* Compute hash value for scalarVal */
1129         switch (scalarVal->type)
1130         {
1131                 case jbvNull:
1132                         tmp = 0x01;
1133                         break;
1134                 case jbvString:
1135                         tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
1136                                                                                   scalarVal->val.string.len));
1137                         break;
1138                 case jbvNumeric:
1139                         /* Must hash equal numerics to equal hash codes */
1140                         tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
1141                                                                    NumericGetDatum(scalarVal->val.numeric)));
1142                         break;
1143                 case jbvBool:
1144                         tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1145                         break;
1146                 default:
1147                         elog(ERROR, "invalid jsonb scalar type");
1148                         tmp = 0;                        /* keep compiler quiet */
1149                         break;
1150         }
1151
1152         /*
1153          * Combine hash values of successive keys, values and elements by rotating
1154          * the previous value left 1 bit, then XOR'ing in the new
1155          * key/value/element's hash value.
1156          */
1157         *hash = (*hash << 1) | (*hash >> 31);
1158         *hash ^= tmp;
1159 }
1160
1161 /*
1162  * Are two scalar JsonbValues of the same type a and b equal?
1163  *
1164  * Does not use lexical comparisons.  Therefore, it is essentially that this
1165  * never be used against Strings for anything other than searching for values
1166  * within a single jsonb.
1167  */
1168 static int
1169 compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1170 {
1171         if (aScalar->type == bScalar->type)
1172         {
1173                 switch (aScalar->type)
1174                 {
1175                         case jbvNull:
1176                                 return 0;
1177                         case jbvString:
1178                                 return lengthCompareJsonbStringValue(aScalar, bScalar, NULL);
1179                         case jbvNumeric:
1180                                 return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
1181                                                                            PointerGetDatum(aScalar->val.numeric),
1182                                                                          PointerGetDatum(bScalar->val.numeric)));
1183                         case jbvBool:
1184                                 if (aScalar->val.boolean != bScalar->val.boolean)
1185                                         return (aScalar->val.boolean > bScalar->val.boolean) ? 1 : -1;
1186                                 else
1187                                         return 0;
1188                         default:
1189                                 elog(ERROR, "invalid jsonb scalar type");
1190                 }
1191         }
1192         elog(ERROR, "jsonb scalar type mismatch");
1193         return -1;
1194 }
1195
1196 /*
1197  * Standard lexical qsort() comparator of jsonb strings.
1198  *
1199  * Sorts strings lexically, using the default database collation.  Used by
1200  * B-Tree operators, where a lexical sort order is generally expected.
1201  */
1202 static int
1203 lexicalCompareJsonbStringValue(const void *a, const void *b)
1204 {
1205         const JsonbValue *va = (const JsonbValue *) a;
1206         const JsonbValue *vb = (const JsonbValue *) b;
1207
1208         Assert(va->type == jbvString);
1209         Assert(vb->type == jbvString);
1210
1211         return varstr_cmp(va->val.string.val, va->val.string.len, vb->val.string.val,
1212                                           vb->val.string.len, DEFAULT_COLLATION_OID);
1213 }
1214
1215 /*
1216  * Given a JsonbValue, convert to Jsonb and store in preallocated Jsonb buffer
1217  * sufficiently large to fit the value
1218  */
1219 static Size
1220 convertJsonb(JsonbValue *val, Jsonb *buffer)
1221 {
1222         convertState state;
1223         Size            len;
1224
1225         /* Should not already have binary representation */
1226         Assert(val->type != jbvBinary);
1227
1228         state.buffer = buffer;
1229         /* Start from superheader */
1230         state.ptr = VARDATA(state.buffer);
1231         state.levelSz = 8;
1232         state.allState = palloc(sizeof(convertLevel) * state.levelSz);
1233
1234         walkJsonbValueConversion(val, &state, 0);
1235
1236         len = state.ptr - VARDATA(state.buffer);
1237
1238         Assert(len <= val->estSize);
1239         return len;
1240 }
1241
1242 /*
1243  * Walk the tree representation of Jsonb, as part of the process of converting
1244  * a JsonbValue to a Jsonb.
1245  *
1246  * This high-level function takes care of recursion into sub-containers, but at
1247  * the top level calls putJsonbValueConversion once per sequential processing
1248  * token (in a manner similar to generic iteration).
1249  */
1250 static void
1251 walkJsonbValueConversion(JsonbValue *val, convertState *cstate,
1252                                                  uint32 nestlevel)
1253 {
1254         int                     i;
1255
1256         check_stack_depth();
1257
1258         if (!val)
1259                 return;
1260
1261         switch (val->type)
1262         {
1263                 case jbvArray:
1264
1265                         putJsonbValueConversion(cstate, val, WJB_BEGIN_ARRAY, nestlevel);
1266                         for (i = 0; i < val->val.array.nElems; i++)
1267                         {
1268                                 if (IsAJsonbScalar(&val->val.array.elems[i]) ||
1269                                         val->val.array.elems[i].type == jbvBinary)
1270                                         putJsonbValueConversion(cstate, val->val.array.elems + i,
1271                                                                                         WJB_ELEM, nestlevel);
1272                                 else
1273                                         walkJsonbValueConversion(val->val.array.elems + i, cstate,
1274                                                                                          nestlevel + 1);
1275                         }
1276                         putJsonbValueConversion(cstate, val, WJB_END_ARRAY, nestlevel);
1277
1278                         break;
1279                 case jbvObject:
1280
1281                         putJsonbValueConversion(cstate, val, WJB_BEGIN_OBJECT, nestlevel);
1282                         for (i = 0; i < val->val.object.nPairs; i++)
1283                         {
1284                                 putJsonbValueConversion(cstate, &val->val.object.pairs[i].key,
1285                                                                                 WJB_KEY, nestlevel);
1286
1287                                 if (IsAJsonbScalar(&val->val.object.pairs[i].value) ||
1288                                         val->val.object.pairs[i].value.type == jbvBinary)
1289                                         putJsonbValueConversion(cstate,
1290                                                                                         &val->val.object.pairs[i].value,
1291                                                                                         WJB_VALUE, nestlevel);
1292                                 else
1293                                         walkJsonbValueConversion(&val->val.object.pairs[i].value,
1294                                                                                          cstate, nestlevel + 1);
1295                         }
1296                         putJsonbValueConversion(cstate, val, WJB_END_OBJECT, nestlevel);
1297
1298                         break;
1299                 default:
1300                         elog(ERROR, "unknown type of jsonb container");
1301         }
1302 }
1303
1304 /*
1305  * walkJsonbValueConversion() worker.  Add padding sufficient to int-align our
1306  * access to conversion buffer.
1307  */
1308 static inline
1309 short
1310 addPaddingInt(convertState *cstate)
1311 {
1312         short           padlen,
1313                                 p;
1314
1315         padlen = INTALIGN(cstate->ptr - VARDATA(cstate->buffer)) -
1316                 (cstate->ptr - VARDATA(cstate->buffer));
1317
1318         for (p = padlen; p > 0; p--)
1319         {
1320                 *cstate->ptr = '\0';
1321                 cstate->ptr++;
1322         }
1323
1324         return padlen;
1325 }
1326
1327 /*
1328  * walkJsonbValueConversion() worker.
1329  *
1330  * As part of the process of converting an arbitrary JsonbValue to a Jsonb,
1331  * copy over an arbitrary individual JsonbValue.  This function may copy any
1332  * type of value, even containers (Objects/arrays).  However, it is not
1333  * responsible for recursive aspects of walking the tree (so only top-level
1334  * Object/array details are handled).
1335  *
1336  * No details about their keys/values/elements are handled recursively -
1337  * rather, the function is called as required for the start of an Object/Array,
1338  * and the end (i.e.  there is one call per sequential processing WJB_* token).
1339  */
1340 static void
1341 putJsonbValueConversion(convertState *cstate, JsonbValue *val, uint32 flags,
1342                                                 uint32 level)
1343 {
1344         if (level == cstate->levelSz)
1345         {
1346                 cstate->levelSz *= 2;
1347                 cstate->allState = repalloc(cstate->allState,
1348                                                                         sizeof(convertLevel) * cstate->levelSz);
1349         }
1350
1351         cstate->contPtr = cstate->allState + level;
1352
1353         if (flags & (WJB_BEGIN_ARRAY | WJB_BEGIN_OBJECT))
1354         {
1355                 Assert(((flags & WJB_BEGIN_ARRAY) && val->type == jbvArray) ||
1356                            ((flags & WJB_BEGIN_OBJECT) && val->type == jbvObject));
1357
1358                 /* Initialize pointer into conversion buffer at this level */
1359                 cstate->contPtr->begin = cstate->ptr;
1360
1361                 addPaddingInt(cstate);
1362
1363                 /* Initialize everything else at this level */
1364                 cstate->contPtr->header = (uint32 *) cstate->ptr;
1365                 /* Advance past header */
1366                 cstate->ptr += sizeof(uint32);
1367                 cstate->contPtr->meta = (JEntry *) cstate->ptr;
1368                 cstate->contPtr->i = 0;
1369
1370                 if (val->type == jbvArray)
1371                 {
1372                         *cstate->contPtr->header = val->val.array.nElems | JB_FARRAY;
1373                         cstate->ptr += sizeof(JEntry) * val->val.array.nElems;
1374
1375                         if (val->val.array.rawScalar)
1376                         {
1377                                 Assert(val->val.array.nElems == 1);
1378                                 Assert(level == 0);
1379                                 *cstate->contPtr->header |= JB_FSCALAR;
1380                         }
1381                 }
1382                 else
1383                 {
1384                         *cstate->contPtr->header = val->val.object.nPairs | JB_FOBJECT;
1385                         cstate->ptr += sizeof(JEntry) * val->val.object.nPairs * 2;
1386                 }
1387         }
1388         else if (flags & WJB_ELEM)
1389         {
1390                 putScalarConversion(cstate, val, level, cstate->contPtr->i);
1391                 cstate->contPtr->i++;
1392         }
1393         else if (flags & WJB_KEY)
1394         {
1395                 Assert(val->type == jbvString);
1396
1397                 putScalarConversion(cstate, val, level, cstate->contPtr->i * 2);
1398         }
1399         else if (flags & WJB_VALUE)
1400         {
1401                 putScalarConversion(cstate, val, level, cstate->contPtr->i * 2 + 1);
1402                 cstate->contPtr->i++;
1403         }
1404         else if (flags & (WJB_END_ARRAY | WJB_END_OBJECT))
1405         {
1406                 convertLevel *prevPtr;  /* Prev container pointer */
1407                 uint32          len,
1408                                         i;
1409
1410                 Assert(((flags & WJB_END_ARRAY) && val->type == jbvArray) ||
1411                            ((flags & WJB_END_OBJECT) && val->type == jbvObject));
1412
1413                 if (level == 0)
1414                         return;
1415
1416                 len = cstate->ptr - (char *) cstate->contPtr->begin;
1417
1418                 prevPtr = cstate->contPtr - 1;
1419
1420                 if (*prevPtr->header & JB_FARRAY)
1421                 {
1422                         i = prevPtr->i;
1423
1424                         prevPtr->meta[i].header = JENTRY_ISNEST;
1425
1426                         if (i == 0)
1427                                 prevPtr->meta[0].header |= JENTRY_ISFIRST | len;
1428                         else
1429                                 prevPtr->meta[i].header |=
1430                                         (prevPtr->meta[i - 1].header & JENTRY_POSMASK) + len;
1431                 }
1432                 else if (*prevPtr->header & JB_FOBJECT)
1433                 {
1434                         i = 2 * prevPtr->i + 1;         /* Value, not key */
1435
1436                         prevPtr->meta[i].header = JENTRY_ISNEST;
1437
1438                         prevPtr->meta[i].header |=
1439                                 (prevPtr->meta[i - 1].header & JENTRY_POSMASK) + len;
1440                 }
1441                 else
1442                 {
1443                         elog(ERROR, "invalid jsonb container type");
1444                 }
1445
1446                 Assert(cstate->ptr - cstate->contPtr->begin <= val->estSize);
1447                 prevPtr->i++;
1448         }
1449         else
1450         {
1451                 elog(ERROR, "unknown flag encountered during jsonb tree walk");
1452         }
1453 }
1454
1455 /*
1456  * As part of the process of converting an arbitrary JsonbValue to a Jsonb,
1457  * serialize and copy a scalar value into buffer.
1458  *
1459  * This is a worker function for putJsonbValueConversion() (itself a worker for
1460  * walkJsonbValueConversion()).  It handles the details with regard to Jentry
1461  * metadata peculiar to each scalar type.
1462  */
1463 static void
1464 putScalarConversion(convertState *cstate, JsonbValue *scalarVal, uint32 level,
1465                                         uint32 i)
1466 {
1467         int                     numlen;
1468         short           padlen;
1469
1470         cstate->contPtr = cstate->allState + level;
1471
1472         if (i == 0)
1473                 cstate->contPtr->meta[0].header = JENTRY_ISFIRST;
1474         else
1475                 cstate->contPtr->meta[i].header = 0;
1476
1477         switch (scalarVal->type)
1478         {
1479                 case jbvNull:
1480                         cstate->contPtr->meta[i].header |= JENTRY_ISNULL;
1481
1482                         if (i > 0)
1483                                 cstate->contPtr->meta[i].header |=
1484                                         cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK;
1485                         break;
1486                 case jbvString:
1487                         memcpy(cstate->ptr, scalarVal->val.string.val, scalarVal->val.string.len);
1488                         cstate->ptr += scalarVal->val.string.len;
1489
1490                         if (i == 0)
1491                                 cstate->contPtr->meta[0].header |= scalarVal->val.string.len;
1492                         else
1493                                 cstate->contPtr->meta[i].header |=
1494                                         (cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK) +
1495                                         scalarVal->val.string.len;
1496                         break;
1497                 case jbvNumeric:
1498                         numlen = VARSIZE_ANY(scalarVal->val.numeric);
1499                         padlen = addPaddingInt(cstate);
1500
1501                         memcpy(cstate->ptr, scalarVal->val.numeric, numlen);
1502                         cstate->ptr += numlen;
1503
1504                         cstate->contPtr->meta[i].header |= JENTRY_ISNUMERIC;
1505                         if (i == 0)
1506                                 cstate->contPtr->meta[0].header |= padlen + numlen;
1507                         else
1508                                 cstate->contPtr->meta[i].header |=
1509                                         (cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK)
1510                                         + padlen + numlen;
1511                         break;
1512                 case jbvBool:
1513                         cstate->contPtr->meta[i].header |= (scalarVal->val.boolean) ?
1514                                 JENTRY_ISTRUE : JENTRY_ISFALSE;
1515
1516                         if (i > 0)
1517                                 cstate->contPtr->meta[i].header |=
1518                                         cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK;
1519                         break;
1520                 default:
1521                         elog(ERROR, "invalid jsonb scalar type");
1522         }
1523 }
1524
1525 /*
1526  * Given superheader pointer into buffer, initialize iterator.  Must be a
1527  * container type.
1528  */
1529 static void
1530 iteratorFromContainerBuf(JsonbIterator *it, JsonbSuperHeader sheader)
1531 {
1532         uint32          superheader = *(uint32 *) sheader;
1533
1534         it->containerType = superheader & (JB_FARRAY | JB_FOBJECT);
1535         it->nElems = superheader & JB_CMASK;
1536         it->buffer = sheader;
1537
1538         /* Array starts just after header */
1539         it->meta = (JEntry *) (sheader + sizeof(uint32));
1540         it->state = jbi_start;
1541
1542         switch (it->containerType)
1543         {
1544                 case JB_FARRAY:
1545                         it->dataProper =
1546                                 (char *) it->meta + it->nElems * sizeof(JEntry);
1547                         it->isScalar = (superheader & JB_FSCALAR) != 0;
1548                         /* This is either a "raw scalar", or an array */
1549                         Assert(!it->isScalar || it->nElems == 1);
1550                         break;
1551                 case JB_FOBJECT:
1552
1553                         /*
1554                          * Offset reflects that nElems indicates JsonbPairs in an object.
1555                          * Each key and each value contain Jentry metadata just the same.
1556                          */
1557                         it->dataProper =
1558                                 (char *) it->meta + it->nElems * sizeof(JEntry) * 2;
1559                         break;
1560                 default:
1561                         elog(ERROR, "unknown type of jsonb container");
1562         }
1563 }
1564
1565 /*
1566  * JsonbIteratorNext() worker
1567  *
1568  * Returns bool indicating if v was a non-jbvBinary container, and thus if
1569  * further recursion is required by caller (according to its skipNested
1570  * preference).  If it is required, we set the caller's iterator for further
1571  * recursion into the nested value.  If we're going to skip nested items, just
1572  * set v to a jbvBinary value, but don't set caller's iterator.
1573  *
1574  * Unlike with containers (either in this function or in any
1575  * JsonbIteratorNext() infrastructure), we fully convert from what is
1576  * ultimately a Jsonb on-disk representation, to a JsonbValue in-memory
1577  * representation (for scalar values only).  JsonbIteratorNext() initializes
1578  * container Jsonbvalues, but without a sane private buffer.  For scalar values
1579  * it has to be done for real (even if we don't actually allocate more memory
1580  * to do this.  The point is that our JsonbValues scalars can be passed around
1581  * anywhere).
1582  */
1583 static bool
1584 formIterIsContainer(JsonbIterator **it, JsonbValue *val, JEntry *ent,
1585                                         bool skipNested)
1586 {
1587         if (JBE_ISNULL(*ent))
1588         {
1589                 val->type = jbvNull;
1590                 val->estSize = sizeof(JEntry);
1591
1592                 return false;
1593         }
1594         else if (JBE_ISSTRING(*ent))
1595         {
1596                 val->type = jbvString;
1597                 val->val.string.val = (*it)->dataProper + JBE_OFF(*ent);
1598                 val->val.string.len = JBE_LEN(*ent);
1599                 val->estSize = sizeof(JEntry) + val->val.string.len;
1600
1601                 return false;
1602         }
1603         else if (JBE_ISNUMERIC(*ent))
1604         {
1605                 val->type = jbvNumeric;
1606                 val->val.numeric = (Numeric) ((*it)->dataProper + INTALIGN(JBE_OFF(*ent)));
1607
1608                 val->estSize = 2 * sizeof(JEntry) + VARSIZE_ANY(val->val.numeric);
1609
1610                 return false;
1611         }
1612         else if (JBE_ISBOOL(*ent))
1613         {
1614                 val->type = jbvBool;
1615                 val->val.boolean = JBE_ISBOOL_TRUE(*ent) != 0;
1616                 val->estSize = sizeof(JEntry);
1617
1618                 return false;
1619         }
1620         else if (skipNested)
1621         {
1622                 val->type = jbvBinary;
1623                 val->val.binary.data = (*it)->dataProper + INTALIGN(JBE_OFF(*ent));
1624                 val->val.binary.len = JBE_LEN(*ent) - (INTALIGN(JBE_OFF(*ent)) - JBE_OFF(*ent));
1625                 val->estSize = val->val.binary.len + 2 * sizeof(JEntry);
1626
1627                 return false;
1628         }
1629         else
1630         {
1631                 /*
1632                  * Must be container type, so setup caller's iterator to point to
1633                  * that, and return indication of that.
1634                  *
1635                  * Get child iterator.
1636                  */
1637                 JsonbIterator *child = palloc(sizeof(JsonbIterator));
1638
1639                 iteratorFromContainerBuf(child,
1640                                                                  (*it)->dataProper + INTALIGN(JBE_OFF(*ent)));
1641
1642                 child->parent = *it;
1643                 *it = child;
1644
1645                 return true;
1646         }
1647 }
1648
1649 /*
1650  * JsonbIteratorNext() worker:  Return parent, while freeing memory for current
1651  * iterator
1652  */
1653 static JsonbIterator *
1654 freeAndGetParent(JsonbIterator *it)
1655 {
1656         JsonbIterator *v = it->parent;
1657
1658         pfree(it);
1659         return v;
1660 }
1661
1662 /*
1663  * pushJsonbValue() worker:  Iteration-like forming of Jsonb
1664  */
1665 static JsonbParseState *
1666 pushState(JsonbParseState **pstate)
1667 {
1668         JsonbParseState *ns = palloc(sizeof(JsonbParseState));
1669
1670         ns->next = *pstate;
1671         return ns;
1672 }
1673
1674 /*
1675  * pushJsonbValue() worker:  Append a pair key to state when generating a Jsonb
1676  */
1677 static void
1678 appendKey(JsonbParseState *pstate, JsonbValue *string)
1679 {
1680         JsonbValue *object = &pstate->contVal;
1681
1682         Assert(object->type == jbvObject);
1683         Assert(string->type == jbvString);
1684
1685         if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
1686                 ereport(ERROR,
1687                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1688                                  errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
1689                                                 JSONB_MAX_PAIRS)));
1690
1691         if (object->val.object.nPairs >= pstate->size)
1692         {
1693                 pstate->size *= 2;
1694                 object->val.object.pairs = repalloc(object->val.object.pairs,
1695                                                                                         sizeof(JsonbPair) * pstate->size);
1696         }
1697
1698         object->val.object.pairs[object->val.object.nPairs].key = *string;
1699         object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
1700
1701         object->estSize += string->estSize;
1702 }
1703
1704 /*
1705  * pushJsonbValue() worker:  Append a pair value to state when generating a
1706  * Jsonb
1707  */
1708 static void
1709 appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
1710 {
1711         JsonbValue *object = &pstate->contVal;
1712
1713         Assert(object->type == jbvObject);
1714
1715         object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
1716         object->estSize += scalarVal->estSize;
1717 }
1718
1719 /*
1720  * pushJsonbValue() worker:  Append an element to state when generating a Jsonb
1721  */
1722 static void
1723 appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
1724 {
1725         JsonbValue *array = &pstate->contVal;
1726
1727         Assert(array->type == jbvArray);
1728
1729         if (array->val.array.nElems >= JSONB_MAX_ELEMS)
1730                 ereport(ERROR,
1731                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1732                                  errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
1733                                                 JSONB_MAX_ELEMS)));
1734
1735         if (array->val.array.nElems >= pstate->size)
1736         {
1737                 pstate->size *= 2;
1738                 array->val.array.elems = repalloc(array->val.array.elems,
1739                                                                                   sizeof(JsonbValue) * pstate->size);
1740         }
1741
1742         array->val.array.elems[array->val.array.nElems++] = *scalarVal;
1743         array->estSize += scalarVal->estSize;
1744 }
1745
1746 /*
1747  * Compare two jbvString JsonbValue values, a and b.
1748  *
1749  * This is a special qsort_arg() comparator used to sort strings in certain
1750  * internal contexts where it is sufficient to have a well-defined sort order.
1751  * In particular, object pair keys are sorted according to this criteria to
1752  * facilitate cheap binary searches where we don't care about lexical sort
1753  * order.
1754  *
1755  * a and b are first sorted based on their length.  If a tie-breaker is
1756  * required, only then do we consider string binary equality.
1757  *
1758  * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
1759  * to true iff a and b have full binary equality, since some callers have an
1760  * interest in whether the two values are equal or merely equivalent.
1761  */
1762 static int
1763 lengthCompareJsonbStringValue(const void *a, const void *b, void *binequal)
1764 {
1765         const JsonbValue *va = (const JsonbValue *) a;
1766         const JsonbValue *vb = (const JsonbValue *) b;
1767         int                     res;
1768
1769         Assert(va->type == jbvString);
1770         Assert(vb->type == jbvString);
1771
1772         if (va->val.string.len == vb->val.string.len)
1773         {
1774                 res = memcmp(va->val.string.val, vb->val.string.val, va->val.string.len);
1775                 if (res == 0 && binequal)
1776                         *((bool *) binequal) = true;
1777         }
1778         else
1779         {
1780                 res = (va->val.string.len > vb->val.string.len) ? 1 : -1;
1781         }
1782
1783         return res;
1784 }
1785
1786 /*
1787  * qsort_arg() comparator to compare JsonbPair values.
1788  *
1789  * Function implemented in terms of lengthCompareJsonbStringValue(), and thus the
1790  * same "arg setting" hack will be applied here in respect of the pair's key
1791  * values.
1792  *
1793  * N.B: String comparisons here are "length-wise"
1794  *
1795  * Pairs with equals keys are ordered such that the order field is respected.
1796  */
1797 static int
1798 lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
1799 {
1800         const JsonbPair *pa = (const JsonbPair *) a;
1801         const JsonbPair *pb = (const JsonbPair *) b;
1802         int                     res;
1803
1804         res = lengthCompareJsonbStringValue(&pa->key, &pb->key, binequal);
1805
1806         /*
1807          * Guarantee keeping order of equal pair.  Unique algorithm will prefer
1808          * first element as value.
1809          */
1810         if (res == 0)
1811                 res = (pa->order > pb->order) ? -1 : 1;
1812
1813         return res;
1814 }
1815
1816 /*
1817  * Sort and unique-ify pairs in JsonbValue object
1818  */
1819 static void
1820 uniqueifyJsonbObject(JsonbValue *object)
1821 {
1822         bool            hasNonUniq = false;
1823
1824         Assert(object->type == jbvObject);
1825
1826         if (object->val.object.nPairs > 1)
1827                 qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
1828                                   lengthCompareJsonbPair, &hasNonUniq);
1829
1830         if (hasNonUniq)
1831         {
1832                 JsonbPair  *ptr = object->val.object.pairs + 1,
1833                                    *res = object->val.object.pairs;
1834
1835                 while (ptr - object->val.object.pairs < object->val.object.nPairs)
1836                 {
1837                         /* Avoid copying over duplicate */
1838                         if (lengthCompareJsonbStringValue(ptr, res, NULL) == 0)
1839                         {
1840                                 object->estSize -= ptr->key.estSize + ptr->value.estSize;
1841                         }
1842                         else
1843                         {
1844                                 res++;
1845                                 if (ptr != res)
1846                                         memcpy(res, ptr, sizeof(JsonbPair));
1847                         }
1848                         ptr++;
1849                 }
1850
1851                 object->val.object.nPairs = res + 1 - object->val.object.pairs;
1852         }
1853 }
1854
1855 /*
1856  * Sort and unique-ify JsonbArray.
1857  *
1858  * Sorting uses internal ordering.
1859  */
1860 static void
1861 uniqueifyJsonbArray(JsonbValue *array)
1862 {
1863         bool            hasNonUniq = false;
1864
1865         Assert(array->type == jbvArray);
1866
1867         /*
1868          * Actually sort values, determining if any were equal on the basis of
1869          * full binary equality (rather than just having the same string length).
1870          */
1871         if (array->val.array.nElems > 1)
1872                 qsort_arg(array->val.array.elems, array->val.array.nElems,
1873                                   sizeof(JsonbValue), lengthCompareJsonbStringValue,
1874                                   &hasNonUniq);
1875
1876         if (hasNonUniq)
1877         {
1878                 JsonbValue *ptr = array->val.array.elems + 1,
1879                                    *res = array->val.array.elems;
1880
1881                 while (ptr - array->val.array.elems < array->val.array.nElems)
1882                 {
1883                         /* Avoid copying over duplicate */
1884                         if (lengthCompareJsonbStringValue(ptr, res, NULL) != 0)
1885                         {
1886                                 res++;
1887                                 *res = *ptr;
1888                         }
1889
1890                         ptr++;
1891                 }
1892
1893                 array->val.array.nElems = res + 1 - array->val.array.elems;
1894         }
1895 }