]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/jsonb_util.c
Introduce jsonb, a structured format for storing json.
[postgresql] / src / backend / utils / adt / jsonb_util.c
1 /*-------------------------------------------------------------------------
2  *
3  * jsonb_util.c
4  *        Utilities for jsonb datatype
5  *
6  * Copyright (c) 2014, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/jsonb_util.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "access/hash.h"
17 #include "catalog/pg_collation.h"
18 #include "catalog/pg_type.h"
19 #include "miscadmin.h"
20 #include "utils/builtins.h"
21 #include "utils/jsonb.h"
22 #include "utils/memutils.h"
23
24 /*
25  * Twice as many values may be stored within pairs (for an Object) than within
26  * elements (for an Array), modulo the current MaxAllocSize limitation.  Note
27  * that JSONB_MAX_PAIRS is derived from the number of possible pairs, not
28  * values (as is the case for arrays and their elements), because we're
29  * concerned about limitations on the representation of the number of pairs.
30  * Over twice the memory is required to store n JsonbPairs as n JsonbValues.
31  * It only takes exactly twice as much disk space for storage, though.  The
32  * JsonbPair (not an actual pair of values) representation is used here because
33  * that is what is subject to the MaxAllocSize restriction when building an
34  * object.
35  */
36 #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JENTRY_POSMASK))
37 #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), \
38                                                          JENTRY_POSMASK))
39
40 /*
41  * State used while converting an arbitrary JsonbValue into a Jsonb value
42  * (4-byte varlena uncompressed representation of a Jsonb)
43  *
44  * ConvertLevel:  Bookkeeping around particular level when converting.
45  */
46 typedef struct convertLevel
47 {
48         uint32          i;              /* Iterates once per element, or once per pair */
49         uint32     *header; /* Pointer to current container header */
50         JEntry     *meta;       /* This level's metadata */
51         char       *begin;      /* Pointer into convertState.buffer */
52 } convertLevel;
53
54 /*
55  * convertState:  Overall bookkeeping state for conversion
56  */
57 typedef struct convertState
58 {
59         /* Preallocated buffer in which to form varlena/Jsonb value */
60         Jsonb                      *buffer;
61         /* Pointer into buffer */
62         char                       *ptr;
63
64         /* State for  */
65         convertLevel       *allState,   /* Overall state array */
66                                            *contPtr;    /* Cur container pointer (in allState) */
67
68         /* Current size of buffer containing allState array */
69         Size                            levelSz;
70
71 }       convertState;
72
73 static int compareJsonbScalarValue(JsonbValue * a, JsonbValue * b);
74 static int lexicalCompareJsonbStringValue(const void *a, const void *b);
75 static Size convertJsonb(JsonbValue * val, Jsonb* buffer);
76 static inline short addPaddingInt(convertState * cstate);
77 static void walkJsonbValueConversion(JsonbValue * val, convertState * cstate,
78                                                                          uint32 nestlevel);
79 static void putJsonbValueConversion(convertState * cstate, JsonbValue * val,
80                                                                         uint32 flags, uint32 level);
81 static void putScalarConversion(convertState * cstate, JsonbValue * scalarVal,
82                                                                 uint32 level, uint32 i);
83 static void iteratorFromContainerBuf(JsonbIterator * it, char *buffer);
84 static bool formIterIsContainer(JsonbIterator ** it, JsonbValue * val,
85                                                                 JEntry * ent, bool skipNested);
86 static JsonbIterator *freeAndGetParent(JsonbIterator * it);
87 static JsonbParseState *pushState(JsonbParseState ** pstate);
88 static void appendKey(JsonbParseState * pstate, JsonbValue * scalarVal);
89 static void appendValue(JsonbParseState * pstate, JsonbValue * scalarVal);
90 static void appendElement(JsonbParseState * pstate, JsonbValue * scalarVal);
91 static int lengthCompareJsonbStringValue(const void *a, const void *b, void *arg);
92 static int lengthCompareJsonbPair(const void *a, const void *b, void *arg);
93 static void uniqueifyJsonbObject(JsonbValue * object);
94 static void uniqueifyJsonbArray(JsonbValue * array);
95
96 /*
97  * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
98  *
99  * There isn't a JsonbToJsonbValue(), because generally we find it more
100  * convenient to directly iterate through the Jsonb representation and only
101  * really convert nested scalar values.  formIterIsContainer() does this, so
102  * that clients of the iteration code don't have to directly deal with the
103  * binary representation (JsonbDeepContains() is a notable exception, although
104  * all exceptions are internal to this module).  In general, functions that
105  * accept a JsonbValue argument are concerned with the manipulation of scalar
106  * values, or simple containers of scalar values, where it would be
107  * inconvenient to deal with a great amount of other state.
108  */
109 Jsonb *
110 JsonbValueToJsonb(JsonbValue * val)
111 {
112         Jsonb      *out;
113         Size            sz;
114
115         if (IsAJsonbScalar(val))
116         {
117                 /* Scalar value */
118                 JsonbParseState *pstate = NULL;
119                 JsonbValue *res;
120                 JsonbValue      scalarArray;
121
122                 scalarArray.type = jbvArray;
123                 scalarArray.array.rawScalar = true;
124                 scalarArray.array.nElems = 1;
125
126                 pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
127                 pushJsonbValue(&pstate, WJB_ELEM, val);
128                 res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
129
130                 out = palloc(VARHDRSZ + res->estSize);
131                 sz = convertJsonb(res, out);
132                 Assert(sz <= res->estSize);
133                 SET_VARSIZE(out, sz + VARHDRSZ);
134         }
135         else if (val->type == jbvObject || val->type == jbvArray)
136         {
137                 out = palloc(VARHDRSZ + val->estSize);
138                 sz = convertJsonb(val, out);
139                 Assert(sz <= val->estSize);
140                 SET_VARSIZE(out, VARHDRSZ + sz);
141         }
142         else
143         {
144                 Assert(val->type == jbvBinary);
145                 out = palloc(VARHDRSZ + val->binary.len);
146                 SET_VARSIZE(out, VARHDRSZ + val->binary.len);
147                 memcpy(VARDATA(out), val->binary.data, val->binary.len);
148         }
149
150         return out;
151 }
152
153 /*
154  * BT comparator worker function.  Returns an integer less than, equal to, or
155  * greater than zero, indicating whether a is less than, equal to, or greater
156  * than b.  Consistent with the requirements for a B-Tree operator class
157  *
158  * Strings are compared lexically, in contrast with other places where we use a
159  * much simpler comparator logic for searching through Strings.  Since this is
160  * called from B-Tree support function 1, we're careful about not leaking
161  * memory here.
162  */
163 int
164 compareJsonbSuperHeaderValue(JsonbSuperHeader a, JsonbSuperHeader b)
165 {
166         JsonbIterator *ita,
167                                   *itb;
168         int                     res = 0;
169
170         ita = JsonbIteratorInit(a);
171         itb = JsonbIteratorInit(b);
172
173         do
174         {
175                 JsonbValue      va,
176                                         vb;
177                 int                     ra,
178                                         rb;
179
180                 ra = JsonbIteratorNext(&ita, &va, false);
181                 rb = JsonbIteratorNext(&itb, &vb, false);
182
183                 /*
184                  * To a limited extent we'll redundantly iterate over an array/object
185                  * while re-performing the same test without any reasonable expectation
186                  * of the same container types having differing lengths (as when we
187                  * process a WJB_BEGIN_OBJECT, and later the corresponding
188                  * WJB_END_OBJECT), but no matter.
189                  */
190                 if (ra == rb)
191                 {
192                         if (ra == WJB_DONE)
193                         {
194                                 /* Decisively equal */
195                                 break;
196                         }
197
198                         if (va.type == vb.type)
199                         {
200                                 switch (va.type)
201                                 {
202                                         case jbvString:
203                                                 res = lexicalCompareJsonbStringValue(&va, &vb);
204                                                 break;
205                                         case jbvNull:
206                                         case jbvNumeric:
207                                         case jbvBool:
208                                                 res = compareJsonbScalarValue(&va, &vb);
209                                                 break;
210                                         case jbvArray:
211                                                 /*
212                                                  * This could be a "raw scalar" pseudo array.  That's a
213                                                  * special case here though, since we still want the
214                                                  * general type-based comparisons to apply, and as far
215                                                  * as we're concerned a pseudo array is just a scalar.
216                                                  */
217                                                 if (va.array.rawScalar != vb.array.rawScalar)
218                                                         res = (va.array.rawScalar) ? -1 : 1;
219                                                 if (va.array.nElems != vb.array.nElems)
220                                                         res = (va.array.nElems > vb.array.nElems) ? 1 : -1;
221                                                 break;
222                                         case jbvObject:
223                                                 if (va.object.nPairs != vb.object.nPairs)
224                                                         res = (va.object.nPairs > vb.object.nPairs) ? 1 : -1;
225                                                 break;
226                                         case jbvBinary:
227                                                 elog(ERROR, "unexpected jbvBinary value");
228                                 }
229                         }
230                         else
231                         {
232                                 /* Type-defined order */
233                                 res = (va.type > vb.type) ? 1 : -1;
234                         }
235                 }
236                 else
237                 {
238                         /*
239                          * It's safe to assume that the types differed.
240                          *
241                          * If the two values were the same container type, then there'd
242                          * have been a chance to observe the variation in the number of
243                          * elements/pairs (when processing WJB_BEGIN_OBJECT, say).  They
244                          * can't be scalar types either, because then they'd have to be
245                          * contained in containers already ruled unequal due to differing
246                          * numbers of pairs/elements, or already directly ruled unequal
247                          * with a call to the underlying type's comparator.
248                          */
249                         Assert(va.type != vb.type);
250                         Assert(va.type == jbvArray || va.type == jbvObject);
251                         Assert(vb.type == jbvArray || vb.type == jbvObject);
252                         /* Type-defined order */
253                         res = (va.type > vb.type) ? 1 : -1;
254                 }
255         }
256         while (res == 0);
257
258         while (ita != NULL)
259         {
260                 JsonbIterator *i = ita->parent;
261                 pfree(ita);
262                 ita = i;
263         }
264         while (itb != NULL)
265         {
266                 JsonbIterator *i = itb->parent;
267                 pfree(itb);
268                 itb = i;
269         }
270
271         return res;
272 }
273
274 /*
275  * Find value in object (i.e. the "value" part of some key/value pair in an
276  * object), or find a matching element if we're looking through an array.  Do
277  * so on the basis of equality of the object keys only, or alternatively
278  * element values only, with a caller-supplied value "key".  The "flags"
279  * argument allows the caller to specify which container types are of interest.
280  *
281  * This exported utility function exists to facilitate various cases concerned
282  * with "containment".  If asked to look through an object, the caller had
283  * better pass a Jsonb String, because their keys can only be strings.
284  * Otherwise, for an array, any type of JsonbValue will do.
285  *
286  * In order to proceed with the search, it is necessary for callers to have
287  * both specified an interest in exactly one particular container type with an
288  * appropriate flag, as well as having the pointed-to Jsonb superheader be of
289  * one of those same container types at the top level. (Actually, we just do
290  * whichever makes sense to save callers the trouble of figuring it out - at
291  * most one can make sense, because the super header either points to an array
292  * (possible a "raw scalar" pseudo array) or an object.)
293  *
294  * Note that we can return a jbvBinary JsonbValue if this is called on an
295  * object, but we never do so on an array.  If the caller asks to look through
296  * a container type that is not of the type pointed to by the superheader,
297  * immediately fall through and return NULL.  If we cannot find the value,
298  * return NULL.  Otherwise, return palloc()'d copy of value.
299  *
300  * lowbound can be NULL, but if not it's used to establish a point at which to
301  * start searching.  If the value searched for is found, then lowbound is then
302  * set to an offset into the array or object.  Typically, this is used to
303  * exploit the ordering of objects to avoid redundant work, by also sorting a
304  * list of items to be checked using the internal sort criteria for objects
305  * (object pair keys), and then, when searching for the second or subsequent
306  * item, picking it up where we left off knowing that the second or subsequent
307  * item can not be at a point below the low bound set when the first was found.
308  * This is only useful for objects, not arrays (which have a user-defined
309  * order), so array superheader Jsonbs should just pass NULL.  Moreover, it's
310  * only useful because we only match object pairs on the basis of their key, so
311  * presumably anyone exploiting this is only interested in matching Object keys
312  * with a String.  lowbound is given in units of pairs, not underlying values.
313  */
314 JsonbValue *
315 findJsonbValueFromSuperHeader(JsonbSuperHeader sheader, uint32 flags,
316                                                           uint32 *lowbound, JsonbValue * key)
317 {
318         uint32                  superheader = *(uint32 *) sheader;
319         JEntry             *array = (JEntry *) (sheader + sizeof(uint32));
320         int                             count = (superheader & JB_CMASK);
321         JsonbValue         *result = palloc(sizeof(JsonbValue));
322
323         Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
324
325         if (flags & JB_FARRAY & superheader)
326         {
327                 char       *data = (char *) (array + (superheader & JB_CMASK));
328                 int                     i;
329
330                 for (i = 0; i < count; i++)
331                 {
332                         JEntry     *e = array + i;
333
334                         if (JBE_ISNULL(*e) && key->type == jbvNull)
335                         {
336                                 result->type = jbvNull;
337                                 result->estSize = sizeof(JEntry);
338                         }
339                         else if (JBE_ISSTRING(*e) && key->type == jbvString)
340                         {
341                                 result->type = jbvString;
342                                 result->string.val = data + JBE_OFF(*e);
343                                 result->string.len = JBE_LEN(*e);
344                                 result->estSize = sizeof(JEntry) + result->string.len;
345                         }
346                         else if (JBE_ISNUMERIC(*e) && key->type == jbvNumeric)
347                         {
348                                 result->type = jbvNumeric;
349                                 result->numeric = (Numeric) (data + INTALIGN(JBE_OFF(*e)));
350                                 result->estSize = 2 * sizeof(JEntry) +
351                                         VARSIZE_ANY(result->numeric);
352                         }
353                         else if (JBE_ISBOOL(*e) && key->type == jbvBool)
354                         {
355                                 result->type = jbvBool;
356                                 result->boolean = JBE_ISBOOL_TRUE(*e) != 0;
357                                 result->estSize = sizeof(JEntry);
358                         }
359                         else
360                                 continue;
361
362                         if (compareJsonbScalarValue(key, result) == 0)
363                                 return result;
364                 }
365         }
366         else if (flags & JB_FOBJECT & superheader)
367         {
368                 /* Since this is an object, account for *Pairs* of Jentrys */
369                 char       *data = (char *) (array + (superheader & JB_CMASK) * 2);
370                 uint32          stopLow = lowbound ? *lowbound : 0,
371                                         stopMiddle;
372
373                 /* Object key past by caller must be a string */
374                 Assert(key->type == jbvString);
375
376                 /* Binary search on object/pair keys *only* */
377                 while (stopLow < count)
378                 {
379                         JEntry     *entry;
380                         int                     difference;
381                         JsonbValue      candidate;
382
383                         /*
384                          * Note how we compensate for the fact that we're iterating through
385                          * pairs (not entries) throughout.
386                          */
387                         stopMiddle = stopLow + (count - stopLow) / 2;
388
389                         entry = array + stopMiddle * 2;
390
391                         candidate.type = jbvString;
392                         candidate.string.val = data + JBE_OFF(*entry);
393                         candidate.string.len = JBE_LEN(*entry);
394                         candidate.estSize = sizeof(JEntry) + candidate.string.len;
395
396                         difference = lengthCompareJsonbStringValue(&candidate, key, NULL);
397
398                         if (difference == 0)
399                         {
400                                 /* Found our value (from key/value pair) */
401                                 JEntry     *v = entry + 1;
402
403                                 if (lowbound)
404                                         *lowbound = stopMiddle + 1;
405
406                                 if (JBE_ISNULL(*v))
407                                 {
408                                         result->type = jbvNull;
409                                         result->estSize = sizeof(JEntry);
410                                 }
411                                 else if (JBE_ISSTRING(*v))
412                                 {
413                                         result->type = jbvString;
414                                         result->string.val = data + JBE_OFF(*v);
415                                         result->string.len = JBE_LEN(*v);
416                                         result->estSize = sizeof(JEntry) + result->string.len;
417                                 }
418                                 else if (JBE_ISNUMERIC(*v))
419                                 {
420                                         result->type = jbvNumeric;
421                                         result->numeric = (Numeric) (data + INTALIGN(JBE_OFF(*v)));
422                                         result->estSize = 2 * sizeof(JEntry) +
423                                                 VARSIZE_ANY(result->numeric);
424                                 }
425                                 else if (JBE_ISBOOL(*v))
426                                 {
427                                         result->type = jbvBool;
428                                         result->boolean = JBE_ISBOOL_TRUE(*v) != 0;
429                                         result->estSize = sizeof(JEntry);
430                                 }
431                                 else
432                                 {
433                                         /*
434                                          * See header comments to understand why this never happens
435                                          * with arrays
436                                          */
437                                         result->type = jbvBinary;
438                                         result->binary.data = data + INTALIGN(JBE_OFF(*v));
439                                         result->binary.len = JBE_LEN(*v) -
440                                                 (INTALIGN(JBE_OFF(*v)) - JBE_OFF(*v));
441                                         result->estSize = 2 * sizeof(JEntry) + result->binary.len;
442                                 }
443
444                                 return result;
445                         }
446                         else
447                         {
448                                 if (difference < 0)
449                                         stopLow = stopMiddle + 1;
450                                 else
451                                         count = stopMiddle;
452                         }
453                 }
454
455                 if (lowbound)
456                         *lowbound = stopLow;
457         }
458
459         /* Not found */
460         pfree(result);
461         return NULL;
462 }
463
464 /*
465  * Get i-th value of Jsonb array from superheader.
466  *
467  * Returns palloc()'d copy of value.
468  */
469 JsonbValue *
470 getIthJsonbValueFromSuperHeader(JsonbSuperHeader sheader, uint32 i)
471 {
472         uint32          superheader = *(uint32 *) sheader;
473         JsonbValue *result;
474         JEntry     *array,
475                            *e;
476         char       *data;
477
478         result = palloc(sizeof(JsonbValue));
479
480         if (i >= (superheader & JB_CMASK))
481                 return NULL;
482
483         array = (JEntry *) (sheader + sizeof(uint32));
484
485         if (superheader & JB_FARRAY)
486         {
487                 e = array + i;
488                 data = (char *) (array + (superheader & JB_CMASK));
489         }
490         else
491         {
492                 elog(ERROR, "not a jsonb array");
493         }
494
495         if (JBE_ISNULL(*e))
496         {
497                 result->type = jbvNull;
498                 result->estSize = sizeof(JEntry);
499         }
500         else if (JBE_ISSTRING(*e))
501         {
502                 result->type = jbvString;
503                 result->string.val = data + JBE_OFF(*e);
504                 result->string.len = JBE_LEN(*e);
505                 result->estSize = sizeof(JEntry) + result->string.len;
506         }
507         else if (JBE_ISNUMERIC(*e))
508         {
509                 result->type = jbvNumeric;
510                 result->numeric = (Numeric) (data + INTALIGN(JBE_OFF(*e)));
511                 result->estSize = 2 * sizeof(JEntry) + VARSIZE_ANY(result->numeric);
512         }
513         else if (JBE_ISBOOL(*e))
514         {
515                 result->type = jbvBool;
516                 result->boolean = JBE_ISBOOL_TRUE(*e) != 0;
517                 result->estSize = sizeof(JEntry);
518         }
519         else
520         {
521                 result->type = jbvBinary;
522                 result->binary.data = data + INTALIGN(JBE_OFF(*e));
523                 result->binary.len = JBE_LEN(*e) - (INTALIGN(JBE_OFF(*e)) - JBE_OFF(*e));
524                 result->estSize = result->binary.len + 2 * sizeof(JEntry);
525         }
526
527         return result;
528 }
529
530 /*
531  * Push JsonbValue into JsonbParseState.
532  *
533  * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
534  * JsonbValue to a Jsonb.
535  *
536  * Initial state of *JsonbParseState is NULL, since it'll be allocated here
537  * originally (caller will get JsonbParseState back by reference).
538  *
539  * Only sequential tokens pertaining to non-container types should pass a
540  * JsonbValue.  There is one exception -- WJB_BEGIN_ARRAY callers may pass a
541  * "raw scalar" pseudo array to append that.
542  */
543 JsonbValue *
544 pushJsonbValue(JsonbParseState ** pstate, int seq, JsonbValue * scalarVal)
545 {
546         JsonbValue *result = NULL;
547
548         switch (seq)
549         {
550                 case WJB_BEGIN_ARRAY:
551                         Assert(!scalarVal || scalarVal->array.rawScalar);
552                         *pstate = pushState(pstate);
553                         result = &(*pstate)->contVal;
554                         (*pstate)->contVal.type = jbvArray;
555                         (*pstate)->contVal.estSize = 3 * sizeof(JEntry);
556                         (*pstate)->contVal.array.nElems = 0;
557                         (*pstate)->contVal.array.rawScalar = (scalarVal &&
558                                                                                                   scalarVal->array.rawScalar);
559                         if (scalarVal && scalarVal->array.nElems > 0)
560                         {
561                                 /* Assume that this array is still really a scalar */
562                                 Assert(scalarVal->type == jbvArray);
563                                 (*pstate)->size = scalarVal->array.nElems;
564                         }
565                         else
566                         {
567                                 (*pstate)->size = 4;
568                         }
569                         (*pstate)->contVal.array.elems = palloc(sizeof(JsonbValue) *
570                                                                                                         (*pstate)->size);
571                         break;
572                 case WJB_BEGIN_OBJECT:
573                         Assert(!scalarVal);
574                         *pstate = pushState(pstate);
575                         result = &(*pstate)->contVal;
576                         (*pstate)->contVal.type = jbvObject;
577                         (*pstate)->contVal.estSize = 3 * sizeof(JEntry);
578                         (*pstate)->contVal.object.nPairs = 0;
579                         (*pstate)->size = 4;
580                         (*pstate)->contVal.object.pairs = palloc(sizeof(JsonbPair) *
581                                                                                                          (*pstate)->size);
582                         break;
583                 case WJB_KEY:
584                         Assert(scalarVal->type == jbvString);
585                         appendKey(*pstate, scalarVal);
586                         break;
587                 case WJB_VALUE:
588                         Assert(IsAJsonbScalar(scalarVal) ||
589                                    scalarVal->type == jbvBinary);
590                         appendValue(*pstate, scalarVal);
591                         break;
592                 case WJB_ELEM:
593                         Assert(IsAJsonbScalar(scalarVal) ||
594                                    scalarVal->type == jbvBinary);
595                         appendElement(*pstate, scalarVal);
596                         break;
597                 case WJB_END_OBJECT:
598                         uniqueifyJsonbObject(&(*pstate)->contVal);
599                 case WJB_END_ARRAY:
600                         /* Steps here common to WJB_END_OBJECT case */
601                         Assert(!scalarVal);
602                         result = &(*pstate)->contVal;
603
604                         /*
605                          * Pop stack and push current array/object as value in parent
606                          * array/object
607                          */
608                         *pstate = (*pstate)->next;
609                         if (*pstate)
610                         {
611                                 switch ((*pstate)->contVal.type)
612                                 {
613                                         case jbvArray:
614                                                 appendElement(*pstate, result);
615                                                 break;
616                                         case jbvObject:
617                                                 appendValue(*pstate, result);
618                                                 break;
619                                         default:
620                                                 elog(ERROR, "invalid jsonb container type");
621                                 }
622                         }
623                         break;
624                 default:
625                         elog(ERROR, "unrecognized jsonb sequential processing token");
626         }
627
628         return result;
629 }
630
631 /*
632  * Given a Jsonb superheader, expand to JsonbIterator to iterate over items
633  * fully expanded to in-memory representation for manipulation.
634  *
635  * See JsonbIteratorNext() for notes on memory management.
636  */
637 JsonbIterator *
638 JsonbIteratorInit(JsonbSuperHeader sheader)
639 {
640         JsonbIterator *it = palloc(sizeof(JsonbIterator));
641
642         iteratorFromContainerBuf(it, sheader);
643         it->parent = NULL;
644
645         return it;
646 }
647
648 /*
649  * Get next JsonbValue while iterating
650  *
651  * Caller should initially pass their own, original iterator.  They may get
652  * back a child iterator palloc()'d here instead.  The function can be relied
653  * on to free those child iterators, lest the memory allocated for highly
654  * nested objects become unreasonable, but only if callers don't end iteration
655  * early (by breaking upon having found something in a search, for example).
656  *
657  * Callers in such a scenario, that are particularly sensitive to leaking
658  * memory in a long-lived context may walk the ancestral tree from the final
659  * iterator we left them with to its oldest ancestor, pfree()ing as they go.
660  * They do not have to free any other memory previously allocated for iterators
661  * but not accessible as direct ancestors of the iterator they're last passed
662  * back.
663  *
664  * Returns "Jsonb sequential processing" token value.  Iterator "state"
665  * reflects the current stage of the process in a less granular fashion, and is
666  * mostly used here to track things internally with respect to particular
667  * iterators.
668  *
669  * Clients of this function should not have to handle any jbvBinary values
670  * (since recursive calls will deal with this), provided skipNested is false.
671  * It is our job to expand the jbvBinary representation without bothering them
672  * with it.  However, clients should not take it upon themselves to touch array
673  * or Object element/pair buffers, since their element/pair pointers are
674  * garbage.
675  */
676 int
677 JsonbIteratorNext(JsonbIterator ** it, JsonbValue * val, bool skipNested)
678 {
679         JsonbIterState  state;
680
681         /* Guard against stack overflow due to overly complex Jsonb */
682         check_stack_depth();
683
684         /* Recursive caller may have original caller's iterator */
685         if (*it == NULL)
686                 return WJB_DONE;
687
688         state = (*it)->state;
689
690         if ((*it)->containerType == JB_FARRAY)
691         {
692                 if (state == jbi_start)
693                 {
694                         /* Set v to array on first array call */
695                         val->type = jbvArray;
696                         val->array.nElems = (*it)->nElems;
697                         /*
698                          * v->array.elems is not actually set, because we aren't doing a
699                          * full conversion
700                          */
701                         val->array.rawScalar = (*it)->isScalar;
702                         (*it)->i = 0;
703                         /* Set state for next call */
704                         (*it)->state = jbi_elem;
705                         return WJB_BEGIN_ARRAY;
706                 }
707                 else if (state == jbi_elem)
708                 {
709                         if ((*it)->i >= (*it)->nElems)
710                         {
711                                 /*
712                                  * All elements within array already processed.  Report this to
713                                  * caller, and give it back original parent iterator (which
714                                  * independently tracks iteration progress at its level of
715                                  * nesting).
716                                  */
717                                 *it = freeAndGetParent(*it);
718                                 return WJB_END_ARRAY;
719                         }
720                         else if (formIterIsContainer(it, val, &(*it)->meta[(*it)->i++],
721                                                                                  skipNested))
722                         {
723                                 /*
724                                  * New child iterator acquired within formIterIsContainer.
725                                  * Recurse into container.  Don't directly return jbvBinary
726                                  * value to top-level client.
727                                  */
728                                 return JsonbIteratorNext(it, val, skipNested);
729                         }
730                         else
731                         {
732                                 /* Scalar item in array */
733                                 return WJB_ELEM;
734                         }
735                 }
736         }
737         else if ((*it)->containerType == JB_FOBJECT)
738         {
739                 if (state == jbi_start)
740                 {
741                         /* Set v to object on first object call */
742                         val->type = jbvObject;
743                         val->object.nPairs = (*it)->nElems;
744                         /*
745                          * v->object.pairs is not actually set, because we aren't doing a
746                          * full conversion
747                          */
748                         (*it)->i = 0;
749                         /* Set state for next call */
750                         (*it)->state = jbi_key;
751                         return WJB_BEGIN_OBJECT;
752                 }
753                 else if (state == jbi_key)
754                 {
755                         if ((*it)->i >= (*it)->nElems)
756                         {
757                                 /*
758                                  * All pairs within object already processed.  Report this to
759                                  * caller, and give it back original containing iterator (which
760                                  * independently tracks iteration progress at its level of
761                                  * nesting).
762                                  */
763                                 *it = freeAndGetParent(*it);
764                                 return WJB_END_OBJECT;
765                         }
766                         else
767                         {
768                                 /*
769                                  * Return binary item key (ensured by setting skipNested to
770                                  * false directly).  No child iterator, no further recursion.
771                                  * When control reaches here, it's probably from a recursive
772                                  * call.
773                                  */
774                                 if (formIterIsContainer(it, val, &(*it)->meta[(*it)->i * 2], false))
775                                         elog(ERROR, "unexpected container as object key");
776
777                                 Assert(val->type == jbvString);
778                                 /* Set state for next call */
779                                 (*it)->state = jbi_value;
780                                 return WJB_KEY;
781                         }
782                 }
783                 else if (state == jbi_value)
784                 {
785                         /* Set state for next call */
786                         (*it)->state = jbi_key;
787
788                         /*
789                          * Value may be a container, in which case we recurse with new,
790                          * child iterator.  If it is, don't bother !skipNested callers with
791                          * dealing with the jbvBinary representation.
792                          */
793                         if (formIterIsContainer(it, val, &(*it)->meta[((*it)->i++) * 2 + 1],
794                                                                         skipNested))
795                                 return JsonbIteratorNext(it, val, skipNested);
796                         else
797                                 return WJB_VALUE;
798                 }
799         }
800
801         elog(ERROR, "invalid iterator state");
802 }
803
804 /*
805  * Worker for "contains" operator's function
806  *
807  * Formally speaking, containment is top-down, unordered subtree isomorphism.
808  *
809  * Takes iterators that belong to some container type.  These iterators
810  * "belong" to those values in the sense that they've just been initialized in
811  * respect of them by the caller (perhaps in a nested fashion).
812  *
813  * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
814  * We determine if mContained is contained within val.
815  */
816 bool
817 JsonbDeepContains(JsonbIterator ** val, JsonbIterator ** mContained)
818 {
819         uint32          rval,
820                                 rcont;
821         JsonbValue      vval,
822                                 vcontained;
823         /*
824          * Guard against stack overflow due to overly complex Jsonb.
825          *
826          * Functions called here independently take this precaution, but that might
827          * not be sufficient since this is also a recursive function.
828          */
829         check_stack_depth();
830
831         rval = JsonbIteratorNext(val, &vval, false);
832         rcont = JsonbIteratorNext(mContained, &vcontained, false);
833
834         if (rval != rcont)
835         {
836                 /*
837                  * The differing return values can immediately be taken as indicating
838                  * two differing container types at this nesting level, which is
839                  * sufficient reason to give up entirely (but it should be the case
840                  * that they're both some container type).
841                  */
842                 Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
843                 Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
844                 return false;
845         }
846         else if (rcont == WJB_BEGIN_OBJECT)
847         {
848                 JsonbValue *lhsVal;             /* lhsVal is from pair in lhs object */
849
850                 Assert(vcontained.type == jbvObject);
851
852                 /* Work through rhs "is it contained within?" object */
853                 for (;;)
854                 {
855                         rcont = JsonbIteratorNext(mContained, &vcontained, false);
856
857                         /*
858                          * When we get through caller's rhs "is it contained within?"
859                          * object without failing to find one of its values, it's
860                          * contained.
861                          */
862                         if (rcont == WJB_END_OBJECT)
863                                 return true;
864
865                         Assert(rcont == WJB_KEY);
866
867                         /* First, find value by key... */
868                         lhsVal = findJsonbValueFromSuperHeader((*val)->buffer,
869                                                                                                    JB_FOBJECT,
870                                                                                                    NULL,
871                                                                                                    &vcontained);
872
873                         if (!lhsVal)
874                                 return false;
875
876                         /*
877                          * ...at this stage it is apparent that there is at least a key
878                          * match for this rhs pair.
879                          */
880                         rcont = JsonbIteratorNext(mContained, &vcontained, true);
881
882                         Assert(rcont == WJB_VALUE);
883
884                         /*
885                          * Compare rhs pair's value with lhs pair's value just found using
886                          * key
887                          */
888                         if (lhsVal->type != vcontained.type)
889                         {
890                                 return false;
891                         }
892                         else if (IsAJsonbScalar(lhsVal))
893                         {
894                                 if (compareJsonbScalarValue(lhsVal, &vcontained) != 0)
895                                         return false;
896                         }
897                         else
898                         {
899                                 /* Nested container value (object or array) */
900                                 JsonbIterator *nestval, *nestContained;
901
902                                 Assert(lhsVal->type == jbvBinary);
903                                 Assert(vcontained.type == jbvBinary);
904
905                                 nestval = JsonbIteratorInit(lhsVal->binary.data);
906                                 nestContained = JsonbIteratorInit(vcontained.binary.data);
907
908                                 /*
909                                  * Match "value" side of rhs datum object's pair recursively.
910                                  * It's a nested structure.
911                                  *
912                                  * Note that nesting still has to "match up" at the right
913                                  * nesting sub-levels.  However, there need only be zero or
914                                  * more matching pairs (or elements) at each nesting level
915                                  * (provided the *rhs* pairs/elements *all* match on each
916                                  * level), which enables searching nested structures for a
917                                  * single String or other primitive type sub-datum quite
918                                  * effectively (provided the user constructed the rhs nested
919                                  * structure such that we "know where to look").
920                                  *
921                                  * In other words, the mapping of container nodes in the rhs
922                                  * "vcontained" Jsonb to internal nodes on the lhs is
923                                  * injective, and parent-child edges on the rhs must be mapped
924                                  * to parent-child edges on the lhs to satisfy the condition of
925                                  * containment (plus of course the mapped nodes must be equal).
926                                  */
927                                 if (!JsonbDeepContains(&nestval, &nestContained))
928                                         return false;
929                         }
930                 }
931         }
932         else if (rcont == WJB_BEGIN_ARRAY)
933         {
934                 JsonbValue *lhsConts = NULL;
935                 uint32          nLhsElems = vval.array.nElems;
936
937                 Assert(vcontained.type == jbvArray);
938
939                 /*
940                  * Handle distinction between "raw scalar" pseudo arrays, and real
941                  * arrays.
942                  *
943                  * A raw scalar may contain another raw scalar, and an array may
944                  * contain a raw scalar, but a raw scalar may not contain an array.  We
945                  * don't do something like this for the object case, since objects can
946                  * only contain pairs, never raw scalars (a pair is represented by an
947                  * rhs object argument with a single contained pair).
948                  */
949                 if (vval.array.rawScalar && !vcontained.array.rawScalar)
950                         return false;
951
952                 /* Work through rhs "is it contained within?" array */
953                 for (;;)
954                 {
955                         rcont = JsonbIteratorNext(mContained, &vcontained, true);
956
957                         /*
958                          * When we get through caller's rhs "is it contained within?" array
959                          * without failing to find one of its values, it's contained.
960                          */
961                         if (rcont == WJB_END_ARRAY)
962                                 return true;
963
964                         Assert(rcont == WJB_ELEM);
965
966                         if (IsAJsonbScalar(&vcontained))
967                         {
968                                 if (!findJsonbValueFromSuperHeader((*val)->buffer,
969                                                                                                    JB_FARRAY,
970                                                                                                    NULL,
971                                                                                                    &vcontained))
972                                         return false;
973                         }
974                         else
975                         {
976                                 uint32          i;
977
978                                 /*
979                                  * If this is first container found in rhs array (at this
980                                  * depth), initialize temp lhs array of containers
981                                  */
982                                 if (lhsConts == NULL)
983                                 {
984                                         uint32          j = 0;
985
986                                         /* Make room for all possible values */
987                                         lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
988
989                                         for (i = 0; i < nLhsElems; i++)
990                                         {
991                                                 /* Store all lhs elements in temp array*/
992                                                 rcont = JsonbIteratorNext(val, &vval, true);
993                                                 Assert(rcont == WJB_ELEM);
994
995                                                 if (vval.type == jbvBinary)
996                                                         lhsConts[j++] = vval;
997                                         }
998
999                                         /* No container elements in temp array, so give up now */
1000                                         if (j == 0)
1001                                                 return false;
1002
1003                                         /* We may have only partially filled array */
1004                                         nLhsElems = j;
1005                                 }
1006
1007                                 /* XXX: Nested array containment is O(N^2) */
1008                                 for (i = 0; i < nLhsElems; i++)
1009                                 {
1010                                         /* Nested container value (object or array) */
1011                                         JsonbIterator  *nestval, *nestContained;
1012                                         bool                    contains;
1013
1014                                         nestval = JsonbIteratorInit(lhsConts[i].binary.data);
1015                                         nestContained = JsonbIteratorInit(vcontained.binary.data);
1016
1017                                         contains = JsonbDeepContains(&nestval, &nestContained);
1018
1019                                         if (nestval)
1020                                                 pfree(nestval);
1021                                         if (nestContained)
1022                                                 pfree(nestContained);
1023                                         if (contains)
1024                                                 break;
1025                                 }
1026
1027                                 /*
1028                                  * Report rhs container value is not contained if couldn't
1029                                  * match rhs container to *some* lhs cont
1030                                  */
1031                                 if (i == nLhsElems)
1032                                         return false;
1033                         }
1034                 }
1035         }
1036         else
1037         {
1038                 elog(ERROR, "invalid jsonb container type");
1039         }
1040
1041         elog(ERROR, "unexpectedly fell off end of jsonb container");
1042 }
1043
1044 /*
1045  * Convert a Postgres text array to a Jsonb array, sorted and with
1046  * de-duplicated key elements.  This is used for searching an object for items
1047  * in the array, so we enforce that the number of strings cannot exceed
1048  * JSONB_MAX_PAIRS.
1049  */
1050 JsonbValue *
1051 arrayToJsonbSortedArray(ArrayType *array)
1052 {
1053         Datum      *key_datums;
1054         bool       *key_nulls;
1055         int                     elem_count;
1056         JsonbValue *result;
1057         int                     i,
1058                                 j;
1059
1060         /* Extract data for sorting */
1061         deconstruct_array(array, TEXTOID, -1, false, 'i', &key_datums, &key_nulls,
1062                                           &elem_count);
1063
1064         if (elem_count == 0)
1065                 return NULL;
1066
1067         /*
1068          * A text array uses at least eight bytes per element, so any overflow in
1069          * "key_count * sizeof(JsonbPair)" is small enough for palloc() to catch.
1070          * However, credible improvements to the array format could invalidate that
1071          * assumption.  Therefore, use an explicit check rather than relying on
1072          * palloc() to complain.
1073          */
1074         if (elem_count > JSONB_MAX_PAIRS)
1075                 ereport(ERROR,
1076                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1077                                  errmsg("number of array elements (%d) exceeds maximum allowed Jsonb pairs (%zu)",
1078                                                 elem_count, JSONB_MAX_PAIRS)));
1079
1080         result = palloc(sizeof(JsonbValue));
1081         result->type = jbvArray;
1082         result->array.rawScalar = false;
1083         result->array.elems = palloc(sizeof(JsonbPair) * elem_count);
1084
1085         for (i = 0, j = 0; i < elem_count; i++)
1086         {
1087                 if (!key_nulls[i])
1088                 {
1089                         result->array.elems[j].type = jbvString;
1090                         result->array.elems[j].string.val = VARDATA(key_datums[i]);
1091                         result->array.elems[j].string.len = VARSIZE(key_datums[i]) - VARHDRSZ;
1092                         j++;
1093                 }
1094         }
1095         result->array.nElems = j;
1096
1097         uniqueifyJsonbArray(result);
1098         return result;
1099 }
1100
1101 /*
1102  * Hash a JsonbValue scalar value, mixing in the hash value with an existing
1103  * hash provided by the caller.
1104  *
1105  * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
1106  * flags.
1107  */
1108 void
1109 JsonbHashScalarValue(const JsonbValue * scalarVal, uint32 * hash)
1110 {
1111         int tmp;
1112
1113         /*
1114          * Combine hash values of successive keys, values and elements by rotating
1115          * the previous value left 1 bit, then XOR'ing in the new
1116          * key/value/element's hash value.
1117          */
1118         *hash = (*hash << 1) | (*hash >> 31);
1119         switch (scalarVal->type)
1120         {
1121                 case jbvNull:
1122                         *hash ^= 0x01;
1123                         return;
1124                 case jbvString:
1125                         tmp = hash_any((unsigned char *) scalarVal->string.val,
1126                                                    scalarVal->string.len);
1127                         *hash ^= tmp;
1128                         return;
1129                 case jbvNumeric:
1130                         /* Must be unaffected by trailing zeroes */
1131                         tmp = DatumGetInt32(DirectFunctionCall1(hash_numeric,
1132                                                                                                         NumericGetDatum(scalarVal->numeric)));
1133                         *hash ^= tmp;
1134                         return;
1135                 case jbvBool:
1136                         *hash ^= scalarVal->boolean? 0x02:0x04;
1137                         return;
1138                 default:
1139                         elog(ERROR, "invalid jsonb scalar type");
1140         }
1141 }
1142
1143 /*
1144  * Are two scalar JsonbValues of the same type a and b equal?
1145  *
1146  * Does not use lexical comparisons.  Therefore, it is essentially that this
1147  * never be used against Strings for anything other than searching for values
1148  * within a single jsonb.
1149  */
1150 static int
1151 compareJsonbScalarValue(JsonbValue * aScalar, JsonbValue * bScalar)
1152 {
1153         if (aScalar->type == bScalar->type)
1154         {
1155                 switch (aScalar->type)
1156                 {
1157                         case jbvNull:
1158                                 return 0;
1159                         case jbvString:
1160                                 return lengthCompareJsonbStringValue(aScalar, bScalar, NULL);
1161                         case jbvNumeric:
1162                                 return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
1163                                                                                                                  PointerGetDatum(aScalar->numeric),
1164                                                                                                                  PointerGetDatum(bScalar->numeric)));
1165                         case jbvBool:
1166                                 if (aScalar->boolean != bScalar->boolean)
1167                                         return (aScalar->boolean > bScalar->boolean) ? 1 : -1;
1168                                 else
1169                                         return 0;
1170                         default:
1171                                 elog(ERROR, "invalid jsonb scalar type");
1172                 }
1173         }
1174         elog(ERROR, "jsonb scalar type mismatch");
1175 }
1176
1177 /*
1178  * Standard lexical qsort() comparator of jsonb strings.
1179  *
1180  * Sorts strings lexically, using the default database collation.  Used by
1181  * B-Tree operators, where a lexical sort order is generally expected.
1182  */
1183 static int
1184 lexicalCompareJsonbStringValue(const void *a, const void *b)
1185 {
1186         const JsonbValue *va = (const JsonbValue *) a;
1187         const JsonbValue *vb = (const JsonbValue *) b;
1188
1189         Assert(va->type == jbvString);
1190         Assert(vb->type == jbvString);
1191
1192         return varstr_cmp(va->string.val, va->string.len, vb->string.val,
1193                                           vb->string.len, DEFAULT_COLLATION_OID);
1194 }
1195
1196 /*
1197  * Given a JsonbValue, convert to Jsonb and store in preallocated Jsonb buffer
1198  * sufficiently large to fit the value
1199  */
1200 static Size
1201 convertJsonb(JsonbValue * val, Jsonb *buffer)
1202 {
1203         convertState    state;
1204         Size                    len;
1205
1206         /* Should not already have binary representation */
1207         Assert(val->type != jbvBinary);
1208
1209         state.buffer = buffer;
1210         /* Start from superheader */
1211         state.ptr = VARDATA(state.buffer);
1212         state.levelSz = 8;
1213         state.allState = palloc(sizeof(convertLevel) * state.levelSz);
1214
1215         walkJsonbValueConversion(val, &state, 0);
1216
1217         len = state.ptr - VARDATA(state.buffer);
1218
1219         Assert(len <= val->estSize);
1220         return len;
1221 }
1222
1223 /*
1224  * Walk the tree representation of Jsonb, as part of the process of converting
1225  * a JsonbValue to a Jsonb.
1226  *
1227  * This high-level function takes care of recursion into sub-containers, but at
1228  * the top level calls putJsonbValueConversion once per sequential processing
1229  * token (in a manner similar to generic iteration).
1230  */
1231 static void
1232 walkJsonbValueConversion(JsonbValue * val, convertState * cstate,
1233                                                  uint32 nestlevel)
1234 {
1235         int                     i;
1236
1237         check_stack_depth();
1238
1239         if (!val)
1240                 return;
1241
1242         switch (val->type)
1243         {
1244                 case jbvArray:
1245
1246                         putJsonbValueConversion(cstate, val, WJB_BEGIN_ARRAY, nestlevel);
1247                         for (i = 0; i < val->array.nElems; i++)
1248                         {
1249                                 if (IsAJsonbScalar(&val->array.elems[i]) ||
1250                                         val->array.elems[i].type == jbvBinary)
1251                                         putJsonbValueConversion(cstate, val->array.elems + i,
1252                                                                                         WJB_ELEM, nestlevel);
1253                                 else
1254                                         walkJsonbValueConversion(val->array.elems + i, cstate,
1255                                                                                          nestlevel + 1);
1256                         }
1257                         putJsonbValueConversion(cstate, val, WJB_END_ARRAY, nestlevel);
1258
1259                         break;
1260                 case jbvObject:
1261
1262                         putJsonbValueConversion(cstate, val, WJB_BEGIN_OBJECT, nestlevel);
1263                         for (i = 0; i < val->object.nPairs; i++)
1264                         {
1265                                 putJsonbValueConversion(cstate, &val->object.pairs[i].key,
1266                                                                                 WJB_KEY, nestlevel);
1267
1268                                 if (IsAJsonbScalar(&val->object.pairs[i].value) ||
1269                                         val->object.pairs[i].value.type == jbvBinary)
1270                                         putJsonbValueConversion(cstate,
1271                                                                                         &val->object.pairs[i].value,
1272                                                                                         WJB_VALUE, nestlevel);
1273                                 else
1274                                         walkJsonbValueConversion(&val->object.pairs[i].value,
1275                                                                                          cstate, nestlevel + 1);
1276                         }
1277                         putJsonbValueConversion(cstate, val, WJB_END_OBJECT, nestlevel);
1278
1279                         break;
1280                 default:
1281                         elog(ERROR, "unknown type of jsonb container");
1282         }
1283 }
1284
1285 /*
1286  * walkJsonbValueConversion() worker.  Add padding sufficient to int-align our
1287  * access to conversion buffer.
1288  */
1289 static inline
1290 short addPaddingInt(convertState * cstate)
1291 {
1292         short           padlen, p;
1293
1294         padlen = INTALIGN(cstate->ptr - VARDATA(cstate->buffer)) -
1295                 (cstate->ptr - VARDATA(cstate->buffer));
1296
1297         for (p = padlen; p > 0; p--)
1298         {
1299                 *cstate->ptr = '\0';
1300                 cstate->ptr++;
1301         }
1302
1303         return padlen;
1304 }
1305
1306 /*
1307  * walkJsonbValueConversion() worker.
1308  *
1309  * As part of the process of converting an arbitrary JsonbValue to a Jsonb,
1310  * copy over an arbitrary individual JsonbValue.  This function may copy any
1311  * type of value, even containers (Objects/arrays).  However, it is not
1312  * responsible for recursive aspects of walking the tree (so only top-level
1313  * Object/array details are handled).
1314  *
1315  * No details about their keys/values/elements are handled recursively -
1316  * rather, the function is called as required for the start of an Object/Array,
1317  * and the end (i.e.  there is one call per sequential processing WJB_* token).
1318  */
1319 static void
1320 putJsonbValueConversion(convertState * cstate, JsonbValue * val, uint32 flags,
1321                                                 uint32 level)
1322 {
1323         if (level == cstate->levelSz)
1324         {
1325                 cstate->levelSz *= 2;
1326                 cstate->allState = repalloc(cstate->allState,
1327                                                                          sizeof(convertLevel) * cstate->levelSz);
1328         }
1329
1330         cstate->contPtr = cstate->allState + level;
1331
1332         if (flags & (WJB_BEGIN_ARRAY | WJB_BEGIN_OBJECT))
1333         {
1334                 Assert(((flags & WJB_BEGIN_ARRAY) && val->type == jbvArray) ||
1335                            ((flags & WJB_BEGIN_OBJECT) && val->type == jbvObject));
1336
1337                 /* Initialize pointer into conversion buffer at this level */
1338                 cstate->contPtr->begin = cstate->ptr;
1339
1340                 addPaddingInt(cstate);
1341
1342                 /* Initialize everything else at this level */
1343                 cstate->contPtr->header = (uint32 *) cstate->ptr;
1344                 /* Advance past header */
1345                 cstate->ptr += sizeof(uint32);
1346                 cstate->contPtr->meta = (JEntry *) cstate->ptr;
1347                 cstate->contPtr->i = 0;
1348
1349                 if (val->type == jbvArray)
1350                 {
1351                         *cstate->contPtr->header = val->array.nElems | JB_FARRAY;
1352                         cstate->ptr += sizeof(JEntry) * val->array.nElems;
1353
1354                         if (val->array.rawScalar)
1355                         {
1356                                 Assert(val->array.nElems == 1);
1357                                 Assert(level == 0);
1358                                 *cstate->contPtr->header |= JB_FSCALAR;
1359                         }
1360                 }
1361                 else
1362                 {
1363                         *cstate->contPtr->header = val->object.nPairs | JB_FOBJECT;
1364                         cstate->ptr += sizeof(JEntry) * val->object.nPairs * 2;
1365                 }
1366         }
1367         else if (flags & WJB_ELEM)
1368         {
1369                 putScalarConversion(cstate, val, level, cstate->contPtr->i);
1370                 cstate->contPtr->i++;
1371         }
1372         else if (flags & WJB_KEY)
1373         {
1374                 Assert(val->type == jbvString);
1375
1376                 putScalarConversion(cstate, val, level, cstate->contPtr->i * 2);
1377         }
1378         else if (flags & WJB_VALUE)
1379         {
1380                 putScalarConversion(cstate, val, level, cstate->contPtr->i * 2 + 1);
1381                 cstate->contPtr->i++;
1382         }
1383         else if (flags & (WJB_END_ARRAY | WJB_END_OBJECT))
1384         {
1385                 convertLevel   *prevPtr;        /* Prev container pointer */
1386                 uint32                  len,
1387                                                 i;
1388
1389                 Assert(((flags & WJB_END_ARRAY) && val->type == jbvArray) ||
1390                            ((flags & WJB_END_OBJECT) && val->type == jbvObject));
1391
1392                 if (level == 0)
1393                         return;
1394
1395                 len = cstate->ptr - (char *) cstate->contPtr->begin;
1396
1397                 prevPtr = cstate->contPtr - 1;
1398
1399                 if (*prevPtr->header & JB_FARRAY)
1400                 {
1401                         i = prevPtr->i;
1402
1403                         prevPtr->meta[i].header = JENTRY_ISNEST;
1404
1405                         if (i == 0)
1406                                 prevPtr->meta[0].header |= JENTRY_ISFIRST | len;
1407                         else
1408                                 prevPtr->meta[i].header |=
1409                                         (prevPtr->meta[i - 1].header & JENTRY_POSMASK) + len;
1410                 }
1411                 else if (*prevPtr->header & JB_FOBJECT)
1412                 {
1413                         i = 2 * prevPtr->i + 1;         /* Value, not key */
1414
1415                         prevPtr->meta[i].header = JENTRY_ISNEST;
1416
1417                         prevPtr->meta[i].header |=
1418                                 (prevPtr->meta[i - 1].header & JENTRY_POSMASK) + len;
1419                 }
1420                 else
1421                 {
1422                         elog(ERROR, "invalid jsonb container type");
1423                 }
1424
1425                 Assert(cstate->ptr - cstate->contPtr->begin <= val->estSize);
1426                 prevPtr->i++;
1427         }
1428         else
1429         {
1430                 elog(ERROR, "unknown flag encountered during jsonb tree walk");
1431         }
1432 }
1433
1434 /*
1435  * As part of the process of converting an arbitrary JsonbValue to a Jsonb,
1436  * serialize and copy a scalar value into buffer.
1437  *
1438  * This is a worker function for putJsonbValueConversion() (itself a worker for
1439  * walkJsonbValueConversion()).  It handles the details with regard to Jentry
1440  * metadata peculiar to each scalar type.
1441  */
1442 static void
1443 putScalarConversion(convertState * cstate, JsonbValue * scalarVal, uint32 level,
1444                                         uint32 i)
1445 {
1446         int             numlen;
1447         short           padlen;
1448
1449         cstate->contPtr = cstate->allState + level;
1450
1451         if (i == 0)
1452                 cstate->contPtr->meta[0].header = JENTRY_ISFIRST;
1453         else
1454                 cstate->contPtr->meta[i].header = 0;
1455
1456         switch (scalarVal->type)
1457         {
1458                 case jbvNull:
1459                         cstate->contPtr->meta[i].header |= JENTRY_ISNULL;
1460
1461                         if (i > 0)
1462                                 cstate->contPtr->meta[i].header |=
1463                                         cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK;
1464                         break;
1465                 case jbvString:
1466                         memcpy(cstate->ptr, scalarVal->string.val, scalarVal->string.len);
1467                         cstate->ptr += scalarVal->string.len;
1468
1469                         if (i == 0)
1470                                 cstate->contPtr->meta[0].header |= scalarVal->string.len;
1471                         else
1472                                 cstate->contPtr->meta[i].header |=
1473                                         (cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK) +
1474                                         scalarVal->string.len;
1475                         break;
1476                 case jbvNumeric:
1477                         numlen = VARSIZE_ANY(scalarVal->numeric);
1478                         padlen = addPaddingInt(cstate);
1479
1480                         memcpy(cstate->ptr, scalarVal->numeric, numlen);
1481                         cstate->ptr += numlen;
1482
1483                         cstate->contPtr->meta[i].header |= JENTRY_ISNUMERIC;
1484                         if (i == 0)
1485                                 cstate->contPtr->meta[0].header |= padlen + numlen;
1486                         else
1487                                 cstate->contPtr->meta[i].header |=
1488                                         (cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK)
1489                                         + padlen + numlen;
1490                         break;
1491                 case jbvBool:
1492                         cstate->contPtr->meta[i].header |= (scalarVal->boolean) ?
1493                                 JENTRY_ISTRUE : JENTRY_ISFALSE;
1494
1495                         if (i > 0)
1496                                 cstate->contPtr->meta[i].header |=
1497                                         cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK;
1498                         break;
1499                 default:
1500                         elog(ERROR, "invalid jsonb scalar type");
1501         }
1502 }
1503
1504 /*
1505  * Given superheader pointer into buffer, initialize iterator.  Must be a
1506  * container type.
1507  */
1508 static void
1509 iteratorFromContainerBuf(JsonbIterator * it, JsonbSuperHeader sheader)
1510 {
1511         uint32          superheader = *(uint32 *) sheader;
1512
1513         it->containerType = superheader & (JB_FARRAY | JB_FOBJECT);
1514         it->nElems = superheader & JB_CMASK;
1515         it->buffer = sheader;
1516
1517         /* Array starts just after header */
1518         it->meta = (JEntry *) (sheader + sizeof(uint32));
1519         it->state = jbi_start;
1520
1521         switch (it->containerType)
1522         {
1523                 case JB_FARRAY:
1524                         it->dataProper =
1525                                 (char *) it->meta + it->nElems * sizeof(JEntry);
1526                         it->isScalar = (superheader & JB_FSCALAR) != 0;
1527                         /* This is either a "raw scalar", or an array */
1528                         Assert(!it->isScalar || it->nElems == 1);
1529                         break;
1530                 case JB_FOBJECT:
1531                         /*
1532                          * Offset reflects that nElems indicates JsonbPairs in an object.
1533                          * Each key and each value contain Jentry metadata just the same.
1534                          */
1535                         it->dataProper =
1536                                 (char *) it->meta + it->nElems * sizeof(JEntry) * 2;
1537                         break;
1538                 default:
1539                         elog(ERROR, "unknown type of jsonb container");
1540         }
1541 }
1542
1543 /*
1544  * JsonbIteratorNext() worker
1545  *
1546  * Returns bool indicating if v was a non-jbvBinary container, and thus if
1547  * further recursion is required by caller (according to its skipNested
1548  * preference).  If it is required, we set the caller's iterator for further
1549  * recursion into the nested value.  If we're going to skip nested items, just
1550  * set v to a jbvBinary value, but don't set caller's iterator.
1551  *
1552  * Unlike with containers (either in this function or in any
1553  * JsonbIteratorNext() infrastructure), we fully convert from what is
1554  * ultimately a Jsonb on-disk representation, to a JsonbValue in-memory
1555  * representation (for scalar values only).  JsonbIteratorNext() initializes
1556  * container Jsonbvalues, but without a sane private buffer.  For scalar values
1557  * it has to be done for real (even if we don't actually allocate more memory
1558  * to do this.  The point is that our JsonbValues scalars can be passed around
1559  * anywhere).
1560  */
1561 static bool
1562 formIterIsContainer(JsonbIterator ** it, JsonbValue * val, JEntry * ent,
1563                                         bool skipNested)
1564 {
1565         if (JBE_ISNULL(*ent))
1566         {
1567                 val->type = jbvNull;
1568                 val->estSize = sizeof(JEntry);
1569
1570                 return false;
1571         }
1572         else if (JBE_ISSTRING(*ent))
1573         {
1574                 val->type = jbvString;
1575                 val->string.val = (*it)->dataProper + JBE_OFF(*ent);
1576                 val->string.len = JBE_LEN(*ent);
1577                 val->estSize = sizeof(JEntry) + val->string.len;
1578
1579                 return false;
1580         }
1581         else if (JBE_ISNUMERIC(*ent))
1582         {
1583                 val->type = jbvNumeric;
1584                 val->numeric = (Numeric) ((*it)->dataProper + INTALIGN(JBE_OFF(*ent)));
1585                 val->estSize = 2 * sizeof(JEntry) + VARSIZE_ANY(val->numeric);
1586
1587                 return false;
1588         }
1589         else if (JBE_ISBOOL(*ent))
1590         {
1591                 val->type = jbvBool;
1592                 val->boolean = JBE_ISBOOL_TRUE(*ent) != 0;
1593                 val->estSize = sizeof(JEntry);
1594
1595                 return false;
1596         }
1597         else if (skipNested)
1598         {
1599                 val->type = jbvBinary;
1600                 val->binary.data = (*it)->dataProper + INTALIGN(JBE_OFF(*ent));
1601                 val->binary.len = JBE_LEN(*ent) - (INTALIGN(JBE_OFF(*ent)) - JBE_OFF(*ent));
1602                 val->estSize = val->binary.len + 2 * sizeof(JEntry);
1603
1604                 return false;
1605         }
1606         else
1607         {
1608                 /*
1609                  * Must be container type, so setup caller's iterator to point to that,
1610                  * and return indication of that.
1611                  *
1612                  * Get child iterator.
1613                  */
1614                 JsonbIterator *child = palloc(sizeof(JsonbIterator));
1615
1616                 iteratorFromContainerBuf(child,
1617                                                                  (*it)->dataProper + INTALIGN(JBE_OFF(*ent)));
1618
1619                 child->parent = *it;
1620                 *it = child;
1621
1622                 return true;
1623         }
1624 }
1625
1626 /*
1627  * JsonbIteratorNext() worker:  Return parent, while freeing memory for current
1628  * iterator
1629  */
1630 static JsonbIterator *
1631 freeAndGetParent(JsonbIterator * it)
1632 {
1633         JsonbIterator *v = it->parent;
1634
1635         pfree(it);
1636         return v;
1637 }
1638
1639 /*
1640  * pushJsonbValue() worker:  Iteration-like forming of Jsonb
1641  */
1642 static JsonbParseState *
1643 pushState(JsonbParseState ** pstate)
1644 {
1645         JsonbParseState *ns = palloc(sizeof(JsonbParseState));
1646
1647         ns->next = *pstate;
1648         return ns;
1649 }
1650
1651 /*
1652  * pushJsonbValue() worker:  Append a pair key to state when generating a Jsonb
1653  */
1654 static void
1655 appendKey(JsonbParseState * pstate, JsonbValue * string)
1656 {
1657         JsonbValue *object = &pstate->contVal;
1658
1659         Assert(object->type == jbvObject);
1660         Assert(string->type == jbvString);
1661
1662         if (object->object.nPairs >= JSONB_MAX_PAIRS)
1663                 ereport(ERROR,
1664                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1665                                  errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
1666                                                 JSONB_MAX_PAIRS)));
1667
1668         if (object->object.nPairs >= pstate->size)
1669         {
1670                 pstate->size *= 2;
1671                 object->object.pairs = repalloc(object->object.pairs,
1672                                                                                 sizeof(JsonbPair) * pstate->size);
1673         }
1674
1675         object->object.pairs[object->object.nPairs].key = *string;
1676         object->object.pairs[object->object.nPairs].order = object->object.nPairs;
1677
1678         object->estSize += string->estSize;
1679 }
1680
1681 /*
1682  * pushJsonbValue() worker:  Append a pair value to state when generating a
1683  * Jsonb
1684  */
1685 static void
1686 appendValue(JsonbParseState * pstate, JsonbValue * scalarVal)
1687 {
1688         JsonbValue *object = &pstate->contVal;
1689
1690         Assert(object->type == jbvObject);
1691
1692         object->object.pairs[object->object.nPairs++].value = *scalarVal;
1693         object->estSize += scalarVal->estSize;
1694 }
1695
1696 /*
1697  * pushJsonbValue() worker:  Append an element to state when generating a Jsonb
1698  */
1699 static void
1700 appendElement(JsonbParseState * pstate, JsonbValue * scalarVal)
1701 {
1702         JsonbValue *array = &pstate->contVal;
1703
1704         Assert(array->type == jbvArray);
1705
1706         if (array->array.nElems >= JSONB_MAX_ELEMS)
1707                 ereport(ERROR,
1708                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1709                                  errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
1710                                                 JSONB_MAX_ELEMS)));
1711
1712         if (array->array.nElems >= pstate->size)
1713         {
1714                 pstate->size *= 2;
1715                 array->array.elems = repalloc(array->array.elems,
1716                                                                           sizeof(JsonbValue) * pstate->size);
1717         }
1718
1719         array->array.elems[array->array.nElems++] = *scalarVal;
1720         array->estSize += scalarVal->estSize;
1721 }
1722
1723 /*
1724  * Compare two jbvString JsonbValue values, a and b.
1725  *
1726  * This is a special qsort_arg() comparator used to sort strings in certain
1727  * internal contexts where it is sufficient to have a well-defined sort order.
1728  * In particular, object pair keys are sorted according to this criteria to
1729  * facilitate cheap binary searches where we don't care about lexical sort
1730  * order.
1731  *
1732  * a and b are first sorted based on their length.  If a tie-breaker is
1733  * required, only then do we consider string binary equality.
1734  *
1735  * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
1736  * to true iff a and b have full binary equality, since some callers have an
1737  * interest in whether the two values are equal or merely equivalent.
1738  */
1739 static int
1740 lengthCompareJsonbStringValue(const void *a, const void *b, void *binequal)
1741 {
1742         const JsonbValue *va = (const JsonbValue *) a;
1743         const JsonbValue *vb = (const JsonbValue *) b;
1744         int                     res;
1745
1746         Assert(va->type == jbvString);
1747         Assert(vb->type == jbvString);
1748
1749         if (va->string.len == vb->string.len)
1750         {
1751                 res = memcmp(va->string.val, vb->string.val, va->string.len);
1752                 if (res == 0 && binequal)
1753                         *((bool *) binequal) = true;
1754         }
1755         else
1756         {
1757                 res = (va->string.len > vb->string.len) ? 1 : -1;
1758         }
1759
1760         return res;
1761 }
1762
1763 /*
1764  * qsort_arg() comparator to compare JsonbPair values.
1765  *
1766  * Function implemented in terms of lengthCompareJsonbStringValue(), and thus the
1767  * same "arg setting" hack will be applied here in respect of the pair's key
1768  * values.
1769  *
1770  * N.B: String comparisons here are "length-wise"
1771  *
1772  * Pairs with equals keys are ordered such that the order field is respected.
1773  */
1774 static int
1775 lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
1776 {
1777         const JsonbPair *pa = (const JsonbPair *) a;
1778         const JsonbPair *pb = (const JsonbPair *) b;
1779         int                     res;
1780
1781         res = lengthCompareJsonbStringValue(&pa->key, &pb->key, binequal);
1782
1783         /*
1784          * Guarantee keeping order of equal pair.  Unique algorithm will prefer
1785          * first element as value.
1786          */
1787         if (res == 0)
1788                 res = (pa->order > pb->order) ? -1 : 1;
1789
1790         return res;
1791 }
1792
1793 /*
1794  * Sort and unique-ify pairs in JsonbValue object
1795  */
1796 static void
1797 uniqueifyJsonbObject(JsonbValue * object)
1798 {
1799         bool            hasNonUniq = false;
1800
1801         Assert(object->type == jbvObject);
1802
1803         if (object->object.nPairs > 1)
1804                 qsort_arg(object->object.pairs, object->object.nPairs, sizeof(JsonbPair),
1805                                   lengthCompareJsonbPair, &hasNonUniq);
1806
1807         if (hasNonUniq)
1808         {
1809                 JsonbPair  *ptr = object->object.pairs + 1,
1810                                    *res = object->object.pairs;
1811
1812                 while (ptr - object->object.pairs < object->object.nPairs)
1813                 {
1814                         /* Avoid copying over duplicate */
1815                         if (lengthCompareJsonbStringValue(ptr, res, NULL) == 0)
1816                         {
1817                                 object->estSize -= ptr->key.estSize + ptr->value.estSize;
1818                         }
1819                         else
1820                         {
1821                                 res++;
1822                                 if (ptr != res)
1823                                         memcpy(res, ptr, sizeof(JsonbPair));
1824                         }
1825                         ptr++;
1826                 }
1827
1828                 object->object.nPairs = res + 1 - object->object.pairs;
1829         }
1830 }
1831
1832 /*
1833  * Sort and unique-ify JsonbArray.
1834  *
1835  * Sorting uses internal ordering.
1836  */
1837 static void
1838 uniqueifyJsonbArray(JsonbValue * array)
1839 {
1840         bool hasNonUniq = false;
1841
1842         Assert(array->type == jbvArray);
1843
1844         /*
1845          * Actually sort values, determining if any were equal on the basis of full
1846          * binary equality (rather than just having the same string length).
1847          */
1848         if (array->array.nElems > 1)
1849                 qsort_arg(array->array.elems, array->array.nElems,
1850                                   sizeof(JsonbValue), lengthCompareJsonbStringValue,
1851                                   &hasNonUniq);
1852
1853         if (hasNonUniq)
1854         {
1855                 JsonbValue *ptr = array->array.elems + 1,
1856                                    *res = array->array.elems;
1857
1858                 while (ptr - array->array.elems < array->array.nElems)
1859                 {
1860                         /* Avoid copying over duplicate */
1861                         if (lengthCompareJsonbStringValue(ptr, res, NULL) != 0)
1862                         {
1863                                 res++;
1864                                 *res = *ptr;
1865                         }
1866
1867                         ptr++;
1868                 }
1869
1870                 array->array.nElems = res + 1 - array->array.elems;
1871         }
1872 }