]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/jsonb_util.c
Add several generator functions for jsonb that exist for json.
[postgresql] / src / backend / utils / adt / jsonb_util.c
1 /*-------------------------------------------------------------------------
2  *
3  * jsonb_util.c
4  *        converting between Jsonb and JsonbValues, and iterating.
5  *
6  * Copyright (c) 2014, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/jsonb_util.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "access/hash.h"
17 #include "catalog/pg_collation.h"
18 #include "miscadmin.h"
19 #include "utils/builtins.h"
20 #include "utils/jsonb.h"
21 #include "utils/memutils.h"
22
23 /*
24  * Maximum number of elements in an array (or key/value pairs in an object).
25  * This is limited by two things: the size of the JEntry array must fit
26  * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
27  * reserved for that in the JsonbContainer.header field.
28  *
29  * (The total size of an array's or object's elements is also limited by
30  * JENTRY_OFFLENMASK, but we're not concerned about that here.)
31  */
32 #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
33 #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
34
35 static void fillJsonbValue(JsonbContainer *container, int index,
36                            char *base_addr, uint32 offset,
37                            JsonbValue *result);
38 static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
39 static int      compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
40 static Jsonb *convertToJsonb(JsonbValue *val);
41 static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
42 static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
43 static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
44 static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
45
46 static int      reserveFromBuffer(StringInfo buffer, int len);
47 static void appendToBuffer(StringInfo buffer, const char *data, int len);
48 static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
49 static short padBufferToInt(StringInfo buffer);
50
51 static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent);
52 static JsonbIterator *freeAndGetParent(JsonbIterator *it);
53 static JsonbParseState *pushState(JsonbParseState **pstate);
54 static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
55 static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
56 static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
57 static int      lengthCompareJsonbStringValue(const void *a, const void *b);
58 static int      lengthCompareJsonbPair(const void *a, const void *b, void *arg);
59 static void uniqueifyJsonbObject(JsonbValue *object);
60
61 /*
62  * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
63  *
64  * There isn't a JsonbToJsonbValue(), because generally we find it more
65  * convenient to directly iterate through the Jsonb representation and only
66  * really convert nested scalar values.  JsonbIteratorNext() does this, so that
67  * clients of the iteration code don't have to directly deal with the binary
68  * representation (JsonbDeepContains() is a notable exception, although all
69  * exceptions are internal to this module).  In general, functions that accept
70  * a JsonbValue argument are concerned with the manipulation of scalar values,
71  * or simple containers of scalar values, where it would be inconvenient to
72  * deal with a great amount of other state.
73  */
74 Jsonb *
75 JsonbValueToJsonb(JsonbValue *val)
76 {
77         Jsonb      *out;
78
79         if (IsAJsonbScalar(val))
80         {
81                 /* Scalar value */
82                 JsonbParseState *pstate = NULL;
83                 JsonbValue *res;
84                 JsonbValue      scalarArray;
85
86                 scalarArray.type = jbvArray;
87                 scalarArray.val.array.rawScalar = true;
88                 scalarArray.val.array.nElems = 1;
89
90                 pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
91                 pushJsonbValue(&pstate, WJB_ELEM, val);
92                 res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
93
94                 out = convertToJsonb(res);
95         }
96         else if (val->type == jbvObject || val->type == jbvArray)
97         {
98                 out = convertToJsonb(val);
99         }
100         else
101         {
102                 Assert(val->type == jbvBinary);
103                 out = palloc(VARHDRSZ + val->val.binary.len);
104                 SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
105                 memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
106         }
107
108         return out;
109 }
110
111 /*
112  * Get the offset of the variable-length portion of a Jsonb node within
113  * the variable-length-data part of its container.  The node is identified
114  * by index within the container's JEntry array.
115  */
116 uint32
117 getJsonbOffset(const JsonbContainer *jc, int index)
118 {
119         uint32          offset = 0;
120         int                     i;
121
122         /*
123          * Start offset of this entry is equal to the end offset of the previous
124          * entry.  Walk backwards to the most recent entry stored as an end
125          * offset, returning that offset plus any lengths in between.
126          */
127         for (i = index - 1; i >= 0; i--)
128         {
129                 offset += JBE_OFFLENFLD(jc->children[i]);
130                 if (JBE_HAS_OFF(jc->children[i]))
131                         break;
132         }
133
134         return offset;
135 }
136
137 /*
138  * Get the length of the variable-length portion of a Jsonb node.
139  * The node is identified by index within the container's JEntry array.
140  */
141 uint32
142 getJsonbLength(const JsonbContainer *jc, int index)
143 {
144         uint32          off;
145         uint32          len;
146
147         /*
148          * If the length is stored directly in the JEntry, just return it.
149          * Otherwise, get the begin offset of the entry, and subtract that from
150          * the stored end+1 offset.
151          */
152         if (JBE_HAS_OFF(jc->children[index]))
153         {
154                 off = getJsonbOffset(jc, index);
155                 len = JBE_OFFLENFLD(jc->children[index]) - off;
156         }
157         else
158                 len = JBE_OFFLENFLD(jc->children[index]);
159
160         return len;
161 }
162
163 /*
164  * BT comparator worker function.  Returns an integer less than, equal to, or
165  * greater than zero, indicating whether a is less than, equal to, or greater
166  * than b.  Consistent with the requirements for a B-Tree operator class
167  *
168  * Strings are compared lexically, in contrast with other places where we use a
169  * much simpler comparator logic for searching through Strings.  Since this is
170  * called from B-Tree support function 1, we're careful about not leaking
171  * memory here.
172  */
173 int
174 compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
175 {
176         JsonbIterator *ita,
177                            *itb;
178         int                     res = 0;
179
180         ita = JsonbIteratorInit(a);
181         itb = JsonbIteratorInit(b);
182
183         do
184         {
185                 JsonbValue      va,
186                                         vb;
187                 int                     ra,
188                                         rb;
189
190                 ra = JsonbIteratorNext(&ita, &va, false);
191                 rb = JsonbIteratorNext(&itb, &vb, false);
192
193                 if (ra == rb)
194                 {
195                         if (ra == WJB_DONE)
196                         {
197                                 /* Decisively equal */
198                                 break;
199                         }
200
201                         if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT)
202                         {
203                                 /*
204                                  * There is no array or object to compare at this stage of
205                                  * processing.  jbvArray/jbvObject values are compared
206                                  * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT
207                                  * tokens.
208                                  */
209                                 continue;
210                         }
211
212                         if (va.type == vb.type)
213                         {
214                                 switch (va.type)
215                                 {
216                                         case jbvString:
217                                         case jbvNull:
218                                         case jbvNumeric:
219                                         case jbvBool:
220                                                 res = compareJsonbScalarValue(&va, &vb);
221                                                 break;
222                                         case jbvArray:
223
224                                                 /*
225                                                  * This could be a "raw scalar" pseudo array.  That's
226                                                  * a special case here though, since we still want the
227                                                  * general type-based comparisons to apply, and as far
228                                                  * as we're concerned a pseudo array is just a scalar.
229                                                  */
230                                                 if (va.val.array.rawScalar != vb.val.array.rawScalar)
231                                                         res = (va.val.array.rawScalar) ? -1 : 1;
232                                                 if (va.val.array.nElems != vb.val.array.nElems)
233                                                         res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
234                                                 break;
235                                         case jbvObject:
236                                                 if (va.val.object.nPairs != vb.val.object.nPairs)
237                                                         res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
238                                                 break;
239                                         case jbvBinary:
240                                                 elog(ERROR, "unexpected jbvBinary value");
241                                 }
242                         }
243                         else
244                         {
245                                 /* Type-defined order */
246                                 res = (va.type > vb.type) ? 1 : -1;
247                         }
248                 }
249                 else
250                 {
251                         /*
252                          * It's safe to assume that the types differed, and that the va
253                          * and vb values passed were set.
254                          *
255                          * If the two values were of the same container type, then there'd
256                          * have been a chance to observe the variation in the number of
257                          * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
258                          * either two heterogeneously-typed containers, or a container and
259                          * some scalar type.
260                          *
261                          * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
262                          * cases here, because we would have seen the corresponding
263                          * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
264                          * concluded that they don't match.
265                          */
266                         Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
267                         Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
268
269                         Assert(va.type != vb.type);
270                         Assert(va.type != jbvBinary);
271                         Assert(vb.type != jbvBinary);
272                         /* Type-defined order */
273                         res = (va.type > vb.type) ? 1 : -1;
274                 }
275         }
276         while (res == 0);
277
278         while (ita != NULL)
279         {
280                 JsonbIterator *i = ita->parent;
281
282                 pfree(ita);
283                 ita = i;
284         }
285         while (itb != NULL)
286         {
287                 JsonbIterator *i = itb->parent;
288
289                 pfree(itb);
290                 itb = i;
291         }
292
293         return res;
294 }
295
296 /*
297  * Find value in object (i.e. the "value" part of some key/value pair in an
298  * object), or find a matching element if we're looking through an array.  Do
299  * so on the basis of equality of the object keys only, or alternatively
300  * element values only, with a caller-supplied value "key".  The "flags"
301  * argument allows the caller to specify which container types are of interest.
302  *
303  * This exported utility function exists to facilitate various cases concerned
304  * with "containment".  If asked to look through an object, the caller had
305  * better pass a Jsonb String, because their keys can only be strings.
306  * Otherwise, for an array, any type of JsonbValue will do.
307  *
308  * In order to proceed with the search, it is necessary for callers to have
309  * both specified an interest in exactly one particular container type with an
310  * appropriate flag, as well as having the pointed-to Jsonb container be of
311  * one of those same container types at the top level. (Actually, we just do
312  * whichever makes sense to save callers the trouble of figuring it out - at
313  * most one can make sense, because the container either points to an array
314  * (possibly a "raw scalar" pseudo array) or an object.)
315  *
316  * Note that we can return a jbvBinary JsonbValue if this is called on an
317  * object, but we never do so on an array.  If the caller asks to look through
318  * a container type that is not of the type pointed to by the container,
319  * immediately fall through and return NULL.  If we cannot find the value,
320  * return NULL.  Otherwise, return palloc()'d copy of value.
321  */
322 JsonbValue *
323 findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
324                                                         JsonbValue *key)
325 {
326         JEntry     *children = container->children;
327         int                     count = (container->header & JB_CMASK);
328         JsonbValue *result;
329
330         Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
331
332         /* Quick out without a palloc cycle if object/array is empty */
333         if (count <= 0)
334                 return NULL;
335
336         result = palloc(sizeof(JsonbValue));
337
338         if (flags & JB_FARRAY & container->header)
339         {
340                 char       *base_addr = (char *) (children + count);
341                 uint32          offset = 0;
342                 int                     i;
343
344                 for (i = 0; i < count; i++)
345                 {
346                         fillJsonbValue(container, i, base_addr, offset, result);
347
348                         if (key->type == result->type)
349                         {
350                                 if (equalsJsonbScalarValue(key, result))
351                                         return result;
352                         }
353
354                         JBE_ADVANCE_OFFSET(offset, children[i]);
355                 }
356         }
357         else if (flags & JB_FOBJECT & container->header)
358         {
359                 /* Since this is an object, account for *Pairs* of Jentrys */
360                 char       *base_addr = (char *) (children + count * 2);
361                 uint32          stopLow = 0,
362                                         stopHigh = count;
363
364                 /* Object key passed by caller must be a string */
365                 Assert(key->type == jbvString);
366
367                 /* Binary search on object/pair keys *only* */
368                 while (stopLow < stopHigh)
369                 {
370                         uint32          stopMiddle;
371                         int                     difference;
372                         JsonbValue      candidate;
373
374                         stopMiddle = stopLow + (stopHigh - stopLow) / 2;
375
376                         candidate.type = jbvString;
377                         candidate.val.string.val =
378                                 base_addr + getJsonbOffset(container, stopMiddle);
379                         candidate.val.string.len = getJsonbLength(container, stopMiddle);
380
381                         difference = lengthCompareJsonbStringValue(&candidate, key);
382
383                         if (difference == 0)
384                         {
385                                 /* Found our key, return corresponding value */
386                                 int                     index = stopMiddle + count;
387
388                                 fillJsonbValue(container, index, base_addr,
389                                                            getJsonbOffset(container, index),
390                                                            result);
391
392                                 return result;
393                         }
394                         else
395                         {
396                                 if (difference < 0)
397                                         stopLow = stopMiddle + 1;
398                                 else
399                                         stopHigh = stopMiddle;
400                         }
401                 }
402         }
403
404         /* Not found */
405         pfree(result);
406         return NULL;
407 }
408
409 /*
410  * Get i-th value of a Jsonb array.
411  *
412  * Returns palloc()'d copy of the value, or NULL if it does not exist.
413  */
414 JsonbValue *
415 getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
416 {
417         JsonbValue *result;
418         char       *base_addr;
419         uint32          nelements;
420
421         if ((container->header & JB_FARRAY) == 0)
422                 elog(ERROR, "not a jsonb array");
423
424         nelements = container->header & JB_CMASK;
425         base_addr = (char *) &container->children[nelements];
426
427         if (i >= nelements)
428                 return NULL;
429
430         result = palloc(sizeof(JsonbValue));
431
432         fillJsonbValue(container, i, base_addr,
433                                    getJsonbOffset(container, i),
434                                    result);
435
436         return result;
437 }
438
439 /*
440  * A helper function to fill in a JsonbValue to represent an element of an
441  * array, or a key or value of an object.
442  *
443  * The node's JEntry is at container->children[index], and its variable-length
444  * data is at base_addr + offset.  We make the caller determine the offset
445  * since in many cases the caller can amortize that work across multiple
446  * children.  When it can't, it can just call getJsonbOffset().
447  *
448  * A nested array or object will be returned as jbvBinary, ie. it won't be
449  * expanded.
450  */
451 static void
452 fillJsonbValue(JsonbContainer *container, int index,
453                            char *base_addr, uint32 offset,
454                            JsonbValue *result)
455 {
456         JEntry          entry = container->children[index];
457
458         if (JBE_ISNULL(entry))
459         {
460                 result->type = jbvNull;
461         }
462         else if (JBE_ISSTRING(entry))
463         {
464                 result->type = jbvString;
465                 result->val.string.val = base_addr + offset;
466                 result->val.string.len = getJsonbLength(container, index);
467                 Assert(result->val.string.len >= 0);
468         }
469         else if (JBE_ISNUMERIC(entry))
470         {
471                 result->type = jbvNumeric;
472                 result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
473         }
474         else if (JBE_ISBOOL_TRUE(entry))
475         {
476                 result->type = jbvBool;
477                 result->val.boolean = true;
478         }
479         else if (JBE_ISBOOL_FALSE(entry))
480         {
481                 result->type = jbvBool;
482                 result->val.boolean = false;
483         }
484         else
485         {
486                 Assert(JBE_ISCONTAINER(entry));
487                 result->type = jbvBinary;
488                 /* Remove alignment padding from data pointer and length */
489                 result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
490                 result->val.binary.len = getJsonbLength(container, index) -
491                         (INTALIGN(offset) - offset);
492         }
493 }
494
495 /*
496  * Push JsonbValue into JsonbParseState.
497  *
498  * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
499  * JsonbValue to a Jsonb.
500  *
501  * Initial state of *JsonbParseState is NULL, since it'll be allocated here
502  * originally (caller will get JsonbParseState back by reference).
503  *
504  * Only sequential tokens pertaining to non-container types should pass a
505  * JsonbValue.  There is one exception -- WJB_BEGIN_ARRAY callers may pass a
506  * "raw scalar" pseudo array to append that.
507  */
508 JsonbValue *
509 pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq,
510                            JsonbValue *scalarVal)
511 {
512         JsonbValue *result = NULL;
513
514         switch (seq)
515         {
516                 case WJB_BEGIN_ARRAY:
517                         Assert(!scalarVal || scalarVal->val.array.rawScalar);
518                         *pstate = pushState(pstate);
519                         result = &(*pstate)->contVal;
520                         (*pstate)->contVal.type = jbvArray;
521                         (*pstate)->contVal.val.array.nElems = 0;
522                         (*pstate)->contVal.val.array.rawScalar = (scalarVal &&
523                                                                                          scalarVal->val.array.rawScalar);
524                         if (scalarVal && scalarVal->val.array.nElems > 0)
525                         {
526                                 /* Assume that this array is still really a scalar */
527                                 Assert(scalarVal->type == jbvArray);
528                                 (*pstate)->size = scalarVal->val.array.nElems;
529                         }
530                         else
531                         {
532                                 (*pstate)->size = 4;
533                         }
534                         (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
535                                                                                                                 (*pstate)->size);
536                         break;
537                 case WJB_BEGIN_OBJECT:
538                         Assert(!scalarVal);
539                         *pstate = pushState(pstate);
540                         result = &(*pstate)->contVal;
541                         (*pstate)->contVal.type = jbvObject;
542                         (*pstate)->contVal.val.object.nPairs = 0;
543                         (*pstate)->size = 4;
544                         (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
545                                                                                                                  (*pstate)->size);
546                         break;
547                 case WJB_KEY:
548                         Assert(scalarVal->type == jbvString);
549                         appendKey(*pstate, scalarVal);
550                         break;
551                 case WJB_VALUE:
552                         Assert(IsAJsonbScalar(scalarVal) ||
553                                    scalarVal->type == jbvBinary);
554                         appendValue(*pstate, scalarVal);
555                         break;
556                 case WJB_ELEM:
557                         Assert(IsAJsonbScalar(scalarVal) ||
558                                    scalarVal->type == jbvBinary);
559                         appendElement(*pstate, scalarVal);
560                         break;
561                 case WJB_END_OBJECT:
562                         uniqueifyJsonbObject(&(*pstate)->contVal);
563                         /* fall through! */
564                 case WJB_END_ARRAY:
565                         /* Steps here common to WJB_END_OBJECT case */
566                         Assert(!scalarVal);
567                         result = &(*pstate)->contVal;
568
569                         /*
570                          * Pop stack and push current array/object as value in parent
571                          * array/object
572                          */
573                         *pstate = (*pstate)->next;
574                         if (*pstate)
575                         {
576                                 switch ((*pstate)->contVal.type)
577                                 {
578                                         case jbvArray:
579                                                 appendElement(*pstate, result);
580                                                 break;
581                                         case jbvObject:
582                                                 appendValue(*pstate, result);
583                                                 break;
584                                         default:
585                                                 elog(ERROR, "invalid jsonb container type");
586                                 }
587                         }
588                         break;
589                 default:
590                         elog(ERROR, "unrecognized jsonb sequential processing token");
591         }
592
593         return result;
594 }
595
596 /*
597  * pushJsonbValue() worker:  Iteration-like forming of Jsonb
598  */
599 static JsonbParseState *
600 pushState(JsonbParseState **pstate)
601 {
602         JsonbParseState *ns = palloc(sizeof(JsonbParseState));
603
604         ns->next = *pstate;
605         return ns;
606 }
607
608 /*
609  * pushJsonbValue() worker:  Append a pair key to state when generating a Jsonb
610  */
611 static void
612 appendKey(JsonbParseState *pstate, JsonbValue *string)
613 {
614         JsonbValue *object = &pstate->contVal;
615
616         Assert(object->type == jbvObject);
617         Assert(string->type == jbvString);
618
619         if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
620                 ereport(ERROR,
621                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
622                                  errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
623                                                 JSONB_MAX_PAIRS)));
624
625         if (object->val.object.nPairs >= pstate->size)
626         {
627                 pstate->size *= 2;
628                 object->val.object.pairs = repalloc(object->val.object.pairs,
629                                                                                         sizeof(JsonbPair) * pstate->size);
630         }
631
632         object->val.object.pairs[object->val.object.nPairs].key = *string;
633         object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
634 }
635
636 /*
637  * pushJsonbValue() worker:  Append a pair value to state when generating a
638  * Jsonb
639  */
640 static void
641 appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
642 {
643         JsonbValue *object = &pstate->contVal;
644
645         Assert(object->type == jbvObject);
646
647         object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
648 }
649
650 /*
651  * pushJsonbValue() worker:  Append an element to state when generating a Jsonb
652  */
653 static void
654 appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
655 {
656         JsonbValue *array = &pstate->contVal;
657
658         Assert(array->type == jbvArray);
659
660         if (array->val.array.nElems >= JSONB_MAX_ELEMS)
661                 ereport(ERROR,
662                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
663                                  errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
664                                                 JSONB_MAX_ELEMS)));
665
666         if (array->val.array.nElems >= pstate->size)
667         {
668                 pstate->size *= 2;
669                 array->val.array.elems = repalloc(array->val.array.elems,
670                                                                                   sizeof(JsonbValue) * pstate->size);
671         }
672
673         array->val.array.elems[array->val.array.nElems++] = *scalarVal;
674 }
675
676 /*
677  * Given a JsonbContainer, expand to JsonbIterator to iterate over items
678  * fully expanded to in-memory representation for manipulation.
679  *
680  * See JsonbIteratorNext() for notes on memory management.
681  */
682 JsonbIterator *
683 JsonbIteratorInit(JsonbContainer *container)
684 {
685         return iteratorFromContainer(container, NULL);
686 }
687
688 /*
689  * Get next JsonbValue while iterating
690  *
691  * Caller should initially pass their own, original iterator.  They may get
692  * back a child iterator palloc()'d here instead.  The function can be relied
693  * on to free those child iterators, lest the memory allocated for highly
694  * nested objects become unreasonable, but only if callers don't end iteration
695  * early (by breaking upon having found something in a search, for example).
696  *
697  * Callers in such a scenario, that are particularly sensitive to leaking
698  * memory in a long-lived context may walk the ancestral tree from the final
699  * iterator we left them with to its oldest ancestor, pfree()ing as they go.
700  * They do not have to free any other memory previously allocated for iterators
701  * but not accessible as direct ancestors of the iterator they're last passed
702  * back.
703  *
704  * Returns "Jsonb sequential processing" token value.  Iterator "state"
705  * reflects the current stage of the process in a less granular fashion, and is
706  * mostly used here to track things internally with respect to particular
707  * iterators.
708  *
709  * Clients of this function should not have to handle any jbvBinary values
710  * (since recursive calls will deal with this), provided skipNested is false.
711  * It is our job to expand the jbvBinary representation without bothering them
712  * with it.  However, clients should not take it upon themselves to touch array
713  * or Object element/pair buffers, since their element/pair pointers are
714  * garbage.  Also, *val will not be set when returning WJB_END_ARRAY or
715  * WJB_END_OBJECT, on the assumption that it's only useful to access values
716  * when recursing in.
717  */
718 JsonbIteratorToken
719 JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
720 {
721         if (*it == NULL)
722                 return WJB_DONE;
723
724         /*
725          * When stepping into a nested container, we jump back here to start
726          * processing the child. We will not recurse further in one call, because
727          * processing the child will always begin in JBI_ARRAY_START or
728          * JBI_OBJECT_START state.
729          */
730 recurse:
731         switch ((*it)->state)
732         {
733                 case JBI_ARRAY_START:
734                         /* Set v to array on first array call */
735                         val->type = jbvArray;
736                         val->val.array.nElems = (*it)->nElems;
737
738                         /*
739                          * v->val.array.elems is not actually set, because we aren't doing
740                          * a full conversion
741                          */
742                         val->val.array.rawScalar = (*it)->isScalar;
743                         (*it)->curIndex = 0;
744                         (*it)->curDataOffset = 0;
745                         (*it)->curValueOffset = 0;      /* not actually used */
746                         /* Set state for next call */
747                         (*it)->state = JBI_ARRAY_ELEM;
748                         return WJB_BEGIN_ARRAY;
749
750                 case JBI_ARRAY_ELEM:
751                         if ((*it)->curIndex >= (*it)->nElems)
752                         {
753                                 /*
754                                  * All elements within array already processed.  Report this
755                                  * to caller, and give it back original parent iterator (which
756                                  * independently tracks iteration progress at its level of
757                                  * nesting).
758                                  */
759                                 *it = freeAndGetParent(*it);
760                                 return WJB_END_ARRAY;
761                         }
762
763                         fillJsonbValue((*it)->container, (*it)->curIndex,
764                                                    (*it)->dataProper, (*it)->curDataOffset,
765                                                    val);
766
767                         JBE_ADVANCE_OFFSET((*it)->curDataOffset,
768                                                            (*it)->children[(*it)->curIndex]);
769                         (*it)->curIndex++;
770
771                         if (!IsAJsonbScalar(val) && !skipNested)
772                         {
773                                 /* Recurse into container. */
774                                 *it = iteratorFromContainer(val->val.binary.data, *it);
775                                 goto recurse;
776                         }
777                         else
778                         {
779                                 /*
780                                  * Scalar item in array, or a container and caller didn't want
781                                  * us to recurse into it.
782                                  */
783                                 return WJB_ELEM;
784                         }
785
786                 case JBI_OBJECT_START:
787                         /* Set v to object on first object call */
788                         val->type = jbvObject;
789                         val->val.object.nPairs = (*it)->nElems;
790
791                         /*
792                          * v->val.object.pairs is not actually set, because we aren't
793                          * doing a full conversion
794                          */
795                         (*it)->curIndex = 0;
796                         (*it)->curDataOffset = 0;
797                         (*it)->curValueOffset = getJsonbOffset((*it)->container,
798                                                                                                    (*it)->nElems);
799                         /* Set state for next call */
800                         (*it)->state = JBI_OBJECT_KEY;
801                         return WJB_BEGIN_OBJECT;
802
803                 case JBI_OBJECT_KEY:
804                         if ((*it)->curIndex >= (*it)->nElems)
805                         {
806                                 /*
807                                  * All pairs within object already processed.  Report this to
808                                  * caller, and give it back original containing iterator
809                                  * (which independently tracks iteration progress at its level
810                                  * of nesting).
811                                  */
812                                 *it = freeAndGetParent(*it);
813                                 return WJB_END_OBJECT;
814                         }
815                         else
816                         {
817                                 /* Return key of a key/value pair.  */
818                                 fillJsonbValue((*it)->container, (*it)->curIndex,
819                                                            (*it)->dataProper, (*it)->curDataOffset,
820                                                            val);
821                                 if (val->type != jbvString)
822                                         elog(ERROR, "unexpected jsonb type as object key");
823
824                                 /* Set state for next call */
825                                 (*it)->state = JBI_OBJECT_VALUE;
826                                 return WJB_KEY;
827                         }
828
829                 case JBI_OBJECT_VALUE:
830                         /* Set state for next call */
831                         (*it)->state = JBI_OBJECT_KEY;
832
833                         fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
834                                                    (*it)->dataProper, (*it)->curValueOffset,
835                                                    val);
836
837                         JBE_ADVANCE_OFFSET((*it)->curDataOffset,
838                                                            (*it)->children[(*it)->curIndex]);
839                         JBE_ADVANCE_OFFSET((*it)->curValueOffset,
840                                                    (*it)->children[(*it)->curIndex + (*it)->nElems]);
841                         (*it)->curIndex++;
842
843                         /*
844                          * Value may be a container, in which case we recurse with new,
845                          * child iterator (unless the caller asked not to, by passing
846                          * skipNested).
847                          */
848                         if (!IsAJsonbScalar(val) && !skipNested)
849                         {
850                                 *it = iteratorFromContainer(val->val.binary.data, *it);
851                                 goto recurse;
852                         }
853                         else
854                                 return WJB_VALUE;
855         }
856
857         elog(ERROR, "invalid iterator state");
858         return -1;
859 }
860
861 /*
862  * Initialize an iterator for iterating all elements in a container.
863  */
864 static JsonbIterator *
865 iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent)
866 {
867         JsonbIterator *it;
868
869         it = palloc(sizeof(JsonbIterator));
870         it->container = container;
871         it->parent = parent;
872         it->nElems = container->header & JB_CMASK;
873
874         /* Array starts just after header */
875         it->children = container->children;
876
877         switch (container->header & (JB_FARRAY | JB_FOBJECT))
878         {
879                 case JB_FARRAY:
880                         it->dataProper =
881                                 (char *) it->children + it->nElems * sizeof(JEntry);
882                         it->isScalar = (container->header & JB_FSCALAR) != 0;
883                         /* This is either a "raw scalar", or an array */
884                         Assert(!it->isScalar || it->nElems == 1);
885
886                         it->state = JBI_ARRAY_START;
887                         break;
888
889                 case JB_FOBJECT:
890                         it->dataProper =
891                                 (char *) it->children + it->nElems * sizeof(JEntry) * 2;
892                         it->state = JBI_OBJECT_START;
893                         break;
894
895                 default:
896                         elog(ERROR, "unknown type of jsonb container");
897         }
898
899         return it;
900 }
901
902 /*
903  * JsonbIteratorNext() worker:  Return parent, while freeing memory for current
904  * iterator
905  */
906 static JsonbIterator *
907 freeAndGetParent(JsonbIterator *it)
908 {
909         JsonbIterator *v = it->parent;
910
911         pfree(it);
912         return v;
913 }
914
915 /*
916  * Worker for "contains" operator's function
917  *
918  * Formally speaking, containment is top-down, unordered subtree isomorphism.
919  *
920  * Takes iterators that belong to some container type.  These iterators
921  * "belong" to those values in the sense that they've just been initialized in
922  * respect of them by the caller (perhaps in a nested fashion).
923  *
924  * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
925  * We determine if mContained is contained within val.
926  */
927 bool
928 JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
929 {
930         uint32          rval,
931                                 rcont;
932         JsonbValue      vval,
933                                 vcontained;
934
935         /*
936          * Guard against stack overflow due to overly complex Jsonb.
937          *
938          * Functions called here independently take this precaution, but that
939          * might not be sufficient since this is also a recursive function.
940          */
941         check_stack_depth();
942
943         rval = JsonbIteratorNext(val, &vval, false);
944         rcont = JsonbIteratorNext(mContained, &vcontained, false);
945
946         if (rval != rcont)
947         {
948                 /*
949                  * The differing return values can immediately be taken as indicating
950                  * two differing container types at this nesting level, which is
951                  * sufficient reason to give up entirely (but it should be the case
952                  * that they're both some container type).
953                  */
954                 Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
955                 Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
956                 return false;
957         }
958         else if (rcont == WJB_BEGIN_OBJECT)
959         {
960                 Assert(vval.type == jbvObject);
961                 Assert(vcontained.type == jbvObject);
962
963                 /*
964                  * If the lhs has fewer pairs than the rhs, it can't possibly contain
965                  * the rhs.  (This conclusion is safe only because we de-duplicate
966                  * keys in all Jsonb objects; thus there can be no corresponding
967                  * optimization in the array case.)  The case probably won't arise
968                  * often, but since it's such a cheap check we may as well make it.
969                  */
970                 if (vval.val.object.nPairs < vcontained.val.object.nPairs)
971                         return false;
972
973                 /* Work through rhs "is it contained within?" object */
974                 for (;;)
975                 {
976                         JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */
977
978                         rcont = JsonbIteratorNext(mContained, &vcontained, false);
979
980                         /*
981                          * When we get through caller's rhs "is it contained within?"
982                          * object without failing to find one of its values, it's
983                          * contained.
984                          */
985                         if (rcont == WJB_END_OBJECT)
986                                 return true;
987
988                         Assert(rcont == WJB_KEY);
989
990                         /* First, find value by key... */
991                         lhsVal = findJsonbValueFromContainer((*val)->container,
992                                                                                                  JB_FOBJECT,
993                                                                                                  &vcontained);
994
995                         if (!lhsVal)
996                                 return false;
997
998                         /*
999                          * ...at this stage it is apparent that there is at least a key
1000                          * match for this rhs pair.
1001                          */
1002                         rcont = JsonbIteratorNext(mContained, &vcontained, true);
1003
1004                         Assert(rcont == WJB_VALUE);
1005
1006                         /*
1007                          * Compare rhs pair's value with lhs pair's value just found using
1008                          * key
1009                          */
1010                         if (lhsVal->type != vcontained.type)
1011                         {
1012                                 return false;
1013                         }
1014                         else if (IsAJsonbScalar(lhsVal))
1015                         {
1016                                 if (!equalsJsonbScalarValue(lhsVal, &vcontained))
1017                                         return false;
1018                         }
1019                         else
1020                         {
1021                                 /* Nested container value (object or array) */
1022                                 JsonbIterator *nestval,
1023                                                    *nestContained;
1024
1025                                 Assert(lhsVal->type == jbvBinary);
1026                                 Assert(vcontained.type == jbvBinary);
1027
1028                                 nestval = JsonbIteratorInit(lhsVal->val.binary.data);
1029                                 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1030
1031                                 /*
1032                                  * Match "value" side of rhs datum object's pair recursively.
1033                                  * It's a nested structure.
1034                                  *
1035                                  * Note that nesting still has to "match up" at the right
1036                                  * nesting sub-levels.  However, there need only be zero or
1037                                  * more matching pairs (or elements) at each nesting level
1038                                  * (provided the *rhs* pairs/elements *all* match on each
1039                                  * level), which enables searching nested structures for a
1040                                  * single String or other primitive type sub-datum quite
1041                                  * effectively (provided the user constructed the rhs nested
1042                                  * structure such that we "know where to look").
1043                                  *
1044                                  * In other words, the mapping of container nodes in the rhs
1045                                  * "vcontained" Jsonb to internal nodes on the lhs is
1046                                  * injective, and parent-child edges on the rhs must be mapped
1047                                  * to parent-child edges on the lhs to satisfy the condition
1048                                  * of containment (plus of course the mapped nodes must be
1049                                  * equal).
1050                                  */
1051                                 if (!JsonbDeepContains(&nestval, &nestContained))
1052                                         return false;
1053                         }
1054                 }
1055         }
1056         else if (rcont == WJB_BEGIN_ARRAY)
1057         {
1058                 JsonbValue *lhsConts = NULL;
1059                 uint32          nLhsElems = vval.val.array.nElems;
1060
1061                 Assert(vval.type == jbvArray);
1062                 Assert(vcontained.type == jbvArray);
1063
1064                 /*
1065                  * Handle distinction between "raw scalar" pseudo arrays, and real
1066                  * arrays.
1067                  *
1068                  * A raw scalar may contain another raw scalar, and an array may
1069                  * contain a raw scalar, but a raw scalar may not contain an array. We
1070                  * don't do something like this for the object case, since objects can
1071                  * only contain pairs, never raw scalars (a pair is represented by an
1072                  * rhs object argument with a single contained pair).
1073                  */
1074                 if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
1075                         return false;
1076
1077                 /* Work through rhs "is it contained within?" array */
1078                 for (;;)
1079                 {
1080                         rcont = JsonbIteratorNext(mContained, &vcontained, true);
1081
1082                         /*
1083                          * When we get through caller's rhs "is it contained within?"
1084                          * array without failing to find one of its values, it's
1085                          * contained.
1086                          */
1087                         if (rcont == WJB_END_ARRAY)
1088                                 return true;
1089
1090                         Assert(rcont == WJB_ELEM);
1091
1092                         if (IsAJsonbScalar(&vcontained))
1093                         {
1094                                 if (!findJsonbValueFromContainer((*val)->container,
1095                                                                                                  JB_FARRAY,
1096                                                                                                  &vcontained))
1097                                         return false;
1098                         }
1099                         else
1100                         {
1101                                 uint32          i;
1102
1103                                 /*
1104                                  * If this is first container found in rhs array (at this
1105                                  * depth), initialize temp lhs array of containers
1106                                  */
1107                                 if (lhsConts == NULL)
1108                                 {
1109                                         uint32          j = 0;
1110
1111                                         /* Make room for all possible values */
1112                                         lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
1113
1114                                         for (i = 0; i < nLhsElems; i++)
1115                                         {
1116                                                 /* Store all lhs elements in temp array */
1117                                                 rcont = JsonbIteratorNext(val, &vval, true);
1118                                                 Assert(rcont == WJB_ELEM);
1119
1120                                                 if (vval.type == jbvBinary)
1121                                                         lhsConts[j++] = vval;
1122                                         }
1123
1124                                         /* No container elements in temp array, so give up now */
1125                                         if (j == 0)
1126                                                 return false;
1127
1128                                         /* We may have only partially filled array */
1129                                         nLhsElems = j;
1130                                 }
1131
1132                                 /* XXX: Nested array containment is O(N^2) */
1133                                 for (i = 0; i < nLhsElems; i++)
1134                                 {
1135                                         /* Nested container value (object or array) */
1136                                         JsonbIterator *nestval,
1137                                                            *nestContained;
1138                                         bool            contains;
1139
1140                                         nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
1141                                         nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1142
1143                                         contains = JsonbDeepContains(&nestval, &nestContained);
1144
1145                                         if (nestval)
1146                                                 pfree(nestval);
1147                                         if (nestContained)
1148                                                 pfree(nestContained);
1149                                         if (contains)
1150                                                 break;
1151                                 }
1152
1153                                 /*
1154                                  * Report rhs container value is not contained if couldn't
1155                                  * match rhs container to *some* lhs cont
1156                                  */
1157                                 if (i == nLhsElems)
1158                                         return false;
1159                         }
1160                 }
1161         }
1162         else
1163         {
1164                 elog(ERROR, "invalid jsonb container type");
1165         }
1166
1167         elog(ERROR, "unexpectedly fell off end of jsonb container");
1168         return false;
1169 }
1170
1171 /*
1172  * Hash a JsonbValue scalar value, mixing the hash value into an existing
1173  * hash provided by the caller.
1174  *
1175  * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
1176  * flags.
1177  */
1178 void
1179 JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
1180 {
1181         uint32          tmp;
1182
1183         /* Compute hash value for scalarVal */
1184         switch (scalarVal->type)
1185         {
1186                 case jbvNull:
1187                         tmp = 0x01;
1188                         break;
1189                 case jbvString:
1190                         tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
1191                                                                                   scalarVal->val.string.len));
1192                         break;
1193                 case jbvNumeric:
1194                         /* Must hash equal numerics to equal hash codes */
1195                         tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
1196                                                                    NumericGetDatum(scalarVal->val.numeric)));
1197                         break;
1198                 case jbvBool:
1199                         tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1200                         break;
1201                 default:
1202                         elog(ERROR, "invalid jsonb scalar type");
1203                         tmp = 0;                        /* keep compiler quiet */
1204                         break;
1205         }
1206
1207         /*
1208          * Combine hash values of successive keys, values and elements by rotating
1209          * the previous value left 1 bit, then XOR'ing in the new
1210          * key/value/element's hash value.
1211          */
1212         *hash = (*hash << 1) | (*hash >> 31);
1213         *hash ^= tmp;
1214 }
1215
1216 /*
1217  * Are two scalar JsonbValues of the same type a and b equal?
1218  */
1219 static bool
1220 equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1221 {
1222         if (aScalar->type == bScalar->type)
1223         {
1224                 switch (aScalar->type)
1225                 {
1226                         case jbvNull:
1227                                 return true;
1228                         case jbvString:
1229                                 return lengthCompareJsonbStringValue(aScalar, bScalar) == 0;
1230                         case jbvNumeric:
1231                                 return DatumGetBool(DirectFunctionCall2(numeric_eq,
1232                                                                            PointerGetDatum(aScalar->val.numeric),
1233                                                                          PointerGetDatum(bScalar->val.numeric)));
1234                         case jbvBool:
1235                                 return aScalar->val.boolean == bScalar->val.boolean;
1236
1237                         default:
1238                                 elog(ERROR, "invalid jsonb scalar type");
1239                 }
1240         }
1241         elog(ERROR, "jsonb scalar type mismatch");
1242         return -1;
1243 }
1244
1245 /*
1246  * Compare two scalar JsonbValues, returning -1, 0, or 1.
1247  *
1248  * Strings are compared using the default collation.  Used by B-tree
1249  * operators, where a lexical sort order is generally expected.
1250  */
1251 static int
1252 compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1253 {
1254         if (aScalar->type == bScalar->type)
1255         {
1256                 switch (aScalar->type)
1257                 {
1258                         case jbvNull:
1259                                 return 0;
1260                         case jbvString:
1261                                 return varstr_cmp(aScalar->val.string.val,
1262                                                                   aScalar->val.string.len,
1263                                                                   bScalar->val.string.val,
1264                                                                   bScalar->val.string.len,
1265                                                                   DEFAULT_COLLATION_OID);
1266                         case jbvNumeric:
1267                                 return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
1268                                                                            PointerGetDatum(aScalar->val.numeric),
1269                                                                          PointerGetDatum(bScalar->val.numeric)));
1270                         case jbvBool:
1271                                 if (aScalar->val.boolean == bScalar->val.boolean)
1272                                         return 0;
1273                                 else if (aScalar->val.boolean > bScalar->val.boolean)
1274                                         return 1;
1275                                 else
1276                                         return -1;
1277                         default:
1278                                 elog(ERROR, "invalid jsonb scalar type");
1279                 }
1280         }
1281         elog(ERROR, "jsonb scalar type mismatch");
1282         return -1;
1283 }
1284
1285
1286 /*
1287  * Functions for manipulating the resizeable buffer used by convertJsonb and
1288  * its subroutines.
1289  */
1290
1291 /*
1292  * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary.
1293  * Returns the offset to the reserved area. The caller is expected to fill
1294  * the reserved area later with copyToBuffer().
1295  */
1296 static int
1297 reserveFromBuffer(StringInfo buffer, int len)
1298 {
1299         int                     offset;
1300
1301         /* Make more room if needed */
1302         enlargeStringInfo(buffer, len);
1303
1304         /* remember current offset */
1305         offset = buffer->len;
1306
1307         /* reserve the space */
1308         buffer->len += len;
1309
1310         /*
1311          * Keep a trailing null in place, even though it's not useful for us; it
1312          * seems best to preserve the invariants of StringInfos.
1313          */
1314         buffer->data[buffer->len] = '\0';
1315
1316         return offset;
1317 }
1318
1319 /*
1320  * Copy 'len' bytes to a previously reserved area in buffer.
1321  */
1322 static void
1323 copyToBuffer(StringInfo buffer, int offset, const char *data, int len)
1324 {
1325         memcpy(buffer->data + offset, data, len);
1326 }
1327
1328 /*
1329  * A shorthand for reserveFromBuffer + copyToBuffer.
1330  */
1331 static void
1332 appendToBuffer(StringInfo buffer, const char *data, int len)
1333 {
1334         int                     offset;
1335
1336         offset = reserveFromBuffer(buffer, len);
1337         copyToBuffer(buffer, offset, data, len);
1338 }
1339
1340
1341 /*
1342  * Append padding, so that the length of the StringInfo is int-aligned.
1343  * Returns the number of padding bytes appended.
1344  */
1345 static short
1346 padBufferToInt(StringInfo buffer)
1347 {
1348         int                     padlen,
1349                                 p,
1350                                 offset;
1351
1352         padlen = INTALIGN(buffer->len) - buffer->len;
1353
1354         offset = reserveFromBuffer(buffer, padlen);
1355
1356         /* padlen must be small, so this is probably faster than a memset */
1357         for (p = 0; p < padlen; p++)
1358                 buffer->data[offset + p] = '\0';
1359
1360         return padlen;
1361 }
1362
1363 /*
1364  * Given a JsonbValue, convert to Jsonb. The result is palloc'd.
1365  */
1366 static Jsonb *
1367 convertToJsonb(JsonbValue *val)
1368 {
1369         StringInfoData buffer;
1370         JEntry          jentry;
1371         Jsonb      *res;
1372
1373         /* Should not already have binary representation */
1374         Assert(val->type != jbvBinary);
1375
1376         /* Allocate an output buffer. It will be enlarged as needed */
1377         initStringInfo(&buffer);
1378
1379         /* Make room for the varlena header */
1380         reserveFromBuffer(&buffer, VARHDRSZ);
1381
1382         convertJsonbValue(&buffer, &jentry, val, 0);
1383
1384         /*
1385          * Note: the JEntry of the root is discarded. Therefore the root
1386          * JsonbContainer struct must contain enough information to tell what kind
1387          * of value it is.
1388          */
1389
1390         res = (Jsonb *) buffer.data;
1391
1392         SET_VARSIZE(res, buffer.len);
1393
1394         return res;
1395 }
1396
1397 /*
1398  * Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
1399  *
1400  * The JEntry header for this node is returned in *header.  It is filled in
1401  * with the length of this value and appropriate type bits.  If we wish to
1402  * store an end offset rather than a length, it is the caller's responsibility
1403  * to adjust for that.
1404  *
1405  * If the value is an array or an object, this recurses. 'level' is only used
1406  * for debugging purposes.
1407  */
1408 static void
1409 convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level)
1410 {
1411         check_stack_depth();
1412
1413         if (!val)
1414                 return;
1415
1416         /*
1417          * A JsonbValue passed as val should never have a type of jbvBinary, and
1418          * neither should any of its sub-components. Those values will be produced
1419          * by convertJsonbArray and convertJsonbObject, the results of which will
1420          * not be passed back to this function as an argument.
1421          */
1422
1423         if (IsAJsonbScalar(val))
1424                 convertJsonbScalar(buffer, header, val);
1425         else if (val->type == jbvArray)
1426                 convertJsonbArray(buffer, header, val, level);
1427         else if (val->type == jbvObject)
1428                 convertJsonbObject(buffer, header, val, level);
1429         else
1430                 elog(ERROR, "unknown type of jsonb container to convert");
1431 }
1432
1433 static void
1434 convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1435 {
1436         int                     base_offset;
1437         int                     jentry_offset;
1438         int                     i;
1439         int                     totallen;
1440         uint32          header;
1441         int                     nElems = val->val.array.nElems;
1442
1443         /* Remember where in the buffer this array starts. */
1444         base_offset = buffer->len;
1445
1446         /* Align to 4-byte boundary (any padding counts as part of my data) */
1447         padBufferToInt(buffer);
1448
1449         /*
1450          * Construct the header Jentry and store it in the beginning of the
1451          * variable-length payload.
1452          */
1453         header = nElems | JB_FARRAY;
1454         if (val->val.array.rawScalar)
1455         {
1456                 Assert(nElems == 1);
1457                 Assert(level == 0);
1458                 header |= JB_FSCALAR;
1459         }
1460
1461         appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1462
1463         /* Reserve space for the JEntries of the elements. */
1464         jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
1465
1466         totallen = 0;
1467         for (i = 0; i < nElems; i++)
1468         {
1469                 JsonbValue *elem = &val->val.array.elems[i];
1470                 int                     len;
1471                 JEntry          meta;
1472
1473                 /*
1474                  * Convert element, producing a JEntry and appending its
1475                  * variable-length data to buffer
1476                  */
1477                 convertJsonbValue(buffer, &meta, elem, level + 1);
1478
1479                 len = JBE_OFFLENFLD(meta);
1480                 totallen += len;
1481
1482                 /*
1483                  * Bail out if total variable-length data exceeds what will fit in a
1484                  * JEntry length field.  We check this in each iteration, not just
1485                  * once at the end, to forestall possible integer overflow.
1486                  */
1487                 if (totallen > JENTRY_OFFLENMASK)
1488                         ereport(ERROR,
1489                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1490                                          errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1491                                                         JENTRY_OFFLENMASK)));
1492
1493                 /*
1494                  * Convert each JB_OFFSET_STRIDE'th length to an offset.
1495                  */
1496                 if ((i % JB_OFFSET_STRIDE) == 0)
1497                         meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1498
1499                 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1500                 jentry_offset += sizeof(JEntry);
1501         }
1502
1503         /* Total data size is everything we've appended to buffer */
1504         totallen = buffer->len - base_offset;
1505
1506         /* Check length again, since we didn't include the metadata above */
1507         if (totallen > JENTRY_OFFLENMASK)
1508                 ereport(ERROR,
1509                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1510                                  errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1511                                                 JENTRY_OFFLENMASK)));
1512
1513         /* Initialize the header of this node in the container's JEntry array */
1514         *pheader = JENTRY_ISCONTAINER | totallen;
1515 }
1516
1517 static void
1518 convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1519 {
1520         int                     base_offset;
1521         int                     jentry_offset;
1522         int                     i;
1523         int                     totallen;
1524         uint32          header;
1525         int                     nPairs = val->val.object.nPairs;
1526
1527         /* Remember where in the buffer this object starts. */
1528         base_offset = buffer->len;
1529
1530         /* Align to 4-byte boundary (any padding counts as part of my data) */
1531         padBufferToInt(buffer);
1532
1533         /*
1534          * Construct the header Jentry and store it in the beginning of the
1535          * variable-length payload.
1536          */
1537         header = nPairs | JB_FOBJECT;
1538         appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1539
1540         /* Reserve space for the JEntries of the keys and values. */
1541         jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
1542
1543         /*
1544          * Iterate over the keys, then over the values, since that is the ordering
1545          * we want in the on-disk representation.
1546          */
1547         totallen = 0;
1548         for (i = 0; i < nPairs; i++)
1549         {
1550                 JsonbPair  *pair = &val->val.object.pairs[i];
1551                 int                     len;
1552                 JEntry          meta;
1553
1554                 /*
1555                  * Convert key, producing a JEntry and appending its variable-length
1556                  * data to buffer
1557                  */
1558                 convertJsonbScalar(buffer, &meta, &pair->key);
1559
1560                 len = JBE_OFFLENFLD(meta);
1561                 totallen += len;
1562
1563                 /*
1564                  * Bail out if total variable-length data exceeds what will fit in a
1565                  * JEntry length field.  We check this in each iteration, not just
1566                  * once at the end, to forestall possible integer overflow.
1567                  */
1568                 if (totallen > JENTRY_OFFLENMASK)
1569                         ereport(ERROR,
1570                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1571                                          errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1572                                                         JENTRY_OFFLENMASK)));
1573
1574                 /*
1575                  * Convert each JB_OFFSET_STRIDE'th length to an offset.
1576                  */
1577                 if ((i % JB_OFFSET_STRIDE) == 0)
1578                         meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1579
1580                 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1581                 jentry_offset += sizeof(JEntry);
1582         }
1583         for (i = 0; i < nPairs; i++)
1584         {
1585                 JsonbPair  *pair = &val->val.object.pairs[i];
1586                 int                     len;
1587                 JEntry          meta;
1588
1589                 /*
1590                  * Convert value, producing a JEntry and appending its variable-length
1591                  * data to buffer
1592                  */
1593                 convertJsonbValue(buffer, &meta, &pair->value, level + 1);
1594
1595                 len = JBE_OFFLENFLD(meta);
1596                 totallen += len;
1597
1598                 /*
1599                  * Bail out if total variable-length data exceeds what will fit in a
1600                  * JEntry length field.  We check this in each iteration, not just
1601                  * once at the end, to forestall possible integer overflow.
1602                  */
1603                 if (totallen > JENTRY_OFFLENMASK)
1604                         ereport(ERROR,
1605                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1606                                          errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1607                                                         JENTRY_OFFLENMASK)));
1608
1609                 /*
1610                  * Convert each JB_OFFSET_STRIDE'th length to an offset.
1611                  */
1612                 if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
1613                         meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1614
1615                 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1616                 jentry_offset += sizeof(JEntry);
1617         }
1618
1619         /* Total data size is everything we've appended to buffer */
1620         totallen = buffer->len - base_offset;
1621
1622         /* Check length again, since we didn't include the metadata above */
1623         if (totallen > JENTRY_OFFLENMASK)
1624                 ereport(ERROR,
1625                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1626                                  errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1627                                                 JENTRY_OFFLENMASK)));
1628
1629         /* Initialize the header of this node in the container's JEntry array */
1630         *pheader = JENTRY_ISCONTAINER | totallen;
1631 }
1632
1633 static void
1634 convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
1635 {
1636         int                     numlen;
1637         short           padlen;
1638
1639         switch (scalarVal->type)
1640         {
1641                 case jbvNull:
1642                         *jentry = JENTRY_ISNULL;
1643                         break;
1644
1645                 case jbvString:
1646                         appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len);
1647
1648                         *jentry = scalarVal->val.string.len;
1649                         break;
1650
1651                 case jbvNumeric:
1652                         numlen = VARSIZE_ANY(scalarVal->val.numeric);
1653                         padlen = padBufferToInt(buffer);
1654
1655                         appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen);
1656
1657                         *jentry = JENTRY_ISNUMERIC | (padlen + numlen);
1658                         break;
1659
1660                 case jbvBool:
1661                         *jentry = (scalarVal->val.boolean) ?
1662                                 JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE;
1663                         break;
1664
1665                 default:
1666                         elog(ERROR, "invalid jsonb scalar type");
1667         }
1668 }
1669
1670 /*
1671  * Compare two jbvString JsonbValue values, a and b.
1672  *
1673  * This is a special qsort() comparator used to sort strings in certain
1674  * internal contexts where it is sufficient to have a well-defined sort order.
1675  * In particular, object pair keys are sorted according to this criteria to
1676  * facilitate cheap binary searches where we don't care about lexical sort
1677  * order.
1678  *
1679  * a and b are first sorted based on their length.  If a tie-breaker is
1680  * required, only then do we consider string binary equality.
1681  */
1682 static int
1683 lengthCompareJsonbStringValue(const void *a, const void *b)
1684 {
1685         const JsonbValue *va = (const JsonbValue *) a;
1686         const JsonbValue *vb = (const JsonbValue *) b;
1687         int                     res;
1688
1689         Assert(va->type == jbvString);
1690         Assert(vb->type == jbvString);
1691
1692         if (va->val.string.len == vb->val.string.len)
1693         {
1694                 res = memcmp(va->val.string.val, vb->val.string.val, va->val.string.len);
1695         }
1696         else
1697         {
1698                 res = (va->val.string.len > vb->val.string.len) ? 1 : -1;
1699         }
1700
1701         return res;
1702 }
1703
1704 /*
1705  * qsort_arg() comparator to compare JsonbPair values.
1706  *
1707  * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
1708  * to true iff a and b have full binary equality, since some callers have an
1709  * interest in whether the two values are equal or merely equivalent.
1710  *
1711  * N.B: String comparisons here are "length-wise"
1712  *
1713  * Pairs with equals keys are ordered such that the order field is respected.
1714  */
1715 static int
1716 lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
1717 {
1718         const JsonbPair *pa = (const JsonbPair *) a;
1719         const JsonbPair *pb = (const JsonbPair *) b;
1720         int                     res;
1721
1722         res = lengthCompareJsonbStringValue(&pa->key, &pb->key);
1723         if (res == 0 && binequal)
1724                 *((bool *) binequal) = true;
1725
1726         /*
1727          * Guarantee keeping order of equal pair.  Unique algorithm will prefer
1728          * first element as value.
1729          */
1730         if (res == 0)
1731                 res = (pa->order > pb->order) ? -1 : 1;
1732
1733         return res;
1734 }
1735
1736 /*
1737  * Sort and unique-ify pairs in JsonbValue object
1738  */
1739 static void
1740 uniqueifyJsonbObject(JsonbValue *object)
1741 {
1742         bool            hasNonUniq = false;
1743
1744         Assert(object->type == jbvObject);
1745
1746         if (object->val.object.nPairs > 1)
1747                 qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
1748                                   lengthCompareJsonbPair, &hasNonUniq);
1749
1750         if (hasNonUniq)
1751         {
1752                 JsonbPair  *ptr = object->val.object.pairs + 1,
1753                                    *res = object->val.object.pairs;
1754
1755                 while (ptr - object->val.object.pairs < object->val.object.nPairs)
1756                 {
1757                         /* Avoid copying over duplicate */
1758                         if (lengthCompareJsonbStringValue(ptr, res) != 0)
1759                         {
1760                                 res++;
1761                                 if (ptr != res)
1762                                         memcpy(res, ptr, sizeof(JsonbPair));
1763                         }
1764                         ptr++;
1765                 }
1766
1767                 object->val.object.nPairs = res + 1 - object->val.object.pairs;
1768         }
1769 }