]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/jsonb_util.c
Introduce 64-bit hash functions with a 64-bit seed.
[postgresql] / src / backend / utils / adt / jsonb_util.c
1 /*-------------------------------------------------------------------------
2  *
3  * jsonb_util.c
4  *        converting between Jsonb and JsonbValues, and iterating.
5  *
6  * Copyright (c) 2014-2017, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/jsonb_util.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "access/hash.h"
17 #include "catalog/pg_collation.h"
18 #include "miscadmin.h"
19 #include "utils/builtins.h"
20 #include "utils/jsonb.h"
21 #include "utils/memutils.h"
22 #include "utils/varlena.h"
23
24 /*
25  * Maximum number of elements in an array (or key/value pairs in an object).
26  * This is limited by two things: the size of the JEntry array must fit
27  * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
28  * reserved for that in the JsonbContainer.header field.
29  *
30  * (The total size of an array's or object's elements is also limited by
31  * JENTRY_OFFLENMASK, but we're not concerned about that here.)
32  */
33 #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
34 #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
35
36 static void fillJsonbValue(JsonbContainer *container, int index,
37                            char *base_addr, uint32 offset,
38                            JsonbValue *result);
39 static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
40 static int      compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
41 static Jsonb *convertToJsonb(JsonbValue *val);
42 static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
43 static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
44 static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
45 static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
46
47 static int      reserveFromBuffer(StringInfo buffer, int len);
48 static void appendToBuffer(StringInfo buffer, const char *data, int len);
49 static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
50 static short padBufferToInt(StringInfo buffer);
51
52 static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent);
53 static JsonbIterator *freeAndGetParent(JsonbIterator *it);
54 static JsonbParseState *pushState(JsonbParseState **pstate);
55 static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
56 static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
57 static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
58 static int      lengthCompareJsonbStringValue(const void *a, const void *b);
59 static int      lengthCompareJsonbPair(const void *a, const void *b, void *arg);
60 static void uniqueifyJsonbObject(JsonbValue *object);
61 static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate,
62                                          JsonbIteratorToken seq,
63                                          JsonbValue *scalarVal);
64
65 /*
66  * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
67  *
68  * There isn't a JsonbToJsonbValue(), because generally we find it more
69  * convenient to directly iterate through the Jsonb representation and only
70  * really convert nested scalar values.  JsonbIteratorNext() does this, so that
71  * clients of the iteration code don't have to directly deal with the binary
72  * representation (JsonbDeepContains() is a notable exception, although all
73  * exceptions are internal to this module).  In general, functions that accept
74  * a JsonbValue argument are concerned with the manipulation of scalar values,
75  * or simple containers of scalar values, where it would be inconvenient to
76  * deal with a great amount of other state.
77  */
78 Jsonb *
79 JsonbValueToJsonb(JsonbValue *val)
80 {
81         Jsonb      *out;
82
83         if (IsAJsonbScalar(val))
84         {
85                 /* Scalar value */
86                 JsonbParseState *pstate = NULL;
87                 JsonbValue *res;
88                 JsonbValue      scalarArray;
89
90                 scalarArray.type = jbvArray;
91                 scalarArray.val.array.rawScalar = true;
92                 scalarArray.val.array.nElems = 1;
93
94                 pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
95                 pushJsonbValue(&pstate, WJB_ELEM, val);
96                 res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
97
98                 out = convertToJsonb(res);
99         }
100         else if (val->type == jbvObject || val->type == jbvArray)
101         {
102                 out = convertToJsonb(val);
103         }
104         else
105         {
106                 Assert(val->type == jbvBinary);
107                 out = palloc(VARHDRSZ + val->val.binary.len);
108                 SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
109                 memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
110         }
111
112         return out;
113 }
114
115 /*
116  * Get the offset of the variable-length portion of a Jsonb node within
117  * the variable-length-data part of its container.  The node is identified
118  * by index within the container's JEntry array.
119  */
120 uint32
121 getJsonbOffset(const JsonbContainer *jc, int index)
122 {
123         uint32          offset = 0;
124         int                     i;
125
126         /*
127          * Start offset of this entry is equal to the end offset of the previous
128          * entry.  Walk backwards to the most recent entry stored as an end
129          * offset, returning that offset plus any lengths in between.
130          */
131         for (i = index - 1; i >= 0; i--)
132         {
133                 offset += JBE_OFFLENFLD(jc->children[i]);
134                 if (JBE_HAS_OFF(jc->children[i]))
135                         break;
136         }
137
138         return offset;
139 }
140
141 /*
142  * Get the length of the variable-length portion of a Jsonb node.
143  * The node is identified by index within the container's JEntry array.
144  */
145 uint32
146 getJsonbLength(const JsonbContainer *jc, int index)
147 {
148         uint32          off;
149         uint32          len;
150
151         /*
152          * If the length is stored directly in the JEntry, just return it.
153          * Otherwise, get the begin offset of the entry, and subtract that from
154          * the stored end+1 offset.
155          */
156         if (JBE_HAS_OFF(jc->children[index]))
157         {
158                 off = getJsonbOffset(jc, index);
159                 len = JBE_OFFLENFLD(jc->children[index]) - off;
160         }
161         else
162                 len = JBE_OFFLENFLD(jc->children[index]);
163
164         return len;
165 }
166
167 /*
168  * BT comparator worker function.  Returns an integer less than, equal to, or
169  * greater than zero, indicating whether a is less than, equal to, or greater
170  * than b.  Consistent with the requirements for a B-Tree operator class
171  *
172  * Strings are compared lexically, in contrast with other places where we use a
173  * much simpler comparator logic for searching through Strings.  Since this is
174  * called from B-Tree support function 1, we're careful about not leaking
175  * memory here.
176  */
177 int
178 compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
179 {
180         JsonbIterator *ita,
181                            *itb;
182         int                     res = 0;
183
184         ita = JsonbIteratorInit(a);
185         itb = JsonbIteratorInit(b);
186
187         do
188         {
189                 JsonbValue      va,
190                                         vb;
191                 JsonbIteratorToken ra,
192                                         rb;
193
194                 ra = JsonbIteratorNext(&ita, &va, false);
195                 rb = JsonbIteratorNext(&itb, &vb, false);
196
197                 if (ra == rb)
198                 {
199                         if (ra == WJB_DONE)
200                         {
201                                 /* Decisively equal */
202                                 break;
203                         }
204
205                         if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT)
206                         {
207                                 /*
208                                  * There is no array or object to compare at this stage of
209                                  * processing.  jbvArray/jbvObject values are compared
210                                  * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT
211                                  * tokens.
212                                  */
213                                 continue;
214                         }
215
216                         if (va.type == vb.type)
217                         {
218                                 switch (va.type)
219                                 {
220                                         case jbvString:
221                                         case jbvNull:
222                                         case jbvNumeric:
223                                         case jbvBool:
224                                                 res = compareJsonbScalarValue(&va, &vb);
225                                                 break;
226                                         case jbvArray:
227
228                                                 /*
229                                                  * This could be a "raw scalar" pseudo array.  That's
230                                                  * a special case here though, since we still want the
231                                                  * general type-based comparisons to apply, and as far
232                                                  * as we're concerned a pseudo array is just a scalar.
233                                                  */
234                                                 if (va.val.array.rawScalar != vb.val.array.rawScalar)
235                                                         res = (va.val.array.rawScalar) ? -1 : 1;
236                                                 if (va.val.array.nElems != vb.val.array.nElems)
237                                                         res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
238                                                 break;
239                                         case jbvObject:
240                                                 if (va.val.object.nPairs != vb.val.object.nPairs)
241                                                         res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
242                                                 break;
243                                         case jbvBinary:
244                                                 elog(ERROR, "unexpected jbvBinary value");
245                                 }
246                         }
247                         else
248                         {
249                                 /* Type-defined order */
250                                 res = (va.type > vb.type) ? 1 : -1;
251                         }
252                 }
253                 else
254                 {
255                         /*
256                          * It's safe to assume that the types differed, and that the va
257                          * and vb values passed were set.
258                          *
259                          * If the two values were of the same container type, then there'd
260                          * have been a chance to observe the variation in the number of
261                          * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
262                          * either two heterogeneously-typed containers, or a container and
263                          * some scalar type.
264                          *
265                          * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
266                          * cases here, because we would have seen the corresponding
267                          * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
268                          * concluded that they don't match.
269                          */
270                         Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
271                         Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
272
273                         Assert(va.type != vb.type);
274                         Assert(va.type != jbvBinary);
275                         Assert(vb.type != jbvBinary);
276                         /* Type-defined order */
277                         res = (va.type > vb.type) ? 1 : -1;
278                 }
279         }
280         while (res == 0);
281
282         while (ita != NULL)
283         {
284                 JsonbIterator *i = ita->parent;
285
286                 pfree(ita);
287                 ita = i;
288         }
289         while (itb != NULL)
290         {
291                 JsonbIterator *i = itb->parent;
292
293                 pfree(itb);
294                 itb = i;
295         }
296
297         return res;
298 }
299
300 /*
301  * Find value in object (i.e. the "value" part of some key/value pair in an
302  * object), or find a matching element if we're looking through an array.  Do
303  * so on the basis of equality of the object keys only, or alternatively
304  * element values only, with a caller-supplied value "key".  The "flags"
305  * argument allows the caller to specify which container types are of interest.
306  *
307  * This exported utility function exists to facilitate various cases concerned
308  * with "containment".  If asked to look through an object, the caller had
309  * better pass a Jsonb String, because their keys can only be strings.
310  * Otherwise, for an array, any type of JsonbValue will do.
311  *
312  * In order to proceed with the search, it is necessary for callers to have
313  * both specified an interest in exactly one particular container type with an
314  * appropriate flag, as well as having the pointed-to Jsonb container be of
315  * one of those same container types at the top level. (Actually, we just do
316  * whichever makes sense to save callers the trouble of figuring it out - at
317  * most one can make sense, because the container either points to an array
318  * (possibly a "raw scalar" pseudo array) or an object.)
319  *
320  * Note that we can return a jbvBinary JsonbValue if this is called on an
321  * object, but we never do so on an array.  If the caller asks to look through
322  * a container type that is not of the type pointed to by the container,
323  * immediately fall through and return NULL.  If we cannot find the value,
324  * return NULL.  Otherwise, return palloc()'d copy of value.
325  */
326 JsonbValue *
327 findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
328                                                         JsonbValue *key)
329 {
330         JEntry     *children = container->children;
331         int                     count = JsonContainerSize(container);
332         JsonbValue *result;
333
334         Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
335
336         /* Quick out without a palloc cycle if object/array is empty */
337         if (count <= 0)
338                 return NULL;
339
340         result = palloc(sizeof(JsonbValue));
341
342         if ((flags & JB_FARRAY) && JsonContainerIsArray(container))
343         {
344                 char       *base_addr = (char *) (children + count);
345                 uint32          offset = 0;
346                 int                     i;
347
348                 for (i = 0; i < count; i++)
349                 {
350                         fillJsonbValue(container, i, base_addr, offset, result);
351
352                         if (key->type == result->type)
353                         {
354                                 if (equalsJsonbScalarValue(key, result))
355                                         return result;
356                         }
357
358                         JBE_ADVANCE_OFFSET(offset, children[i]);
359                 }
360         }
361         else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container))
362         {
363                 /* Since this is an object, account for *Pairs* of Jentrys */
364                 char       *base_addr = (char *) (children + count * 2);
365                 uint32          stopLow = 0,
366                                         stopHigh = count;
367
368                 /* Object key passed by caller must be a string */
369                 Assert(key->type == jbvString);
370
371                 /* Binary search on object/pair keys *only* */
372                 while (stopLow < stopHigh)
373                 {
374                         uint32          stopMiddle;
375                         int                     difference;
376                         JsonbValue      candidate;
377
378                         stopMiddle = stopLow + (stopHigh - stopLow) / 2;
379
380                         candidate.type = jbvString;
381                         candidate.val.string.val =
382                                 base_addr + getJsonbOffset(container, stopMiddle);
383                         candidate.val.string.len = getJsonbLength(container, stopMiddle);
384
385                         difference = lengthCompareJsonbStringValue(&candidate, key);
386
387                         if (difference == 0)
388                         {
389                                 /* Found our key, return corresponding value */
390                                 int                     index = stopMiddle + count;
391
392                                 fillJsonbValue(container, index, base_addr,
393                                                            getJsonbOffset(container, index),
394                                                            result);
395
396                                 return result;
397                         }
398                         else
399                         {
400                                 if (difference < 0)
401                                         stopLow = stopMiddle + 1;
402                                 else
403                                         stopHigh = stopMiddle;
404                         }
405                 }
406         }
407
408         /* Not found */
409         pfree(result);
410         return NULL;
411 }
412
413 /*
414  * Get i-th value of a Jsonb array.
415  *
416  * Returns palloc()'d copy of the value, or NULL if it does not exist.
417  */
418 JsonbValue *
419 getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
420 {
421         JsonbValue *result;
422         char       *base_addr;
423         uint32          nelements;
424
425         if (!JsonContainerIsArray(container))
426                 elog(ERROR, "not a jsonb array");
427
428         nelements = JsonContainerSize(container);
429         base_addr = (char *) &container->children[nelements];
430
431         if (i >= nelements)
432                 return NULL;
433
434         result = palloc(sizeof(JsonbValue));
435
436         fillJsonbValue(container, i, base_addr,
437                                    getJsonbOffset(container, i),
438                                    result);
439
440         return result;
441 }
442
443 /*
444  * A helper function to fill in a JsonbValue to represent an element of an
445  * array, or a key or value of an object.
446  *
447  * The node's JEntry is at container->children[index], and its variable-length
448  * data is at base_addr + offset.  We make the caller determine the offset
449  * since in many cases the caller can amortize that work across multiple
450  * children.  When it can't, it can just call getJsonbOffset().
451  *
452  * A nested array or object will be returned as jbvBinary, ie. it won't be
453  * expanded.
454  */
455 static void
456 fillJsonbValue(JsonbContainer *container, int index,
457                            char *base_addr, uint32 offset,
458                            JsonbValue *result)
459 {
460         JEntry          entry = container->children[index];
461
462         if (JBE_ISNULL(entry))
463         {
464                 result->type = jbvNull;
465         }
466         else if (JBE_ISSTRING(entry))
467         {
468                 result->type = jbvString;
469                 result->val.string.val = base_addr + offset;
470                 result->val.string.len = getJsonbLength(container, index);
471                 Assert(result->val.string.len >= 0);
472         }
473         else if (JBE_ISNUMERIC(entry))
474         {
475                 result->type = jbvNumeric;
476                 result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
477         }
478         else if (JBE_ISBOOL_TRUE(entry))
479         {
480                 result->type = jbvBool;
481                 result->val.boolean = true;
482         }
483         else if (JBE_ISBOOL_FALSE(entry))
484         {
485                 result->type = jbvBool;
486                 result->val.boolean = false;
487         }
488         else
489         {
490                 Assert(JBE_ISCONTAINER(entry));
491                 result->type = jbvBinary;
492                 /* Remove alignment padding from data pointer and length */
493                 result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
494                 result->val.binary.len = getJsonbLength(container, index) -
495                         (INTALIGN(offset) - offset);
496         }
497 }
498
499 /*
500  * Push JsonbValue into JsonbParseState.
501  *
502  * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
503  * JsonbValue to a Jsonb.
504  *
505  * Initial state of *JsonbParseState is NULL, since it'll be allocated here
506  * originally (caller will get JsonbParseState back by reference).
507  *
508  * Only sequential tokens pertaining to non-container types should pass a
509  * JsonbValue.  There is one exception -- WJB_BEGIN_ARRAY callers may pass a
510  * "raw scalar" pseudo array to append it - the actual scalar should be passed
511  * next and it will be added as the only member of the array.
512  *
513  * Values of type jvbBinary, which are rolled up arrays and objects,
514  * are unpacked before being added to the result.
515  */
516 JsonbValue *
517 pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq,
518                            JsonbValue *jbval)
519 {
520         JsonbIterator *it;
521         JsonbValue *res = NULL;
522         JsonbValue      v;
523         JsonbIteratorToken tok;
524
525         if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) ||
526                 jbval->type != jbvBinary)
527         {
528                 /* drop through */
529                 return pushJsonbValueScalar(pstate, seq, jbval);
530         }
531
532         /* unpack the binary and add each piece to the pstate */
533         it = JsonbIteratorInit(jbval->val.binary.data);
534         while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
535                 res = pushJsonbValueScalar(pstate, tok,
536                                                                    tok < WJB_BEGIN_ARRAY ? &v : NULL);
537
538         return res;
539 }
540
541 /*
542  * Do the actual pushing, with only scalar or pseudo-scalar-array values
543  * accepted.
544  */
545 static JsonbValue *
546 pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq,
547                                          JsonbValue *scalarVal)
548 {
549         JsonbValue *result = NULL;
550
551         switch (seq)
552         {
553                 case WJB_BEGIN_ARRAY:
554                         Assert(!scalarVal || scalarVal->val.array.rawScalar);
555                         *pstate = pushState(pstate);
556                         result = &(*pstate)->contVal;
557                         (*pstate)->contVal.type = jbvArray;
558                         (*pstate)->contVal.val.array.nElems = 0;
559                         (*pstate)->contVal.val.array.rawScalar = (scalarVal &&
560                                                                                                           scalarVal->val.array.rawScalar);
561                         if (scalarVal && scalarVal->val.array.nElems > 0)
562                         {
563                                 /* Assume that this array is still really a scalar */
564                                 Assert(scalarVal->type == jbvArray);
565                                 (*pstate)->size = scalarVal->val.array.nElems;
566                         }
567                         else
568                         {
569                                 (*pstate)->size = 4;
570                         }
571                         (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
572                                                                                                                 (*pstate)->size);
573                         break;
574                 case WJB_BEGIN_OBJECT:
575                         Assert(!scalarVal);
576                         *pstate = pushState(pstate);
577                         result = &(*pstate)->contVal;
578                         (*pstate)->contVal.type = jbvObject;
579                         (*pstate)->contVal.val.object.nPairs = 0;
580                         (*pstate)->size = 4;
581                         (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
582                                                                                                                  (*pstate)->size);
583                         break;
584                 case WJB_KEY:
585                         Assert(scalarVal->type == jbvString);
586                         appendKey(*pstate, scalarVal);
587                         break;
588                 case WJB_VALUE:
589                         Assert(IsAJsonbScalar(scalarVal));
590                         appendValue(*pstate, scalarVal);
591                         break;
592                 case WJB_ELEM:
593                         Assert(IsAJsonbScalar(scalarVal));
594                         appendElement(*pstate, scalarVal);
595                         break;
596                 case WJB_END_OBJECT:
597                         uniqueifyJsonbObject(&(*pstate)->contVal);
598                         /* fall through! */
599                 case WJB_END_ARRAY:
600                         /* Steps here common to WJB_END_OBJECT case */
601                         Assert(!scalarVal);
602                         result = &(*pstate)->contVal;
603
604                         /*
605                          * Pop stack and push current array/object as value in parent
606                          * array/object
607                          */
608                         *pstate = (*pstate)->next;
609                         if (*pstate)
610                         {
611                                 switch ((*pstate)->contVal.type)
612                                 {
613                                         case jbvArray:
614                                                 appendElement(*pstate, result);
615                                                 break;
616                                         case jbvObject:
617                                                 appendValue(*pstate, result);
618                                                 break;
619                                         default:
620                                                 elog(ERROR, "invalid jsonb container type");
621                                 }
622                         }
623                         break;
624                 default:
625                         elog(ERROR, "unrecognized jsonb sequential processing token");
626         }
627
628         return result;
629 }
630
631 /*
632  * pushJsonbValue() worker:  Iteration-like forming of Jsonb
633  */
634 static JsonbParseState *
635 pushState(JsonbParseState **pstate)
636 {
637         JsonbParseState *ns = palloc(sizeof(JsonbParseState));
638
639         ns->next = *pstate;
640         return ns;
641 }
642
643 /*
644  * pushJsonbValue() worker:  Append a pair key to state when generating a Jsonb
645  */
646 static void
647 appendKey(JsonbParseState *pstate, JsonbValue *string)
648 {
649         JsonbValue *object = &pstate->contVal;
650
651         Assert(object->type == jbvObject);
652         Assert(string->type == jbvString);
653
654         if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
655                 ereport(ERROR,
656                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
657                                  errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
658                                                 JSONB_MAX_PAIRS)));
659
660         if (object->val.object.nPairs >= pstate->size)
661         {
662                 pstate->size *= 2;
663                 object->val.object.pairs = repalloc(object->val.object.pairs,
664                                                                                         sizeof(JsonbPair) * pstate->size);
665         }
666
667         object->val.object.pairs[object->val.object.nPairs].key = *string;
668         object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
669 }
670
671 /*
672  * pushJsonbValue() worker:  Append a pair value to state when generating a
673  * Jsonb
674  */
675 static void
676 appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
677 {
678         JsonbValue *object = &pstate->contVal;
679
680         Assert(object->type == jbvObject);
681
682         object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
683 }
684
685 /*
686  * pushJsonbValue() worker:  Append an element to state when generating a Jsonb
687  */
688 static void
689 appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
690 {
691         JsonbValue *array = &pstate->contVal;
692
693         Assert(array->type == jbvArray);
694
695         if (array->val.array.nElems >= JSONB_MAX_ELEMS)
696                 ereport(ERROR,
697                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
698                                  errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
699                                                 JSONB_MAX_ELEMS)));
700
701         if (array->val.array.nElems >= pstate->size)
702         {
703                 pstate->size *= 2;
704                 array->val.array.elems = repalloc(array->val.array.elems,
705                                                                                   sizeof(JsonbValue) * pstate->size);
706         }
707
708         array->val.array.elems[array->val.array.nElems++] = *scalarVal;
709 }
710
711 /*
712  * Given a JsonbContainer, expand to JsonbIterator to iterate over items
713  * fully expanded to in-memory representation for manipulation.
714  *
715  * See JsonbIteratorNext() for notes on memory management.
716  */
717 JsonbIterator *
718 JsonbIteratorInit(JsonbContainer *container)
719 {
720         return iteratorFromContainer(container, NULL);
721 }
722
723 /*
724  * Get next JsonbValue while iterating
725  *
726  * Caller should initially pass their own, original iterator.  They may get
727  * back a child iterator palloc()'d here instead.  The function can be relied
728  * on to free those child iterators, lest the memory allocated for highly
729  * nested objects become unreasonable, but only if callers don't end iteration
730  * early (by breaking upon having found something in a search, for example).
731  *
732  * Callers in such a scenario, that are particularly sensitive to leaking
733  * memory in a long-lived context may walk the ancestral tree from the final
734  * iterator we left them with to its oldest ancestor, pfree()ing as they go.
735  * They do not have to free any other memory previously allocated for iterators
736  * but not accessible as direct ancestors of the iterator they're last passed
737  * back.
738  *
739  * Returns "Jsonb sequential processing" token value.  Iterator "state"
740  * reflects the current stage of the process in a less granular fashion, and is
741  * mostly used here to track things internally with respect to particular
742  * iterators.
743  *
744  * Clients of this function should not have to handle any jbvBinary values
745  * (since recursive calls will deal with this), provided skipNested is false.
746  * It is our job to expand the jbvBinary representation without bothering them
747  * with it.  However, clients should not take it upon themselves to touch array
748  * or Object element/pair buffers, since their element/pair pointers are
749  * garbage.  Also, *val will not be set when returning WJB_END_ARRAY or
750  * WJB_END_OBJECT, on the assumption that it's only useful to access values
751  * when recursing in.
752  */
753 JsonbIteratorToken
754 JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
755 {
756         if (*it == NULL)
757                 return WJB_DONE;
758
759         /*
760          * When stepping into a nested container, we jump back here to start
761          * processing the child. We will not recurse further in one call, because
762          * processing the child will always begin in JBI_ARRAY_START or
763          * JBI_OBJECT_START state.
764          */
765 recurse:
766         switch ((*it)->state)
767         {
768                 case JBI_ARRAY_START:
769                         /* Set v to array on first array call */
770                         val->type = jbvArray;
771                         val->val.array.nElems = (*it)->nElems;
772
773                         /*
774                          * v->val.array.elems is not actually set, because we aren't doing
775                          * a full conversion
776                          */
777                         val->val.array.rawScalar = (*it)->isScalar;
778                         (*it)->curIndex = 0;
779                         (*it)->curDataOffset = 0;
780                         (*it)->curValueOffset = 0;      /* not actually used */
781                         /* Set state for next call */
782                         (*it)->state = JBI_ARRAY_ELEM;
783                         return WJB_BEGIN_ARRAY;
784
785                 case JBI_ARRAY_ELEM:
786                         if ((*it)->curIndex >= (*it)->nElems)
787                         {
788                                 /*
789                                  * All elements within array already processed.  Report this
790                                  * to caller, and give it back original parent iterator (which
791                                  * independently tracks iteration progress at its level of
792                                  * nesting).
793                                  */
794                                 *it = freeAndGetParent(*it);
795                                 return WJB_END_ARRAY;
796                         }
797
798                         fillJsonbValue((*it)->container, (*it)->curIndex,
799                                                    (*it)->dataProper, (*it)->curDataOffset,
800                                                    val);
801
802                         JBE_ADVANCE_OFFSET((*it)->curDataOffset,
803                                                            (*it)->children[(*it)->curIndex]);
804                         (*it)->curIndex++;
805
806                         if (!IsAJsonbScalar(val) && !skipNested)
807                         {
808                                 /* Recurse into container. */
809                                 *it = iteratorFromContainer(val->val.binary.data, *it);
810                                 goto recurse;
811                         }
812                         else
813                         {
814                                 /*
815                                  * Scalar item in array, or a container and caller didn't want
816                                  * us to recurse into it.
817                                  */
818                                 return WJB_ELEM;
819                         }
820
821                 case JBI_OBJECT_START:
822                         /* Set v to object on first object call */
823                         val->type = jbvObject;
824                         val->val.object.nPairs = (*it)->nElems;
825
826                         /*
827                          * v->val.object.pairs is not actually set, because we aren't
828                          * doing a full conversion
829                          */
830                         (*it)->curIndex = 0;
831                         (*it)->curDataOffset = 0;
832                         (*it)->curValueOffset = getJsonbOffset((*it)->container,
833                                                                                                    (*it)->nElems);
834                         /* Set state for next call */
835                         (*it)->state = JBI_OBJECT_KEY;
836                         return WJB_BEGIN_OBJECT;
837
838                 case JBI_OBJECT_KEY:
839                         if ((*it)->curIndex >= (*it)->nElems)
840                         {
841                                 /*
842                                  * All pairs within object already processed.  Report this to
843                                  * caller, and give it back original containing iterator
844                                  * (which independently tracks iteration progress at its level
845                                  * of nesting).
846                                  */
847                                 *it = freeAndGetParent(*it);
848                                 return WJB_END_OBJECT;
849                         }
850                         else
851                         {
852                                 /* Return key of a key/value pair.  */
853                                 fillJsonbValue((*it)->container, (*it)->curIndex,
854                                                            (*it)->dataProper, (*it)->curDataOffset,
855                                                            val);
856                                 if (val->type != jbvString)
857                                         elog(ERROR, "unexpected jsonb type as object key");
858
859                                 /* Set state for next call */
860                                 (*it)->state = JBI_OBJECT_VALUE;
861                                 return WJB_KEY;
862                         }
863
864                 case JBI_OBJECT_VALUE:
865                         /* Set state for next call */
866                         (*it)->state = JBI_OBJECT_KEY;
867
868                         fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
869                                                    (*it)->dataProper, (*it)->curValueOffset,
870                                                    val);
871
872                         JBE_ADVANCE_OFFSET((*it)->curDataOffset,
873                                                            (*it)->children[(*it)->curIndex]);
874                         JBE_ADVANCE_OFFSET((*it)->curValueOffset,
875                                                            (*it)->children[(*it)->curIndex + (*it)->nElems]);
876                         (*it)->curIndex++;
877
878                         /*
879                          * Value may be a container, in which case we recurse with new,
880                          * child iterator (unless the caller asked not to, by passing
881                          * skipNested).
882                          */
883                         if (!IsAJsonbScalar(val) && !skipNested)
884                         {
885                                 *it = iteratorFromContainer(val->val.binary.data, *it);
886                                 goto recurse;
887                         }
888                         else
889                                 return WJB_VALUE;
890         }
891
892         elog(ERROR, "invalid iterator state");
893         return -1;
894 }
895
896 /*
897  * Initialize an iterator for iterating all elements in a container.
898  */
899 static JsonbIterator *
900 iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent)
901 {
902         JsonbIterator *it;
903
904         it = palloc(sizeof(JsonbIterator));
905         it->container = container;
906         it->parent = parent;
907         it->nElems = JsonContainerSize(container);
908
909         /* Array starts just after header */
910         it->children = container->children;
911
912         switch (container->header & (JB_FARRAY | JB_FOBJECT))
913         {
914                 case JB_FARRAY:
915                         it->dataProper =
916                                 (char *) it->children + it->nElems * sizeof(JEntry);
917                         it->isScalar = JsonContainerIsScalar(container);
918                         /* This is either a "raw scalar", or an array */
919                         Assert(!it->isScalar || it->nElems == 1);
920
921                         it->state = JBI_ARRAY_START;
922                         break;
923
924                 case JB_FOBJECT:
925                         it->dataProper =
926                                 (char *) it->children + it->nElems * sizeof(JEntry) * 2;
927                         it->state = JBI_OBJECT_START;
928                         break;
929
930                 default:
931                         elog(ERROR, "unknown type of jsonb container");
932         }
933
934         return it;
935 }
936
937 /*
938  * JsonbIteratorNext() worker:  Return parent, while freeing memory for current
939  * iterator
940  */
941 static JsonbIterator *
942 freeAndGetParent(JsonbIterator *it)
943 {
944         JsonbIterator *v = it->parent;
945
946         pfree(it);
947         return v;
948 }
949
950 /*
951  * Worker for "contains" operator's function
952  *
953  * Formally speaking, containment is top-down, unordered subtree isomorphism.
954  *
955  * Takes iterators that belong to some container type.  These iterators
956  * "belong" to those values in the sense that they've just been initialized in
957  * respect of them by the caller (perhaps in a nested fashion).
958  *
959  * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
960  * We determine if mContained is contained within val.
961  */
962 bool
963 JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
964 {
965         JsonbValue      vval,
966                                 vcontained;
967         JsonbIteratorToken rval,
968                                 rcont;
969
970         /*
971          * Guard against stack overflow due to overly complex Jsonb.
972          *
973          * Functions called here independently take this precaution, but that
974          * might not be sufficient since this is also a recursive function.
975          */
976         check_stack_depth();
977
978         rval = JsonbIteratorNext(val, &vval, false);
979         rcont = JsonbIteratorNext(mContained, &vcontained, false);
980
981         if (rval != rcont)
982         {
983                 /*
984                  * The differing return values can immediately be taken as indicating
985                  * two differing container types at this nesting level, which is
986                  * sufficient reason to give up entirely (but it should be the case
987                  * that they're both some container type).
988                  */
989                 Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
990                 Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
991                 return false;
992         }
993         else if (rcont == WJB_BEGIN_OBJECT)
994         {
995                 Assert(vval.type == jbvObject);
996                 Assert(vcontained.type == jbvObject);
997
998                 /*
999                  * If the lhs has fewer pairs than the rhs, it can't possibly contain
1000                  * the rhs.  (This conclusion is safe only because we de-duplicate
1001                  * keys in all Jsonb objects; thus there can be no corresponding
1002                  * optimization in the array case.)  The case probably won't arise
1003                  * often, but since it's such a cheap check we may as well make it.
1004                  */
1005                 if (vval.val.object.nPairs < vcontained.val.object.nPairs)
1006                         return false;
1007
1008                 /* Work through rhs "is it contained within?" object */
1009                 for (;;)
1010                 {
1011                         JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */
1012
1013                         rcont = JsonbIteratorNext(mContained, &vcontained, false);
1014
1015                         /*
1016                          * When we get through caller's rhs "is it contained within?"
1017                          * object without failing to find one of its values, it's
1018                          * contained.
1019                          */
1020                         if (rcont == WJB_END_OBJECT)
1021                                 return true;
1022
1023                         Assert(rcont == WJB_KEY);
1024
1025                         /* First, find value by key... */
1026                         lhsVal = findJsonbValueFromContainer((*val)->container,
1027                                                                                                  JB_FOBJECT,
1028                                                                                                  &vcontained);
1029
1030                         if (!lhsVal)
1031                                 return false;
1032
1033                         /*
1034                          * ...at this stage it is apparent that there is at least a key
1035                          * match for this rhs pair.
1036                          */
1037                         rcont = JsonbIteratorNext(mContained, &vcontained, true);
1038
1039                         Assert(rcont == WJB_VALUE);
1040
1041                         /*
1042                          * Compare rhs pair's value with lhs pair's value just found using
1043                          * key
1044                          */
1045                         if (lhsVal->type != vcontained.type)
1046                         {
1047                                 return false;
1048                         }
1049                         else if (IsAJsonbScalar(lhsVal))
1050                         {
1051                                 if (!equalsJsonbScalarValue(lhsVal, &vcontained))
1052                                         return false;
1053                         }
1054                         else
1055                         {
1056                                 /* Nested container value (object or array) */
1057                                 JsonbIterator *nestval,
1058                                                    *nestContained;
1059
1060                                 Assert(lhsVal->type == jbvBinary);
1061                                 Assert(vcontained.type == jbvBinary);
1062
1063                                 nestval = JsonbIteratorInit(lhsVal->val.binary.data);
1064                                 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1065
1066                                 /*
1067                                  * Match "value" side of rhs datum object's pair recursively.
1068                                  * It's a nested structure.
1069                                  *
1070                                  * Note that nesting still has to "match up" at the right
1071                                  * nesting sub-levels.  However, there need only be zero or
1072                                  * more matching pairs (or elements) at each nesting level
1073                                  * (provided the *rhs* pairs/elements *all* match on each
1074                                  * level), which enables searching nested structures for a
1075                                  * single String or other primitive type sub-datum quite
1076                                  * effectively (provided the user constructed the rhs nested
1077                                  * structure such that we "know where to look").
1078                                  *
1079                                  * In other words, the mapping of container nodes in the rhs
1080                                  * "vcontained" Jsonb to internal nodes on the lhs is
1081                                  * injective, and parent-child edges on the rhs must be mapped
1082                                  * to parent-child edges on the lhs to satisfy the condition
1083                                  * of containment (plus of course the mapped nodes must be
1084                                  * equal).
1085                                  */
1086                                 if (!JsonbDeepContains(&nestval, &nestContained))
1087                                         return false;
1088                         }
1089                 }
1090         }
1091         else if (rcont == WJB_BEGIN_ARRAY)
1092         {
1093                 JsonbValue *lhsConts = NULL;
1094                 uint32          nLhsElems = vval.val.array.nElems;
1095
1096                 Assert(vval.type == jbvArray);
1097                 Assert(vcontained.type == jbvArray);
1098
1099                 /*
1100                  * Handle distinction between "raw scalar" pseudo arrays, and real
1101                  * arrays.
1102                  *
1103                  * A raw scalar may contain another raw scalar, and an array may
1104                  * contain a raw scalar, but a raw scalar may not contain an array. We
1105                  * don't do something like this for the object case, since objects can
1106                  * only contain pairs, never raw scalars (a pair is represented by an
1107                  * rhs object argument with a single contained pair).
1108                  */
1109                 if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
1110                         return false;
1111
1112                 /* Work through rhs "is it contained within?" array */
1113                 for (;;)
1114                 {
1115                         rcont = JsonbIteratorNext(mContained, &vcontained, true);
1116
1117                         /*
1118                          * When we get through caller's rhs "is it contained within?"
1119                          * array without failing to find one of its values, it's
1120                          * contained.
1121                          */
1122                         if (rcont == WJB_END_ARRAY)
1123                                 return true;
1124
1125                         Assert(rcont == WJB_ELEM);
1126
1127                         if (IsAJsonbScalar(&vcontained))
1128                         {
1129                                 if (!findJsonbValueFromContainer((*val)->container,
1130                                                                                                  JB_FARRAY,
1131                                                                                                  &vcontained))
1132                                         return false;
1133                         }
1134                         else
1135                         {
1136                                 uint32          i;
1137
1138                                 /*
1139                                  * If this is first container found in rhs array (at this
1140                                  * depth), initialize temp lhs array of containers
1141                                  */
1142                                 if (lhsConts == NULL)
1143                                 {
1144                                         uint32          j = 0;
1145
1146                                         /* Make room for all possible values */
1147                                         lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
1148
1149                                         for (i = 0; i < nLhsElems; i++)
1150                                         {
1151                                                 /* Store all lhs elements in temp array */
1152                                                 rcont = JsonbIteratorNext(val, &vval, true);
1153                                                 Assert(rcont == WJB_ELEM);
1154
1155                                                 if (vval.type == jbvBinary)
1156                                                         lhsConts[j++] = vval;
1157                                         }
1158
1159                                         /* No container elements in temp array, so give up now */
1160                                         if (j == 0)
1161                                                 return false;
1162
1163                                         /* We may have only partially filled array */
1164                                         nLhsElems = j;
1165                                 }
1166
1167                                 /* XXX: Nested array containment is O(N^2) */
1168                                 for (i = 0; i < nLhsElems; i++)
1169                                 {
1170                                         /* Nested container value (object or array) */
1171                                         JsonbIterator *nestval,
1172                                                            *nestContained;
1173                                         bool            contains;
1174
1175                                         nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
1176                                         nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1177
1178                                         contains = JsonbDeepContains(&nestval, &nestContained);
1179
1180                                         if (nestval)
1181                                                 pfree(nestval);
1182                                         if (nestContained)
1183                                                 pfree(nestContained);
1184                                         if (contains)
1185                                                 break;
1186                                 }
1187
1188                                 /*
1189                                  * Report rhs container value is not contained if couldn't
1190                                  * match rhs container to *some* lhs cont
1191                                  */
1192                                 if (i == nLhsElems)
1193                                         return false;
1194                         }
1195                 }
1196         }
1197         else
1198         {
1199                 elog(ERROR, "invalid jsonb container type");
1200         }
1201
1202         elog(ERROR, "unexpectedly fell off end of jsonb container");
1203         return false;
1204 }
1205
1206 /*
1207  * Hash a JsonbValue scalar value, mixing the hash value into an existing
1208  * hash provided by the caller.
1209  *
1210  * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
1211  * flags.
1212  */
1213 void
1214 JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
1215 {
1216         uint32          tmp;
1217
1218         /* Compute hash value for scalarVal */
1219         switch (scalarVal->type)
1220         {
1221                 case jbvNull:
1222                         tmp = 0x01;
1223                         break;
1224                 case jbvString:
1225                         tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
1226                                                                                   scalarVal->val.string.len));
1227                         break;
1228                 case jbvNumeric:
1229                         /* Must hash equal numerics to equal hash codes */
1230                         tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
1231                                                                                                          NumericGetDatum(scalarVal->val.numeric)));
1232                         break;
1233                 case jbvBool:
1234                         tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1235
1236                         break;
1237                 default:
1238                         elog(ERROR, "invalid jsonb scalar type");
1239                         tmp = 0;                        /* keep compiler quiet */
1240                         break;
1241         }
1242
1243         /*
1244          * Combine hash values of successive keys, values and elements by rotating
1245          * the previous value left 1 bit, then XOR'ing in the new
1246          * key/value/element's hash value.
1247          */
1248         *hash = (*hash << 1) | (*hash >> 31);
1249         *hash ^= tmp;
1250 }
1251
1252 /*
1253  * Hash a value to a 64-bit value, with a seed. Otherwise, similar to
1254  * JsonbHashScalarValue.
1255  */
1256 void
1257 JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash,
1258                                                          uint64 seed)
1259 {
1260         uint64          tmp;
1261
1262         switch (scalarVal->type)
1263         {
1264                 case jbvNull:
1265                         tmp = seed + 0x01;
1266                         break;
1267                 case jbvString:
1268                         tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val,
1269                                                                                                    scalarVal->val.string.len,
1270                                                                                                    seed));
1271                         break;
1272                 case jbvNumeric:
1273                         tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended,
1274                                                                                                          NumericGetDatum(scalarVal->val.numeric),
1275                                                                                                          UInt64GetDatum(seed)));
1276                         break;
1277                 case jbvBool:
1278                         if (seed)
1279                                 tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended,
1280                                                                                                                  BoolGetDatum(scalarVal->val.boolean),
1281                                                                                                                  UInt64GetDatum(seed)));
1282                         else
1283                                 tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1284
1285                         break;
1286                 default:
1287                         elog(ERROR, "invalid jsonb scalar type");
1288                         break;
1289         }
1290
1291         *hash = ROTATE_HIGH_AND_LOW_32BITS(*hash);
1292         *hash ^= tmp;
1293 }
1294
1295 /*
1296  * Are two scalar JsonbValues of the same type a and b equal?
1297  */
1298 static bool
1299 equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1300 {
1301         if (aScalar->type == bScalar->type)
1302         {
1303                 switch (aScalar->type)
1304                 {
1305                         case jbvNull:
1306                                 return true;
1307                         case jbvString:
1308                                 return lengthCompareJsonbStringValue(aScalar, bScalar) == 0;
1309                         case jbvNumeric:
1310                                 return DatumGetBool(DirectFunctionCall2(numeric_eq,
1311                                                                                                                 PointerGetDatum(aScalar->val.numeric),
1312                                                                                                                 PointerGetDatum(bScalar->val.numeric)));
1313                         case jbvBool:
1314                                 return aScalar->val.boolean == bScalar->val.boolean;
1315
1316                         default:
1317                                 elog(ERROR, "invalid jsonb scalar type");
1318                 }
1319         }
1320         elog(ERROR, "jsonb scalar type mismatch");
1321         return -1;
1322 }
1323
1324 /*
1325  * Compare two scalar JsonbValues, returning -1, 0, or 1.
1326  *
1327  * Strings are compared using the default collation.  Used by B-tree
1328  * operators, where a lexical sort order is generally expected.
1329  */
1330 static int
1331 compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1332 {
1333         if (aScalar->type == bScalar->type)
1334         {
1335                 switch (aScalar->type)
1336                 {
1337                         case jbvNull:
1338                                 return 0;
1339                         case jbvString:
1340                                 return varstr_cmp(aScalar->val.string.val,
1341                                                                   aScalar->val.string.len,
1342                                                                   bScalar->val.string.val,
1343                                                                   bScalar->val.string.len,
1344                                                                   DEFAULT_COLLATION_OID);
1345                         case jbvNumeric:
1346                                 return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
1347                                                                                                                  PointerGetDatum(aScalar->val.numeric),
1348                                                                                                                  PointerGetDatum(bScalar->val.numeric)));
1349                         case jbvBool:
1350                                 if (aScalar->val.boolean == bScalar->val.boolean)
1351                                         return 0;
1352                                 else if (aScalar->val.boolean > bScalar->val.boolean)
1353                                         return 1;
1354                                 else
1355                                         return -1;
1356                         default:
1357                                 elog(ERROR, "invalid jsonb scalar type");
1358                 }
1359         }
1360         elog(ERROR, "jsonb scalar type mismatch");
1361         return -1;
1362 }
1363
1364
1365 /*
1366  * Functions for manipulating the resizeable buffer used by convertJsonb and
1367  * its subroutines.
1368  */
1369
1370 /*
1371  * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary.
1372  * Returns the offset to the reserved area. The caller is expected to fill
1373  * the reserved area later with copyToBuffer().
1374  */
1375 static int
1376 reserveFromBuffer(StringInfo buffer, int len)
1377 {
1378         int                     offset;
1379
1380         /* Make more room if needed */
1381         enlargeStringInfo(buffer, len);
1382
1383         /* remember current offset */
1384         offset = buffer->len;
1385
1386         /* reserve the space */
1387         buffer->len += len;
1388
1389         /*
1390          * Keep a trailing null in place, even though it's not useful for us; it
1391          * seems best to preserve the invariants of StringInfos.
1392          */
1393         buffer->data[buffer->len] = '\0';
1394
1395         return offset;
1396 }
1397
1398 /*
1399  * Copy 'len' bytes to a previously reserved area in buffer.
1400  */
1401 static void
1402 copyToBuffer(StringInfo buffer, int offset, const char *data, int len)
1403 {
1404         memcpy(buffer->data + offset, data, len);
1405 }
1406
1407 /*
1408  * A shorthand for reserveFromBuffer + copyToBuffer.
1409  */
1410 static void
1411 appendToBuffer(StringInfo buffer, const char *data, int len)
1412 {
1413         int                     offset;
1414
1415         offset = reserveFromBuffer(buffer, len);
1416         copyToBuffer(buffer, offset, data, len);
1417 }
1418
1419
1420 /*
1421  * Append padding, so that the length of the StringInfo is int-aligned.
1422  * Returns the number of padding bytes appended.
1423  */
1424 static short
1425 padBufferToInt(StringInfo buffer)
1426 {
1427         int                     padlen,
1428                                 p,
1429                                 offset;
1430
1431         padlen = INTALIGN(buffer->len) - buffer->len;
1432
1433         offset = reserveFromBuffer(buffer, padlen);
1434
1435         /* padlen must be small, so this is probably faster than a memset */
1436         for (p = 0; p < padlen; p++)
1437                 buffer->data[offset + p] = '\0';
1438
1439         return padlen;
1440 }
1441
1442 /*
1443  * Given a JsonbValue, convert to Jsonb. The result is palloc'd.
1444  */
1445 static Jsonb *
1446 convertToJsonb(JsonbValue *val)
1447 {
1448         StringInfoData buffer;
1449         JEntry          jentry;
1450         Jsonb      *res;
1451
1452         /* Should not already have binary representation */
1453         Assert(val->type != jbvBinary);
1454
1455         /* Allocate an output buffer. It will be enlarged as needed */
1456         initStringInfo(&buffer);
1457
1458         /* Make room for the varlena header */
1459         reserveFromBuffer(&buffer, VARHDRSZ);
1460
1461         convertJsonbValue(&buffer, &jentry, val, 0);
1462
1463         /*
1464          * Note: the JEntry of the root is discarded. Therefore the root
1465          * JsonbContainer struct must contain enough information to tell what kind
1466          * of value it is.
1467          */
1468
1469         res = (Jsonb *) buffer.data;
1470
1471         SET_VARSIZE(res, buffer.len);
1472
1473         return res;
1474 }
1475
1476 /*
1477  * Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
1478  *
1479  * The JEntry header for this node is returned in *header.  It is filled in
1480  * with the length of this value and appropriate type bits.  If we wish to
1481  * store an end offset rather than a length, it is the caller's responsibility
1482  * to adjust for that.
1483  *
1484  * If the value is an array or an object, this recurses. 'level' is only used
1485  * for debugging purposes.
1486  */
1487 static void
1488 convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level)
1489 {
1490         check_stack_depth();
1491
1492         if (!val)
1493                 return;
1494
1495         /*
1496          * A JsonbValue passed as val should never have a type of jbvBinary, and
1497          * neither should any of its sub-components. Those values will be produced
1498          * by convertJsonbArray and convertJsonbObject, the results of which will
1499          * not be passed back to this function as an argument.
1500          */
1501
1502         if (IsAJsonbScalar(val))
1503                 convertJsonbScalar(buffer, header, val);
1504         else if (val->type == jbvArray)
1505                 convertJsonbArray(buffer, header, val, level);
1506         else if (val->type == jbvObject)
1507                 convertJsonbObject(buffer, header, val, level);
1508         else
1509                 elog(ERROR, "unknown type of jsonb container to convert");
1510 }
1511
1512 static void
1513 convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1514 {
1515         int                     base_offset;
1516         int                     jentry_offset;
1517         int                     i;
1518         int                     totallen;
1519         uint32          header;
1520         int                     nElems = val->val.array.nElems;
1521
1522         /* Remember where in the buffer this array starts. */
1523         base_offset = buffer->len;
1524
1525         /* Align to 4-byte boundary (any padding counts as part of my data) */
1526         padBufferToInt(buffer);
1527
1528         /*
1529          * Construct the header Jentry and store it in the beginning of the
1530          * variable-length payload.
1531          */
1532         header = nElems | JB_FARRAY;
1533         if (val->val.array.rawScalar)
1534         {
1535                 Assert(nElems == 1);
1536                 Assert(level == 0);
1537                 header |= JB_FSCALAR;
1538         }
1539
1540         appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1541
1542         /* Reserve space for the JEntries of the elements. */
1543         jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
1544
1545         totallen = 0;
1546         for (i = 0; i < nElems; i++)
1547         {
1548                 JsonbValue *elem = &val->val.array.elems[i];
1549                 int                     len;
1550                 JEntry          meta;
1551
1552                 /*
1553                  * Convert element, producing a JEntry and appending its
1554                  * variable-length data to buffer
1555                  */
1556                 convertJsonbValue(buffer, &meta, elem, level + 1);
1557
1558                 len = JBE_OFFLENFLD(meta);
1559                 totallen += len;
1560
1561                 /*
1562                  * Bail out if total variable-length data exceeds what will fit in a
1563                  * JEntry length field.  We check this in each iteration, not just
1564                  * once at the end, to forestall possible integer overflow.
1565                  */
1566                 if (totallen > JENTRY_OFFLENMASK)
1567                         ereport(ERROR,
1568                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1569                                          errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1570                                                         JENTRY_OFFLENMASK)));
1571
1572                 /*
1573                  * Convert each JB_OFFSET_STRIDE'th length to an offset.
1574                  */
1575                 if ((i % JB_OFFSET_STRIDE) == 0)
1576                         meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1577
1578                 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1579                 jentry_offset += sizeof(JEntry);
1580         }
1581
1582         /* Total data size is everything we've appended to buffer */
1583         totallen = buffer->len - base_offset;
1584
1585         /* Check length again, since we didn't include the metadata above */
1586         if (totallen > JENTRY_OFFLENMASK)
1587                 ereport(ERROR,
1588                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1589                                  errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1590                                                 JENTRY_OFFLENMASK)));
1591
1592         /* Initialize the header of this node in the container's JEntry array */
1593         *pheader = JENTRY_ISCONTAINER | totallen;
1594 }
1595
1596 static void
1597 convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1598 {
1599         int                     base_offset;
1600         int                     jentry_offset;
1601         int                     i;
1602         int                     totallen;
1603         uint32          header;
1604         int                     nPairs = val->val.object.nPairs;
1605
1606         /* Remember where in the buffer this object starts. */
1607         base_offset = buffer->len;
1608
1609         /* Align to 4-byte boundary (any padding counts as part of my data) */
1610         padBufferToInt(buffer);
1611
1612         /*
1613          * Construct the header Jentry and store it in the beginning of the
1614          * variable-length payload.
1615          */
1616         header = nPairs | JB_FOBJECT;
1617         appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1618
1619         /* Reserve space for the JEntries of the keys and values. */
1620         jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
1621
1622         /*
1623          * Iterate over the keys, then over the values, since that is the ordering
1624          * we want in the on-disk representation.
1625          */
1626         totallen = 0;
1627         for (i = 0; i < nPairs; i++)
1628         {
1629                 JsonbPair  *pair = &val->val.object.pairs[i];
1630                 int                     len;
1631                 JEntry          meta;
1632
1633                 /*
1634                  * Convert key, producing a JEntry and appending its variable-length
1635                  * data to buffer
1636                  */
1637                 convertJsonbScalar(buffer, &meta, &pair->key);
1638
1639                 len = JBE_OFFLENFLD(meta);
1640                 totallen += len;
1641
1642                 /*
1643                  * Bail out if total variable-length data exceeds what will fit in a
1644                  * JEntry length field.  We check this in each iteration, not just
1645                  * once at the end, to forestall possible integer overflow.
1646                  */
1647                 if (totallen > JENTRY_OFFLENMASK)
1648                         ereport(ERROR,
1649                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1650                                          errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1651                                                         JENTRY_OFFLENMASK)));
1652
1653                 /*
1654                  * Convert each JB_OFFSET_STRIDE'th length to an offset.
1655                  */
1656                 if ((i % JB_OFFSET_STRIDE) == 0)
1657                         meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1658
1659                 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1660                 jentry_offset += sizeof(JEntry);
1661         }
1662         for (i = 0; i < nPairs; i++)
1663         {
1664                 JsonbPair  *pair = &val->val.object.pairs[i];
1665                 int                     len;
1666                 JEntry          meta;
1667
1668                 /*
1669                  * Convert value, producing a JEntry and appending its variable-length
1670                  * data to buffer
1671                  */
1672                 convertJsonbValue(buffer, &meta, &pair->value, level + 1);
1673
1674                 len = JBE_OFFLENFLD(meta);
1675                 totallen += len;
1676
1677                 /*
1678                  * Bail out if total variable-length data exceeds what will fit in a
1679                  * JEntry length field.  We check this in each iteration, not just
1680                  * once at the end, to forestall possible integer overflow.
1681                  */
1682                 if (totallen > JENTRY_OFFLENMASK)
1683                         ereport(ERROR,
1684                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1685                                          errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1686                                                         JENTRY_OFFLENMASK)));
1687
1688                 /*
1689                  * Convert each JB_OFFSET_STRIDE'th length to an offset.
1690                  */
1691                 if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
1692                         meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1693
1694                 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1695                 jentry_offset += sizeof(JEntry);
1696         }
1697
1698         /* Total data size is everything we've appended to buffer */
1699         totallen = buffer->len - base_offset;
1700
1701         /* Check length again, since we didn't include the metadata above */
1702         if (totallen > JENTRY_OFFLENMASK)
1703                 ereport(ERROR,
1704                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1705                                  errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1706                                                 JENTRY_OFFLENMASK)));
1707
1708         /* Initialize the header of this node in the container's JEntry array */
1709         *pheader = JENTRY_ISCONTAINER | totallen;
1710 }
1711
1712 static void
1713 convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
1714 {
1715         int                     numlen;
1716         short           padlen;
1717
1718         switch (scalarVal->type)
1719         {
1720                 case jbvNull:
1721                         *jentry = JENTRY_ISNULL;
1722                         break;
1723
1724                 case jbvString:
1725                         appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len);
1726
1727                         *jentry = scalarVal->val.string.len;
1728                         break;
1729
1730                 case jbvNumeric:
1731                         numlen = VARSIZE_ANY(scalarVal->val.numeric);
1732                         padlen = padBufferToInt(buffer);
1733
1734                         appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen);
1735
1736                         *jentry = JENTRY_ISNUMERIC | (padlen + numlen);
1737                         break;
1738
1739                 case jbvBool:
1740                         *jentry = (scalarVal->val.boolean) ?
1741                                 JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE;
1742                         break;
1743
1744                 default:
1745                         elog(ERROR, "invalid jsonb scalar type");
1746         }
1747 }
1748
1749 /*
1750  * Compare two jbvString JsonbValue values, a and b.
1751  *
1752  * This is a special qsort() comparator used to sort strings in certain
1753  * internal contexts where it is sufficient to have a well-defined sort order.
1754  * In particular, object pair keys are sorted according to this criteria to
1755  * facilitate cheap binary searches where we don't care about lexical sort
1756  * order.
1757  *
1758  * a and b are first sorted based on their length.  If a tie-breaker is
1759  * required, only then do we consider string binary equality.
1760  */
1761 static int
1762 lengthCompareJsonbStringValue(const void *a, const void *b)
1763 {
1764         const JsonbValue *va = (const JsonbValue *) a;
1765         const JsonbValue *vb = (const JsonbValue *) b;
1766         int                     res;
1767
1768         Assert(va->type == jbvString);
1769         Assert(vb->type == jbvString);
1770
1771         if (va->val.string.len == vb->val.string.len)
1772         {
1773                 res = memcmp(va->val.string.val, vb->val.string.val, va->val.string.len);
1774         }
1775         else
1776         {
1777                 res = (va->val.string.len > vb->val.string.len) ? 1 : -1;
1778         }
1779
1780         return res;
1781 }
1782
1783 /*
1784  * qsort_arg() comparator to compare JsonbPair values.
1785  *
1786  * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
1787  * to true iff a and b have full binary equality, since some callers have an
1788  * interest in whether the two values are equal or merely equivalent.
1789  *
1790  * N.B: String comparisons here are "length-wise"
1791  *
1792  * Pairs with equals keys are ordered such that the order field is respected.
1793  */
1794 static int
1795 lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
1796 {
1797         const JsonbPair *pa = (const JsonbPair *) a;
1798         const JsonbPair *pb = (const JsonbPair *) b;
1799         int                     res;
1800
1801         res = lengthCompareJsonbStringValue(&pa->key, &pb->key);
1802         if (res == 0 && binequal)
1803                 *((bool *) binequal) = true;
1804
1805         /*
1806          * Guarantee keeping order of equal pair.  Unique algorithm will prefer
1807          * first element as value.
1808          */
1809         if (res == 0)
1810                 res = (pa->order > pb->order) ? -1 : 1;
1811
1812         return res;
1813 }
1814
1815 /*
1816  * Sort and unique-ify pairs in JsonbValue object
1817  */
1818 static void
1819 uniqueifyJsonbObject(JsonbValue *object)
1820 {
1821         bool            hasNonUniq = false;
1822
1823         Assert(object->type == jbvObject);
1824
1825         if (object->val.object.nPairs > 1)
1826                 qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
1827                                   lengthCompareJsonbPair, &hasNonUniq);
1828
1829         if (hasNonUniq)
1830         {
1831                 JsonbPair  *ptr = object->val.object.pairs + 1,
1832                                    *res = object->val.object.pairs;
1833
1834                 while (ptr - object->val.object.pairs < object->val.object.nPairs)
1835                 {
1836                         /* Avoid copying over duplicate */
1837                         if (lengthCompareJsonbStringValue(ptr, res) != 0)
1838                         {
1839                                 res++;
1840                                 if (ptr != res)
1841                                         memcpy(res, ptr, sizeof(JsonbPair));
1842                         }
1843                         ptr++;
1844                 }
1845
1846                 object->val.object.nPairs = res + 1 - object->val.object.pairs;
1847         }
1848 }