1 /*-------------------------------------------------------------------------
4 * converting between Jsonb and JsonbValues, and iterating.
6 * Copyright (c) 2014-2015, PostgreSQL Global Development Group
10 * src/backend/utils/adt/jsonb_util.c
12 *-------------------------------------------------------------------------
16 #include "access/hash.h"
17 #include "catalog/pg_collation.h"
18 #include "miscadmin.h"
19 #include "utils/builtins.h"
20 #include "utils/jsonb.h"
21 #include "utils/memutils.h"
24 * Maximum number of elements in an array (or key/value pairs in an object).
25 * This is limited by two things: the size of the JEntry array must fit
26 * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
27 * reserved for that in the JsonbContainer.header field.
29 * (The total size of an array's or object's elements is also limited by
30 * JENTRY_OFFLENMASK, but we're not concerned about that here.)
32 #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
33 #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
35 static void fillJsonbValue(JsonbContainer *container, int index,
36 char *base_addr, uint32 offset,
38 static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
39 static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
40 static Jsonb *convertToJsonb(JsonbValue *val);
41 static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
42 static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
43 static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
44 static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
46 static int reserveFromBuffer(StringInfo buffer, int len);
47 static void appendToBuffer(StringInfo buffer, const char *data, int len);
48 static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
49 static short padBufferToInt(StringInfo buffer);
51 static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent);
52 static JsonbIterator *freeAndGetParent(JsonbIterator *it);
53 static JsonbParseState *pushState(JsonbParseState **pstate);
54 static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
55 static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
56 static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
57 static int lengthCompareJsonbStringValue(const void *a, const void *b);
58 static int lengthCompareJsonbPair(const void *a, const void *b, void *arg);
59 static void uniqueifyJsonbObject(JsonbValue *object);
60 static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate,
61 JsonbIteratorToken seq,
62 JsonbValue *scalarVal);
65 * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
67 * There isn't a JsonbToJsonbValue(), because generally we find it more
68 * convenient to directly iterate through the Jsonb representation and only
69 * really convert nested scalar values. JsonbIteratorNext() does this, so that
70 * clients of the iteration code don't have to directly deal with the binary
71 * representation (JsonbDeepContains() is a notable exception, although all
72 * exceptions are internal to this module). In general, functions that accept
73 * a JsonbValue argument are concerned with the manipulation of scalar values,
74 * or simple containers of scalar values, where it would be inconvenient to
75 * deal with a great amount of other state.
78 JsonbValueToJsonb(JsonbValue *val)
82 if (IsAJsonbScalar(val))
85 JsonbParseState *pstate = NULL;
87 JsonbValue scalarArray;
89 scalarArray.type = jbvArray;
90 scalarArray.val.array.rawScalar = true;
91 scalarArray.val.array.nElems = 1;
93 pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
94 pushJsonbValue(&pstate, WJB_ELEM, val);
95 res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
97 out = convertToJsonb(res);
99 else if (val->type == jbvObject || val->type == jbvArray)
101 out = convertToJsonb(val);
105 Assert(val->type == jbvBinary);
106 out = palloc(VARHDRSZ + val->val.binary.len);
107 SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
108 memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
115 * Get the offset of the variable-length portion of a Jsonb node within
116 * the variable-length-data part of its container. The node is identified
117 * by index within the container's JEntry array.
120 getJsonbOffset(const JsonbContainer *jc, int index)
126 * Start offset of this entry is equal to the end offset of the previous
127 * entry. Walk backwards to the most recent entry stored as an end
128 * offset, returning that offset plus any lengths in between.
130 for (i = index - 1; i >= 0; i--)
132 offset += JBE_OFFLENFLD(jc->children[i]);
133 if (JBE_HAS_OFF(jc->children[i]))
141 * Get the length of the variable-length portion of a Jsonb node.
142 * The node is identified by index within the container's JEntry array.
145 getJsonbLength(const JsonbContainer *jc, int index)
151 * If the length is stored directly in the JEntry, just return it.
152 * Otherwise, get the begin offset of the entry, and subtract that from
153 * the stored end+1 offset.
155 if (JBE_HAS_OFF(jc->children[index]))
157 off = getJsonbOffset(jc, index);
158 len = JBE_OFFLENFLD(jc->children[index]) - off;
161 len = JBE_OFFLENFLD(jc->children[index]);
167 * BT comparator worker function. Returns an integer less than, equal to, or
168 * greater than zero, indicating whether a is less than, equal to, or greater
169 * than b. Consistent with the requirements for a B-Tree operator class
171 * Strings are compared lexically, in contrast with other places where we use a
172 * much simpler comparator logic for searching through Strings. Since this is
173 * called from B-Tree support function 1, we're careful about not leaking
177 compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
183 ita = JsonbIteratorInit(a);
184 itb = JsonbIteratorInit(b);
193 ra = JsonbIteratorNext(&ita, &va, false);
194 rb = JsonbIteratorNext(&itb, &vb, false);
200 /* Decisively equal */
204 if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT)
207 * There is no array or object to compare at this stage of
208 * processing. jbvArray/jbvObject values are compared
209 * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT
215 if (va.type == vb.type)
223 res = compareJsonbScalarValue(&va, &vb);
228 * This could be a "raw scalar" pseudo array. That's
229 * a special case here though, since we still want the
230 * general type-based comparisons to apply, and as far
231 * as we're concerned a pseudo array is just a scalar.
233 if (va.val.array.rawScalar != vb.val.array.rawScalar)
234 res = (va.val.array.rawScalar) ? -1 : 1;
235 if (va.val.array.nElems != vb.val.array.nElems)
236 res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
239 if (va.val.object.nPairs != vb.val.object.nPairs)
240 res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
243 elog(ERROR, "unexpected jbvBinary value");
248 /* Type-defined order */
249 res = (va.type > vb.type) ? 1 : -1;
255 * It's safe to assume that the types differed, and that the va
256 * and vb values passed were set.
258 * If the two values were of the same container type, then there'd
259 * have been a chance to observe the variation in the number of
260 * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
261 * either two heterogeneously-typed containers, or a container and
264 * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
265 * cases here, because we would have seen the corresponding
266 * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
267 * concluded that they don't match.
269 Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
270 Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
272 Assert(va.type != vb.type);
273 Assert(va.type != jbvBinary);
274 Assert(vb.type != jbvBinary);
275 /* Type-defined order */
276 res = (va.type > vb.type) ? 1 : -1;
283 JsonbIterator *i = ita->parent;
290 JsonbIterator *i = itb->parent;
300 * Find value in object (i.e. the "value" part of some key/value pair in an
301 * object), or find a matching element if we're looking through an array. Do
302 * so on the basis of equality of the object keys only, or alternatively
303 * element values only, with a caller-supplied value "key". The "flags"
304 * argument allows the caller to specify which container types are of interest.
306 * This exported utility function exists to facilitate various cases concerned
307 * with "containment". If asked to look through an object, the caller had
308 * better pass a Jsonb String, because their keys can only be strings.
309 * Otherwise, for an array, any type of JsonbValue will do.
311 * In order to proceed with the search, it is necessary for callers to have
312 * both specified an interest in exactly one particular container type with an
313 * appropriate flag, as well as having the pointed-to Jsonb container be of
314 * one of those same container types at the top level. (Actually, we just do
315 * whichever makes sense to save callers the trouble of figuring it out - at
316 * most one can make sense, because the container either points to an array
317 * (possibly a "raw scalar" pseudo array) or an object.)
319 * Note that we can return a jbvBinary JsonbValue if this is called on an
320 * object, but we never do so on an array. If the caller asks to look through
321 * a container type that is not of the type pointed to by the container,
322 * immediately fall through and return NULL. If we cannot find the value,
323 * return NULL. Otherwise, return palloc()'d copy of value.
326 findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
329 JEntry *children = container->children;
330 int count = (container->header & JB_CMASK);
333 Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
335 /* Quick out without a palloc cycle if object/array is empty */
339 result = palloc(sizeof(JsonbValue));
341 if (flags & JB_FARRAY & container->header)
343 char *base_addr = (char *) (children + count);
347 for (i = 0; i < count; i++)
349 fillJsonbValue(container, i, base_addr, offset, result);
351 if (key->type == result->type)
353 if (equalsJsonbScalarValue(key, result))
357 JBE_ADVANCE_OFFSET(offset, children[i]);
360 else if (flags & JB_FOBJECT & container->header)
362 /* Since this is an object, account for *Pairs* of Jentrys */
363 char *base_addr = (char *) (children + count * 2);
367 /* Object key passed by caller must be a string */
368 Assert(key->type == jbvString);
370 /* Binary search on object/pair keys *only* */
371 while (stopLow < stopHigh)
375 JsonbValue candidate;
377 stopMiddle = stopLow + (stopHigh - stopLow) / 2;
379 candidate.type = jbvString;
380 candidate.val.string.val =
381 base_addr + getJsonbOffset(container, stopMiddle);
382 candidate.val.string.len = getJsonbLength(container, stopMiddle);
384 difference = lengthCompareJsonbStringValue(&candidate, key);
388 /* Found our key, return corresponding value */
389 int index = stopMiddle + count;
391 fillJsonbValue(container, index, base_addr,
392 getJsonbOffset(container, index),
400 stopLow = stopMiddle + 1;
402 stopHigh = stopMiddle;
413 * Get i-th value of a Jsonb array.
415 * Returns palloc()'d copy of the value, or NULL if it does not exist.
418 getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
424 if ((container->header & JB_FARRAY) == 0)
425 elog(ERROR, "not a jsonb array");
427 nelements = container->header & JB_CMASK;
428 base_addr = (char *) &container->children[nelements];
433 result = palloc(sizeof(JsonbValue));
435 fillJsonbValue(container, i, base_addr,
436 getJsonbOffset(container, i),
443 * A helper function to fill in a JsonbValue to represent an element of an
444 * array, or a key or value of an object.
446 * The node's JEntry is at container->children[index], and its variable-length
447 * data is at base_addr + offset. We make the caller determine the offset
448 * since in many cases the caller can amortize that work across multiple
449 * children. When it can't, it can just call getJsonbOffset().
451 * A nested array or object will be returned as jbvBinary, ie. it won't be
455 fillJsonbValue(JsonbContainer *container, int index,
456 char *base_addr, uint32 offset,
459 JEntry entry = container->children[index];
461 if (JBE_ISNULL(entry))
463 result->type = jbvNull;
465 else if (JBE_ISSTRING(entry))
467 result->type = jbvString;
468 result->val.string.val = base_addr + offset;
469 result->val.string.len = getJsonbLength(container, index);
470 Assert(result->val.string.len >= 0);
472 else if (JBE_ISNUMERIC(entry))
474 result->type = jbvNumeric;
475 result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
477 else if (JBE_ISBOOL_TRUE(entry))
479 result->type = jbvBool;
480 result->val.boolean = true;
482 else if (JBE_ISBOOL_FALSE(entry))
484 result->type = jbvBool;
485 result->val.boolean = false;
489 Assert(JBE_ISCONTAINER(entry));
490 result->type = jbvBinary;
491 /* Remove alignment padding from data pointer and length */
492 result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
493 result->val.binary.len = getJsonbLength(container, index) -
494 (INTALIGN(offset) - offset);
499 * Push JsonbValue into JsonbParseState.
501 * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
502 * JsonbValue to a Jsonb.
504 * Initial state of *JsonbParseState is NULL, since it'll be allocated here
505 * originally (caller will get JsonbParseState back by reference).
507 * Only sequential tokens pertaining to non-container types should pass a
508 * JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a
509 * "raw scalar" pseudo array to append it - the actual scalar should be passed
510 * next and it will be added as the only member of the array.
512 * Values of type jvbBinary, which are rolled up arrays and objects,
513 * are unpacked before being added to the result.
516 pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq,
520 JsonbValue *res = NULL;
522 JsonbIteratorToken tok;
524 if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) ||
525 jbval->type != jbvBinary)
528 return pushJsonbValueScalar(pstate, seq, jbval);
531 /* unpack the binary and add each piece to the pstate */
532 it = JsonbIteratorInit(jbval->val.binary.data);
533 while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
534 res = pushJsonbValueScalar(pstate, tok,
535 tok < WJB_BEGIN_ARRAY ? &v : NULL);
541 * Do the actual pushing, with only scalar or pseudo-scalar-array values
545 pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq,
546 JsonbValue *scalarVal)
548 JsonbValue *result = NULL;
552 case WJB_BEGIN_ARRAY:
553 Assert(!scalarVal || scalarVal->val.array.rawScalar);
554 *pstate = pushState(pstate);
555 result = &(*pstate)->contVal;
556 (*pstate)->contVal.type = jbvArray;
557 (*pstate)->contVal.val.array.nElems = 0;
558 (*pstate)->contVal.val.array.rawScalar = (scalarVal &&
559 scalarVal->val.array.rawScalar);
560 if (scalarVal && scalarVal->val.array.nElems > 0)
562 /* Assume that this array is still really a scalar */
563 Assert(scalarVal->type == jbvArray);
564 (*pstate)->size = scalarVal->val.array.nElems;
570 (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
573 case WJB_BEGIN_OBJECT:
575 *pstate = pushState(pstate);
576 result = &(*pstate)->contVal;
577 (*pstate)->contVal.type = jbvObject;
578 (*pstate)->contVal.val.object.nPairs = 0;
580 (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
584 Assert(scalarVal->type == jbvString);
585 appendKey(*pstate, scalarVal);
588 Assert(IsAJsonbScalar(scalarVal));
589 appendValue(*pstate, scalarVal);
592 Assert(IsAJsonbScalar(scalarVal));
593 appendElement(*pstate, scalarVal);
596 uniqueifyJsonbObject(&(*pstate)->contVal);
599 /* Steps here common to WJB_END_OBJECT case */
601 result = &(*pstate)->contVal;
604 * Pop stack and push current array/object as value in parent
607 *pstate = (*pstate)->next;
610 switch ((*pstate)->contVal.type)
613 appendElement(*pstate, result);
616 appendValue(*pstate, result);
619 elog(ERROR, "invalid jsonb container type");
624 elog(ERROR, "unrecognized jsonb sequential processing token");
631 * pushJsonbValue() worker: Iteration-like forming of Jsonb
633 static JsonbParseState *
634 pushState(JsonbParseState **pstate)
636 JsonbParseState *ns = palloc(sizeof(JsonbParseState));
643 * pushJsonbValue() worker: Append a pair key to state when generating a Jsonb
646 appendKey(JsonbParseState *pstate, JsonbValue *string)
648 JsonbValue *object = &pstate->contVal;
650 Assert(object->type == jbvObject);
651 Assert(string->type == jbvString);
653 if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
655 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
656 errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
659 if (object->val.object.nPairs >= pstate->size)
662 object->val.object.pairs = repalloc(object->val.object.pairs,
663 sizeof(JsonbPair) * pstate->size);
666 object->val.object.pairs[object->val.object.nPairs].key = *string;
667 object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
671 * pushJsonbValue() worker: Append a pair value to state when generating a
675 appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
677 JsonbValue *object = &pstate->contVal;
679 Assert(object->type == jbvObject);
681 object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
685 * pushJsonbValue() worker: Append an element to state when generating a Jsonb
688 appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
690 JsonbValue *array = &pstate->contVal;
692 Assert(array->type == jbvArray);
694 if (array->val.array.nElems >= JSONB_MAX_ELEMS)
696 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
697 errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
700 if (array->val.array.nElems >= pstate->size)
703 array->val.array.elems = repalloc(array->val.array.elems,
704 sizeof(JsonbValue) * pstate->size);
707 array->val.array.elems[array->val.array.nElems++] = *scalarVal;
711 * Given a JsonbContainer, expand to JsonbIterator to iterate over items
712 * fully expanded to in-memory representation for manipulation.
714 * See JsonbIteratorNext() for notes on memory management.
717 JsonbIteratorInit(JsonbContainer *container)
719 return iteratorFromContainer(container, NULL);
723 * Get next JsonbValue while iterating
725 * Caller should initially pass their own, original iterator. They may get
726 * back a child iterator palloc()'d here instead. The function can be relied
727 * on to free those child iterators, lest the memory allocated for highly
728 * nested objects become unreasonable, but only if callers don't end iteration
729 * early (by breaking upon having found something in a search, for example).
731 * Callers in such a scenario, that are particularly sensitive to leaking
732 * memory in a long-lived context may walk the ancestral tree from the final
733 * iterator we left them with to its oldest ancestor, pfree()ing as they go.
734 * They do not have to free any other memory previously allocated for iterators
735 * but not accessible as direct ancestors of the iterator they're last passed
738 * Returns "Jsonb sequential processing" token value. Iterator "state"
739 * reflects the current stage of the process in a less granular fashion, and is
740 * mostly used here to track things internally with respect to particular
743 * Clients of this function should not have to handle any jbvBinary values
744 * (since recursive calls will deal with this), provided skipNested is false.
745 * It is our job to expand the jbvBinary representation without bothering them
746 * with it. However, clients should not take it upon themselves to touch array
747 * or Object element/pair buffers, since their element/pair pointers are
748 * garbage. Also, *val will not be set when returning WJB_END_ARRAY or
749 * WJB_END_OBJECT, on the assumption that it's only useful to access values
753 JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
759 * When stepping into a nested container, we jump back here to start
760 * processing the child. We will not recurse further in one call, because
761 * processing the child will always begin in JBI_ARRAY_START or
762 * JBI_OBJECT_START state.
765 switch ((*it)->state)
767 case JBI_ARRAY_START:
768 /* Set v to array on first array call */
769 val->type = jbvArray;
770 val->val.array.nElems = (*it)->nElems;
773 * v->val.array.elems is not actually set, because we aren't doing
776 val->val.array.rawScalar = (*it)->isScalar;
778 (*it)->curDataOffset = 0;
779 (*it)->curValueOffset = 0; /* not actually used */
780 /* Set state for next call */
781 (*it)->state = JBI_ARRAY_ELEM;
782 return WJB_BEGIN_ARRAY;
785 if ((*it)->curIndex >= (*it)->nElems)
788 * All elements within array already processed. Report this
789 * to caller, and give it back original parent iterator (which
790 * independently tracks iteration progress at its level of
793 *it = freeAndGetParent(*it);
794 return WJB_END_ARRAY;
797 fillJsonbValue((*it)->container, (*it)->curIndex,
798 (*it)->dataProper, (*it)->curDataOffset,
801 JBE_ADVANCE_OFFSET((*it)->curDataOffset,
802 (*it)->children[(*it)->curIndex]);
805 if (!IsAJsonbScalar(val) && !skipNested)
807 /* Recurse into container. */
808 *it = iteratorFromContainer(val->val.binary.data, *it);
814 * Scalar item in array, or a container and caller didn't want
815 * us to recurse into it.
820 case JBI_OBJECT_START:
821 /* Set v to object on first object call */
822 val->type = jbvObject;
823 val->val.object.nPairs = (*it)->nElems;
826 * v->val.object.pairs is not actually set, because we aren't
827 * doing a full conversion
830 (*it)->curDataOffset = 0;
831 (*it)->curValueOffset = getJsonbOffset((*it)->container,
833 /* Set state for next call */
834 (*it)->state = JBI_OBJECT_KEY;
835 return WJB_BEGIN_OBJECT;
838 if ((*it)->curIndex >= (*it)->nElems)
841 * All pairs within object already processed. Report this to
842 * caller, and give it back original containing iterator
843 * (which independently tracks iteration progress at its level
846 *it = freeAndGetParent(*it);
847 return WJB_END_OBJECT;
851 /* Return key of a key/value pair. */
852 fillJsonbValue((*it)->container, (*it)->curIndex,
853 (*it)->dataProper, (*it)->curDataOffset,
855 if (val->type != jbvString)
856 elog(ERROR, "unexpected jsonb type as object key");
858 /* Set state for next call */
859 (*it)->state = JBI_OBJECT_VALUE;
863 case JBI_OBJECT_VALUE:
864 /* Set state for next call */
865 (*it)->state = JBI_OBJECT_KEY;
867 fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
868 (*it)->dataProper, (*it)->curValueOffset,
871 JBE_ADVANCE_OFFSET((*it)->curDataOffset,
872 (*it)->children[(*it)->curIndex]);
873 JBE_ADVANCE_OFFSET((*it)->curValueOffset,
874 (*it)->children[(*it)->curIndex + (*it)->nElems]);
878 * Value may be a container, in which case we recurse with new,
879 * child iterator (unless the caller asked not to, by passing
882 if (!IsAJsonbScalar(val) && !skipNested)
884 *it = iteratorFromContainer(val->val.binary.data, *it);
891 elog(ERROR, "invalid iterator state");
896 * Initialize an iterator for iterating all elements in a container.
898 static JsonbIterator *
899 iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent)
903 it = palloc(sizeof(JsonbIterator));
904 it->container = container;
906 it->nElems = container->header & JB_CMASK;
908 /* Array starts just after header */
909 it->children = container->children;
911 switch (container->header & (JB_FARRAY | JB_FOBJECT))
915 (char *) it->children + it->nElems * sizeof(JEntry);
916 it->isScalar = (container->header & JB_FSCALAR) != 0;
917 /* This is either a "raw scalar", or an array */
918 Assert(!it->isScalar || it->nElems == 1);
920 it->state = JBI_ARRAY_START;
925 (char *) it->children + it->nElems * sizeof(JEntry) * 2;
926 it->state = JBI_OBJECT_START;
930 elog(ERROR, "unknown type of jsonb container");
937 * JsonbIteratorNext() worker: Return parent, while freeing memory for current
940 static JsonbIterator *
941 freeAndGetParent(JsonbIterator *it)
943 JsonbIterator *v = it->parent;
950 * Worker for "contains" operator's function
952 * Formally speaking, containment is top-down, unordered subtree isomorphism.
954 * Takes iterators that belong to some container type. These iterators
955 * "belong" to those values in the sense that they've just been initialized in
956 * respect of them by the caller (perhaps in a nested fashion).
958 * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
959 * We determine if mContained is contained within val.
962 JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
970 * Guard against stack overflow due to overly complex Jsonb.
972 * Functions called here independently take this precaution, but that
973 * might not be sufficient since this is also a recursive function.
977 rval = JsonbIteratorNext(val, &vval, false);
978 rcont = JsonbIteratorNext(mContained, &vcontained, false);
983 * The differing return values can immediately be taken as indicating
984 * two differing container types at this nesting level, which is
985 * sufficient reason to give up entirely (but it should be the case
986 * that they're both some container type).
988 Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
989 Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
992 else if (rcont == WJB_BEGIN_OBJECT)
994 Assert(vval.type == jbvObject);
995 Assert(vcontained.type == jbvObject);
998 * If the lhs has fewer pairs than the rhs, it can't possibly contain
999 * the rhs. (This conclusion is safe only because we de-duplicate
1000 * keys in all Jsonb objects; thus there can be no corresponding
1001 * optimization in the array case.) The case probably won't arise
1002 * often, but since it's such a cheap check we may as well make it.
1004 if (vval.val.object.nPairs < vcontained.val.object.nPairs)
1007 /* Work through rhs "is it contained within?" object */
1010 JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */
1012 rcont = JsonbIteratorNext(mContained, &vcontained, false);
1015 * When we get through caller's rhs "is it contained within?"
1016 * object without failing to find one of its values, it's
1019 if (rcont == WJB_END_OBJECT)
1022 Assert(rcont == WJB_KEY);
1024 /* First, find value by key... */
1025 lhsVal = findJsonbValueFromContainer((*val)->container,
1033 * ...at this stage it is apparent that there is at least a key
1034 * match for this rhs pair.
1036 rcont = JsonbIteratorNext(mContained, &vcontained, true);
1038 Assert(rcont == WJB_VALUE);
1041 * Compare rhs pair's value with lhs pair's value just found using
1044 if (lhsVal->type != vcontained.type)
1048 else if (IsAJsonbScalar(lhsVal))
1050 if (!equalsJsonbScalarValue(lhsVal, &vcontained))
1055 /* Nested container value (object or array) */
1056 JsonbIterator *nestval,
1059 Assert(lhsVal->type == jbvBinary);
1060 Assert(vcontained.type == jbvBinary);
1062 nestval = JsonbIteratorInit(lhsVal->val.binary.data);
1063 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1066 * Match "value" side of rhs datum object's pair recursively.
1067 * It's a nested structure.
1069 * Note that nesting still has to "match up" at the right
1070 * nesting sub-levels. However, there need only be zero or
1071 * more matching pairs (or elements) at each nesting level
1072 * (provided the *rhs* pairs/elements *all* match on each
1073 * level), which enables searching nested structures for a
1074 * single String or other primitive type sub-datum quite
1075 * effectively (provided the user constructed the rhs nested
1076 * structure such that we "know where to look").
1078 * In other words, the mapping of container nodes in the rhs
1079 * "vcontained" Jsonb to internal nodes on the lhs is
1080 * injective, and parent-child edges on the rhs must be mapped
1081 * to parent-child edges on the lhs to satisfy the condition
1082 * of containment (plus of course the mapped nodes must be
1085 if (!JsonbDeepContains(&nestval, &nestContained))
1090 else if (rcont == WJB_BEGIN_ARRAY)
1092 JsonbValue *lhsConts = NULL;
1093 uint32 nLhsElems = vval.val.array.nElems;
1095 Assert(vval.type == jbvArray);
1096 Assert(vcontained.type == jbvArray);
1099 * Handle distinction between "raw scalar" pseudo arrays, and real
1102 * A raw scalar may contain another raw scalar, and an array may
1103 * contain a raw scalar, but a raw scalar may not contain an array. We
1104 * don't do something like this for the object case, since objects can
1105 * only contain pairs, never raw scalars (a pair is represented by an
1106 * rhs object argument with a single contained pair).
1108 if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
1111 /* Work through rhs "is it contained within?" array */
1114 rcont = JsonbIteratorNext(mContained, &vcontained, true);
1117 * When we get through caller's rhs "is it contained within?"
1118 * array without failing to find one of its values, it's
1121 if (rcont == WJB_END_ARRAY)
1124 Assert(rcont == WJB_ELEM);
1126 if (IsAJsonbScalar(&vcontained))
1128 if (!findJsonbValueFromContainer((*val)->container,
1138 * If this is first container found in rhs array (at this
1139 * depth), initialize temp lhs array of containers
1141 if (lhsConts == NULL)
1145 /* Make room for all possible values */
1146 lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
1148 for (i = 0; i < nLhsElems; i++)
1150 /* Store all lhs elements in temp array */
1151 rcont = JsonbIteratorNext(val, &vval, true);
1152 Assert(rcont == WJB_ELEM);
1154 if (vval.type == jbvBinary)
1155 lhsConts[j++] = vval;
1158 /* No container elements in temp array, so give up now */
1162 /* We may have only partially filled array */
1166 /* XXX: Nested array containment is O(N^2) */
1167 for (i = 0; i < nLhsElems; i++)
1169 /* Nested container value (object or array) */
1170 JsonbIterator *nestval,
1174 nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
1175 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1177 contains = JsonbDeepContains(&nestval, &nestContained);
1182 pfree(nestContained);
1188 * Report rhs container value is not contained if couldn't
1189 * match rhs container to *some* lhs cont
1198 elog(ERROR, "invalid jsonb container type");
1201 elog(ERROR, "unexpectedly fell off end of jsonb container");
1206 * Hash a JsonbValue scalar value, mixing the hash value into an existing
1207 * hash provided by the caller.
1209 * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
1213 JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
1217 /* Compute hash value for scalarVal */
1218 switch (scalarVal->type)
1224 tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
1225 scalarVal->val.string.len));
1228 /* Must hash equal numerics to equal hash codes */
1229 tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
1230 NumericGetDatum(scalarVal->val.numeric)));
1233 tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1237 elog(ERROR, "invalid jsonb scalar type");
1238 tmp = 0; /* keep compiler quiet */
1243 * Combine hash values of successive keys, values and elements by rotating
1244 * the previous value left 1 bit, then XOR'ing in the new
1245 * key/value/element's hash value.
1247 *hash = (*hash << 1) | (*hash >> 31);
1252 * Are two scalar JsonbValues of the same type a and b equal?
1255 equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1257 if (aScalar->type == bScalar->type)
1259 switch (aScalar->type)
1264 return lengthCompareJsonbStringValue(aScalar, bScalar) == 0;
1266 return DatumGetBool(DirectFunctionCall2(numeric_eq,
1267 PointerGetDatum(aScalar->val.numeric),
1268 PointerGetDatum(bScalar->val.numeric)));
1270 return aScalar->val.boolean == bScalar->val.boolean;
1273 elog(ERROR, "invalid jsonb scalar type");
1276 elog(ERROR, "jsonb scalar type mismatch");
1281 * Compare two scalar JsonbValues, returning -1, 0, or 1.
1283 * Strings are compared using the default collation. Used by B-tree
1284 * operators, where a lexical sort order is generally expected.
1287 compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1289 if (aScalar->type == bScalar->type)
1291 switch (aScalar->type)
1296 return varstr_cmp(aScalar->val.string.val,
1297 aScalar->val.string.len,
1298 bScalar->val.string.val,
1299 bScalar->val.string.len,
1300 DEFAULT_COLLATION_OID);
1302 return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
1303 PointerGetDatum(aScalar->val.numeric),
1304 PointerGetDatum(bScalar->val.numeric)));
1306 if (aScalar->val.boolean == bScalar->val.boolean)
1308 else if (aScalar->val.boolean >bScalar->val.boolean)
1313 elog(ERROR, "invalid jsonb scalar type");
1316 elog(ERROR, "jsonb scalar type mismatch");
1322 * Functions for manipulating the resizeable buffer used by convertJsonb and
1327 * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary.
1328 * Returns the offset to the reserved area. The caller is expected to fill
1329 * the reserved area later with copyToBuffer().
1332 reserveFromBuffer(StringInfo buffer, int len)
1336 /* Make more room if needed */
1337 enlargeStringInfo(buffer, len);
1339 /* remember current offset */
1340 offset = buffer->len;
1342 /* reserve the space */
1346 * Keep a trailing null in place, even though it's not useful for us; it
1347 * seems best to preserve the invariants of StringInfos.
1349 buffer->data[buffer->len] = '\0';
1355 * Copy 'len' bytes to a previously reserved area in buffer.
1358 copyToBuffer(StringInfo buffer, int offset, const char *data, int len)
1360 memcpy(buffer->data + offset, data, len);
1364 * A shorthand for reserveFromBuffer + copyToBuffer.
1367 appendToBuffer(StringInfo buffer, const char *data, int len)
1371 offset = reserveFromBuffer(buffer, len);
1372 copyToBuffer(buffer, offset, data, len);
1377 * Append padding, so that the length of the StringInfo is int-aligned.
1378 * Returns the number of padding bytes appended.
1381 padBufferToInt(StringInfo buffer)
1387 padlen = INTALIGN(buffer->len) - buffer->len;
1389 offset = reserveFromBuffer(buffer, padlen);
1391 /* padlen must be small, so this is probably faster than a memset */
1392 for (p = 0; p < padlen; p++)
1393 buffer->data[offset + p] = '\0';
1399 * Given a JsonbValue, convert to Jsonb. The result is palloc'd.
1402 convertToJsonb(JsonbValue *val)
1404 StringInfoData buffer;
1408 /* Should not already have binary representation */
1409 Assert(val->type != jbvBinary);
1411 /* Allocate an output buffer. It will be enlarged as needed */
1412 initStringInfo(&buffer);
1414 /* Make room for the varlena header */
1415 reserveFromBuffer(&buffer, VARHDRSZ);
1417 convertJsonbValue(&buffer, &jentry, val, 0);
1420 * Note: the JEntry of the root is discarded. Therefore the root
1421 * JsonbContainer struct must contain enough information to tell what kind
1425 res = (Jsonb *) buffer.data;
1427 SET_VARSIZE(res, buffer.len);
1433 * Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
1435 * The JEntry header for this node is returned in *header. It is filled in
1436 * with the length of this value and appropriate type bits. If we wish to
1437 * store an end offset rather than a length, it is the caller's responsibility
1438 * to adjust for that.
1440 * If the value is an array or an object, this recurses. 'level' is only used
1441 * for debugging purposes.
1444 convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level)
1446 check_stack_depth();
1452 * A JsonbValue passed as val should never have a type of jbvBinary, and
1453 * neither should any of its sub-components. Those values will be produced
1454 * by convertJsonbArray and convertJsonbObject, the results of which will
1455 * not be passed back to this function as an argument.
1458 if (IsAJsonbScalar(val))
1459 convertJsonbScalar(buffer, header, val);
1460 else if (val->type == jbvArray)
1461 convertJsonbArray(buffer, header, val, level);
1462 else if (val->type == jbvObject)
1463 convertJsonbObject(buffer, header, val, level);
1465 elog(ERROR, "unknown type of jsonb container to convert");
1469 convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1476 int nElems = val->val.array.nElems;
1478 /* Remember where in the buffer this array starts. */
1479 base_offset = buffer->len;
1481 /* Align to 4-byte boundary (any padding counts as part of my data) */
1482 padBufferToInt(buffer);
1485 * Construct the header Jentry and store it in the beginning of the
1486 * variable-length payload.
1488 header = nElems | JB_FARRAY;
1489 if (val->val.array.rawScalar)
1491 Assert(nElems == 1);
1493 header |= JB_FSCALAR;
1496 appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1498 /* Reserve space for the JEntries of the elements. */
1499 jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
1502 for (i = 0; i < nElems; i++)
1504 JsonbValue *elem = &val->val.array.elems[i];
1509 * Convert element, producing a JEntry and appending its
1510 * variable-length data to buffer
1512 convertJsonbValue(buffer, &meta, elem, level + 1);
1514 len = JBE_OFFLENFLD(meta);
1518 * Bail out if total variable-length data exceeds what will fit in a
1519 * JEntry length field. We check this in each iteration, not just
1520 * once at the end, to forestall possible integer overflow.
1522 if (totallen > JENTRY_OFFLENMASK)
1524 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1525 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1526 JENTRY_OFFLENMASK)));
1529 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1531 if ((i % JB_OFFSET_STRIDE) == 0)
1532 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1534 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1535 jentry_offset += sizeof(JEntry);
1538 /* Total data size is everything we've appended to buffer */
1539 totallen = buffer->len - base_offset;
1541 /* Check length again, since we didn't include the metadata above */
1542 if (totallen > JENTRY_OFFLENMASK)
1544 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1545 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1546 JENTRY_OFFLENMASK)));
1548 /* Initialize the header of this node in the container's JEntry array */
1549 *pheader = JENTRY_ISCONTAINER | totallen;
1553 convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1560 int nPairs = val->val.object.nPairs;
1562 /* Remember where in the buffer this object starts. */
1563 base_offset = buffer->len;
1565 /* Align to 4-byte boundary (any padding counts as part of my data) */
1566 padBufferToInt(buffer);
1569 * Construct the header Jentry and store it in the beginning of the
1570 * variable-length payload.
1572 header = nPairs | JB_FOBJECT;
1573 appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1575 /* Reserve space for the JEntries of the keys and values. */
1576 jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
1579 * Iterate over the keys, then over the values, since that is the ordering
1580 * we want in the on-disk representation.
1583 for (i = 0; i < nPairs; i++)
1585 JsonbPair *pair = &val->val.object.pairs[i];
1590 * Convert key, producing a JEntry and appending its variable-length
1593 convertJsonbScalar(buffer, &meta, &pair->key);
1595 len = JBE_OFFLENFLD(meta);
1599 * Bail out if total variable-length data exceeds what will fit in a
1600 * JEntry length field. We check this in each iteration, not just
1601 * once at the end, to forestall possible integer overflow.
1603 if (totallen > JENTRY_OFFLENMASK)
1605 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1606 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1607 JENTRY_OFFLENMASK)));
1610 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1612 if ((i % JB_OFFSET_STRIDE) == 0)
1613 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1615 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1616 jentry_offset += sizeof(JEntry);
1618 for (i = 0; i < nPairs; i++)
1620 JsonbPair *pair = &val->val.object.pairs[i];
1625 * Convert value, producing a JEntry and appending its variable-length
1628 convertJsonbValue(buffer, &meta, &pair->value, level + 1);
1630 len = JBE_OFFLENFLD(meta);
1634 * Bail out if total variable-length data exceeds what will fit in a
1635 * JEntry length field. We check this in each iteration, not just
1636 * once at the end, to forestall possible integer overflow.
1638 if (totallen > JENTRY_OFFLENMASK)
1640 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1641 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1642 JENTRY_OFFLENMASK)));
1645 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1647 if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
1648 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1650 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1651 jentry_offset += sizeof(JEntry);
1654 /* Total data size is everything we've appended to buffer */
1655 totallen = buffer->len - base_offset;
1657 /* Check length again, since we didn't include the metadata above */
1658 if (totallen > JENTRY_OFFLENMASK)
1660 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1661 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1662 JENTRY_OFFLENMASK)));
1664 /* Initialize the header of this node in the container's JEntry array */
1665 *pheader = JENTRY_ISCONTAINER | totallen;
1669 convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
1674 switch (scalarVal->type)
1677 *jentry = JENTRY_ISNULL;
1681 appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len);
1683 *jentry = scalarVal->val.string.len;
1687 numlen = VARSIZE_ANY(scalarVal->val.numeric);
1688 padlen = padBufferToInt(buffer);
1690 appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen);
1692 *jentry = JENTRY_ISNUMERIC | (padlen + numlen);
1696 *jentry = (scalarVal->val.boolean) ?
1697 JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE;
1701 elog(ERROR, "invalid jsonb scalar type");
1706 * Compare two jbvString JsonbValue values, a and b.
1708 * This is a special qsort() comparator used to sort strings in certain
1709 * internal contexts where it is sufficient to have a well-defined sort order.
1710 * In particular, object pair keys are sorted according to this criteria to
1711 * facilitate cheap binary searches where we don't care about lexical sort
1714 * a and b are first sorted based on their length. If a tie-breaker is
1715 * required, only then do we consider string binary equality.
1718 lengthCompareJsonbStringValue(const void *a, const void *b)
1720 const JsonbValue *va = (const JsonbValue *) a;
1721 const JsonbValue *vb = (const JsonbValue *) b;
1724 Assert(va->type == jbvString);
1725 Assert(vb->type == jbvString);
1727 if (va->val.string.len == vb->val.string.len)
1729 res = memcmp(va->val.string.val, vb->val.string.val, va->val.string.len);
1733 res = (va->val.string.len > vb->val.string.len) ? 1 : -1;
1740 * qsort_arg() comparator to compare JsonbPair values.
1742 * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
1743 * to true iff a and b have full binary equality, since some callers have an
1744 * interest in whether the two values are equal or merely equivalent.
1746 * N.B: String comparisons here are "length-wise"
1748 * Pairs with equals keys are ordered such that the order field is respected.
1751 lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
1753 const JsonbPair *pa = (const JsonbPair *) a;
1754 const JsonbPair *pb = (const JsonbPair *) b;
1757 res = lengthCompareJsonbStringValue(&pa->key, &pb->key);
1758 if (res == 0 && binequal)
1759 *((bool *) binequal) = true;
1762 * Guarantee keeping order of equal pair. Unique algorithm will prefer
1763 * first element as value.
1766 res = (pa->order > pb->order) ? -1 : 1;
1772 * Sort and unique-ify pairs in JsonbValue object
1775 uniqueifyJsonbObject(JsonbValue *object)
1777 bool hasNonUniq = false;
1779 Assert(object->type == jbvObject);
1781 if (object->val.object.nPairs > 1)
1782 qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
1783 lengthCompareJsonbPair, &hasNonUniq);
1787 JsonbPair *ptr = object->val.object.pairs + 1,
1788 *res = object->val.object.pairs;
1790 while (ptr - object->val.object.pairs < object->val.object.nPairs)
1792 /* Avoid copying over duplicate */
1793 if (lengthCompareJsonbStringValue(ptr, res) != 0)
1797 memcpy(res, ptr, sizeof(JsonbPair));
1802 object->val.object.nPairs = res + 1 - object->val.object.pairs;