1 /*-------------------------------------------------------------------------
4 * Declarations for jsonb data type support.
6 * Copyright (c) 1996-2014, PostgreSQL Global Development Group
8 * src/include/utils/jsonb.h
10 *-------------------------------------------------------------------------
15 #include "lib/stringinfo.h"
16 #include "utils/array.h"
17 #include "utils/numeric.h"
19 /* Tokens used when sequentially processing a jsonb value */
32 /* Strategy numbers for GIN index opclasses */
33 #define JsonbContainsStrategyNumber 7
34 #define JsonbExistsStrategyNumber 9
35 #define JsonbExistsAnyStrategyNumber 10
36 #define JsonbExistsAllStrategyNumber 11
39 * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
40 * keys and values. The storage format is text. The first byte of the text
41 * string distinguishes whether this is a key (always a string), null value,
42 * boolean value, numeric value, or string value. However, array elements
43 * that are strings are marked as though they were keys; this imprecision
44 * supports the definition of the "exists" operator, which treats array
45 * elements like keys. The remainder of the text string is empty for a null
46 * value, "t" or "f" for a boolean value, a normalized print representation of
47 * a numeric value, or the text of a string value. However, if the length of
48 * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
49 * the text representation and store an 8-hex-digit representation of the
50 * uint32 hash value, marking the prefix byte with an additional bit to
51 * distinguish that this has happened. Hashing long strings saves space and
52 * ensures that we won't overrun the maximum entry length for a GIN index.
53 * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
54 * to ensure that the on-disk text datum will have a short varlena header.)
55 * Note that when any hashed item appears in a query, we must recheck index
56 * matches against the heap tuple; currently, this costs nothing because we
57 * must always recheck for other reasons.
59 #define JGINFLAG_KEY 0x01 /* key (or string array element) */
60 #define JGINFLAG_NULL 0x02 /* null value */
61 #define JGINFLAG_BOOL 0x03 /* boolean value */
62 #define JGINFLAG_NUM 0x04 /* numeric value */
63 #define JGINFLAG_STR 0x05 /* string value (if not an array element) */
64 #define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
65 #define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
67 /* Convenience macros */
68 #define DatumGetJsonb(d) ((Jsonb *) PG_DETOAST_DATUM(d))
69 #define JsonbGetDatum(p) PointerGetDatum(p)
70 #define PG_GETARG_JSONB(x) DatumGetJsonb(PG_GETARG_DATUM(x))
71 #define PG_RETURN_JSONB(x) PG_RETURN_POINTER(x)
73 typedef struct JsonbPair JsonbPair;
74 typedef struct JsonbValue JsonbValue;
77 * Jsonbs are varlena objects, so must meet the varlena convention that the
78 * first int32 of the object contains the total object size in bytes. Be sure
79 * to use VARSIZE() and SET_VARSIZE() to access it, though!
81 * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
82 * representation. Often, JsonbValues are just shims through which a Jsonb
83 * buffer is accessed, but they can also be deep copied and passed around.
85 * Jsonb is a tree structure. Each node in the tree consists of a JEntry
86 * header and a variable-length content (possibly of zero size). The JEntry
87 * header indicates what kind of a node it is, e.g. a string or an array,
88 * and provides the length of its variable-length portion.
90 * The JEntry and the content of a node are not stored physically together.
91 * Instead, the container array or object has an array that holds the JEntrys
92 * of all the child nodes, followed by their variable-length portions.
94 * The root node is an exception; it has no parent array or object that could
95 * hold its JEntry. Hence, no JEntry header is stored for the root node. It
96 * is implicitly known that the root node must be an array or an object,
97 * so we can get away without the type indicator as long as we can distinguish
98 * the two. For that purpose, both an array and an object begin with a uint32
99 * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
100 * scalar value needs to be stored as a Jsonb value, what we actually store is
101 * an array with one element, with the flags in the array's header field set
102 * to JB_FSCALAR | JB_FARRAY.
104 * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
105 * the variable-length portion of some node types is aligned to a 4-byte
106 * boundary, while others are not. When alignment is needed, the padding is
107 * in the beginning of the node that requires it. For example, if a numeric
108 * node is stored after a string node, so that the numeric node begins at
109 * offset 3, the variable-length portion of the numeric node will begin with
110 * one padding byte so that the actual numeric data is 4-byte aligned.
116 * The least significant 28 bits store either the data length of the entry,
117 * or its end+1 offset from the start of the variable-length portion of the
118 * containing object. The next three bits store the type of the entry, and
119 * the high-order bit tells whether the least significant bits store a length
122 * The reason for the offset-or-length complication is to compromise between
123 * access speed and data compressibility. In the initial design each JEntry
124 * always stored an offset, but this resulted in JEntry arrays with horrible
125 * compressibility properties, so that TOAST compression of a JSONB did not
126 * work well. Storing only lengths would greatly improve compressibility,
127 * but it makes random access into large arrays expensive (O(N) not O(1)).
128 * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
129 * a length in the rest. This results in reasonably compressible data (as
130 * long as the stride isn't too small). We may have to examine as many as
131 * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
132 * given item, but that's still O(1) no matter how large the container is.
134 * We could avoid eating a flag bit for this purpose if we were to store
135 * the stride in the container header, or if we were willing to treat the
136 * stride as an unchangeable constant. Neither of those options is very
139 typedef uint32 JEntry;
141 #define JENTRY_OFFLENMASK 0x0FFFFFFF
142 #define JENTRY_TYPEMASK 0x70000000
143 #define JENTRY_HAS_OFF 0x80000000
145 /* values stored in the type bits */
146 #define JENTRY_ISSTRING 0x00000000
147 #define JENTRY_ISNUMERIC 0x10000000
148 #define JENTRY_ISBOOL_FALSE 0x20000000
149 #define JENTRY_ISBOOL_TRUE 0x30000000
150 #define JENTRY_ISNULL 0x40000000
151 #define JENTRY_ISCONTAINER 0x50000000 /* array or object */
153 /* Access macros. Note possible multiple evaluations */
154 #define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
155 #define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
156 #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
157 #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
158 #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
159 #define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
160 #define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
161 #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
162 #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
164 /* Macro for advancing an offset variable to the next JEntry */
165 #define JBE_ADVANCE_OFFSET(offset, je) \
168 if (JBE_HAS_OFF(je_)) \
169 (offset) = JBE_OFFLENFLD(je_); \
171 (offset) += JBE_OFFLENFLD(je_); \
175 * We store an offset, not a length, every JB_OFFSET_STRIDE children.
176 * Caution: this macro should only be referenced when creating a JSONB
177 * value. When examining an existing value, pay attention to the HAS_OFF
178 * bits instead. This allows changes in the offset-placement heuristic
179 * without breaking on-disk compatibility.
181 #define JB_OFFSET_STRIDE 32
184 * A jsonb array or object node, within a Jsonb Datum.
186 * An array has one child for each element, stored in array order.
188 * An object has two children for each key/value pair. The keys all appear
189 * first, in key sort order; then the values appear, in an order matching the
190 * key order. This arrangement keeps the keys compact in memory, making a
191 * search for a particular key more cache-friendly.
193 typedef struct JsonbContainer
195 uint32 header; /* number of elements or key/value pairs, and
197 JEntry children[1]; /* variable length */
199 /* the data for each child node follows. */
202 /* flags for the header-field in JsonbContainer */
203 #define JB_CMASK 0x0FFFFFFF /* mask for count field */
204 #define JB_FSCALAR 0x10000000 /* flag bits */
205 #define JB_FOBJECT 0x20000000
206 #define JB_FARRAY 0x40000000
208 /* The top-level on-disk format for a jsonb datum. */
211 int32 vl_len_; /* varlena header (do not touch directly!) */
215 /* convenience macros for accessing the root container in a Jsonb datum */
216 #define JB_ROOT_COUNT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_CMASK)
217 #define JB_ROOT_IS_SCALAR(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FSCALAR)
218 #define JB_ROOT_IS_OBJECT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FOBJECT)
219 #define JB_ROOT_IS_ARRAY(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FARRAY)
223 * JsonbValue: In-memory representation of Jsonb. This is a convenient
224 * deserialized representation, that can easily support using the "val"
225 * union across underlying types during manipulation. The Jsonb on-disk
226 * representation has various alignment considerations.
237 /* Composite types */
240 /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
242 } type; /* Influences sort order */
251 char *val; /* Not necessarily null-terminated */
252 } string; /* String primitive type */
258 bool rawScalar; /* Top-level "raw scalar" array? */
259 } array; /* Array container type */
263 int nPairs; /* 1 pair, 2 elements */
265 } object; /* Associative container type */
270 JsonbContainer *data;
271 } binary; /* Array or object, in on-disk format */
275 #define IsAJsonbScalar(jsonbval) ((jsonbval)->type >= jbvNull && \
276 (jsonbval)->type <= jbvBool)
279 * Key/value pair within an Object.
281 * This struct type is only used briefly while constructing a Jsonb; it is
282 * *not* the on-disk representation.
284 * Pairs with duplicate keys are de-duplicated. We store the originally
285 * observed pair ordering for the purpose of removing duplicates in a
286 * well-defined way (which is "last observed wins").
290 JsonbValue key; /* Must be a jbvString */
291 JsonbValue value; /* May be of any type */
292 uint32 order; /* Pair's index in original sequence */
295 /* Conversion state used when parsing Jsonb from text, or for type coercion */
296 typedef struct JsonbParseState
300 struct JsonbParseState *next;
304 * JsonbIterator holds details of the type for each iteration. It also stores a
305 * Jsonb varlena buffer, which can be directly accessed in some contexts.
316 typedef struct JsonbIterator
318 /* Container being iterated */
319 JsonbContainer *container;
320 uint32 nElems; /* Number of elements in children array (will
321 * be nPairs for objects) */
322 bool isScalar; /* Pseudo-array scalar value? */
323 JEntry *children; /* JEntrys for child nodes */
324 /* Data proper. This points to the beginning of the variable-length data */
327 /* Current item in buffer (up to nElems) */
330 /* Data offset corresponding to current item */
331 uint32 curDataOffset;
334 * If the container is an object, we want to return keys and values
335 * alternately; so curDataOffset points to the current key, and
336 * curValueOffset points to the current value.
338 uint32 curValueOffset;
341 JsonbIterState state;
343 struct JsonbIterator *parent;
347 extern Datum jsonb_in(PG_FUNCTION_ARGS);
348 extern Datum jsonb_out(PG_FUNCTION_ARGS);
349 extern Datum jsonb_recv(PG_FUNCTION_ARGS);
350 extern Datum jsonb_send(PG_FUNCTION_ARGS);
351 extern Datum jsonb_typeof(PG_FUNCTION_ARGS);
353 /* Indexing-related ops */
354 extern Datum jsonb_exists(PG_FUNCTION_ARGS);
355 extern Datum jsonb_exists_any(PG_FUNCTION_ARGS);
356 extern Datum jsonb_exists_all(PG_FUNCTION_ARGS);
357 extern Datum jsonb_contains(PG_FUNCTION_ARGS);
358 extern Datum jsonb_contained(PG_FUNCTION_ARGS);
359 extern Datum jsonb_ne(PG_FUNCTION_ARGS);
360 extern Datum jsonb_lt(PG_FUNCTION_ARGS);
361 extern Datum jsonb_gt(PG_FUNCTION_ARGS);
362 extern Datum jsonb_le(PG_FUNCTION_ARGS);
363 extern Datum jsonb_ge(PG_FUNCTION_ARGS);
364 extern Datum jsonb_eq(PG_FUNCTION_ARGS);
365 extern Datum jsonb_cmp(PG_FUNCTION_ARGS);
366 extern Datum jsonb_hash(PG_FUNCTION_ARGS);
368 /* GIN support functions for jsonb_ops */
369 extern Datum gin_compare_jsonb(PG_FUNCTION_ARGS);
370 extern Datum gin_extract_jsonb(PG_FUNCTION_ARGS);
371 extern Datum gin_extract_jsonb_query(PG_FUNCTION_ARGS);
372 extern Datum gin_consistent_jsonb(PG_FUNCTION_ARGS);
373 extern Datum gin_triconsistent_jsonb(PG_FUNCTION_ARGS);
375 /* GIN support functions for jsonb_path_ops */
376 extern Datum gin_extract_jsonb_path(PG_FUNCTION_ARGS);
377 extern Datum gin_extract_jsonb_query_path(PG_FUNCTION_ARGS);
378 extern Datum gin_consistent_jsonb_path(PG_FUNCTION_ARGS);
379 extern Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS);
381 /* Support functions */
382 extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
383 extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
384 extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
385 extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
388 extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader,
390 extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
391 JsonbIteratorToken seq, JsonbValue *scalarVal);
392 extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
393 extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
395 extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
396 extern bool JsonbDeepContains(JsonbIterator **val,
397 JsonbIterator **mContained);
398 extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
400 /* jsonb.c support function */
401 extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
404 #endif /* __JSONB_H__ */