From 12e611d43e6efbf0e36014a3055ed47366facee7 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 11 May 2014 12:06:04 -0400 Subject: [PATCH] Rename jsonb_hash_ops to jsonb_path_ops. There's no longer much pressure to switch the default GIN opclass for jsonb, but there was still some unhappiness with the name "jsonb_hash_ops", since hashing is no longer a distinguishing property of that opclass, and anyway it seems like a relatively minor detail. At the suggestion of Heikki Linnakangas, we'll use "jsonb_path_ops" instead; that captures the important characteristic that each index entry depends on the entire path from the document root to the indexed value. Also add a user-facing explanation of the implementation properties of these two opclasses. --- doc/src/sgml/gin.sgml | 4 +-- doc/src/sgml/json.sgml | 45 ++++++++++++++++++++++++--- src/backend/utils/adt/jsonb_gin.c | 27 ++++++++-------- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_amop.h | 6 ++-- src/include/catalog/pg_opclass.h | 2 +- src/include/catalog/pg_opfamily.h | 4 +-- src/include/catalog/pg_proc.h | 8 ++--- src/include/utils/jsonb.h | 12 +++---- src/test/regress/expected/jsonb.out | 4 +-- src/test/regress/expected/jsonb_1.out | 4 +-- src/test/regress/sql/jsonb.sql | 4 +-- 12 files changed, 78 insertions(+), 44 deletions(-) diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml index 0b3d6eeb63..1cbc73c70c 100644 --- a/doc/src/sgml/gin.sgml +++ b/doc/src/sgml/gin.sgml @@ -395,7 +395,7 @@ - jsonb_hash_ops + jsonb_path_ops jsonb @> @@ -415,7 +415,7 @@ Of the two operator classes for type jsonb, jsonb_ops - is the default. jsonb_hash_ops supports fewer operators but + is the default. jsonb_path_ops supports fewer operators but offers better performance for those operators. See for details. diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml index 518fe63873..66426189ca 100644 --- a/doc/src/sgml/json.sgml +++ b/doc/src/sgml/json.sgml @@ -156,7 +156,7 @@ - <type>jsonb</> Input and Output Syntax + JSON Input and Output Syntax The input/output syntax for the JSON data types is as specified in RFC 7159. @@ -366,11 +366,11 @@ SELECT '"foo"'::jsonb ? 'foo'; CREATE INDEX idxgin ON api USING gin (jdoc); - The non-default GIN operator class jsonb_hash_ops + The non-default GIN operator class jsonb_path_ops supports indexing the @> operator only. An example of creating an index with this operator class is: -CREATE INDEX idxginh ON api USING gin (jdoc jsonb_hash_ops); +CREATE INDEX idxginp ON api USING gin (jdoc jsonb_path_ops); @@ -444,10 +444,10 @@ SELECT jdoc->'guid', jdoc->'name' FROM api WHERE jdoc @> '{"tags": ["qu - Although the jsonb_hash_ops operator class supports + Although the jsonb_path_ops operator class supports only queries with the @> operator, it has notable performance advantages over the default operator - class jsonb_ops. A jsonb_hash_ops + class jsonb_ops. A jsonb_path_ops index is usually much smaller than a jsonb_ops index over the same data, and the specificity of searches is better, particularly when queries contain keys that appear frequently in the @@ -455,6 +455,41 @@ SELECT jdoc->'guid', jdoc->'name' FROM api WHERE jdoc @> '{"tags": ["qu than with the default operator class. + + The technical difference between a jsonb_ops + and a jsonb_path_ops GIN index is that the former + creates independent index items for each key and value in the data, + while the latter creates index items only for each value in the + data.For this purpose, the term value + includes array elements, though JSON terminology sometimes considers + array elements distinct from values within objects. + But in jsonb_path_ops, each index item is a hash + of both the value and the key(s) leading to it; for example to index + {"foo": {"bar": "baz"}}, a single index item would + be created incorporating all three of foo, bar, + and baz into the hash value. Thus a containment query + looking for this structure would result in an extremely specific index + search; but there is no way at all to find out whether foo + appears as a key. On the other hand, a jsonb_ops + index would create three index items representing foo, + bar, and baz separately; then to do the + containment query, it would look for rows containing all three of + these items. While GIN indexes can perform such an AND search fairly + efficiently, it will still be less specific and slower than the + equivalent jsonb_path_ops search, especially if + there are a very large number of rows containing any single one of the + three index items. + + + + A disadvantage of the jsonb_path_ops approach is + that it produces no index entries for JSON structures not containing + any values, such as {"a": {}}. If a search for + documents containing such a structure is requested, it will require a + full-index scan, which is quite slow. jsonb_path_ops is + therefore ill-suited for applications that often perform such searches. + + jsonb also supports btree and hash indexes. These are usually useful only if it's important to check diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c index 57a0b2c8a3..069ee03080 100644 --- a/src/backend/utils/adt/jsonb_gin.c +++ b/src/backend/utils/adt/jsonb_gin.c @@ -315,9 +315,9 @@ gin_triconsistent_jsonb(PG_FUNCTION_ARGS) /* * - * jsonb_hash_ops GIN opclass support functions + * jsonb_path_ops GIN opclass support functions * - * In a jsonb_hash_ops index, the GIN keys are uint32 hashes, one per JSON + * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON * value; but the JSON key(s) leading to each value are also included in its * hash computation. This means we can only support containment queries, * but the index can distinguish, for example, {"foo": 42} from {"bar": 42} @@ -326,7 +326,7 @@ gin_triconsistent_jsonb(PG_FUNCTION_ARGS) */ Datum -gin_extract_jsonb_hash(PG_FUNCTION_ARGS) +gin_extract_jsonb_path(PG_FUNCTION_ARGS) { Jsonb *jb = PG_GETARG_JSONB(0); int32 *nentries = (int32 *) PG_GETARG_POINTER(1); @@ -349,7 +349,7 @@ gin_extract_jsonb_hash(PG_FUNCTION_ARGS) /* Otherwise, use 2 * root count as initial estimate of result size */ entries = (Datum *) palloc(sizeof(Datum) * total); - /* We keep a stack of hashes corresponding to parent key levels */ + /* We keep a stack of partial hashes corresponding to parent key levels */ tail.parent = NULL; tail.hash = 0; stack = &tail; @@ -439,7 +439,7 @@ gin_extract_jsonb_hash(PG_FUNCTION_ARGS) } Datum -gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS) +gin_extract_jsonb_query_path(PG_FUNCTION_ARGS) { int32 *nentries = (int32 *) PG_GETARG_POINTER(1); StrategyNumber strategy = PG_GETARG_UINT16(2); @@ -449,9 +449,9 @@ gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS) if (strategy != JsonbContainsStrategyNumber) elog(ERROR, "unrecognized strategy number: %d", strategy); - /* Query is a jsonb, so just apply gin_extract_jsonb_hash ... */ + /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */ entries = (Datum *) - DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_hash, + DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path, PG_GETARG_DATUM(0), PointerGetDatum(nentries))); @@ -463,7 +463,7 @@ gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS) } Datum -gin_consistent_jsonb_hash(PG_FUNCTION_ARGS) +gin_consistent_jsonb_path(PG_FUNCTION_ARGS) { bool *check = (bool *) PG_GETARG_POINTER(0); StrategyNumber strategy = PG_GETARG_UINT16(1); @@ -480,13 +480,12 @@ gin_consistent_jsonb_hash(PG_FUNCTION_ARGS) elog(ERROR, "unrecognized strategy number: %d", strategy); /* - * jsonb_hash_ops is necessarily lossy, not only because of hash + * jsonb_path_ops is necessarily lossy, not only because of hash * collisions but also because it doesn't preserve complete information * about the structure of the JSON object. Besides, there are some - * special rules around the containment of raw scalar arrays and regular - * arrays that are not handled here. So we must always recheck a match. - * However, if not all of the keys are present, the tuple certainly - * doesn't match. + * special rules around the containment of raw scalars in arrays that are + * not handled here. So we must always recheck a match. However, if not + * all of the keys are present, the tuple certainly doesn't match. */ *recheck = true; for (i = 0; i < nkeys; i++) @@ -502,7 +501,7 @@ gin_consistent_jsonb_hash(PG_FUNCTION_ARGS) } Datum -gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS) +gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS) { GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); StrategyNumber strategy = PG_GETARG_UINT16(1); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 76422e5587..c46a92b82f 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201405093 +#define CATALOG_VERSION_NO 201405111 #endif diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h index 8efd3be3c6..264059f17e 100644 --- a/src/include/catalog/pg_amop.h +++ b/src/include/catalog/pg_amop.h @@ -787,12 +787,12 @@ DATA(insert ( 4033 3802 3802 4 s 3245 403 0 )); DATA(insert ( 4033 3802 3802 5 s 3243 403 0 )); /* - * hash jsonb ops + * hash jsonb_ops */ DATA(insert ( 4034 3802 3802 1 s 3240 405 0 )); /* - * GIN jsonb ops + * GIN jsonb_ops */ DATA(insert ( 4036 3802 3802 7 s 3246 2742 0 )); DATA(insert ( 4036 3802 25 9 s 3247 2742 0 )); @@ -800,7 +800,7 @@ DATA(insert ( 4036 3802 1009 10 s 3248 2742 0 )); DATA(insert ( 4036 3802 1009 11 s 3249 2742 0 )); /* - * GIN jsonb hash ops + * GIN jsonb_path_ops */ DATA(insert ( 4037 3802 3802 7 s 3246 2742 0 )); diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h index ecf70639c8..369888665f 100644 --- a/src/include/catalog/pg_opclass.h +++ b/src/include/catalog/pg_opclass.h @@ -232,6 +232,6 @@ DATA(insert ( 4000 text_ops PGNSP PGUID 4017 25 t 0 )); DATA(insert ( 403 jsonb_ops PGNSP PGUID 4033 3802 t 0 )); DATA(insert ( 405 jsonb_ops PGNSP PGUID 4034 3802 t 0 )); DATA(insert ( 2742 jsonb_ops PGNSP PGUID 4036 3802 t 25 )); -DATA(insert ( 2742 jsonb_hash_ops PGNSP PGUID 4037 3802 f 23 )); +DATA(insert ( 2742 jsonb_path_ops PGNSP PGUID 4037 3802 f 23 )); #endif /* PG_OPCLASS_H */ diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h index 9e8f4ac5b6..c83ac8c1a4 100644 --- a/src/include/catalog/pg_opfamily.h +++ b/src/include/catalog/pg_opfamily.h @@ -148,11 +148,11 @@ DATA(insert OID = 3474 ( 4000 range_ops PGNSP PGUID )); DATA(insert OID = 4015 ( 4000 quad_point_ops PGNSP PGUID )); DATA(insert OID = 4016 ( 4000 kd_point_ops PGNSP PGUID )); DATA(insert OID = 4017 ( 4000 text_ops PGNSP PGUID )); +#define TEXT_SPGIST_FAM_OID 4017 DATA(insert OID = 4033 ( 403 jsonb_ops PGNSP PGUID )); DATA(insert OID = 4034 ( 405 jsonb_ops PGNSP PGUID )); DATA(insert OID = 4035 ( 783 jsonb_ops PGNSP PGUID )); DATA(insert OID = 4036 ( 2742 jsonb_ops PGNSP PGUID )); -DATA(insert OID = 4037 ( 2742 jsonb_hash_ops PGNSP PGUID )); -#define TEXT_SPGIST_FAM_OID 4017 +DATA(insert OID = 4037 ( 2742 jsonb_path_ops PGNSP PGUID )); #endif /* PG_OPFAMILY_H */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index e601ccd09c..72170af0e8 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4645,13 +4645,13 @@ DATA(insert OID = 3484 ( gin_consistent_jsonb PGNSP PGUID 12 1 0 0 0 f f f f t DESCR("GIN support"); DATA(insert OID = 3488 ( gin_triconsistent_jsonb PGNSP PGUID 12 1 0 0 0 f f f f t f i 7 0 16 "2281 21 2277 23 2281 2281 2281" _null_ _null_ _null_ _null_ gin_triconsistent_jsonb _null_ _null_ _null_ )); DESCR("GIN support"); -DATA(insert OID = 3485 ( gin_extract_jsonb_hash PGNSP PGUID 12 1 0 0 0 f f f f t f i 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ gin_extract_jsonb_hash _null_ _null_ _null_ )); +DATA(insert OID = 3485 ( gin_extract_jsonb_path PGNSP PGUID 12 1 0 0 0 f f f f t f i 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ gin_extract_jsonb_path _null_ _null_ _null_ )); DESCR("GIN support"); -DATA(insert OID = 3486 ( gin_extract_jsonb_query_hash PGNSP PGUID 12 1 0 0 0 f f f f t f i 7 0 2281 "2277 2281 21 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gin_extract_jsonb_query_hash _null_ _null_ _null_ )); +DATA(insert OID = 3486 ( gin_extract_jsonb_query_path PGNSP PGUID 12 1 0 0 0 f f f f t f i 7 0 2281 "2277 2281 21 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gin_extract_jsonb_query_path _null_ _null_ _null_ )); DESCR("GIN support"); -DATA(insert OID = 3487 ( gin_consistent_jsonb_hash PGNSP PGUID 12 1 0 0 0 f f f f t f i 8 0 16 "2281 21 2277 23 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gin_consistent_jsonb_hash _null_ _null_ _null_ )); +DATA(insert OID = 3487 ( gin_consistent_jsonb_path PGNSP PGUID 12 1 0 0 0 f f f f t f i 8 0 16 "2281 21 2277 23 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gin_consistent_jsonb_path _null_ _null_ _null_ )); DESCR("GIN support"); -DATA(insert OID = 3489 ( gin_triconsistent_jsonb_hash PGNSP PGUID 12 1 0 0 0 f f f f t f i 7 0 16 "2281 21 2277 23 2281 2281 2281" _null_ _null_ _null_ _null_ gin_triconsistent_jsonb_hash _null_ _null_ _null_ )); +DATA(insert OID = 3489 ( gin_triconsistent_jsonb_path PGNSP PGUID 12 1 0 0 0 f f f f t f i 7 0 16 "2281 21 2277 23 2281 2281 2281" _null_ _null_ _null_ _null_ gin_triconsistent_jsonb_path _null_ _null_ _null_ )); DESCR("GIN support"); /* txid */ diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h index bb8c380ee3..add76280ba 100644 --- a/src/include/utils/jsonb.h +++ b/src/include/utils/jsonb.h @@ -332,18 +332,18 @@ extern Datum jsonb_eq(PG_FUNCTION_ARGS); extern Datum jsonb_cmp(PG_FUNCTION_ARGS); extern Datum jsonb_hash(PG_FUNCTION_ARGS); -/* GIN support functions */ +/* GIN support functions for jsonb_ops */ extern Datum gin_compare_jsonb(PG_FUNCTION_ARGS); extern Datum gin_extract_jsonb(PG_FUNCTION_ARGS); extern Datum gin_extract_jsonb_query(PG_FUNCTION_ARGS); extern Datum gin_consistent_jsonb(PG_FUNCTION_ARGS); extern Datum gin_triconsistent_jsonb(PG_FUNCTION_ARGS); -/* GIN hash opclass functions */ -extern Datum gin_extract_jsonb_hash(PG_FUNCTION_ARGS); -extern Datum gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS); -extern Datum gin_consistent_jsonb_hash(PG_FUNCTION_ARGS); -extern Datum gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS); +/* GIN support functions for jsonb_path_ops */ +extern Datum gin_extract_jsonb_path(PG_FUNCTION_ARGS); +extern Datum gin_extract_jsonb_query_path(PG_FUNCTION_ARGS); +extern Datum gin_consistent_jsonb_path(PG_FUNCTION_ARGS); +extern Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS); /* Support functions */ extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b); diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index c5a7d64ae4..ae7c506811 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -1685,9 +1685,9 @@ SELECT count(*) FROM testjsonb WHERE j = '{"pos":98, "line":371, "node":"CBA", " 1 (1 row) ---gin hash +--gin path opclass DROP INDEX jidx; -CREATE INDEX jidx ON testjsonb USING gin (j jsonb_hash_ops); +CREATE INDEX jidx ON testjsonb USING gin (j jsonb_path_ops); SET enable_seqscan = off; SELECT count(*) FROM testjsonb WHERE j @> '{"wait":null}'; count diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out index 0e3ebd161e..38a95b43f8 100644 --- a/src/test/regress/expected/jsonb_1.out +++ b/src/test/regress/expected/jsonb_1.out @@ -1685,9 +1685,9 @@ SELECT count(*) FROM testjsonb WHERE j = '{"pos":98, "line":371, "node":"CBA", " 1 (1 row) ---gin hash +--gin path opclass DROP INDEX jidx; -CREATE INDEX jidx ON testjsonb USING gin (j jsonb_hash_ops); +CREATE INDEX jidx ON testjsonb USING gin (j jsonb_path_ops); SET enable_seqscan = off; SELECT count(*) FROM testjsonb WHERE j @> '{"wait":null}'; count diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 3e9048911b..7527925b2c 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -391,9 +391,9 @@ SET enable_seqscan = off; SELECT count(*) FROM testjsonb WHERE j > '{"p":1}'; SELECT count(*) FROM testjsonb WHERE j = '{"pos":98, "line":371, "node":"CBA", "indexed":true}'; ---gin hash +--gin path opclass DROP INDEX jidx; -CREATE INDEX jidx ON testjsonb USING gin (j jsonb_hash_ops); +CREATE INDEX jidx ON testjsonb USING gin (j jsonb_path_ops); SET enable_seqscan = off; SELECT count(*) FROM testjsonb WHERE j @> '{"wait":null}'; -- 2.40.0