]> granicus.if.org Git - postgresql/commitdiff
Fix erroneous hash calculations in gin_extract_jsonb_path().
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 5 Nov 2015 23:15:48 +0000 (18:15 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 5 Nov 2015 23:15:48 +0000 (18:15 -0500)
The jsonb_path_ops code calculated hash values inconsistently in some cases
involving nested arrays and objects.  This would result in queries possibly
not finding entries that they should find, when using a jsonb_path_ops GIN
index for the search.  The problem cases involve JSONB values that contain
both scalars and sub-objects at the same nesting level, for example an
array containing both scalars and sub-arrays.  To fix, reset the current
stack->hash after processing each value or sub-object, not before; and
don't try to be cute about the outermost level's initial hash.

Correcting this means that existing jsonb_path_ops indexes may now be
inconsistent with the new hash calculation code.  The symptom is the same
--- searches not finding entries they should find --- but the specific
rows affected are likely to be different.  Users will need to REINDEX
jsonb_path_ops indexes to make sure that all searches work as expected.

Per bug #13756 from Daniel Cheng.  Back-patch to 9.4 where the faulty
logic was introduced.

src/backend/utils/adt/jsonb_gin.c
src/test/regress/expected/jsonb.out
src/test/regress/sql/jsonb.sql

index 204fb8b5919c34dd73d1de0218571981fa9f8668..1a8d646d74da729f87fa2f10e5a21c383d527e8a 100644 (file)
@@ -375,51 +375,31 @@ gin_extract_jsonb_path(PG_FUNCTION_ARGS)
                                parent = stack;
                                stack = (PathHashStack *) palloc(sizeof(PathHashStack));
 
-                               if (parent->parent)
-                               {
-                                       /*
-                                        * We pass forward hashes from previous container nesting
-                                        * levels so that nested arrays with an outermost nested
-                                        * object will have element hashes mixed with the
-                                        * outermost key.  It's also somewhat useful to have
-                                        * nested objects' innermost values have hashes that are a
-                                        * function of not just their own key, but outer keys too.
-                                        *
-                                        * Nesting an array within another array will not alter
-                                        * innermost scalar element hash values, but that seems
-                                        * inconsequential.
-                                        */
-                                       stack->hash = parent->hash;
-                               }
-                               else
-                               {
-                                       /*
-                                        * At the outermost level, initialize hash with container
-                                        * type proxy value.  Note that this makes JB_FARRAY and
-                                        * JB_FOBJECT part of the on-disk representation, but they
-                                        * are that in the base jsonb object storage already.
-                                        */
-                                       stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT;
-                               }
+                               /*
+                                * We pass forward hashes from outer nesting levels so that
+                                * the hashes for nested values will include outer keys as
+                                * well as their own keys.
+                                *
+                                * Nesting an array within another array will not alter
+                                * innermost scalar element hash values, but that seems
+                                * inconsequential.
+                                */
+                               stack->hash = parent->hash;
                                stack->parent = parent;
                                break;
                        case WJB_KEY:
-                               /* initialize hash from parent */
-                               stack->hash = stack->parent->hash;
-                               /* and mix in this key */
+                               /* mix this key into the current outer hash */
                                JsonbHashScalarValue(&v, &stack->hash);
                                /* hash is now ready to incorporate the value */
                                break;
                        case WJB_ELEM:
-                               /* array elements use parent hash mixed with element's hash */
-                               stack->hash = stack->parent->hash;
-                               /* FALL THRU */
                        case WJB_VALUE:
                                /* mix the element or value's hash into the prepared hash */
                                JsonbHashScalarValue(&v, &stack->hash);
                                /* and emit an index entry */
                                entries[i++] = UInt32GetDatum(stack->hash);
-                               /* Note: we assume we'll see KEY before another VALUE */
+                               /* reset hash for next key, value, or sub-object */
+                               stack->hash = stack->parent->hash;
                                break;
                        case WJB_END_ARRAY:
                        case WJB_END_OBJECT:
@@ -427,6 +407,11 @@ gin_extract_jsonb_path(PG_FUNCTION_ARGS)
                                parent = stack->parent;
                                pfree(stack);
                                stack = parent;
+                               /* reset hash for next key, value, or sub-object */
+                               if (stack->parent)
+                                       stack->hash = stack->parent->hash;
+                               else
+                                       stack->hash = 0;
                                break;
                        default:
                                elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
index 58c2ab02736b3f75bdaba678d5c2665f6bda64f6..89c3efd198c130dc35b69519acf75d0e48cb25c8 100644 (file)
@@ -2420,6 +2420,56 @@ SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},1]}';
  t
 (1 row)
 
+-- check some corner cases for indexed nested containment (bug #13756)
+create temp table nestjsonb (j jsonb);
+insert into nestjsonb (j) values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
+insert into nestjsonb (j) values ('[[14,2,3]]');
+insert into nestjsonb (j) values ('[1,[14,2,3]]');
+create index on nestjsonb using gin(j jsonb_path_ops);
+set enable_seqscan = on;
+set enable_bitmapscan = off;
+select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
+                         j                         
+---------------------------------------------------
+ {"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
+(1 row)
+
+select * from nestjsonb where j @> '{"c":3}';
+                         j                         
+---------------------------------------------------
+ {"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
+(1 row)
+
+select * from nestjsonb where j @> '[[14]]';
+        j        
+-----------------
+ [[14, 2, 3]]
+ [1, [14, 2, 3]]
+(2 rows)
+
+set enable_seqscan = off;
+set enable_bitmapscan = on;
+select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
+                         j                         
+---------------------------------------------------
+ {"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
+(1 row)
+
+select * from nestjsonb where j @> '{"c":3}';
+                         j                         
+---------------------------------------------------
+ {"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
+(1 row)
+
+select * from nestjsonb where j @> '[[14]]';
+        j        
+-----------------
+ [[14, 2, 3]]
+ [1, [14, 2, 3]]
+(2 rows)
+
+reset enable_seqscan;
+reset enable_bitmapscan;
 -- nested object field / array index lookup
 SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'n';
  ?column? 
index a6b6d482053c0552d626961c3d8e01ee5c344b4f..b724f0ab1f70c45f0f34c55be3f06da06e07defd 100644 (file)
@@ -618,6 +618,26 @@ SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4}]}';
 SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},3]}';
 SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},1]}';
 
+-- check some corner cases for indexed nested containment (bug #13756)
+create temp table nestjsonb (j jsonb);
+insert into nestjsonb (j) values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
+insert into nestjsonb (j) values ('[[14,2,3]]');
+insert into nestjsonb (j) values ('[1,[14,2,3]]');
+create index on nestjsonb using gin(j jsonb_path_ops);
+
+set enable_seqscan = on;
+set enable_bitmapscan = off;
+select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
+select * from nestjsonb where j @> '{"c":3}';
+select * from nestjsonb where j @> '[[14]]';
+set enable_seqscan = off;
+set enable_bitmapscan = on;
+select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
+select * from nestjsonb where j @> '{"c":3}';
+select * from nestjsonb where j @> '[[14]]';
+reset enable_seqscan;
+reset enable_bitmapscan;
+
 -- nested object field / array index lookup
 SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'n';
 SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'a';