Commit b23af458 authored by Tom Lane's avatar Tom Lane

Fix erroneous hash calculations in gin_extract_jsonb_path().

The jsonb_path_ops code calculated hash values inconsistently in some cases
involving nested arrays and objects.  This would result in queries possibly
not finding entries that they should find, when using a jsonb_path_ops GIN
index for the search.  The problem cases involve JSONB values that contain
both scalars and sub-objects at the same nesting level, for example an
array containing both scalars and sub-arrays.  To fix, reset the current
stack->hash after processing each value or sub-object, not before; and
don't try to be cute about the outermost level's initial hash.

Correcting this means that existing jsonb_path_ops indexes may now be
inconsistent with the new hash calculation code.  The symptom is the same
--- searches not finding entries they should find --- but the specific
rows affected are likely to be different.  Users will need to REINDEX
jsonb_path_ops indexes to make sure that all searches work as expected.

Per bug #13756 from Daniel Cheng.  Back-patch to 9.4 where the faulty
logic was introduced.
parent 8c75ad43
...@@ -375,51 +375,31 @@ gin_extract_jsonb_path(PG_FUNCTION_ARGS) ...@@ -375,51 +375,31 @@ gin_extract_jsonb_path(PG_FUNCTION_ARGS)
parent = stack; parent = stack;
stack = (PathHashStack *) palloc(sizeof(PathHashStack)); stack = (PathHashStack *) palloc(sizeof(PathHashStack));
if (parent->parent)
{
/* /*
* We pass forward hashes from previous container nesting * We pass forward hashes from outer nesting levels so that
* levels so that nested arrays with an outermost nested * the hashes for nested values will include outer keys as
* object will have element hashes mixed with the * well as their own keys.
* outermost key. It's also somewhat useful to have
* nested objects' innermost values have hashes that are a
* function of not just their own key, but outer keys too.
* *
* Nesting an array within another array will not alter * Nesting an array within another array will not alter
* innermost scalar element hash values, but that seems * innermost scalar element hash values, but that seems
* inconsequential. * inconsequential.
*/ */
stack->hash = parent->hash; stack->hash = parent->hash;
}
else
{
/*
* At the outermost level, initialize hash with container
* type proxy value. Note that this makes JB_FARRAY and
* JB_FOBJECT part of the on-disk representation, but they
* are that in the base jsonb object storage already.
*/
stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT;
}
stack->parent = parent; stack->parent = parent;
break; break;
case WJB_KEY: case WJB_KEY:
/* initialize hash from parent */ /* mix this key into the current outer hash */
stack->hash = stack->parent->hash;
/* and mix in this key */
JsonbHashScalarValue(&v, &stack->hash); JsonbHashScalarValue(&v, &stack->hash);
/* hash is now ready to incorporate the value */ /* hash is now ready to incorporate the value */
break; break;
case WJB_ELEM: case WJB_ELEM:
/* array elements use parent hash mixed with element's hash */
stack->hash = stack->parent->hash;
/* FALL THRU */
case WJB_VALUE: case WJB_VALUE:
/* mix the element or value's hash into the prepared hash */ /* mix the element or value's hash into the prepared hash */
JsonbHashScalarValue(&v, &stack->hash); JsonbHashScalarValue(&v, &stack->hash);
/* and emit an index entry */ /* and emit an index entry */
entries[i++] = UInt32GetDatum(stack->hash); entries[i++] = UInt32GetDatum(stack->hash);
/* Note: we assume we'll see KEY before another VALUE */ /* reset hash for next key, value, or sub-object */
stack->hash = stack->parent->hash;
break; break;
case WJB_END_ARRAY: case WJB_END_ARRAY:
case WJB_END_OBJECT: case WJB_END_OBJECT:
...@@ -427,6 +407,11 @@ gin_extract_jsonb_path(PG_FUNCTION_ARGS) ...@@ -427,6 +407,11 @@ gin_extract_jsonb_path(PG_FUNCTION_ARGS)
parent = stack->parent; parent = stack->parent;
pfree(stack); pfree(stack);
stack = parent; stack = parent;
/* reset hash for next key, value, or sub-object */
if (stack->parent)
stack->hash = stack->parent->hash;
else
stack->hash = 0;
break; break;
default: default:
elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
......
...@@ -2420,6 +2420,56 @@ SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},1]}'; ...@@ -2420,6 +2420,56 @@ SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},1]}';
t t
(1 row) (1 row)
-- check some corner cases for indexed nested containment (bug #13756)
create temp table nestjsonb (j jsonb);
insert into nestjsonb (j) values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
insert into nestjsonb (j) values ('[[14,2,3]]');
insert into nestjsonb (j) values ('[1,[14,2,3]]');
create index on nestjsonb using gin(j jsonb_path_ops);
set enable_seqscan = on;
set enable_bitmapscan = off;
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
j
---------------------------------------------------
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
(1 row)
select * from nestjsonb where j @> '{"c":3}';
j
---------------------------------------------------
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
(1 row)
select * from nestjsonb where j @> '[[14]]';
j
-----------------
[[14, 2, 3]]
[1, [14, 2, 3]]
(2 rows)
set enable_seqscan = off;
set enable_bitmapscan = on;
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
j
---------------------------------------------------
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
(1 row)
select * from nestjsonb where j @> '{"c":3}';
j
---------------------------------------------------
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
(1 row)
select * from nestjsonb where j @> '[[14]]';
j
-----------------
[[14, 2, 3]]
[1, [14, 2, 3]]
(2 rows)
reset enable_seqscan;
reset enable_bitmapscan;
-- nested object field / array index lookup -- nested object field / array index lookup
SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'n'; SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'n';
?column? ?column?
......
...@@ -618,6 +618,26 @@ SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4}]}'; ...@@ -618,6 +618,26 @@ SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4}]}';
SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},3]}'; SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},3]}';
SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},1]}'; SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},1]}';
-- check some corner cases for indexed nested containment (bug #13756)
create temp table nestjsonb (j jsonb);
insert into nestjsonb (j) values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
insert into nestjsonb (j) values ('[[14,2,3]]');
insert into nestjsonb (j) values ('[1,[14,2,3]]');
create index on nestjsonb using gin(j jsonb_path_ops);
set enable_seqscan = on;
set enable_bitmapscan = off;
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
select * from nestjsonb where j @> '{"c":3}';
select * from nestjsonb where j @> '[[14]]';
set enable_seqscan = off;
set enable_bitmapscan = on;
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
select * from nestjsonb where j @> '{"c":3}';
select * from nestjsonb where j @> '[[14]]';
reset enable_seqscan;
reset enable_bitmapscan;
-- nested object field / array index lookup -- nested object field / array index lookup
SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'n'; SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'n';
SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'a'; SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'a';
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment