Commit 563a053b authored by Teodor Sigaev's avatar Teodor Sigaev

Fix behavior of ~> (cube, int) operator

~> (cube, int) operator was especially designed for knn-gist search.
However, it appears that knn-gist search can't work correctly with current
behavior of this operator when dataset contains cubes of variable
dimensionality. In this case, the same value of second operator argument
can point to different dimension depending on dimensionality of particular cube.
Such behavior is incompatible with gist indexing of cubes, and knn-gist doesn't
work correctly for it.

This patch changes behavior of ~> (cube, int) operator by introducing dimension
numbering where value of second argument unambiguously identifies number of
dimension. With new behavior, this operator can be correctly supported by
knn-gist. Relevant changes to cube operator class are also included.

Backpatch to v9.6 where operator was introduced.

Since behavior of ~> (cube, int) operator is changed, depending entities
must be refreshed after upgrade. Such as, expression indexes using this
operator must be reindexed, materialized views must be rebuilt, stored
procedures and client code must be revised to correctly use new behavior.
That should be mentioned in release notes.

Noticed by: Tomas Vondra
Author: Alexander Korotkov
Reviewed by: Tomas Vondra, Andrey Borodin
Discussion: https://www.postgresql.org/message-id/flat/a9657f6a-b497-36ff-e56-482a2c7e3292@2ndquadrant.com
parent 3c1e9fd2
...@@ -1337,15 +1337,55 @@ g_cube_distance(PG_FUNCTION_ARGS) ...@@ -1337,15 +1337,55 @@ g_cube_distance(PG_FUNCTION_ARGS)
if (strategy == CubeKNNDistanceCoord) if (strategy == CubeKNNDistanceCoord)
{ {
/*
* Handle ordering by ~> operator. See comments of cube_coord_llur()
* for details
*/
int coord = PG_GETARG_INT32(1); int coord = PG_GETARG_INT32(1);
bool isLeaf = GistPageIsLeaf(entry->page);
if (DIM(cube) == 0) /* 0 is the only unsupported coordinate value */
retval = 0.0; if (coord <= 0)
else if (IS_POINT(cube)) ereport(ERROR,
retval = cube->x[(coord - 1) % DIM(cube)]; (errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
errmsg("cube index %d is out of bounds", coord)));
if (coord <= 2 * DIM(cube))
{
/* dimension index */
int index = (coord - 1) / 2;
/* whether this is upper bound (lower bound otherwise) */
bool upper = ((coord - 1) % 2 == 1);
if (IS_POINT(cube))
{
retval = cube->x[index];
}
else
{
if (isLeaf)
{
/* For leaf just return required upper/lower bound */
if (upper)
retval = Max(cube->x[index], cube->x[index + DIM(cube)]);
else
retval = Min(cube->x[index], cube->x[index + DIM(cube)]);
}
else
{
/*
* For non-leaf we should always return lower bound,
* because even upper bound of a child in the subtree can
* be as small as our lower bound.
*/
retval = Min(cube->x[index], cube->x[index + DIM(cube)]);
}
}
}
else else
retval = Min(cube->x[(coord - 1) % DIM(cube)], {
cube->x[(coord - 1) % DIM(cube) + DIM(cube)]); retval = 0.0;
}
} }
else else
{ {
...@@ -1492,43 +1532,73 @@ cube_coord(PG_FUNCTION_ARGS) ...@@ -1492,43 +1532,73 @@ cube_coord(PG_FUNCTION_ARGS)
} }
/* /*----
* This function works like cube_coord(), * This function works like cube_coord(), but rearranges coordinates in the
* but rearranges coordinates of corners to get cube representation * way suitable to support coordinate ordering using KNN-GiST. For historical
* in the form of (lower left, upper right). * reasons this extension allows us to create cubes in form ((2,1),(1,2)) and
* For historical reasons that extension allows us to create cubes in form * instead of normalizing such cube to ((1,1),(2,2)) it stores cube in original
* ((2,1),(1,2)) and instead of normalizing such cube to ((1,1),(2,2)) it * way. But in order to get cubes ordered by one of dimensions from the index
* stores cube in original way. But to get cubes ordered by one of dimensions * without explicit sort step we need this representation-independent coordinate
* directly from the index without extra sort step we need some * getter. Moreover, indexed dataset may contain cubes of different dimensions
* representation-independent coordinate getter. This function implements it. * number. Accordingly, this coordinate getter should be able to return
* lower/upper bound for particular dimension independently on number of cube
* dimensions.
*
* Long story short, this function uses following meaning of coordinates:
* # (2 * N - 1) -- lower bound of Nth dimension,
* # (2 * N) -- upper bound of Nth dimension.
*
* When given coordinate exceeds number of cube dimensions, then 0 returned
* (reproducing logic of GiST indexing of variable-length cubes).
*/ */
Datum Datum
cube_coord_llur(PG_FUNCTION_ARGS) cube_coord_llur(PG_FUNCTION_ARGS)
{ {
NDBOX *cube = PG_GETARG_NDBOX_P(0); NDBOX *cube = PG_GETARG_NDBOX_P(0);
int coord = PG_GETARG_INT32(1); int coord = PG_GETARG_INT32(1);
bool inverse = false;
float8 result;
if (coord <= 0 || coord > 2 * DIM(cube)) /* 0 is the only unsupported coordinate value */
if (coord <= 0)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_ARRAY_ELEMENT_ERROR), (errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
errmsg("cube index %d is out of bounds", coord))); errmsg("cube index %d is out of bounds", coord)));
if (coord <= DIM(cube)) if (coord <= 2 * DIM(cube))
{ {
/* dimension index */
int index = (coord - 1) / 2;
/* whether this is upper bound (lower bound otherwise) */
bool upper = ((coord - 1) % 2 == 1);
if (IS_POINT(cube)) if (IS_POINT(cube))
PG_RETURN_FLOAT8(cube->x[coord - 1]); {
result = cube->x[index];
}
else else
PG_RETURN_FLOAT8(Min(cube->x[coord - 1], {
cube->x[coord - 1 + DIM(cube)])); if (upper)
result = Max(cube->x[index], cube->x[index + DIM(cube)]);
else
result = Min(cube->x[index], cube->x[index + DIM(cube)]);
}
} }
else else
{ {
if (IS_POINT(cube)) /*
PG_RETURN_FLOAT8(cube->x[(coord - 1) % DIM(cube)]); * Return zero if coordinate is out of bound. That reproduces logic of
else * how cubes with low dimension number are expanded during GiST
PG_RETURN_FLOAT8(Max(cube->x[coord - 1], * indexing.
cube->x[coord - 1 - DIM(cube)])); */
result = 0.0;
} }
/* Inverse value if needed */
if (inverse)
result = -result;
PG_RETURN_FLOAT8(result);
} }
/* Increase or decrease box size by a radius in at least n dimensions. */ /* Increase or decrease box size by a radius in at least n dimensions. */
......
This diff is collapsed.
This diff is collapsed.
...@@ -389,20 +389,29 @@ SELECT c FROM test_cube WHERE c <@ '(3000,1000),(0,0)' ORDER BY c; ...@@ -389,20 +389,29 @@ SELECT c FROM test_cube WHERE c <@ '(3000,1000),(0,0)' ORDER BY c;
SELECT c FROM test_cube WHERE c <@ '(3000,1000),(0,0)' ORDER BY c; SELECT c FROM test_cube WHERE c <@ '(3000,1000),(0,0)' ORDER BY c;
RESET enable_bitmapscan; RESET enable_bitmapscan;
-- kNN with index -- Test kNN
INSERT INTO test_cube VALUES ('(1,1)'), ('(100000)'), ('(0, 100000)'); -- Some corner cases
SET enable_seqscan = false;
-- Test different metrics
SELECT *, c <-> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <-> '(100, 100),(500, 500)'::cube LIMIT 5; SELECT *, c <-> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <-> '(100, 100),(500, 500)'::cube LIMIT 5;
SELECT *, c <=> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <=> '(100, 100),(500, 500)'::cube LIMIT 5; SELECT *, c <=> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <=> '(100, 100),(500, 500)'::cube LIMIT 5;
SELECT *, c <#> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <#> '(100, 100),(500, 500)'::cube LIMIT 5; SELECT *, c <#> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <#> '(100, 100),(500, 500)'::cube LIMIT 5;
-- kNN-based sorting -- Test sorting by coordinates
SELECT * FROM test_cube ORDER BY c~>1 LIMIT 15; -- ascending by 1st coordinate of lower left corner SELECT c~>1, c FROM test_cube ORDER BY c~>1 LIMIT 15; -- ascending by left bound
SELECT * FROM test_cube ORDER BY c~>4 LIMIT 15; -- ascending by 2nd coordinate or upper right corner SELECT c~>2, c FROM test_cube ORDER BY c~>2 LIMIT 15; -- ascending by right bound
SELECT * FROM test_cube ORDER BY c~>1 DESC LIMIT 15; -- descending by 1st coordinate of lower left corner SELECT c~>3, c FROM test_cube ORDER BY c~>3 LIMIT 15; -- ascending by lower bound
SELECT * FROM test_cube ORDER BY c~>4 DESC LIMIT 15; -- descending by 2nd coordinate or upper right corner SELECT c~>4, c FROM test_cube ORDER BY c~>4 LIMIT 15; -- ascending by upper bound
-- same thing for index with points -- Same queries with sequential scan (should give the same results as above)
CREATE TABLE test_point(c cube); RESET enable_seqscan;
INSERT INTO test_point(SELECT cube(array[c->1,c->2,c->3,c->4]) FROM test_cube); SET enable_indexscan = OFF;
CREATE INDEX ON test_point USING gist(c); SELECT *, c <-> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <-> '(100, 100),(500, 500)'::cube LIMIT 5;
SELECT * FROM test_point ORDER BY c~>1, c~>2 LIMIT 15; -- ascending by 1st then by 2nd coordinate SELECT *, c <=> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <=> '(100, 100),(500, 500)'::cube LIMIT 5;
SELECT * FROM test_point ORDER BY c~>4 DESC LIMIT 15; -- descending by 1st coordinate SELECT *, c <#> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <#> '(100, 100),(500, 500)'::cube LIMIT 5;
SELECT c~>1, c FROM test_cube ORDER BY c~>1 LIMIT 15; -- ascending by left bound
SELECT c~>2, c FROM test_cube ORDER BY c~>2 LIMIT 15; -- ascending by right bound
SELECT c~>3, c FROM test_cube ORDER BY c~>3 LIMIT 15; -- ascending by lower bound
SELECT c~>4, c FROM test_cube ORDER BY c~>4 LIMIT 15; -- ascending by upper bound
RESET enable_indexscan;
...@@ -186,10 +186,11 @@ ...@@ -186,10 +186,11 @@
<entry><literal>a ~&gt; n</literal></entry> <entry><literal>a ~&gt; n</literal></entry>
<entry><type>float8</type></entry> <entry><type>float8</type></entry>
<entry> <entry>
Get <replaceable>n</replaceable>-th coordinate in <quote>normalized</quote> cube Get <replaceable>n</replaceable>-th coordinate of cube in following way:
representation, in which the coordinates have been rearranged into n = 2 * k - 1 means lower bound of <replaceable>k</replaceable>-th
the form <quote>lower left &mdash; upper right</quote>; that is, the dimension, n = 2 * k means upper bound of
smaller endpoint along each dimension appears first. <replaceable>k</replaceable>-th dimension. This operator is designed
for KNN-GiST support.
</entry> </entry>
</row> </row>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment