From 81ee726d87ec67c4f2846110c99f72e8a20dcd07 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 28 Dec 2015 14:39:09 -0500 Subject: [PATCH] Code and docs review for cube kNN support. Commit 33bd250f6c4cc309f4eeb657da80f1e7743b3e5c could have done with some more review: Adjust coding so that compilers unfamiliar with elog/ereport don't complain about uninitialized values. Fix misuse of PG_GETARG_INT16 to retrieve arguments declared as "integer" at the SQL level. (This was evidently copied from cube_ll_coord and cube_ur_coord, but those were wrong too.) Fix non-style-guide-conforming error messages. Fix underparenthesized if statements, which pgindent would have made a hash of, and remove some unnecessary parens elsewhere. Run pgindent over new code. Revise documentation: repeated accretion of more operators without any rethinking of the text already there had left things in a bit of a mess. Merge all the cube operators into one table and adjust surrounding text appropriately. David Rowley and Tom Lane --- contrib/cube/cube.c | 128 ++++++++++---------- contrib/cube/expected/cube.out | 14 +-- contrib/cube/expected/cube_1.out | 14 +-- contrib/cube/expected/cube_2.out | 14 +-- contrib/cube/expected/cube_3.out | 14 +-- doc/src/sgml/cube.sgml | 199 +++++++++++++++---------------- 6 files changed, 186 insertions(+), 197 deletions(-) diff --git a/contrib/cube/cube.c b/contrib/cube/cube.c index 35ffb6c3f7..3feddef8f3 100644 --- a/contrib/cube/cube.c +++ b/contrib/cube/cube.c @@ -1275,6 +1275,7 @@ distance_taxicab(PG_FUNCTION_ARGS) if (DIM(a) < DIM(b)) { NDBOX *tmp = b; + b = a; a = tmp; swapped = true; @@ -1283,11 +1284,13 @@ distance_taxicab(PG_FUNCTION_ARGS) distance = 0.0; /* compute within the dimensions of (b) */ for (i = 0; i < DIM(b); i++) - distance += fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), LL_COORD(b,i), UR_COORD(b,i))); + distance += fabs(distance_1D(LL_COORD(a, i), UR_COORD(a, i), + LL_COORD(b, i), UR_COORD(b, i))); /* compute distance to zero for those dimensions in (a) absent in (b) */ for (i = DIM(b); i < DIM(a); i++) - distance += fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), 0.0, 0.0)); + distance += fabs(distance_1D(LL_COORD(a, i), UR_COORD(a, i), + 0.0, 0.0)); if (swapped) { @@ -1309,13 +1312,15 @@ distance_chebyshev(PG_FUNCTION_ARGS) NDBOX *a = PG_GETARG_NDBOX(0), *b = PG_GETARG_NDBOX(1); bool swapped = false; - double d, distance; + double d, + distance; int i; /* swap the box pointers if needed */ if (DIM(a) < DIM(b)) { NDBOX *tmp = b; + b = a; a = tmp; swapped = true; @@ -1325,7 +1330,8 @@ distance_chebyshev(PG_FUNCTION_ARGS) /* compute within the dimensions of (b) */ for (i = 0; i < DIM(b); i++) { - d = fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), LL_COORD(b,i), UR_COORD(b,i))); + d = fabs(distance_1D(LL_COORD(a, i), UR_COORD(a, i), + LL_COORD(b, i), UR_COORD(b, i))); if (d > distance) distance = d; } @@ -1333,7 +1339,7 @@ distance_chebyshev(PG_FUNCTION_ARGS) /* compute distance to zero for those dimensions in (a) absent in (b) */ for (i = DIM(b); i < DIM(a); i++) { - d = fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), 0.0, 0.0)); + d = fabs(distance_1D(LL_COORD(a, i), UR_COORD(a, i), 0.0, 0.0)); if (d > distance) distance = d; } @@ -1357,44 +1363,41 @@ g_cube_distance(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); - NDBOX *cube = DatumGetNDBOX(entry->key); - double retval; + NDBOX *cube = DatumGetNDBOX(entry->key); + double retval; if (strategy == CubeKNNDistanceCoord) { - int coord = PG_GETARG_INT32(1); + int coord = PG_GETARG_INT32(1); - if IS_POINT(cube) - { - retval = (cube)->x[(coord-1)%DIM(cube)]; - } + if (IS_POINT(cube)) + retval = cube->x[(coord - 1) % DIM(cube)]; else - { - retval = Min( - (cube)->x[(coord-1)%DIM(cube)], - (cube)->x[(coord-1)%DIM(cube) + DIM(cube)] - ); - } + retval = Min(cube->x[(coord - 1) % DIM(cube)], + cube->x[(coord - 1) % DIM(cube) + DIM(cube)]); } else { - NDBOX *query = PG_GETARG_NDBOX(1); - switch(strategy) + NDBOX *query = PG_GETARG_NDBOX(1); + + switch (strategy) { - case CubeKNNDistanceTaxicab: - retval = DatumGetFloat8(DirectFunctionCall2(distance_taxicab, - PointerGetDatum(cube), PointerGetDatum(query))); - break; - case CubeKNNDistanceEuclid: - retval = DatumGetFloat8(DirectFunctionCall2(cube_distance, - PointerGetDatum(cube), PointerGetDatum(query))); - break; - case CubeKNNDistanceChebyshev: - retval = DatumGetFloat8(DirectFunctionCall2(distance_chebyshev, - PointerGetDatum(cube), PointerGetDatum(query))); - break; - default: - elog(ERROR, "Cube: unknown strategy number."); + case CubeKNNDistanceTaxicab: + retval = DatumGetFloat8(DirectFunctionCall2(distance_taxicab, + PointerGetDatum(cube), PointerGetDatum(query))); + break; + case CubeKNNDistanceEuclid: + retval = DatumGetFloat8(DirectFunctionCall2(cube_distance, + PointerGetDatum(cube), PointerGetDatum(query))); + break; + case CubeKNNDistanceChebyshev: + retval = DatumGetFloat8(DirectFunctionCall2(distance_chebyshev, + PointerGetDatum(cube), PointerGetDatum(query))); + break; + default: + elog(ERROR, "unrecognized cube strategy number: %d", strategy); + retval = 0; /* keep compiler quiet */ + break; } } PG_RETURN_FLOAT8(retval); @@ -1466,7 +1469,7 @@ Datum cube_ll_coord(PG_FUNCTION_ARGS) { NDBOX *c = PG_GETARG_NDBOX(0); - int n = PG_GETARG_INT16(1); + int n = PG_GETARG_INT32(1); double result; if (DIM(c) >= n && n > 0) @@ -1483,7 +1486,7 @@ Datum cube_ur_coord(PG_FUNCTION_ARGS) { NDBOX *c = PG_GETARG_NDBOX(0); - int n = PG_GETARG_INT16(1); + int n = PG_GETARG_INT32(1); double result; if (DIM(c) >= n && n > 0) @@ -1504,21 +1507,17 @@ Datum cube_coord(PG_FUNCTION_ARGS) { NDBOX *cube = PG_GETARG_NDBOX(0); - int coord = PG_GETARG_INT16(1); + int coord = PG_GETARG_INT32(1); - if ((coord > 0) && (coord <= 2*DIM(cube))) - { - if IS_POINT(cube) - PG_RETURN_FLOAT8( (cube)->x[(coord-1)%DIM(cube)] ); - else - PG_RETURN_FLOAT8( (cube)->x[coord-1] ); - } - else - { + if (coord <= 0 || coord > 2 * DIM(cube)) ereport(ERROR, - (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), - errmsg("Cube index out of bounds"))); - } + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("cube index %d is out of bounds", coord))); + + if (IS_POINT(cube)) + PG_RETURN_FLOAT8(cube->x[(coord - 1) % DIM(cube)]); + else + PG_RETURN_FLOAT8(cube->x[coord - 1]); } @@ -1536,27 +1535,28 @@ Datum cube_coord_llur(PG_FUNCTION_ARGS) { NDBOX *cube = PG_GETARG_NDBOX(0); - int coord = PG_GETARG_INT16(1); + int coord = PG_GETARG_INT32(1); - if ((coord > 0) && (coord <= DIM(cube))) - { - if IS_POINT(cube) - PG_RETURN_FLOAT8( (cube)->x[coord-1] ); - else - PG_RETURN_FLOAT8( Min((cube)->x[coord-1], (cube)->x[coord-1+DIM(cube)]) ); - } - else if ((coord > DIM(cube)) && (coord <= 2*DIM(cube))) + if (coord <= 0 || coord > 2 * DIM(cube)) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("cube index %d is out of bounds", coord))); + + if (coord <= DIM(cube)) { - if IS_POINT(cube) - PG_RETURN_FLOAT8( (cube)->x[(coord-1)%DIM(cube)] ); + if (IS_POINT(cube)) + PG_RETURN_FLOAT8(cube->x[coord - 1]); else - PG_RETURN_FLOAT8( Max((cube)->x[coord-1], (cube)->x[coord-1-DIM(cube)]) ); + PG_RETURN_FLOAT8(Min(cube->x[coord - 1], + cube->x[coord - 1 + DIM(cube)])); } else { - ereport(ERROR, - (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), - errmsg("Cube index out of bounds"))); + if (IS_POINT(cube)) + PG_RETURN_FLOAT8(cube->x[(coord - 1) % DIM(cube)]); + else + PG_RETURN_FLOAT8(Max(cube->x[coord - 1], + cube->x[coord - 1 - DIM(cube)])); } } diff --git a/contrib/cube/expected/cube.out b/contrib/cube/expected/cube.out index 769ad3a88d..e9e2c0f15b 100644 --- a/contrib/cube/expected/cube.out +++ b/contrib/cube/expected/cube.out @@ -1458,13 +1458,13 @@ SELECT cube(array[10,20,30], array[40,50,60])->6; (1 row) SELECT cube(array[10,20,30], array[40,50,60])->0; -ERROR: Cube index out of bounds +ERROR: cube index 0 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->7; -ERROR: Cube index out of bounds +ERROR: cube index 7 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->-1; -ERROR: Cube index out of bounds +ERROR: cube index -1 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->-6; -ERROR: Cube index out of bounds +ERROR: cube index -6 is out of bounds SELECT cube(array[10,20,30])->3; ?column? ---------- @@ -1478,7 +1478,7 @@ SELECT cube(array[10,20,30])->6; (1 row) SELECT cube(array[10,20,30])->-6; -ERROR: Cube index out of bounds +ERROR: cube index -6 is out of bounds -- "normalized" coordinate access SELECT cube(array[10,20,30], array[40,50,60])~>1; ?column? @@ -1517,7 +1517,7 @@ SELECT cube(array[40,50,60], array[10,20,30])~>3; (1 row) SELECT cube(array[40,50,60], array[10,20,30])~>0; -ERROR: Cube index out of bounds +ERROR: cube index 0 is out of bounds SELECT cube(array[40,50,60], array[10,20,30])~>4; ?column? ---------- @@ -1525,7 +1525,7 @@ SELECT cube(array[40,50,60], array[10,20,30])~>4; (1 row) SELECT cube(array[40,50,60], array[10,20,30])~>(-1); -ERROR: Cube index out of bounds +ERROR: cube index -1 is out of bounds -- Load some example data and build the index -- CREATE TABLE test_cube (c cube); diff --git a/contrib/cube/expected/cube_1.out b/contrib/cube/expected/cube_1.out index 7178088e4a..c40fabcd46 100644 --- a/contrib/cube/expected/cube_1.out +++ b/contrib/cube/expected/cube_1.out @@ -1458,13 +1458,13 @@ SELECT cube(array[10,20,30], array[40,50,60])->6; (1 row) SELECT cube(array[10,20,30], array[40,50,60])->0; -ERROR: Cube index out of bounds +ERROR: cube index 0 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->7; -ERROR: Cube index out of bounds +ERROR: cube index 7 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->-1; -ERROR: Cube index out of bounds +ERROR: cube index -1 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->-6; -ERROR: Cube index out of bounds +ERROR: cube index -6 is out of bounds SELECT cube(array[10,20,30])->3; ?column? ---------- @@ -1478,7 +1478,7 @@ SELECT cube(array[10,20,30])->6; (1 row) SELECT cube(array[10,20,30])->-6; -ERROR: Cube index out of bounds +ERROR: cube index -6 is out of bounds -- "normalized" coordinate access SELECT cube(array[10,20,30], array[40,50,60])~>1; ?column? @@ -1517,7 +1517,7 @@ SELECT cube(array[40,50,60], array[10,20,30])~>3; (1 row) SELECT cube(array[40,50,60], array[10,20,30])~>0; -ERROR: Cube index out of bounds +ERROR: cube index 0 is out of bounds SELECT cube(array[40,50,60], array[10,20,30])~>4; ?column? ---------- @@ -1525,7 +1525,7 @@ SELECT cube(array[40,50,60], array[10,20,30])~>4; (1 row) SELECT cube(array[40,50,60], array[10,20,30])~>(-1); -ERROR: Cube index out of bounds +ERROR: cube index -1 is out of bounds -- Load some example data and build the index -- CREATE TABLE test_cube (c cube); diff --git a/contrib/cube/expected/cube_2.out b/contrib/cube/expected/cube_2.out index c2421c5805..fef749c698 100644 --- a/contrib/cube/expected/cube_2.out +++ b/contrib/cube/expected/cube_2.out @@ -1458,13 +1458,13 @@ SELECT cube(array[10,20,30], array[40,50,60])->6; (1 row) SELECT cube(array[10,20,30], array[40,50,60])->0; -ERROR: Cube index out of bounds +ERROR: cube index 0 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->7; -ERROR: Cube index out of bounds +ERROR: cube index 7 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->-1; -ERROR: Cube index out of bounds +ERROR: cube index -1 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->-6; -ERROR: Cube index out of bounds +ERROR: cube index -6 is out of bounds SELECT cube(array[10,20,30])->3; ?column? ---------- @@ -1478,7 +1478,7 @@ SELECT cube(array[10,20,30])->6; (1 row) SELECT cube(array[10,20,30])->-6; -ERROR: Cube index out of bounds +ERROR: cube index -6 is out of bounds -- "normalized" coordinate access SELECT cube(array[10,20,30], array[40,50,60])~>1; ?column? @@ -1517,7 +1517,7 @@ SELECT cube(array[40,50,60], array[10,20,30])~>3; (1 row) SELECT cube(array[40,50,60], array[10,20,30])~>0; -ERROR: Cube index out of bounds +ERROR: cube index 0 is out of bounds SELECT cube(array[40,50,60], array[10,20,30])~>4; ?column? ---------- @@ -1525,7 +1525,7 @@ SELECT cube(array[40,50,60], array[10,20,30])~>4; (1 row) SELECT cube(array[40,50,60], array[10,20,30])~>(-1); -ERROR: Cube index out of bounds +ERROR: cube index -1 is out of bounds -- Load some example data and build the index -- CREATE TABLE test_cube (c cube); diff --git a/contrib/cube/expected/cube_3.out b/contrib/cube/expected/cube_3.out index b6c961dcb9..31d2d1a64e 100644 --- a/contrib/cube/expected/cube_3.out +++ b/contrib/cube/expected/cube_3.out @@ -1458,13 +1458,13 @@ SELECT cube(array[10,20,30], array[40,50,60])->6; (1 row) SELECT cube(array[10,20,30], array[40,50,60])->0; -ERROR: Cube index out of bounds +ERROR: cube index 0 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->7; -ERROR: Cube index out of bounds +ERROR: cube index 7 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->-1; -ERROR: Cube index out of bounds +ERROR: cube index -1 is out of bounds SELECT cube(array[10,20,30], array[40,50,60])->-6; -ERROR: Cube index out of bounds +ERROR: cube index -6 is out of bounds SELECT cube(array[10,20,30])->3; ?column? ---------- @@ -1478,7 +1478,7 @@ SELECT cube(array[10,20,30])->6; (1 row) SELECT cube(array[10,20,30])->-6; -ERROR: Cube index out of bounds +ERROR: cube index -6 is out of bounds -- "normalized" coordinate access SELECT cube(array[10,20,30], array[40,50,60])~>1; ?column? @@ -1517,7 +1517,7 @@ SELECT cube(array[40,50,60], array[10,20,30])~>3; (1 row) SELECT cube(array[40,50,60], array[10,20,30])~>0; -ERROR: Cube index out of bounds +ERROR: cube index 0 is out of bounds SELECT cube(array[40,50,60], array[10,20,30])~>4; ?column? ---------- @@ -1525,7 +1525,7 @@ SELECT cube(array[40,50,60], array[10,20,30])~>4; (1 row) SELECT cube(array[40,50,60], array[10,20,30])~>(-1); -ERROR: Cube index out of bounds +ERROR: cube index -1 is out of bounds -- Load some example data and build the index -- CREATE TABLE test_cube (c cube); diff --git a/doc/src/sgml/cube.sgml b/doc/src/sgml/cube.sgml index 666400800c..4a093706e4 100644 --- a/doc/src/sgml/cube.sgml +++ b/doc/src/sgml/cube.sgml @@ -75,8 +75,8 @@ entered in. The cube functions automatically swap values if needed to create a uniform lower left — upper right internal representation. - When corners coincide cube stores only one corner along with a - special flag in order to reduce size wasted. + When the corners coincide, cube stores only one corner + along with an is point flag to avoid wasting space. @@ -98,17 +98,17 @@ Usage - The cube module includes a GiST index operator class for - cube values. - The operators supported by the GiST operator class are shown in . + shows the operators provided for type + cube. - - Cube GiST Operators - +
+ Cube Operators + Operator + Result Description @@ -116,160 +116,149 @@ a = b + boolean The cubes a and b are identical. a && b + boolean The cubes a and b overlap. a @> b + boolean The cube a contains the cube b. a <@ b + boolean The cube a is contained in the cube b. - a -> n - Get n-th coordinate of cube. + a < b + boolean + The cube a is less than the cube b. - a ~> n - - Get n-th coordinate in 'normalized' cube representation. Noramlization - means coordinate rearrangement to form (lower left, upper right). - + a <= b + boolean + The cube a is less than or equal to the cube b. - - -
- - (Before PostgreSQL 8.2, the containment operators @> and <@ were - respectively called @ and ~. These names are still available, but are - deprecated and will eventually be retired. Notice that the old names - are reversed from the convention formerly followed by the core geometric - data types!) - + + a > b + boolean + The cube a is greater than the cube b. + - - GiST index can be used to retrieve nearest neighbours via several metric - operators. As always any of them can be used as ordinary function. - + + a >= b + boolean + The cube a is greater than or equal to the cube b. + - - Cube GiST-kNN Operators - - - Operator - Description + a <> b + boolean + The cube a is not equal to the cube b. - - + + + a -> n + float8 + Get n-th coordinate of cube (counting from 1). + + + + a ~> n + float8 + + Get n-th coordinate in normalized cube + representation, in which the coordinates have been rearranged into + the form lower left — upper right; that is, the + smaller endpoint along each dimension appears first. + + + a <-> b - Euclidean distance between a and b + float8 + Euclidean distance between a and b. a <#> b - Taxicab (L-1 metric) distance between a and b + float8 + Taxicab (L-1 metric) distance between a and b. a <=> b - Chebyshev (L-inf metric) distance between a and b + float8 + Chebyshev (L-inf metric) distance between a and b. +
- Selection of nearing neigbours can be done in the following way: + (Before PostgreSQL 8.2, the containment operators @> and <@ were + respectively called @ and ~. These names are still available, but are + deprecated and will eventually be retired. Notice that the old names + are reversed from the convention formerly followed by the core geometric + data types!) - -SELECT c FROM test -ORDER BY cube(array[0.5,0.5,0.5])<->c -LIMIT 1; - - - Also kNN framework allows us to cheat with metrics in order to get results - sorted by selected coodinate directly from the index without extra sorting - step. That technique significantly faster on small values of LIMIT, however - with bigger values of LIMIT planner will switch automatically to standart - index scan and sort. - That behavior can be achieved using coordinate operator - (cube c)~>(int offset). + The scalar ordering operators (<, >=, etc) + do not make a lot of sense for any practical purpose but sorting. These + operators first compare the first coordinates, and if those are equal, + compare the second coordinates, etc. They exist mainly to support the + b-tree index operator class for cube, which can be useful for + example if you would like a UNIQUE constraint on a cube column. - -=> select cube(array[0.41,0.42,0.43])~>2 as coord; - coord -------- - 0.42 -(1 row) - - So using that operator as kNN metric we can obtain cubes sorted by it's - coordinate. + The cube module also provides a GiST index operator class for + cube values. + A cube GiST index can be used to search for values using the + =, &&, @>, and + <@ operators in WHERE clauses. + - To get cubes ordered by first coordinate of lower left corner ascending - one can use the following query: - + In addition, a cube GiST index can be used to find nearest + neighbors using the metric operators + <->, <#>, and + <=> in ORDER BY clauses. + For example, the nearest neighbor of the 3-D point (0.5, 0.5, 0.5) + could be found efficiently with: -SELECT c FROM test ORDER BY c~>1 LIMIT 5; +SELECT c FROM test +ORDER BY cube(array[0.5,0.5,0.5]) <-> c +LIMIT 1; - - And to get cubes descending by first coordinate of upper right corner - of 2d-cube: - -SELECT c FROM test ORDER BY c~>3 DESC LIMIT 5; - - - - The standard B-tree operators are also provided, for example - - - - - - Operator - Description - - - - - - [a, b] < [c, d] - Less than - - - - [a, b] > [c, d] - Greater than - - - - - - These operators do not make a lot of sense for any practical - purpose but sorting. These operators first compare (a) to (c), - and if these are equal, compare (b) to (d). That results in - reasonably good sorting in most cases, which is useful if - you want to use ORDER BY with this type. + The ~> operator can also be used in this way to + efficiently retrieve the first few values sorted by a selected coordinate. + For example, to get the first few cubes ordered by the first coordinate + (lower left corner) ascending one could use the following query: + +SELECT c FROM test ORDER BY c ~> 1 LIMIT 5; + + And to get 2-D cubes ordered by the first coordinate of the upper right + corner descending: + +SELECT c FROM test ORDER BY c ~> 3 DESC LIMIT 5; + -- 2.40.0