From 33bd250f6c4cc309f4eeb657da80f1e7743b3e5c Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Fri, 18 Dec 2015 14:38:27 +0300 Subject: [PATCH] Cube extension kNN support Introduce distance operators over cubes: <#> taxicab distance <-> euclidean distance <=> chebyshev distance Also add kNN support of those distances in GiST opclass. Author: Stas Kelvich --- contrib/cube/Makefile | 2 +- contrib/cube/cube--1.0--1.1.sql | 60 ++++ contrib/cube/{cube--1.0.sql => cube--1.1.sql} | 58 +++- contrib/cube/cube.c | 208 ++++++++++++ contrib/cube/cube.control | 2 +- contrib/cube/cubedata.h | 5 + contrib/cube/expected/cube.out | 301 ++++++++++++++++++ contrib/cube/expected/cube_1.out | 301 ++++++++++++++++++ contrib/cube/expected/cube_2.out | 301 ++++++++++++++++++ contrib/cube/expected/cube_3.out | 301 ++++++++++++++++++ contrib/cube/sql/cube.sql | 53 +++ doc/src/sgml/cube.sgml | 96 ++++++ 12 files changed, 1684 insertions(+), 4 deletions(-) create mode 100644 contrib/cube/cube--1.0--1.1.sql rename contrib/cube/{cube--1.0.sql => cube--1.1.sql} (84%) diff --git a/contrib/cube/Makefile b/contrib/cube/Makefile index 67f7867761..e2a5d2c992 100644 --- a/contrib/cube/Makefile +++ b/contrib/cube/Makefile @@ -4,7 +4,7 @@ MODULE_big = cube OBJS= cube.o cubeparse.o $(WIN32RES) EXTENSION = cube -DATA = cube--1.0.sql cube--unpackaged--1.0.sql +DATA = cube--1.1.sql cube--1.0--1.1.sql cube--unpackaged--1.0.sql PGFILEDESC = "cube - multidimensional cube data type" REGRESS = cube diff --git a/contrib/cube/cube--1.0--1.1.sql b/contrib/cube/cube--1.0--1.1.sql new file mode 100644 index 0000000000..f032f73586 --- /dev/null +++ b/contrib/cube/cube--1.0--1.1.sql @@ -0,0 +1,60 @@ +/* contrib/cube/cube--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION cube UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION distance_chebyshev(cube, cube) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION distance_taxicab(cube, cube) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION cube_coord(cube, int4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION cube_coord_llur(cube, int4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR -> ( + LEFTARG = cube, RIGHTARG = int, PROCEDURE = cube_coord +); + +CREATE OPERATOR ~> ( + LEFTARG = cube, RIGHTARG = int, PROCEDURE = cube_coord_llur +); + +CREATE OPERATOR <#> ( + LEFTARG = cube, RIGHTARG = cube, PROCEDURE = distance_taxicab, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <-> ( + LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <=> ( + LEFTARG = cube, RIGHTARG = cube, PROCEDURE = distance_chebyshev, + COMMUTATOR = '<=>' +); + +CREATE FUNCTION g_cube_distance (internal, cube, smallint, oid) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +ALTER OPERATOR FAMILY gist_cube_ops USING gist ADD + OPERATOR 15 ~> (cube, int) FOR ORDER BY float_ops, + OPERATOR 16 <#> (cube, cube) FOR ORDER BY float_ops, + OPERATOR 17 <-> (cube, cube) FOR ORDER BY float_ops, + OPERATOR 18 <=> (cube, cube) FOR ORDER BY float_ops, + FUNCTION 8 (cube, cube) g_cube_distance (internal, cube, smallint, oid); + diff --git a/contrib/cube/cube--1.0.sql b/contrib/cube/cube--1.1.sql similarity index 84% rename from contrib/cube/cube--1.0.sql rename to contrib/cube/cube--1.1.sql index 0307811ceb..c944414576 100644 --- a/contrib/cube/cube--1.0.sql +++ b/contrib/cube/cube--1.1.sql @@ -1,4 +1,4 @@ -/* contrib/cube/cube--1.0.sql */ +/* contrib/cube/cube--1.1.sql */ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION cube" to load this file. \quit @@ -140,6 +140,16 @@ RETURNS float8 AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION distance_chebyshev(cube, cube) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION distance_taxicab(cube, cube) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + -- Extracting elements functions CREATE FUNCTION cube_dim(cube) @@ -157,6 +167,16 @@ RETURNS float8 AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION cube_coord(cube, int4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION cube_coord_llur(cube, int4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + CREATE FUNCTION cube(float8) RETURNS cube AS 'MODULE_PATHNAME', 'cube_f8' LANGUAGE C IMMUTABLE STRICT; @@ -246,6 +266,29 @@ CREATE OPERATOR <@ ( RESTRICT = contsel, JOIN = contjoinsel ); +CREATE OPERATOR -> ( + LEFTARG = cube, RIGHTARG = int, PROCEDURE = cube_coord +); + +CREATE OPERATOR ~> ( + LEFTARG = cube, RIGHTARG = int, PROCEDURE = cube_coord_llur +); + +CREATE OPERATOR <#> ( + LEFTARG = cube, RIGHTARG = cube, PROCEDURE = distance_taxicab, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <-> ( + LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <=> ( + LEFTARG = cube, RIGHTARG = cube, PROCEDURE = distance_chebyshev, + COMMUTATOR = '<=>' +); + -- these are obsolete/deprecated: CREATE OPERATOR @ ( LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_contains, @@ -296,6 +339,10 @@ RETURNS internal AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION g_cube_distance (internal, cube, smallint, oid) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; -- Create the operator classes for indexing @@ -316,10 +363,17 @@ CREATE OPERATOR CLASS gist_cube_ops OPERATOR 8 <@ , OPERATOR 13 @ , OPERATOR 14 ~ , + OPERATOR 15 ~> (cube, int) FOR ORDER BY float_ops, + OPERATOR 16 <#> (cube, cube) FOR ORDER BY float_ops, + OPERATOR 17 <-> (cube, cube) FOR ORDER BY float_ops, + OPERATOR 18 <=> (cube, cube) FOR ORDER BY float_ops, + FUNCTION 1 g_cube_consistent (internal, cube, int, oid, internal), FUNCTION 2 g_cube_union (internal, internal), FUNCTION 3 g_cube_compress (internal), FUNCTION 4 g_cube_decompress (internal), FUNCTION 5 g_cube_penalty (internal, internal, internal), FUNCTION 6 g_cube_picksplit (internal, internal), - FUNCTION 7 g_cube_same (cube, cube, internal); + FUNCTION 7 g_cube_same (cube, cube, internal), + FUNCTION 8 g_cube_distance (internal, cube, smallint, oid); + diff --git a/contrib/cube/cube.c b/contrib/cube/cube.c index a6be59ec93..35ffb6c3f7 100644 --- a/contrib/cube/cube.c +++ b/contrib/cube/cube.c @@ -40,6 +40,8 @@ PG_FUNCTION_INFO_V1(cube_c_f8_f8); PG_FUNCTION_INFO_V1(cube_dim); PG_FUNCTION_INFO_V1(cube_ll_coord); PG_FUNCTION_INFO_V1(cube_ur_coord); +PG_FUNCTION_INFO_V1(cube_coord); +PG_FUNCTION_INFO_V1(cube_coord_llur); PG_FUNCTION_INFO_V1(cube_subset); /* @@ -53,6 +55,7 @@ PG_FUNCTION_INFO_V1(g_cube_penalty); PG_FUNCTION_INFO_V1(g_cube_picksplit); PG_FUNCTION_INFO_V1(g_cube_union); PG_FUNCTION_INFO_V1(g_cube_same); +PG_FUNCTION_INFO_V1(g_cube_distance); /* ** B-tree support functions @@ -79,7 +82,9 @@ PG_FUNCTION_INFO_V1(cube_size); /* ** miscellaneous */ +PG_FUNCTION_INFO_V1(distance_taxicab); PG_FUNCTION_INFO_V1(cube_distance); +PG_FUNCTION_INFO_V1(distance_chebyshev); PG_FUNCTION_INFO_V1(cube_is_point); PG_FUNCTION_INFO_V1(cube_enlarge); @@ -1257,6 +1262,144 @@ cube_distance(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(sqrt(distance)); } +Datum +distance_taxicab(PG_FUNCTION_ARGS) +{ + NDBOX *a = PG_GETARG_NDBOX(0), + *b = PG_GETARG_NDBOX(1); + bool swapped = false; + double distance; + int i; + + /* swap the box pointers if needed */ + if (DIM(a) < DIM(b)) + { + NDBOX *tmp = b; + b = a; + a = tmp; + swapped = true; + } + + distance = 0.0; + /* compute within the dimensions of (b) */ + for (i = 0; i < DIM(b); i++) + distance += fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), LL_COORD(b,i), UR_COORD(b,i))); + + /* compute distance to zero for those dimensions in (a) absent in (b) */ + for (i = DIM(b); i < DIM(a); i++) + distance += fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), 0.0, 0.0)); + + if (swapped) + { + PG_FREE_IF_COPY(b, 0); + PG_FREE_IF_COPY(a, 1); + } + else + { + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + } + + PG_RETURN_FLOAT8(distance); +} + +Datum +distance_chebyshev(PG_FUNCTION_ARGS) +{ + NDBOX *a = PG_GETARG_NDBOX(0), + *b = PG_GETARG_NDBOX(1); + bool swapped = false; + double d, distance; + int i; + + /* swap the box pointers if needed */ + if (DIM(a) < DIM(b)) + { + NDBOX *tmp = b; + b = a; + a = tmp; + swapped = true; + } + + distance = 0.0; + /* compute within the dimensions of (b) */ + for (i = 0; i < DIM(b); i++) + { + d = fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), LL_COORD(b,i), UR_COORD(b,i))); + if (d > distance) + distance = d; + } + + /* compute distance to zero for those dimensions in (a) absent in (b) */ + for (i = DIM(b); i < DIM(a); i++) + { + d = fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), 0.0, 0.0)); + if (d > distance) + distance = d; + } + + if (swapped) + { + PG_FREE_IF_COPY(b, 0); + PG_FREE_IF_COPY(a, 1); + } + else + { + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + } + + PG_RETURN_FLOAT8(distance); +} + +Datum +g_cube_distance(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + NDBOX *cube = DatumGetNDBOX(entry->key); + double retval; + + if (strategy == CubeKNNDistanceCoord) + { + int coord = PG_GETARG_INT32(1); + + if IS_POINT(cube) + { + retval = (cube)->x[(coord-1)%DIM(cube)]; + } + else + { + retval = Min( + (cube)->x[(coord-1)%DIM(cube)], + (cube)->x[(coord-1)%DIM(cube) + DIM(cube)] + ); + } + } + else + { + NDBOX *query = PG_GETARG_NDBOX(1); + switch(strategy) + { + case CubeKNNDistanceTaxicab: + retval = DatumGetFloat8(DirectFunctionCall2(distance_taxicab, + PointerGetDatum(cube), PointerGetDatum(query))); + break; + case CubeKNNDistanceEuclid: + retval = DatumGetFloat8(DirectFunctionCall2(cube_distance, + PointerGetDatum(cube), PointerGetDatum(query))); + break; + case CubeKNNDistanceChebyshev: + retval = DatumGetFloat8(DirectFunctionCall2(distance_chebyshev, + PointerGetDatum(cube), PointerGetDatum(query))); + break; + default: + elog(ERROR, "Cube: unknown strategy number."); + } + } + PG_RETURN_FLOAT8(retval); +} + static double distance_1D(double a1, double a2, double b1, double b2) { @@ -1352,6 +1495,71 @@ cube_ur_coord(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(result); } +/* + * Function returns cube coordinate. + * Numbers from 1 to DIM denotes first corner coordinates. + * Numbers from DIM+1 to 2*DIM denotes second corner coordinates. + */ +Datum +cube_coord(PG_FUNCTION_ARGS) +{ + NDBOX *cube = PG_GETARG_NDBOX(0); + int coord = PG_GETARG_INT16(1); + + if ((coord > 0) && (coord <= 2*DIM(cube))) + { + if IS_POINT(cube) + PG_RETURN_FLOAT8( (cube)->x[(coord-1)%DIM(cube)] ); + else + PG_RETURN_FLOAT8( (cube)->x[coord-1] ); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("Cube index out of bounds"))); + } +} + + +/* + * This function works like cube_coord(), + * but rearranges coordinates of corners to get cube representation + * in the form of (lower left, upper right). + * For historical reasons that extension allows us to create cubes in form + * ((2,1),(1,2)) and instead of normalizing such cube to ((1,1),(2,2)) it + * stores cube in original way. But to get cubes ordered by one of dimensions + * directly from the index without extra sort step we need some + * representation-independent coordinate getter. This function implements it. + */ +Datum +cube_coord_llur(PG_FUNCTION_ARGS) +{ + NDBOX *cube = PG_GETARG_NDBOX(0); + int coord = PG_GETARG_INT16(1); + + if ((coord > 0) && (coord <= DIM(cube))) + { + if IS_POINT(cube) + PG_RETURN_FLOAT8( (cube)->x[coord-1] ); + else + PG_RETURN_FLOAT8( Min((cube)->x[coord-1], (cube)->x[coord-1+DIM(cube)]) ); + } + else if ((coord > DIM(cube)) && (coord <= 2*DIM(cube))) + { + if IS_POINT(cube) + PG_RETURN_FLOAT8( (cube)->x[(coord-1)%DIM(cube)] ); + else + PG_RETURN_FLOAT8( Max((cube)->x[coord-1], (cube)->x[coord-1-DIM(cube)]) ); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("Cube index out of bounds"))); + } +} + /* Increase or decrease box size by a radius in at least n dimensions. */ Datum cube_enlarge(PG_FUNCTION_ARGS) diff --git a/contrib/cube/cube.control b/contrib/cube/cube.control index ddc8d2e5d1..f84e6c582e 100644 --- a/contrib/cube/cube.control +++ b/contrib/cube/cube.control @@ -1,5 +1,5 @@ # cube extension comment = 'data type for multidimensional cubes' -default_version = '1.0' +default_version = '1.1' module_pathname = '$libdir/cube' relocatable = true diff --git a/contrib/cube/cubedata.h b/contrib/cube/cubedata.h index 59c23ded6a..7eaac39640 100644 --- a/contrib/cube/cubedata.h +++ b/contrib/cube/cubedata.h @@ -47,6 +47,11 @@ typedef struct NDBOX #define PG_GETARG_NDBOX(x) DatumGetNDBOX(PG_GETARG_DATUM(x)) #define PG_RETURN_NDBOX(x) PG_RETURN_POINTER(x) +#define CubeKNNDistanceCoord 15 /* ~> */ +#define CubeKNNDistanceTaxicab 16 /* <#> */ +#define CubeKNNDistanceEuclid 17 /* <-> */ +#define CubeKNNDistanceChebyshev 18 /* <=> */ + /* in cubescan.l */ extern int cube_yylex(void); extern void cube_yyerror(NDBOX **result, const char *message) pg_attribute_noreturn(); diff --git a/contrib/cube/expected/cube.out b/contrib/cube/expected/cube.out index ca9555ed4b..769ad3a88d 100644 --- a/contrib/cube/expected/cube.out +++ b/contrib/cube/expected/cube.out @@ -1381,6 +1381,151 @@ SELECT cube_size('(42,137)'::cube); 0 (1 row) +-- Test of distances +-- +SELECT cube_distance('(1,1)'::cube, '(4,5)'::cube); + cube_distance +--------------- + 5 +(1 row) + +SELECT '(1,1)'::cube <-> '(4,5)'::cube as d_e; + d_e +----- + 5 +(1 row) + +SELECT distance_chebyshev('(1,1)'::cube, '(4,5)'::cube); + distance_chebyshev +-------------------- + 4 +(1 row) + +SELECT '(1,1)'::cube <=> '(4,5)'::cube as d_c; + d_c +----- + 4 +(1 row) + +SELECT distance_taxicab('(1,1)'::cube, '(4,5)'::cube); + distance_taxicab +------------------ + 7 +(1 row) + +SELECT '(1,1)'::cube <#> '(4,5)'::cube as d_t; + d_t +----- + 7 +(1 row) + +-- zero for overlapping +SELECT cube_distance('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + cube_distance +--------------- + 0 +(1 row) + +SELECT distance_chebyshev('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + distance_chebyshev +-------------------- + 0 +(1 row) + +SELECT distance_taxicab('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + distance_taxicab +------------------ + 0 +(1 row) + +-- coordinate access +SELECT cube(array[10,20,30], array[40,50,60])->1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])->1; + ?column? +---------- + 40 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])->6; + ?column? +---------- + 60 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])->0; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->7; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->-1; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->-6; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30])->3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[10,20,30])->6; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[10,20,30])->-6; +ERROR: Cube index out of bounds +-- "normalized" coordinate access +SELECT cube(array[10,20,30], array[40,50,60])~>1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])~>2; + ?column? +---------- + 20 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>2; + ?column? +---------- + 20 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])~>3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>0; +ERROR: Cube index out of bounds +SELECT cube(array[40,50,60], array[10,20,30])~>4; + ?column? +---------- + 40 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>(-1); +ERROR: Cube index out of bounds -- Load some example data and build the index -- CREATE TABLE test_cube (c cube); @@ -1407,3 +1552,159 @@ SELECT * FROM test_cube WHERE c && '(3000,1000),(0,0)' GROUP BY c ORDER BY c; (2424, 160),(2424, 81) (5 rows) +-- kNN with index +SELECT *, c <-> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <-> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------------------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (948, 1201),(907, 1156) | 772.000647668122 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 +(5 rows) + +SELECT *, c <=> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <=> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (948, 1201),(907, 1156) | 656 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 +(5 rows) + +SELECT *, c <#> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <#> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 + (948, 1201),(907, 1156) | 1063 +(5 rows) + +-- kNN-based sorting +SELECT * FROM test_cube ORDER BY c~>1 LIMIT 15; -- ascending by 1st coordinate of lower left corner + c +--------------------------- + (54, 38679),(3, 38602) + (83, 10271),(15, 10265) + (122, 46832),(64, 46762) + (167, 17214),(92, 17184) + (161, 24465),(107, 24374) + (162, 26040),(120, 25963) + (154, 4019),(138, 3990) + (259, 1850),(175, 1820) + (207, 40886),(179, 40879) + (288, 49588),(204, 49571) + (270, 32616),(226, 32607) + (318, 31489),(235, 31404) + (337, 455),(240, 359) + (270, 29508),(264, 29440) + (369, 1457),(278, 1409) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>4 LIMIT 15; -- ascending by 2nd coordinate or upper right corner + c +--------------------------- + (30333, 50),(30273, 6) + (43301, 75),(43227, 43) + (19650, 142),(19630, 51) + (2424, 160),(2424, 81) + (3449, 171),(3354, 108) + (18037, 155),(17941, 109) + (28511, 208),(28479, 114) + (19946, 217),(19941, 118) + (16906, 191),(16816, 139) + (759, 187),(662, 163) + (22684, 266),(22656, 181) + (24423, 255),(24360, 213) + (45989, 249),(45910, 222) + (11399, 377),(11360, 294) + (12162, 389),(12103, 309) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>1 DESC LIMIT 15; -- descending by 1st coordinate of lower left corner + c +------------------------------- + (50027, 49230),(49951, 49214) + (49980, 35004),(49937, 34963) + (49985, 6436),(49927, 6338) + (49999, 27218),(49908, 27176) + (49954, 1340),(49905, 1294) + (49944, 25163),(49902, 25153) + (49981, 34876),(49898, 34786) + (49957, 43390),(49897, 43384) + (49853, 18504),(49848, 18503) + (49902, 41752),(49818, 41746) + (49907, 30225),(49810, 30158) + (49843, 5175),(49808, 5145) + (49887, 24274),(49805, 24184) + (49847, 7128),(49798, 7067) + (49820, 7990),(49771, 7967) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>4 DESC LIMIT 15; -- descending by 2nd coordinate or upper right corner + c +------------------------------- + (36311, 50073),(36258, 49987) + (30746, 50040),(30727, 49992) + (2168, 50012),(2108, 49914) + (21551, 49983),(21492, 49885) + (17954, 49975),(17865, 49915) + (3531, 49962),(3463, 49934) + (19128, 49932),(19112, 49849) + (31287, 49923),(31236, 49913) + (43925, 49912),(43888, 49878) + (29261, 49910),(29247, 49818) + (14913, 49873),(14849, 49836) + (20007, 49858),(19921, 49778) + (38266, 49852),(38233, 49844) + (37595, 49849),(37581, 49834) + (46151, 49848),(46058, 49830) +(15 rows) + +-- same thing for index with points +CREATE TABLE test_point(c cube); +INSERT INTO test_point(SELECT cube(array[c->1,c->2,c->3,c->4]) FROM test_cube); +CREATE INDEX ON test_point USING gist(c); +SELECT * FROM test_point ORDER BY c~>1, c~>2 LIMIT 15; -- ascending by 1st then by 2nd coordinate + c +-------------------------- + (54, 38679, 3, 38602) + (83, 10271, 15, 10265) + (122, 46832, 64, 46762) + (154, 4019, 138, 3990) + (161, 24465, 107, 24374) + (162, 26040, 120, 25963) + (167, 17214, 92, 17184) + (207, 40886, 179, 40879) + (259, 1850, 175, 1820) + (270, 29508, 264, 29440) + (270, 32616, 226, 32607) + (288, 49588, 204, 49571) + (318, 31489, 235, 31404) + (326, 18837, 285, 18817) + (337, 455, 240, 359) +(15 rows) + +SELECT * FROM test_point ORDER BY c~>4 DESC LIMIT 15; -- descending by 1st coordinate + c +------------------------------ + (30746, 50040, 30727, 49992) + (36311, 50073, 36258, 49987) + (3531, 49962, 3463, 49934) + (17954, 49975, 17865, 49915) + (2168, 50012, 2108, 49914) + (31287, 49923, 31236, 49913) + (21551, 49983, 21492, 49885) + (43925, 49912, 43888, 49878) + (19128, 49932, 19112, 49849) + (38266, 49852, 38233, 49844) + (14913, 49873, 14849, 49836) + (37595, 49849, 37581, 49834) + (46151, 49848, 46058, 49830) + (29261, 49910, 29247, 49818) + (19233, 49824, 19185, 49794) +(15 rows) + diff --git a/contrib/cube/expected/cube_1.out b/contrib/cube/expected/cube_1.out index c07d61d0b0..7178088e4a 100644 --- a/contrib/cube/expected/cube_1.out +++ b/contrib/cube/expected/cube_1.out @@ -1381,6 +1381,151 @@ SELECT cube_size('(42,137)'::cube); 0 (1 row) +-- Test of distances +-- +SELECT cube_distance('(1,1)'::cube, '(4,5)'::cube); + cube_distance +--------------- + 5 +(1 row) + +SELECT '(1,1)'::cube <-> '(4,5)'::cube as d_e; + d_e +----- + 5 +(1 row) + +SELECT distance_chebyshev('(1,1)'::cube, '(4,5)'::cube); + distance_chebyshev +-------------------- + 4 +(1 row) + +SELECT '(1,1)'::cube <=> '(4,5)'::cube as d_c; + d_c +----- + 4 +(1 row) + +SELECT distance_taxicab('(1,1)'::cube, '(4,5)'::cube); + distance_taxicab +------------------ + 7 +(1 row) + +SELECT '(1,1)'::cube <#> '(4,5)'::cube as d_t; + d_t +----- + 7 +(1 row) + +-- zero for overlapping +SELECT cube_distance('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + cube_distance +--------------- + 0 +(1 row) + +SELECT distance_chebyshev('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + distance_chebyshev +-------------------- + 0 +(1 row) + +SELECT distance_taxicab('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + distance_taxicab +------------------ + 0 +(1 row) + +-- coordinate access +SELECT cube(array[10,20,30], array[40,50,60])->1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])->1; + ?column? +---------- + 40 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])->6; + ?column? +---------- + 60 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])->0; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->7; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->-1; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->-6; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30])->3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[10,20,30])->6; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[10,20,30])->-6; +ERROR: Cube index out of bounds +-- "normalized" coordinate access +SELECT cube(array[10,20,30], array[40,50,60])~>1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])~>2; + ?column? +---------- + 20 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>2; + ?column? +---------- + 20 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])~>3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>0; +ERROR: Cube index out of bounds +SELECT cube(array[40,50,60], array[10,20,30])~>4; + ?column? +---------- + 40 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>(-1); +ERROR: Cube index out of bounds -- Load some example data and build the index -- CREATE TABLE test_cube (c cube); @@ -1407,3 +1552,159 @@ SELECT * FROM test_cube WHERE c && '(3000,1000),(0,0)' GROUP BY c ORDER BY c; (2424, 160),(2424, 81) (5 rows) +-- kNN with index +SELECT *, c <-> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <-> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------------------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (948, 1201),(907, 1156) | 772.000647668122 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 +(5 rows) + +SELECT *, c <=> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <=> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (948, 1201),(907, 1156) | 656 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 +(5 rows) + +SELECT *, c <#> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <#> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 + (948, 1201),(907, 1156) | 1063 +(5 rows) + +-- kNN-based sorting +SELECT * FROM test_cube ORDER BY c~>1 LIMIT 15; -- ascending by 1st coordinate of lower left corner + c +--------------------------- + (54, 38679),(3, 38602) + (83, 10271),(15, 10265) + (122, 46832),(64, 46762) + (167, 17214),(92, 17184) + (161, 24465),(107, 24374) + (162, 26040),(120, 25963) + (154, 4019),(138, 3990) + (259, 1850),(175, 1820) + (207, 40886),(179, 40879) + (288, 49588),(204, 49571) + (270, 32616),(226, 32607) + (318, 31489),(235, 31404) + (337, 455),(240, 359) + (270, 29508),(264, 29440) + (369, 1457),(278, 1409) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>4 LIMIT 15; -- ascending by 2nd coordinate or upper right corner + c +--------------------------- + (30333, 50),(30273, 6) + (43301, 75),(43227, 43) + (19650, 142),(19630, 51) + (2424, 160),(2424, 81) + (3449, 171),(3354, 108) + (18037, 155),(17941, 109) + (28511, 208),(28479, 114) + (19946, 217),(19941, 118) + (16906, 191),(16816, 139) + (759, 187),(662, 163) + (22684, 266),(22656, 181) + (24423, 255),(24360, 213) + (45989, 249),(45910, 222) + (11399, 377),(11360, 294) + (12162, 389),(12103, 309) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>1 DESC LIMIT 15; -- descending by 1st coordinate of lower left corner + c +------------------------------- + (50027, 49230),(49951, 49214) + (49980, 35004),(49937, 34963) + (49985, 6436),(49927, 6338) + (49999, 27218),(49908, 27176) + (49954, 1340),(49905, 1294) + (49944, 25163),(49902, 25153) + (49981, 34876),(49898, 34786) + (49957, 43390),(49897, 43384) + (49853, 18504),(49848, 18503) + (49902, 41752),(49818, 41746) + (49907, 30225),(49810, 30158) + (49843, 5175),(49808, 5145) + (49887, 24274),(49805, 24184) + (49847, 7128),(49798, 7067) + (49820, 7990),(49771, 7967) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>4 DESC LIMIT 15; -- descending by 2nd coordinate or upper right corner + c +------------------------------- + (36311, 50073),(36258, 49987) + (30746, 50040),(30727, 49992) + (2168, 50012),(2108, 49914) + (21551, 49983),(21492, 49885) + (17954, 49975),(17865, 49915) + (3531, 49962),(3463, 49934) + (19128, 49932),(19112, 49849) + (31287, 49923),(31236, 49913) + (43925, 49912),(43888, 49878) + (29261, 49910),(29247, 49818) + (14913, 49873),(14849, 49836) + (20007, 49858),(19921, 49778) + (38266, 49852),(38233, 49844) + (37595, 49849),(37581, 49834) + (46151, 49848),(46058, 49830) +(15 rows) + +-- same thing for index with points +CREATE TABLE test_point(c cube); +INSERT INTO test_point(SELECT cube(array[c->1,c->2,c->3,c->4]) FROM test_cube); +CREATE INDEX ON test_point USING gist(c); +SELECT * FROM test_point ORDER BY c~>1, c~>2 LIMIT 15; -- ascending by 1st then by 2nd coordinate + c +-------------------------- + (54, 38679, 3, 38602) + (83, 10271, 15, 10265) + (122, 46832, 64, 46762) + (154, 4019, 138, 3990) + (161, 24465, 107, 24374) + (162, 26040, 120, 25963) + (167, 17214, 92, 17184) + (207, 40886, 179, 40879) + (259, 1850, 175, 1820) + (270, 29508, 264, 29440) + (270, 32616, 226, 32607) + (288, 49588, 204, 49571) + (318, 31489, 235, 31404) + (326, 18837, 285, 18817) + (337, 455, 240, 359) +(15 rows) + +SELECT * FROM test_point ORDER BY c~>4 DESC LIMIT 15; -- descending by 1st coordinate + c +------------------------------ + (30746, 50040, 30727, 49992) + (36311, 50073, 36258, 49987) + (3531, 49962, 3463, 49934) + (17954, 49975, 17865, 49915) + (2168, 50012, 2108, 49914) + (31287, 49923, 31236, 49913) + (21551, 49983, 21492, 49885) + (43925, 49912, 43888, 49878) + (19128, 49932, 19112, 49849) + (38266, 49852, 38233, 49844) + (14913, 49873, 14849, 49836) + (37595, 49849, 37581, 49834) + (46151, 49848, 46058, 49830) + (29261, 49910, 29247, 49818) + (19233, 49824, 19185, 49794) +(15 rows) + diff --git a/contrib/cube/expected/cube_2.out b/contrib/cube/expected/cube_2.out index 3767d0ef9b..c2421c5805 100644 --- a/contrib/cube/expected/cube_2.out +++ b/contrib/cube/expected/cube_2.out @@ -1381,6 +1381,151 @@ SELECT cube_size('(42,137)'::cube); 0 (1 row) +-- Test of distances +-- +SELECT cube_distance('(1,1)'::cube, '(4,5)'::cube); + cube_distance +--------------- + 5 +(1 row) + +SELECT '(1,1)'::cube <-> '(4,5)'::cube as d_e; + d_e +----- + 5 +(1 row) + +SELECT distance_chebyshev('(1,1)'::cube, '(4,5)'::cube); + distance_chebyshev +-------------------- + 4 +(1 row) + +SELECT '(1,1)'::cube <=> '(4,5)'::cube as d_c; + d_c +----- + 4 +(1 row) + +SELECT distance_taxicab('(1,1)'::cube, '(4,5)'::cube); + distance_taxicab +------------------ + 7 +(1 row) + +SELECT '(1,1)'::cube <#> '(4,5)'::cube as d_t; + d_t +----- + 7 +(1 row) + +-- zero for overlapping +SELECT cube_distance('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + cube_distance +--------------- + 0 +(1 row) + +SELECT distance_chebyshev('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + distance_chebyshev +-------------------- + 0 +(1 row) + +SELECT distance_taxicab('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + distance_taxicab +------------------ + 0 +(1 row) + +-- coordinate access +SELECT cube(array[10,20,30], array[40,50,60])->1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])->1; + ?column? +---------- + 40 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])->6; + ?column? +---------- + 60 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])->0; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->7; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->-1; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->-6; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30])->3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[10,20,30])->6; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[10,20,30])->-6; +ERROR: Cube index out of bounds +-- "normalized" coordinate access +SELECT cube(array[10,20,30], array[40,50,60])~>1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])~>2; + ?column? +---------- + 20 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>2; + ?column? +---------- + 20 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])~>3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>0; +ERROR: Cube index out of bounds +SELECT cube(array[40,50,60], array[10,20,30])~>4; + ?column? +---------- + 40 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>(-1); +ERROR: Cube index out of bounds -- Load some example data and build the index -- CREATE TABLE test_cube (c cube); @@ -1407,3 +1552,159 @@ SELECT * FROM test_cube WHERE c && '(3000,1000),(0,0)' GROUP BY c ORDER BY c; (2424, 160),(2424, 81) (5 rows) +-- kNN with index +SELECT *, c <-> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <-> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------------------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (948, 1201),(907, 1156) | 772.000647668122 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 +(5 rows) + +SELECT *, c <=> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <=> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (948, 1201),(907, 1156) | 656 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 +(5 rows) + +SELECT *, c <#> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <#> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 + (948, 1201),(907, 1156) | 1063 +(5 rows) + +-- kNN-based sorting +SELECT * FROM test_cube ORDER BY c~>1 LIMIT 15; -- ascending by 1st coordinate of lower left corner + c +--------------------------- + (54, 38679),(3, 38602) + (83, 10271),(15, 10265) + (122, 46832),(64, 46762) + (167, 17214),(92, 17184) + (161, 24465),(107, 24374) + (162, 26040),(120, 25963) + (154, 4019),(138, 3990) + (259, 1850),(175, 1820) + (207, 40886),(179, 40879) + (288, 49588),(204, 49571) + (270, 32616),(226, 32607) + (318, 31489),(235, 31404) + (337, 455),(240, 359) + (270, 29508),(264, 29440) + (369, 1457),(278, 1409) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>4 LIMIT 15; -- ascending by 2nd coordinate or upper right corner + c +--------------------------- + (30333, 50),(30273, 6) + (43301, 75),(43227, 43) + (19650, 142),(19630, 51) + (2424, 160),(2424, 81) + (3449, 171),(3354, 108) + (18037, 155),(17941, 109) + (28511, 208),(28479, 114) + (19946, 217),(19941, 118) + (16906, 191),(16816, 139) + (759, 187),(662, 163) + (22684, 266),(22656, 181) + (24423, 255),(24360, 213) + (45989, 249),(45910, 222) + (11399, 377),(11360, 294) + (12162, 389),(12103, 309) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>1 DESC LIMIT 15; -- descending by 1st coordinate of lower left corner + c +------------------------------- + (50027, 49230),(49951, 49214) + (49980, 35004),(49937, 34963) + (49985, 6436),(49927, 6338) + (49999, 27218),(49908, 27176) + (49954, 1340),(49905, 1294) + (49944, 25163),(49902, 25153) + (49981, 34876),(49898, 34786) + (49957, 43390),(49897, 43384) + (49853, 18504),(49848, 18503) + (49902, 41752),(49818, 41746) + (49907, 30225),(49810, 30158) + (49843, 5175),(49808, 5145) + (49887, 24274),(49805, 24184) + (49847, 7128),(49798, 7067) + (49820, 7990),(49771, 7967) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>4 DESC LIMIT 15; -- descending by 2nd coordinate or upper right corner + c +------------------------------- + (36311, 50073),(36258, 49987) + (30746, 50040),(30727, 49992) + (2168, 50012),(2108, 49914) + (21551, 49983),(21492, 49885) + (17954, 49975),(17865, 49915) + (3531, 49962),(3463, 49934) + (19128, 49932),(19112, 49849) + (31287, 49923),(31236, 49913) + (43925, 49912),(43888, 49878) + (29261, 49910),(29247, 49818) + (14913, 49873),(14849, 49836) + (20007, 49858),(19921, 49778) + (38266, 49852),(38233, 49844) + (37595, 49849),(37581, 49834) + (46151, 49848),(46058, 49830) +(15 rows) + +-- same thing for index with points +CREATE TABLE test_point(c cube); +INSERT INTO test_point(SELECT cube(array[c->1,c->2,c->3,c->4]) FROM test_cube); +CREATE INDEX ON test_point USING gist(c); +SELECT * FROM test_point ORDER BY c~>1, c~>2 LIMIT 15; -- ascending by 1st then by 2nd coordinate + c +-------------------------- + (54, 38679, 3, 38602) + (83, 10271, 15, 10265) + (122, 46832, 64, 46762) + (154, 4019, 138, 3990) + (161, 24465, 107, 24374) + (162, 26040, 120, 25963) + (167, 17214, 92, 17184) + (207, 40886, 179, 40879) + (259, 1850, 175, 1820) + (270, 29508, 264, 29440) + (270, 32616, 226, 32607) + (288, 49588, 204, 49571) + (318, 31489, 235, 31404) + (326, 18837, 285, 18817) + (337, 455, 240, 359) +(15 rows) + +SELECT * FROM test_point ORDER BY c~>4 DESC LIMIT 15; -- descending by 1st coordinate + c +------------------------------ + (30746, 50040, 30727, 49992) + (36311, 50073, 36258, 49987) + (3531, 49962, 3463, 49934) + (17954, 49975, 17865, 49915) + (2168, 50012, 2108, 49914) + (31287, 49923, 31236, 49913) + (21551, 49983, 21492, 49885) + (43925, 49912, 43888, 49878) + (19128, 49932, 19112, 49849) + (38266, 49852, 38233, 49844) + (14913, 49873, 14849, 49836) + (37595, 49849, 37581, 49834) + (46151, 49848, 46058, 49830) + (29261, 49910, 29247, 49818) + (19233, 49824, 19185, 49794) +(15 rows) + diff --git a/contrib/cube/expected/cube_3.out b/contrib/cube/expected/cube_3.out index 2aa42beb86..b6c961dcb9 100644 --- a/contrib/cube/expected/cube_3.out +++ b/contrib/cube/expected/cube_3.out @@ -1381,6 +1381,151 @@ SELECT cube_size('(42,137)'::cube); 0 (1 row) +-- Test of distances +-- +SELECT cube_distance('(1,1)'::cube, '(4,5)'::cube); + cube_distance +--------------- + 5 +(1 row) + +SELECT '(1,1)'::cube <-> '(4,5)'::cube as d_e; + d_e +----- + 5 +(1 row) + +SELECT distance_chebyshev('(1,1)'::cube, '(4,5)'::cube); + distance_chebyshev +-------------------- + 4 +(1 row) + +SELECT '(1,1)'::cube <=> '(4,5)'::cube as d_c; + d_c +----- + 4 +(1 row) + +SELECT distance_taxicab('(1,1)'::cube, '(4,5)'::cube); + distance_taxicab +------------------ + 7 +(1 row) + +SELECT '(1,1)'::cube <#> '(4,5)'::cube as d_t; + d_t +----- + 7 +(1 row) + +-- zero for overlapping +SELECT cube_distance('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + cube_distance +--------------- + 0 +(1 row) + +SELECT distance_chebyshev('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + distance_chebyshev +-------------------- + 0 +(1 row) + +SELECT distance_taxicab('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); + distance_taxicab +------------------ + 0 +(1 row) + +-- coordinate access +SELECT cube(array[10,20,30], array[40,50,60])->1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])->1; + ?column? +---------- + 40 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])->6; + ?column? +---------- + 60 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])->0; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->7; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->-1; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30], array[40,50,60])->-6; +ERROR: Cube index out of bounds +SELECT cube(array[10,20,30])->3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[10,20,30])->6; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[10,20,30])->-6; +ERROR: Cube index out of bounds +-- "normalized" coordinate access +SELECT cube(array[10,20,30], array[40,50,60])~>1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>1; + ?column? +---------- + 10 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])~>2; + ?column? +---------- + 20 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>2; + ?column? +---------- + 20 +(1 row) + +SELECT cube(array[10,20,30], array[40,50,60])~>3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>3; + ?column? +---------- + 30 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>0; +ERROR: Cube index out of bounds +SELECT cube(array[40,50,60], array[10,20,30])~>4; + ?column? +---------- + 40 +(1 row) + +SELECT cube(array[40,50,60], array[10,20,30])~>(-1); +ERROR: Cube index out of bounds -- Load some example data and build the index -- CREATE TABLE test_cube (c cube); @@ -1407,3 +1552,159 @@ SELECT * FROM test_cube WHERE c && '(3000,1000),(0,0)' GROUP BY c ORDER BY c; (2424, 160),(2424, 81) (5 rows) +-- kNN with index +SELECT *, c <-> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <-> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------------------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (948, 1201),(907, 1156) | 772.000647668122 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 +(5 rows) + +SELECT *, c <=> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <=> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (948, 1201),(907, 1156) | 656 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 +(5 rows) + +SELECT *, c <#> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <#> '(100, 100),(500, 500)'::cube LIMIT 5; + c | dist +-------------------------+------ + (337, 455),(240, 359) | 0 + (759, 187),(662, 163) | 162 + (1444, 403),(1346, 344) | 846 + (369, 1457),(278, 1409) | 909 + (948, 1201),(907, 1156) | 1063 +(5 rows) + +-- kNN-based sorting +SELECT * FROM test_cube ORDER BY c~>1 LIMIT 15; -- ascending by 1st coordinate of lower left corner + c +--------------------------- + (54, 38679),(3, 38602) + (83, 10271),(15, 10265) + (122, 46832),(64, 46762) + (167, 17214),(92, 17184) + (161, 24465),(107, 24374) + (162, 26040),(120, 25963) + (154, 4019),(138, 3990) + (259, 1850),(175, 1820) + (207, 40886),(179, 40879) + (288, 49588),(204, 49571) + (270, 32616),(226, 32607) + (318, 31489),(235, 31404) + (337, 455),(240, 359) + (270, 29508),(264, 29440) + (369, 1457),(278, 1409) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>4 LIMIT 15; -- ascending by 2nd coordinate or upper right corner + c +--------------------------- + (30333, 50),(30273, 6) + (43301, 75),(43227, 43) + (19650, 142),(19630, 51) + (2424, 160),(2424, 81) + (3449, 171),(3354, 108) + (18037, 155),(17941, 109) + (28511, 208),(28479, 114) + (19946, 217),(19941, 118) + (16906, 191),(16816, 139) + (759, 187),(662, 163) + (22684, 266),(22656, 181) + (24423, 255),(24360, 213) + (45989, 249),(45910, 222) + (11399, 377),(11360, 294) + (12162, 389),(12103, 309) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>1 DESC LIMIT 15; -- descending by 1st coordinate of lower left corner + c +------------------------------- + (50027, 49230),(49951, 49214) + (49980, 35004),(49937, 34963) + (49985, 6436),(49927, 6338) + (49999, 27218),(49908, 27176) + (49954, 1340),(49905, 1294) + (49944, 25163),(49902, 25153) + (49981, 34876),(49898, 34786) + (49957, 43390),(49897, 43384) + (49853, 18504),(49848, 18503) + (49902, 41752),(49818, 41746) + (49907, 30225),(49810, 30158) + (49843, 5175),(49808, 5145) + (49887, 24274),(49805, 24184) + (49847, 7128),(49798, 7067) + (49820, 7990),(49771, 7967) +(15 rows) + +SELECT * FROM test_cube ORDER BY c~>4 DESC LIMIT 15; -- descending by 2nd coordinate or upper right corner + c +------------------------------- + (36311, 50073),(36258, 49987) + (30746, 50040),(30727, 49992) + (2168, 50012),(2108, 49914) + (21551, 49983),(21492, 49885) + (17954, 49975),(17865, 49915) + (3531, 49962),(3463, 49934) + (19128, 49932),(19112, 49849) + (31287, 49923),(31236, 49913) + (43925, 49912),(43888, 49878) + (29261, 49910),(29247, 49818) + (14913, 49873),(14849, 49836) + (20007, 49858),(19921, 49778) + (38266, 49852),(38233, 49844) + (37595, 49849),(37581, 49834) + (46151, 49848),(46058, 49830) +(15 rows) + +-- same thing for index with points +CREATE TABLE test_point(c cube); +INSERT INTO test_point(SELECT cube(array[c->1,c->2,c->3,c->4]) FROM test_cube); +CREATE INDEX ON test_point USING gist(c); +SELECT * FROM test_point ORDER BY c~>1, c~>2 LIMIT 15; -- ascending by 1st then by 2nd coordinate + c +-------------------------- + (54, 38679, 3, 38602) + (83, 10271, 15, 10265) + (122, 46832, 64, 46762) + (154, 4019, 138, 3990) + (161, 24465, 107, 24374) + (162, 26040, 120, 25963) + (167, 17214, 92, 17184) + (207, 40886, 179, 40879) + (259, 1850, 175, 1820) + (270, 29508, 264, 29440) + (270, 32616, 226, 32607) + (288, 49588, 204, 49571) + (318, 31489, 235, 31404) + (326, 18837, 285, 18817) + (337, 455, 240, 359) +(15 rows) + +SELECT * FROM test_point ORDER BY c~>4 DESC LIMIT 15; -- descending by 1st coordinate + c +------------------------------ + (30746, 50040, 30727, 49992) + (36311, 50073, 36258, 49987) + (3531, 49962, 3463, 49934) + (17954, 49975, 17865, 49915) + (2168, 50012, 2108, 49914) + (31287, 49923, 31236, 49913) + (21551, 49983, 21492, 49885) + (43925, 49912, 43888, 49878) + (19128, 49932, 19112, 49849) + (38266, 49852, 38233, 49844) + (14913, 49873, 14849, 49836) + (37595, 49849, 37581, 49834) + (46151, 49848, 46058, 49830) + (29261, 49910, 29247, 49818) + (19233, 49824, 19185, 49794) +(15 rows) + diff --git a/contrib/cube/sql/cube.sql b/contrib/cube/sql/cube.sql index d58974c408..e225fb7da1 100644 --- a/contrib/cube/sql/cube.sql +++ b/contrib/cube/sql/cube.sql @@ -325,6 +325,41 @@ SELECT cube_inter('(1,2,3)'::cube, '(5,6,3)'::cube); -- point args SELECT cube_size('(4,8),(15,16)'::cube); SELECT cube_size('(42,137)'::cube); +-- Test of distances +-- +SELECT cube_distance('(1,1)'::cube, '(4,5)'::cube); +SELECT '(1,1)'::cube <-> '(4,5)'::cube as d_e; +SELECT distance_chebyshev('(1,1)'::cube, '(4,5)'::cube); +SELECT '(1,1)'::cube <=> '(4,5)'::cube as d_c; +SELECT distance_taxicab('(1,1)'::cube, '(4,5)'::cube); +SELECT '(1,1)'::cube <#> '(4,5)'::cube as d_t; +-- zero for overlapping +SELECT cube_distance('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); +SELECT distance_chebyshev('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); +SELECT distance_taxicab('(2,2),(10,10)'::cube, '(0,0),(5,5)'::cube); +-- coordinate access +SELECT cube(array[10,20,30], array[40,50,60])->1; +SELECT cube(array[40,50,60], array[10,20,30])->1; +SELECT cube(array[10,20,30], array[40,50,60])->6; +SELECT cube(array[10,20,30], array[40,50,60])->0; +SELECT cube(array[10,20,30], array[40,50,60])->7; +SELECT cube(array[10,20,30], array[40,50,60])->-1; +SELECT cube(array[10,20,30], array[40,50,60])->-6; +SELECT cube(array[10,20,30])->3; +SELECT cube(array[10,20,30])->6; +SELECT cube(array[10,20,30])->-6; +-- "normalized" coordinate access +SELECT cube(array[10,20,30], array[40,50,60])~>1; +SELECT cube(array[40,50,60], array[10,20,30])~>1; +SELECT cube(array[10,20,30], array[40,50,60])~>2; +SELECT cube(array[40,50,60], array[10,20,30])~>2; +SELECT cube(array[10,20,30], array[40,50,60])~>3; +SELECT cube(array[40,50,60], array[10,20,30])~>3; + +SELECT cube(array[40,50,60], array[10,20,30])~>0; +SELECT cube(array[40,50,60], array[10,20,30])~>4; +SELECT cube(array[40,50,60], array[10,20,30])~>(-1); + -- Load some example data and build the index -- CREATE TABLE test_cube (c cube); @@ -336,3 +371,21 @@ SELECT * FROM test_cube WHERE c && '(3000,1000),(0,0)' ORDER BY c; -- Test sorting SELECT * FROM test_cube WHERE c && '(3000,1000),(0,0)' GROUP BY c ORDER BY c; + +-- kNN with index +SELECT *, c <-> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <-> '(100, 100),(500, 500)'::cube LIMIT 5; +SELECT *, c <=> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <=> '(100, 100),(500, 500)'::cube LIMIT 5; +SELECT *, c <#> '(100, 100),(500, 500)'::cube as dist FROM test_cube ORDER BY c <#> '(100, 100),(500, 500)'::cube LIMIT 5; + +-- kNN-based sorting +SELECT * FROM test_cube ORDER BY c~>1 LIMIT 15; -- ascending by 1st coordinate of lower left corner +SELECT * FROM test_cube ORDER BY c~>4 LIMIT 15; -- ascending by 2nd coordinate or upper right corner +SELECT * FROM test_cube ORDER BY c~>1 DESC LIMIT 15; -- descending by 1st coordinate of lower left corner +SELECT * FROM test_cube ORDER BY c~>4 DESC LIMIT 15; -- descending by 2nd coordinate or upper right corner + +-- same thing for index with points +CREATE TABLE test_point(c cube); +INSERT INTO test_point(SELECT cube(array[c->1,c->2,c->3,c->4]) FROM test_cube); +CREATE INDEX ON test_point USING gist(c); +SELECT * FROM test_point ORDER BY c~>1, c~>2 LIMIT 15; -- ascending by 1st then by 2nd coordinate +SELECT * FROM test_point ORDER BY c~>4 DESC LIMIT 15; -- descending by 1st coordinate diff --git a/doc/src/sgml/cube.sgml b/doc/src/sgml/cube.sgml index 0a226ca542..666400800c 100644 --- a/doc/src/sgml/cube.sgml +++ b/doc/src/sgml/cube.sgml @@ -75,6 +75,8 @@ entered in. The cube functions automatically swap values if needed to create a uniform lower left — upper right internal representation. + When corners coincide cube stores only one corner along with a + special flag in order to reduce size wasted. @@ -131,6 +133,19 @@ a <@ b The cube a is contained in the cube b. + + + a -> n + Get n-th coordinate of cube. + + + + a ~> n + + Get n-th coordinate in 'normalized' cube representation. Noramlization + means coordinate rearrangement to form (lower left, upper right). + + @@ -143,6 +158,87 @@ data types!) + + GiST index can be used to retrieve nearest neighbours via several metric + operators. As always any of them can be used as ordinary function. + + + + Cube GiST-kNN Operators + + + + Operator + Description + + + + + a <-> b + Euclidean distance between a and b + + + + a <#> b + Taxicab (L-1 metric) distance between a and b + + + + a <=> b + Chebyshev (L-inf metric) distance between a and b + + + +
+ + + Selection of nearing neigbours can be done in the following way: + + +SELECT c FROM test +ORDER BY cube(array[0.5,0.5,0.5])<->c +LIMIT 1; + + + + + Also kNN framework allows us to cheat with metrics in order to get results + sorted by selected coodinate directly from the index without extra sorting + step. That technique significantly faster on small values of LIMIT, however + with bigger values of LIMIT planner will switch automatically to standart + index scan and sort. + That behavior can be achieved using coordinate operator + (cube c)~>(int offset). + + +=> select cube(array[0.41,0.42,0.43])~>2 as coord; + coord +------- + 0.42 +(1 row) + + + + So using that operator as kNN metric we can obtain cubes sorted by it's + coordinate. + + + To get cubes ordered by first coordinate of lower left corner ascending + one can use the following query: + + +SELECT c FROM test ORDER BY c~>1 LIMIT 5; + + + And to get cubes descending by first coordinate of upper right corner + of 2d-cube: + + +SELECT c FROM test ORDER BY c~>3 DESC LIMIT 5; + + + + The standard B-tree operators are also provided, for example -- 2.40.0