From 8224de4f42ccf98e08db07b43d52fed72f962ebb Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Sat, 7 Apr 2018 23:00:39 +0300 Subject: [PATCH] Indexes with INCLUDE columns and their support in B-tree This patch introduces INCLUDE clause to index definition. This clause specifies a list of columns which will be included as a non-key part in the index. The INCLUDE columns exist solely to allow more queries to benefit from index-only scans. Also, such columns don't need to have appropriate operator classes. Expressions are not supported as INCLUDE columns since they cannot be used in index-only scans. Index access methods supporting INCLUDE are indicated by amcaninclude flag in IndexAmRoutine. For now, only B-tree indexes support INCLUDE clause. In B-tree indexes INCLUDE columns are truncated from pivot index tuples (tuples located in non-leaf pages and high keys). Therefore, B-tree indexes now might have variable number of attributes. This patch also provides generic facility to support that: pivot tuples contain number of their attributes in t_tid.ip_posid. Free 13th bit of t_info is used for indicating that. This facility will simplify further support of index suffix truncation. The changes of above are backward-compatible, pg_upgrade doesn't need special handling of B-tree indexes for that. Bump catalog version Author: Anastasia Lubennikova with contribition by Alexander Korotkov and me Reviewed by: Peter Geoghegan, Tomas Vondra, Antonin Houska, Jeff Janes, David Rowley, Alexander Korotkov Discussion: https://www.postgresql.org/message-id/flat/56168952.4010101@postgrespro.ru --- contrib/amcheck/expected/check_btree.out | 46 +++ contrib/amcheck/sql/check_btree.sql | 19 + contrib/amcheck/verify_nbtree.c | 98 ++++- contrib/bloom/blutils.c | 1 + contrib/dblink/dblink.c | 26 +- contrib/dblink/expected/dblink.out | 55 +++ contrib/dblink/sql/dblink.sql | 38 ++ contrib/tcn/tcn.c | 6 +- doc/src/sgml/btree.sgml | 17 + doc/src/sgml/catalogs.sgml | 12 +- doc/src/sgml/indexam.sgml | 5 +- doc/src/sgml/indices.sgml | 7 +- doc/src/sgml/ref/create_index.sgml | 62 +++- doc/src/sgml/ref/create_table.sgml | 33 +- src/backend/access/brin/brin.c | 1 + src/backend/access/common/indextuple.c | 31 ++ src/backend/access/gin/ginutil.c | 1 + src/backend/access/gist/gist.c | 1 + src/backend/access/hash/hash.c | 1 + src/backend/access/heap/heapam.c | 4 +- src/backend/access/index/genam.c | 19 +- src/backend/access/nbtree/README | 17 + src/backend/access/nbtree/nbtinsert.c | 119 ++++-- src/backend/access/nbtree/nbtpage.c | 23 +- src/backend/access/nbtree/nbtree.c | 1 + src/backend/access/nbtree/nbtsearch.c | 63 +++- src/backend/access/nbtree/nbtsort.c | 52 ++- src/backend/access/nbtree/nbtutils.c | 54 ++- src/backend/access/nbtree/nbtxlog.c | 34 +- src/backend/access/rmgrdesc/nbtdesc.c | 8 + src/backend/access/spgist/spgutils.c | 1 + src/backend/bootstrap/bootparse.y | 2 + src/backend/bootstrap/bootstrap.c | 2 +- src/backend/catalog/heap.c | 3 +- src/backend/catalog/index.c | 86 +++-- src/backend/catalog/indexing.c | 1 + src/backend/catalog/pg_constraint.c | 27 +- src/backend/catalog/toasting.c | 1 + src/backend/commands/indexcmds.c | 63 +++- src/backend/commands/matview.c | 6 +- src/backend/commands/tablecmds.c | 9 +- src/backend/commands/trigger.c | 1 + src/backend/commands/typecmds.c | 1 + src/backend/executor/execIndexing.c | 14 +- src/backend/executor/execReplication.c | 6 +- src/backend/executor/nodeIndexscan.c | 8 +- src/backend/nodes/copyfuncs.c | 2 + src/backend/nodes/equalfuncs.c | 2 + src/backend/nodes/outfuncs.c | 4 + src/backend/optimizer/README | 10 +- src/backend/optimizer/path/indxpath.c | 2 +- src/backend/optimizer/path/pathkeys.c | 13 +- src/backend/optimizer/util/plancat.c | 33 +- src/backend/parser/analyze.c | 6 +- src/backend/parser/gram.y | 65 ++-- src/backend/parser/parse_relation.c | 2 +- src/backend/parser/parse_target.c | 3 +- src/backend/parser/parse_utilcmd.c | 340 ++++++++++++----- src/backend/utils/adt/ruleutils.c | 31 ++ src/backend/utils/adt/selfuncs.c | 4 +- src/backend/utils/cache/relcache.c | 87 +++-- src/backend/utils/sort/tuplesort.c | 5 +- src/bin/pg_dump/pg_dump.c | 40 +- src/bin/pg_dump/pg_dump.h | 6 +- src/include/access/amapi.h | 2 + src/include/access/hash.h | 2 +- src/include/access/itup.h | 7 +- src/include/access/nbtree.h | 91 +++-- src/include/access/nbtxlog.h | 12 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_constraint.h | 23 +- src/include/catalog/pg_constraint_fn.h | 1 + src/include/catalog/pg_index.h | 38 +- src/include/nodes/execnodes.h | 9 +- src/include/nodes/parsenodes.h | 7 +- src/include/nodes/relation.h | 13 +- src/include/parser/kwlist.h | 1 + src/include/utils/rel.h | 16 +- .../specs/insert-conflict-do-nothing-2.spec | 2 +- .../specs/insert-conflict-do-update-2.spec | 2 +- .../specs/lock-committed-keyupdate.spec | 2 +- .../specs/lock-update-traversal.spec | 5 +- src/test/regress/expected/create_index.out | 19 + src/test/regress/expected/index_including.out | 346 ++++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/serial_schedule | 1 + src/test/regress/sql/create_index.sql | 20 + src/test/regress/sql/index_including.sql | 203 ++++++++++ src/test/subscription/t/001_rep_changes.pl | 19 +- 89 files changed, 2115 insertions(+), 470 deletions(-) create mode 100644 src/test/regress/expected/index_including.out create mode 100644 src/test/regress/sql/index_including.sql diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out index 6f5b91754d..2a06cce9a0 100644 --- a/contrib/amcheck/expected/check_btree.out +++ b/contrib/amcheck/expected/check_btree.out @@ -1,10 +1,14 @@ -- minimal test, basically just verifying that amcheck CREATE TABLE bttest_a(id int8); CREATE TABLE bttest_b(id int8); +CREATE TABLE bttest_multi(id int8, data int8); INSERT INTO bttest_a SELECT * FROM generate_series(1, 100000); INSERT INTO bttest_b SELECT * FROM generate_series(100000, 1, -1); +INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i; CREATE INDEX bttest_a_idx ON bttest_a USING btree (id); CREATE INDEX bttest_b_idx ON bttest_b USING btree (id); +CREATE UNIQUE INDEX bttest_multi_idx ON bttest_multi +USING btree (id) INCLUDE (data); CREATE ROLE bttest_role; -- verify permissions are checked (error due to function not callable) SET ROLE bttest_role; @@ -93,8 +97,50 @@ WHERE relation = ANY(ARRAY['bttest_a', 'bttest_a_idx', 'bttest_b', 'bttest_b_idx (0 rows) COMMIT; +-- normal check outside of xact for index with included columns +SELECT bt_index_check('bttest_multi_idx'); + bt_index_check +---------------- + +(1 row) + +-- more expansive test for index with included columns +SELECT bt_index_parent_check('bttest_multi_idx', true); + bt_index_parent_check +----------------------- + +(1 row) + +SELECT bt_index_parent_check('bttest_multi_idx', true); + bt_index_parent_check +----------------------- + +(1 row) + +-- repeat same checks with index made by insertions +TRUNCATE bttest_multi; +INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i; +SELECT bt_index_check('bttest_multi_idx'); + bt_index_check +---------------- + +(1 row) + +SELECT bt_index_parent_check('bttest_multi_idx', true); + bt_index_parent_check +----------------------- + +(1 row) + +SELECT bt_index_parent_check('bttest_multi_idx', true); + bt_index_parent_check +----------------------- + +(1 row) + -- cleanup DROP TABLE bttest_a; DROP TABLE bttest_b; +DROP TABLE bttest_multi; DROP OWNED BY bttest_role; -- permissions DROP ROLE bttest_role; diff --git a/contrib/amcheck/sql/check_btree.sql b/contrib/amcheck/sql/check_btree.sql index 03f4c96b9e..da2f1314e5 100644 --- a/contrib/amcheck/sql/check_btree.sql +++ b/contrib/amcheck/sql/check_btree.sql @@ -1,12 +1,16 @@ -- minimal test, basically just verifying that amcheck CREATE TABLE bttest_a(id int8); CREATE TABLE bttest_b(id int8); +CREATE TABLE bttest_multi(id int8, data int8); INSERT INTO bttest_a SELECT * FROM generate_series(1, 100000); INSERT INTO bttest_b SELECT * FROM generate_series(100000, 1, -1); +INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i; CREATE INDEX bttest_a_idx ON bttest_a USING btree (id); CREATE INDEX bttest_b_idx ON bttest_b USING btree (id); +CREATE UNIQUE INDEX bttest_multi_idx ON bttest_multi +USING btree (id) INCLUDE (data); CREATE ROLE bttest_role; @@ -57,8 +61,23 @@ WHERE relation = ANY(ARRAY['bttest_a', 'bttest_a_idx', 'bttest_b', 'bttest_b_idx AND pid = pg_backend_pid(); COMMIT; +-- normal check outside of xact for index with included columns +SELECT bt_index_check('bttest_multi_idx'); +-- more expansive test for index with included columns +SELECT bt_index_parent_check('bttest_multi_idx', true); +SELECT bt_index_parent_check('bttest_multi_idx', true); + +-- repeat same checks with index made by insertions +TRUNCATE bttest_multi; +INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i; +SELECT bt_index_check('bttest_multi_idx'); +SELECT bt_index_parent_check('bttest_multi_idx', true); +SELECT bt_index_parent_check('bttest_multi_idx', true); + + -- cleanup DROP TABLE bttest_a; DROP TABLE bttest_b; +DROP TABLE bttest_multi; DROP OWNED BY bttest_role; -- permissions DROP ROLE bttest_role; diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 52aa633056..be0206d58e 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -617,7 +617,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level) /* Internal page -- downlink gets leftmost on next level */ itemid = PageGetItemId(state->target, P_FIRSTDATAKEY(opaque)); itup = (IndexTuple) PageGetItem(state->target, itemid); - nextleveldown.leftmost = ItemPointerGetBlockNumber(&(itup->t_tid)); + nextleveldown.leftmost = ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)); nextleveldown.level = opaque->btpo.level - 1; } else @@ -722,6 +722,39 @@ bt_target_page_check(BtreeCheckState *state) elog(DEBUG2, "verifying %u items on %s block %u", max, P_ISLEAF(topaque) ? "leaf" : "internal", state->targetblock); + + /* Check the number of attributes in high key if any */ + if (!P_RIGHTMOST(topaque)) + { + if (!_bt_check_natts(state->rel, state->target, P_HIKEY)) + { + ItemId itemid; + IndexTuple itup; + char *itid, + *htid; + + itemid = PageGetItemId(state->target, P_HIKEY); + itup = (IndexTuple) PageGetItem(state->target, itemid); + itid = psprintf("(%u,%u)", state->targetblock, P_HIKEY); + htid = psprintf("(%u,%u)", + ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)), + ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid))); + + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("wrong number of index tuple attributes for index \"%s\"", + RelationGetRelationName(state->rel)), + errdetail_internal("Index tid=%s natts=%u points to %s tid=%s page lsn=%X/%X.", + itid, + BTreeTupGetNAtts(itup, state->rel), + P_ISLEAF(topaque) ? "heap" : "index", + htid, + (uint32) (state->targetlsn >> 32), + (uint32) state->targetlsn))); + } + } + + /* * Loop over page items, starting from first non-highkey item, not high * key (if any). Also, immediately skip "negative infinity" real item (if @@ -760,6 +793,30 @@ bt_target_page_check(BtreeCheckState *state) (uint32) state->targetlsn), errhint("This could be a torn page problem"))); + /* Check the number of index tuple attributes */ + if (!_bt_check_natts(state->rel, state->target, offset)) + { + char *itid, + *htid; + + itid = psprintf("(%u,%u)", state->targetblock, offset); + htid = psprintf("(%u,%u)", + ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)), + ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid))); + + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("wrong number of index tuple attributes for index \"%s\"", + RelationGetRelationName(state->rel)), + errdetail_internal("Index tid=%s natts=%u points to %s tid=%s page lsn=%X/%X.", + itid, + BTreeTupGetNAtts(itup, state->rel), + P_ISLEAF(topaque) ? "heap" : "index", + htid, + (uint32) (state->targetlsn >> 32), + (uint32) state->targetlsn))); + } + /* * Don't try to generate scankey using "negative infinity" garbage * data on internal pages @@ -802,8 +859,8 @@ bt_target_page_check(BtreeCheckState *state) itid = psprintf("(%u,%u)", state->targetblock, offset); htid = psprintf("(%u,%u)", - ItemPointerGetBlockNumber(&(itup->t_tid)), - ItemPointerGetOffsetNumber(&(itup->t_tid))); + ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)), + ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid))); ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), @@ -834,8 +891,8 @@ bt_target_page_check(BtreeCheckState *state) itid = psprintf("(%u,%u)", state->targetblock, offset); htid = psprintf("(%u,%u)", - ItemPointerGetBlockNumber(&(itup->t_tid)), - ItemPointerGetOffsetNumber(&(itup->t_tid))); + ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)), + ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid))); nitid = psprintf("(%u,%u)", state->targetblock, OffsetNumberNext(offset)); @@ -843,8 +900,8 @@ bt_target_page_check(BtreeCheckState *state) itemid = PageGetItemId(state->target, OffsetNumberNext(offset)); itup = (IndexTuple) PageGetItem(state->target, itemid); nhtid = psprintf("(%u,%u)", - ItemPointerGetBlockNumber(&(itup->t_tid)), - ItemPointerGetOffsetNumber(&(itup->t_tid))); + ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)), + ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid))); ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), @@ -932,7 +989,7 @@ bt_target_page_check(BtreeCheckState *state) */ if (!P_ISLEAF(topaque) && state->readonly) { - BlockNumber childblock = ItemPointerGetBlockNumber(&(itup->t_tid)); + BlockNumber childblock = ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)); bt_downlink_check(state, childblock, skey); } @@ -1326,6 +1383,11 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values, * or otherwise varied when or how compression was applied, our assumption * would break, leading to false positive reports of corruption. For now, * we don't decompress/normalize toasted values as part of fingerprinting. + * + * In future, non-pivot index tuples might get use of + * BT_N_KEYS_OFFSET_MASK. Then binary representation of index tuple linked + * to particular heap tuple might vary and meeds to be normalized before + * bloom filter lookup. */ itup = index_form_tuple(RelationGetDescr(index), values, isnull); itup->t_tid = htup->t_self; @@ -1336,8 +1398,8 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values, ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"", - ItemPointerGetBlockNumber(&(itup->t_tid)), - ItemPointerGetOffsetNumber(&(itup->t_tid)), + ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)), + ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)), RelationGetRelationName(state->heaprel), RelationGetRelationName(state->rel)), !state->readonly @@ -1368,6 +1430,10 @@ offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset) * infinity item is either first or second line item, or there is none * within page. * + * "Negative infinity" tuple is a special corner case of pivot tuples, + * it has zero attributes while rest of pivot tuples have nkeyatts number + * of attributes. + * * Right-most pages don't have a high key, but could be said to * conceptually have a "positive infinity" high key. Thus, there is a * symmetry between down link items in parent pages, and high keys in @@ -1391,10 +1457,10 @@ static inline bool invariant_leq_offset(BtreeCheckState *state, ScanKey key, OffsetNumber upperbound) { - int16 natts = state->rel->rd_rel->relnatts; + int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel); int32 cmp; - cmp = _bt_compare(state->rel, natts, key, state->target, upperbound); + cmp = _bt_compare(state->rel, nkeyatts, key, state->target, upperbound); return cmp <= 0; } @@ -1410,10 +1476,10 @@ static inline bool invariant_geq_offset(BtreeCheckState *state, ScanKey key, OffsetNumber lowerbound) { - int16 natts = state->rel->rd_rel->relnatts; + int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel); int32 cmp; - cmp = _bt_compare(state->rel, natts, key, state->target, lowerbound); + cmp = _bt_compare(state->rel, nkeyatts, key, state->target, lowerbound); return cmp >= 0; } @@ -1433,10 +1499,10 @@ invariant_leq_nontarget_offset(BtreeCheckState *state, Page nontarget, ScanKey key, OffsetNumber upperbound) { - int16 natts = state->rel->rd_rel->relnatts; + int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel); int32 cmp; - cmp = _bt_compare(state->rel, natts, key, nontarget, upperbound); + cmp = _bt_compare(state->rel, nkeyatts, key, nontarget, upperbound); return cmp <= 0; } diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index bbe7183207..6b2b9e3742 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -120,6 +120,7 @@ blhandler(PG_FUNCTION_ARGS) amroutine->amclusterable = false; amroutine->ampredlocks = false; amroutine->amcanparallel = false; + amroutine->amcaninclude = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = blbuild; diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index 8e5af5a62f..c646068848 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -100,7 +100,7 @@ static remoteConn *getConnectionByName(const char *name); static HTAB *createConnHash(void); static void createNewConnection(const char *name, remoteConn *rconn); static void deleteConnection(const char *name); -static char **get_pkey_attnames(Relation rel, int16 *numatts); +static char **get_pkey_attnames(Relation rel, int16 *indnkeyatts); static char **get_text_array_contents(ArrayType *array, int *numitems); static char *get_sql_insert(Relation rel, int *pkattnums, int pknumatts, char **src_pkattvals, char **tgt_pkattvals); static char *get_sql_delete(Relation rel, int *pkattnums, int pknumatts, char **tgt_pkattvals); @@ -1493,7 +1493,7 @@ PG_FUNCTION_INFO_V1(dblink_get_pkey); Datum dblink_get_pkey(PG_FUNCTION_ARGS) { - int16 numatts; + int16 indnkeyatts; char **results; FuncCallContext *funcctx; int32 call_cntr; @@ -1519,7 +1519,7 @@ dblink_get_pkey(PG_FUNCTION_ARGS) rel = get_rel_from_relname(PG_GETARG_TEXT_PP(0), AccessShareLock, ACL_SELECT); /* get the array of attnums */ - results = get_pkey_attnames(rel, &numatts); + results = get_pkey_attnames(rel, &indnkeyatts); relation_close(rel, AccessShareLock); @@ -1539,9 +1539,9 @@ dblink_get_pkey(PG_FUNCTION_ARGS) attinmeta = TupleDescGetAttInMetadata(tupdesc); funcctx->attinmeta = attinmeta; - if ((results != NULL) && (numatts > 0)) + if ((results != NULL) && (indnkeyatts > 0)) { - funcctx->max_calls = numatts; + funcctx->max_calls = indnkeyatts; /* got results, keep track of them */ funcctx->user_fctx = results; @@ -2029,10 +2029,10 @@ dblink_fdw_validator(PG_FUNCTION_ARGS) * get_pkey_attnames * * Get the primary key attnames for the given relation. - * Return NULL, and set numatts = 0, if no primary key exists. + * Return NULL, and set indnkeyatts = 0, if no primary key exists. */ static char ** -get_pkey_attnames(Relation rel, int16 *numatts) +get_pkey_attnames(Relation rel, int16 *indnkeyatts) { Relation indexRelation; ScanKeyData skey; @@ -2042,8 +2042,8 @@ get_pkey_attnames(Relation rel, int16 *numatts) char **result = NULL; TupleDesc tupdesc; - /* initialize numatts to 0 in case no primary key exists */ - *numatts = 0; + /* initialize indnkeyatts to 0 in case no primary key exists */ + *indnkeyatts = 0; tupdesc = rel->rd_att; @@ -2064,12 +2064,12 @@ get_pkey_attnames(Relation rel, int16 *numatts) /* we're only interested if it is the primary key */ if (index->indisprimary) { - *numatts = index->indnatts; - if (*numatts > 0) + *indnkeyatts = index->indnkeyatts; + if (*indnkeyatts > 0) { - result = (char **) palloc(*numatts * sizeof(char *)); + result = (char **) palloc(*indnkeyatts * sizeof(char *)); - for (i = 0; i < *numatts; i++) + for (i = 0; i < *indnkeyatts; i++) result[i] = SPI_fname(tupdesc, index->indkey.values[i]); } break; diff --git a/contrib/dblink/expected/dblink.out b/contrib/dblink/expected/dblink.out index dbcc6b08db..dfd49b937e 100644 --- a/contrib/dblink/expected/dblink.out +++ b/contrib/dblink/expected/dblink.out @@ -54,6 +54,61 @@ SELECT dblink_build_sql_delete('foo','1 2',2,'{"0", "a"}'); -- too many pk fields, should fail SELECT dblink_build_sql_delete('foo','1 2 3 4',4,'{"0", "a", "{a0,b0,c0}"}'); ERROR: invalid attribute number 4 +-- repeat the test for table with primary key index with included columns +CREATE TABLE foo_1(f1 int, f2 text, f3 text[], primary key (f1,f2) include (f3)); +INSERT INTO foo_1 VALUES (0,'a','{"a0","b0","c0"}'); +INSERT INTO foo_1 VALUES (1,'b','{"a1","b1","c1"}'); +INSERT INTO foo_1 VALUES (2,'c','{"a2","b2","c2"}'); +INSERT INTO foo_1 VALUES (3,'d','{"a3","b3","c3"}'); +INSERT INTO foo_1 VALUES (4,'e','{"a4","b4","c4"}'); +INSERT INTO foo_1 VALUES (5,'f','{"a5","b5","c5"}'); +INSERT INTO foo_1 VALUES (6,'g','{"a6","b6","c6"}'); +INSERT INTO foo_1 VALUES (7,'h','{"a7","b7","c7"}'); +INSERT INTO foo_1 VALUES (8,'i','{"a8","b8","c8"}'); +INSERT INTO foo_1 VALUES (9,'j','{"a9","b9","c9"}'); +-- misc utilities +-- list the primary key fields +SELECT * +FROM dblink_get_pkey('foo_1'); + position | colname +----------+--------- + 1 | f1 + 2 | f2 +(2 rows) + +-- build an insert statement based on a local tuple, +-- replacing the primary key values with new ones +SELECT dblink_build_sql_insert('foo_1','1 2',2,'{"0", "a"}','{"99", "xyz"}'); + dblink_build_sql_insert +------------------------------------------------------------- + INSERT INTO foo_1(f1,f2,f3) VALUES('99','xyz','{a0,b0,c0}') +(1 row) + +-- too many pk fields, should fail +SELECT dblink_build_sql_insert('foo_1','1 2 3 4',4,'{"0", "a", "{a0,b0,c0}"}','{"99", "xyz", "{za0,zb0,zc0}"}'); +ERROR: invalid attribute number 4 +-- build an update statement based on a local tuple, +-- replacing the primary key values with new ones +SELECT dblink_build_sql_update('foo_1','1 2',2,'{"0", "a"}','{"99", "xyz"}'); + dblink_build_sql_update +------------------------------------------------------------------------------------------ + UPDATE foo_1 SET f1 = '99', f2 = 'xyz', f3 = '{a0,b0,c0}' WHERE f1 = '99' AND f2 = 'xyz' +(1 row) + +-- too many pk fields, should fail +SELECT dblink_build_sql_update('foo_1','1 2 3 4',4,'{"0", "a", "{a0,b0,c0}"}','{"99", "xyz", "{za0,zb0,zc0}"}'); +ERROR: invalid attribute number 4 +-- build a delete statement based on a local tuple, +SELECT dblink_build_sql_delete('foo_1','1 2',2,'{"0", "a"}'); + dblink_build_sql_delete +----------------------------------------------- + DELETE FROM foo_1 WHERE f1 = '0' AND f2 = 'a' +(1 row) + +-- too many pk fields, should fail +SELECT dblink_build_sql_delete('foo_1','1 2 3 4',4,'{"0", "a", "{a0,b0,c0}"}'); +ERROR: invalid attribute number 4 +DROP TABLE foo_1; -- retest using a quoted and schema qualified table CREATE SCHEMA "MySchema"; CREATE TABLE "MySchema"."Foo"(f1 int, f2 text, f3 text[], primary key (f1,f2)); diff --git a/contrib/dblink/sql/dblink.sql b/contrib/dblink/sql/dblink.sql index b093fa6722..3e96b98571 100644 --- a/contrib/dblink/sql/dblink.sql +++ b/contrib/dblink/sql/dblink.sql @@ -38,6 +38,44 @@ SELECT dblink_build_sql_delete('foo','1 2',2,'{"0", "a"}'); -- too many pk fields, should fail SELECT dblink_build_sql_delete('foo','1 2 3 4',4,'{"0", "a", "{a0,b0,c0}"}'); +-- repeat the test for table with primary key index with included columns +CREATE TABLE foo_1(f1 int, f2 text, f3 text[], primary key (f1,f2) include (f3)); +INSERT INTO foo_1 VALUES (0,'a','{"a0","b0","c0"}'); +INSERT INTO foo_1 VALUES (1,'b','{"a1","b1","c1"}'); +INSERT INTO foo_1 VALUES (2,'c','{"a2","b2","c2"}'); +INSERT INTO foo_1 VALUES (3,'d','{"a3","b3","c3"}'); +INSERT INTO foo_1 VALUES (4,'e','{"a4","b4","c4"}'); +INSERT INTO foo_1 VALUES (5,'f','{"a5","b5","c5"}'); +INSERT INTO foo_1 VALUES (6,'g','{"a6","b6","c6"}'); +INSERT INTO foo_1 VALUES (7,'h','{"a7","b7","c7"}'); +INSERT INTO foo_1 VALUES (8,'i','{"a8","b8","c8"}'); +INSERT INTO foo_1 VALUES (9,'j','{"a9","b9","c9"}'); + +-- misc utilities + +-- list the primary key fields +SELECT * +FROM dblink_get_pkey('foo_1'); + +-- build an insert statement based on a local tuple, +-- replacing the primary key values with new ones +SELECT dblink_build_sql_insert('foo_1','1 2',2,'{"0", "a"}','{"99", "xyz"}'); +-- too many pk fields, should fail +SELECT dblink_build_sql_insert('foo_1','1 2 3 4',4,'{"0", "a", "{a0,b0,c0}"}','{"99", "xyz", "{za0,zb0,zc0}"}'); + +-- build an update statement based on a local tuple, +-- replacing the primary key values with new ones +SELECT dblink_build_sql_update('foo_1','1 2',2,'{"0", "a"}','{"99", "xyz"}'); +-- too many pk fields, should fail +SELECT dblink_build_sql_update('foo_1','1 2 3 4',4,'{"0", "a", "{a0,b0,c0}"}','{"99", "xyz", "{za0,zb0,zc0}"}'); + +-- build a delete statement based on a local tuple, +SELECT dblink_build_sql_delete('foo_1','1 2',2,'{"0", "a"}'); +-- too many pk fields, should fail +SELECT dblink_build_sql_delete('foo_1','1 2 3 4',4,'{"0", "a", "{a0,b0,c0}"}'); + +DROP TABLE foo_1; + -- retest using a quoted and schema qualified table CREATE SCHEMA "MySchema"; CREATE TABLE "MySchema"."Foo"(f1 int, f2 text, f3 text[], primary key (f1,f2)); diff --git a/contrib/tcn/tcn.c b/contrib/tcn/tcn.c index 41186fdd8f..43bdd92749 100644 --- a/contrib/tcn/tcn.c +++ b/contrib/tcn/tcn.c @@ -138,9 +138,9 @@ triggered_change_notification(PG_FUNCTION_ARGS) /* we're only interested if it is the primary key and valid */ if (index->indisprimary && IndexIsValid(index)) { - int numatts = index->indnatts; + int indnkeyatts = index->indnkeyatts; - if (numatts > 0) + if (indnkeyatts > 0) { int i; @@ -150,7 +150,7 @@ triggered_change_notification(PG_FUNCTION_ARGS) appendStringInfoCharMacro(payload, ','); appendStringInfoCharMacro(payload, operation); - for (i = 0; i < numatts; i++) + for (i = 0; i < indnkeyatts; i++) { int colno = index->indkey.values[i]; Form_pg_attribute attr = TupleDescAttr(tupdesc, colno - 1); diff --git a/doc/src/sgml/btree.sgml b/doc/src/sgml/btree.sgml index 10abf90189..ca81fbbc84 100644 --- a/doc/src/sgml/btree.sgml +++ b/doc/src/sgml/btree.sgml @@ -433,6 +433,23 @@ returns bool + + Included attributes in B-tree indexes + + + As of PostgreSQL 11.0 there is an optional + INCLUDE clause, which allows to add non-key (included) attributes to index. + Those included attributes allow more queries to benefit from index-only scans. + We never use included attributes in ScanKeys for search. That allows us to + include into B-tree any datatypes, even those which don't have suitable + operator classes. Included columns only stored in regular tuples on leaf + pages. All pivot tuples on non-leaf pages and highkey tuples are truncated + to contain only key attributes. That helps to slightly reduce the size of + index. + + + + Implementation diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index e8efa13e8d..c304262fdb 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -3743,8 +3743,16 @@ SCRAM-SHA-256$<iteration count>:&l indnatts int2 - The number of columns in the index (duplicates - pg_class.relnatts) + The total number of columns in the index (duplicates + pg_class.relnatts). This number includes both key and included attributes. + + + + indnkeyatts + int2 + + The number of key columns in the index. "Key columns" are ordinary + index columns (as opposed to "included" columns). diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index a7f6c8dc6a..24c3405f91 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -114,6 +114,8 @@ typedef struct IndexAmRoutine bool amcanparallel; /* type of data stored in index, or InvalidOid if variable */ Oid amkeytype; + /* does AM support columns included with clause INCLUDE? */ + bool amcaninclude; /* interface functions */ ambuild_function ambuild; @@ -985,7 +987,8 @@ amparallelrescan (IndexScanDesc scan); using unique indexes, which are indexes that disallow multiple entries with identical keys. An access method that supports this feature sets amcanunique true. - (At present, only b-tree supports it.) + (At present, only b-tree supports it.) Columns listed in the + INCLUDE clause are not used to enforce uniqueness. diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index 0818196e76..14a1aa56cb 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -638,7 +638,8 @@ CREATE INDEX test3_desc_index ON test3 (id DESC NULLS LAST); Indexes can also be used to enforce uniqueness of a column's value, or the uniqueness of the combined values of more than one column. -CREATE UNIQUE INDEX name ON table (column , ...); +CREATE UNIQUE INDEX name ON table (column , ...) +[ INCLUDE (column , ...) ]; Currently, only B-tree indexes can be declared unique. @@ -647,7 +648,9 @@ CREATE UNIQUE INDEX name ON tableINCLUDE clause aren't used to enforce constraints + (UNIQUE, PRIMARY KEY, etc). diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 6a6490cac3..91692325a5 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -23,6 +23,7 @@ PostgreSQL documentation CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] name ] ON [ ONLY ] table_name [ USING method ] ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] ) + [ INCLUDE ( column_name [, ...] ) ] [ WITH ( storage_parameter = value [, ... ] ) ] [ TABLESPACE tablespace_name ] [ WHERE predicate ] @@ -143,6 +144,56 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] + + INCLUDE + + + The optional INCLUDE clause specifies a + list of columns which will be included as a non-key part in the index. + Columns listed in this clause cannot also be present as index key columns. + The INCLUDE columns exist solely to + allow more queries to benefit from index-only scans + by including the values of the specified columns in the index. These values + would otherwise have to be obtained by reading the table's heap. + + + + In UNIQUE indexes, uniqueness is only enforced + for key columns. Columns listed in the INCLUDE + clause have no effect on uniqueness enforcement. Other constraints + (PRIMARY KEY and EXCLUDE) work + the same way. + + + + Columns listed in the INCLUDE clause don't need + appropriate operator classes; the clause can contain non-key index + columns whose data types don't have operator classes defined for + a given access method. + + + + Expressions are not supported as included columns since they cannot be + used in index-only scans. + + + + Currently, only the B-tree index access method supports this feature. + In B-tree indexes, the values of columns listed in the + INCLUDE clause are included in leaf tuples which + are linked to the heap tuples, but are not included into pivot tuples + used for tree navigation. Therefore, moving columns from the list of + key columns to the INCLUDE clause can slightly + reduce index size and improve the tree branching factor. + + + + Indexes with columns listed in the INCLUDE clause + are also called covering indexes. + + + + name @@ -729,13 +780,22 @@ Indexes: Examples - To create a B-tree index on the column title in + To create a unique B-tree index on the column title in the table films: CREATE UNIQUE INDEX title_idx ON films (title); + + To create a unique B-tree index on the column title + and included columns director and rating + in the table films: + +CREATE UNIQUE INDEX title_idx ON films (title) INCLUDE (director, rating); + + + To create an index on the expression lower(title), allowing efficient case-insensitive searches: diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index be0effa5d9..cb3867dbd5 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -73,8 +73,8 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI [ CONSTRAINT constraint_name ] { CHECK ( expression ) [ NO INHERIT ] | - UNIQUE ( column_name [, ... ] ) index_parameters | - PRIMARY KEY ( column_name [, ... ] ) index_parameters | + UNIQUE ( column_name [, ... ] ) index_parameters INCLUDE (column_name [, ...]) | + PRIMARY KEY ( column_name [, ... ] ) index_parameters INCLUDE (column_name [, ...]) | EXCLUDE [ USING index_method ] ( exclude_element WITH operator [, ... ] ) index_parameters [ WHERE ( predicate ) ] | FOREIGN KEY ( column_name [, ... ] ) REFERENCES reftable [ ( refcolumn [, ... ] ) ] [ MATCH FULL | MATCH PARTIAL | MATCH SIMPLE ] [ ON DELETE action ] [ ON UPDATE action ] } @@ -769,7 +769,8 @@ WITH ( MODULUS numeric_literal, REM UNIQUE (column constraint) - UNIQUE ( column_name [, ... ] ) (table constraint) + UNIQUE ( column_name [, ... ] ) + INCLUDE ( column_name [, ...]) (table constraint) @@ -798,12 +799,25 @@ WITH ( MODULUS numeric_literal, REM partitioned table, as well as those of all its descendant partitioned tables, must be included in the constraint definition. + + + Adding a unique constraint will automatically create a unique btree + index on the column or group of columns used in the constraint. + The optional clause INCLUDE adds to that index + one or more columns on which the uniqueness is not enforced. + Note that although the constraint is not enforced on the included columns, + it still depends on them. Consequently, some operations on these columns + (e.g. DROP COLUMN) can cause cascade constraint and + index deletion. See paragraph about INCLUDE in + for more information. + PRIMARY KEY (column constraint) - PRIMARY KEY ( column_name [, ... ] ) (table constraint) + PRIMARY KEY ( column_name [, ... ] ) + INCLUDE ( column_name [, ...]) (table constraint) The PRIMARY KEY constraint specifies that a column or @@ -833,6 +847,17 @@ WITH ( MODULUS numeric_literal, REM tables. + + Adding a PRIMARY KEY constraint will automatically + create a unique btree index on the column or group of columns used in the + constraint. The optional INCLUDE clause allows a list + of columns to be specified which will be included in the non-key portion + of the index. Although uniqueness is not enforced on the included columns, + the constraint still depends on them. Consequently, some operations on the + included columns (e.g. DROP COLUMN) can cause cascade + constraint and index deletion. See paragraph about INCLUDE + in for more information. + diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 6ed115f81c..e716f51503 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -97,6 +97,7 @@ brinhandler(PG_FUNCTION_ARGS) amroutine->amclusterable = false; amroutine->ampredlocks = false; amroutine->amcanparallel = false; + amroutine->amcaninclude = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = brinbuild; diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c index f7103e53bc..a9c0b620ec 100644 --- a/src/backend/access/common/indextuple.c +++ b/src/backend/access/common/indextuple.c @@ -19,6 +19,7 @@ #include "access/heapam.h" #include "access/itup.h" #include "access/tuptoaster.h" +#include "utils/rel.h" /* ---------------------------------------------------------------- @@ -445,3 +446,33 @@ CopyIndexTuple(IndexTuple source) memcpy(result, source, size); return result; } + +/* + * Truncate tailing attributes from given index tuple leaving it with + * new_indnatts number of attributes. + */ +IndexTuple +index_truncate_tuple(TupleDesc tupleDescriptor, IndexTuple olditup, + int new_indnatts) +{ + TupleDesc itupdesc = CreateTupleDescCopyConstr(tupleDescriptor); + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + IndexTuple newitup; + int indnatts = tupleDescriptor->natts; + + Assert(indnatts <= INDEX_MAX_KEYS); + Assert(new_indnatts > 0); + Assert(new_indnatts < indnatts); + + index_deform_tuple(olditup, tupleDescriptor, values, isnull); + + /* form new tuple that will contain only key attributes */ + itupdesc->natts = new_indnatts; + newitup = index_form_tuple(itupdesc, values, isnull); + newitup->t_tid = olditup->t_tid; + + FreeTupleDesc(itupdesc); + Assert(IndexTupleSize(newitup) <= IndexTupleSize(olditup)); + return newitup; +} diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 5632cc5a77..4367523dd9 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -52,6 +52,7 @@ ginhandler(PG_FUNCTION_ARGS) amroutine->amclusterable = false; amroutine->ampredlocks = true; amroutine->amcanparallel = false; + amroutine->amcaninclude = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = ginbuild; diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 52c83b9cbf..9007d65ad2 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -74,6 +74,7 @@ gisthandler(PG_FUNCTION_ARGS) amroutine->amclusterable = true; amroutine->ampredlocks = true; amroutine->amcanparallel = false; + amroutine->amcaninclude = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = gistbuild; diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 4f2ea7955f..0002df30c0 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -70,6 +70,7 @@ hashhandler(PG_FUNCTION_ARGS) amroutine->amclusterable = false; amroutine->ampredlocks = true; amroutine->amcanparallel = false; + amroutine->amcaninclude = false; amroutine->amkeytype = INT4OID; amroutine->ambuild = hashbuild; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0bafb4fefc..201d1f5a1b 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8023,7 +8023,6 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool * TupleDesc desc = RelationGetDescr(relation); Oid replidindex; Relation idx_rel; - TupleDesc idx_desc; char replident = relation->rd_rel->relreplident; HeapTuple key_tuple = NULL; bool nulls[MaxHeapAttributeNumber]; @@ -8066,7 +8065,6 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool * } idx_rel = RelationIdGetRelation(replidindex); - idx_desc = RelationGetDescr(idx_rel); /* deform tuple, so we have fast access to columns */ heap_deform_tuple(tp, desc, values, nulls); @@ -8078,7 +8076,7 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool * * Now set all columns contained in the index to NOT NULL, they cannot * currently be NULL. */ - for (natt = 0; natt < idx_desc->natts; natt++) + for (natt = 0; natt < IndexRelationGetNumberOfKeyAttributes(idx_rel); natt++) { int attno = idx_rel->rd_index->indkey.values[natt]; diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 214825114e..58b4411796 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -158,7 +158,8 @@ IndexScanEnd(IndexScanDesc scan) * * Construct a string describing the contents of an index entry, in the * form "(key_name, ...)=(key_value, ...)". This is currently used - * for building unique-constraint and exclusion-constraint error messages. + * for building unique-constraint and exclusion-constraint error messages, + * so only key columns of the index are checked and printed. * * Note that if the user does not have permissions to view all of the * columns involved then a NULL is returned. Returning a partial key seems @@ -180,13 +181,15 @@ BuildIndexValueDescription(Relation indexRelation, StringInfoData buf; Form_pg_index idxrec; HeapTuple ht_idx; - int natts = indexRelation->rd_rel->relnatts; + int indnkeyatts; int i; int keyno; Oid indexrelid = RelationGetRelid(indexRelation); Oid indrelid; AclResult aclresult; + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation); + /* * Check permissions- if the user does not have access to view all of the * key columns then return NULL to avoid leaking data. @@ -224,7 +227,7 @@ BuildIndexValueDescription(Relation indexRelation, * No table-level access, so step through the columns in the index and * make sure the user has SELECT rights on all of them. */ - for (keyno = 0; keyno < idxrec->indnatts; keyno++) + for (keyno = 0; keyno < idxrec->indnkeyatts; keyno++) { AttrNumber attnum = idxrec->indkey.values[keyno]; @@ -250,7 +253,7 @@ BuildIndexValueDescription(Relation indexRelation, appendStringInfo(&buf, "(%s)=(", pg_get_indexdef_columns(indexrelid, true)); - for (i = 0; i < natts; i++) + for (i = 0; i < indnkeyatts; i++) { char *val; @@ -368,7 +371,7 @@ systable_beginscan(Relation heapRelation, { int j; - for (j = 0; j < irel->rd_index->indnatts; j++) + for (j = 0; j < IndexRelationGetNumberOfAttributes(irel); j++) { if (key[i].sk_attno == irel->rd_index->indkey.values[j]) { @@ -376,7 +379,7 @@ systable_beginscan(Relation heapRelation, break; } } - if (j == irel->rd_index->indnatts) + if (j == IndexRelationGetNumberOfAttributes(irel)) elog(ERROR, "column is not in index"); } @@ -570,7 +573,7 @@ systable_beginscan_ordered(Relation heapRelation, { int j; - for (j = 0; j < indexRelation->rd_index->indnatts; j++) + for (j = 0; j < IndexRelationGetNumberOfAttributes(indexRelation); j++) { if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j]) { @@ -578,7 +581,7 @@ systable_beginscan_ordered(Relation heapRelation, break; } } - if (j == indexRelation->rd_index->indnatts) + if (j == IndexRelationGetNumberOfAttributes(indexRelation)) elog(ERROR, "column is not in index"); } diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index 34f78b2f50..aef455c122 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -590,6 +590,23 @@ original search scankey is consulted as each index entry is sequentially scanned to decide whether to return the entry and whether the scan can stop (see _bt_checkkeys()). +We use term "pivot" index tuples to distinguish tuples which don't point +to heap tuples, but rather used for tree navigation. Pivot tuples includes +all tuples on non-leaf pages and high keys on leaf pages. Note that pivot +index tuples are only used to represent which part of the key space belongs +on each page, and can have attribute values copied from non-pivot tuples +that were deleted and killed by VACUUM some time ago. In principle, we could +truncate away attributes that are not needed for a page high key during a leaf +page split, provided that the remaining attributes distinguish the last index +tuple on the post-split left page as belonging on the left page, and the first +index tuple on the post-split right page as belonging on the right page. This +optimization is sometimes called suffix truncation, and may appear in a future +release. Since the high key is subsequently reused as the downlink in the +parent page for the new right page, suffix truncation can increase index +fan-out considerably by keeping pivot tuples short. INCLUDE indexes similarly +truncate away non-key attributes at the time of a leaf page split, +increasing fan-out. + Notes About Data Representation ------------------------------- diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index fd7360278d..9bfa0e9ace 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -82,7 +82,7 @@ static void _bt_checksplitloc(FindSplitData *state, int dataitemstoleft, Size firstoldonrightsz); static bool _bt_pgaddtup(Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off); -static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, +static bool _bt_isequal(Relation idxrel, Page page, OffsetNumber offnum, int keysz, ScanKey scankey); static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel); @@ -109,13 +109,16 @@ _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel) { bool is_unique = false; - int natts = rel->rd_rel->relnatts; + int indnkeyatts; ScanKey itup_scankey; BTStack stack = NULL; Buffer buf; OffsetNumber offset; bool fastpath; + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); + Assert(indnkeyatts != 0); + /* we need an insertion scan key to do our search, so build one */ itup_scankey = _bt_mkscankey(rel, itup); @@ -173,12 +176,12 @@ top: * page. */ if (P_ISLEAF(lpageop) && P_RIGHTMOST(lpageop) && - !P_INCOMPLETE_SPLIT(lpageop) && - !P_IGNORE(lpageop) && - (PageGetFreeSpace(page) > itemsz) && - PageGetMaxOffsetNumber(page) >= P_FIRSTDATAKEY(lpageop) && - _bt_compare(rel, natts, itup_scankey, page, - P_FIRSTDATAKEY(lpageop)) > 0) + !P_INCOMPLETE_SPLIT(lpageop) && + !P_IGNORE(lpageop) && + (PageGetFreeSpace(page) > itemsz) && + PageGetMaxOffsetNumber(page) >= P_FIRSTDATAKEY(lpageop) && + _bt_compare(rel, indnkeyatts, itup_scankey, page, + P_FIRSTDATAKEY(lpageop)) > 0) { fastpath = true; } @@ -209,7 +212,7 @@ top: if (!fastpath) { /* find the first page containing this key */ - stack = _bt_search(rel, natts, itup_scankey, false, &buf, BT_WRITE, + stack = _bt_search(rel, indnkeyatts, itup_scankey, false, &buf, BT_WRITE, NULL); /* trade in our read lock for a write lock */ @@ -223,7 +226,7 @@ top: * need to move right in the tree. See Lehman and Yao for an * excruciatingly precise description. */ - buf = _bt_moveright(rel, buf, natts, itup_scankey, false, + buf = _bt_moveright(rel, buf, indnkeyatts, itup_scankey, false, true, stack, BT_WRITE, NULL); } @@ -253,7 +256,7 @@ top: TransactionId xwait; uint32 speculativeToken; - offset = _bt_binsrch(rel, buf, natts, itup_scankey, false); + offset = _bt_binsrch(rel, buf, indnkeyatts, itup_scankey, false); xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey, checkUnique, &is_unique, &speculativeToken); @@ -287,10 +290,12 @@ top: * actual location of the insert is hard to predict because of the * random search used to prevent O(N^2) performance when there are * many duplicate entries, we can just use the "first valid" page. + * This reasoning also applies to INCLUDE indexes, whose extra + * attributes are not considered part of the key space. */ CheckForSerializableConflictIn(rel, NULL, buf); /* do the insertion */ - _bt_findinsertloc(rel, &buf, &offset, natts, itup_scankey, itup, + _bt_findinsertloc(rel, &buf, &offset, indnkeyatts, itup_scankey, itup, stack, heapRel); _bt_insertonpg(rel, buf, InvalidBuffer, stack, itup, offset, false); } @@ -333,8 +338,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, IndexUniqueCheck checkUnique, bool *is_unique, uint32 *speculativeToken) { - TupleDesc itupdesc = RelationGetDescr(rel); - int natts = rel->rd_rel->relnatts; + int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); SnapshotData SnapshotDirty; OffsetNumber maxoff; Page page; @@ -393,7 +397,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, * in real comparison, but only for ordering/finding items on * pages. - vadim 03/24/97 */ - if (!_bt_isequal(itupdesc, page, offset, natts, itup_scankey)) + if (!_bt_isequal(rel, page, offset, indnkeyatts, itup_scankey)) break; /* we're past all the equal tuples */ /* okay, we gotta fetch the heap tuple ... */ @@ -557,8 +561,8 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, /* If scankey == hikey we gotta check the next page too */ if (P_RIGHTMOST(opaque)) break; - if (!_bt_isequal(itupdesc, page, P_HIKEY, - natts, itup_scankey)) + if (!_bt_isequal(rel, page, P_HIKEY, + indnkeyatts, itup_scankey)) break; /* Advance to next non-dead page --- there must be one */ for (;;) @@ -1087,6 +1091,9 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, OffsetNumber maxoff; OffsetNumber i; bool isleaf; + IndexTuple lefthikey; + int indnatts = IndexRelationGetNumberOfAttributes(rel); + int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); /* Acquire a new page to split into */ rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); @@ -1186,7 +1193,23 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, itemsz = ItemIdGetLength(itemid); item = (IndexTuple) PageGetItem(origpage, itemid); } - if (PageAddItem(leftpage, (Item) item, itemsz, leftoff, + + /* + * We must truncate included attributes of the "high key" item, before + * insert it onto the leaf page. It's the only point in insertion + * process, where we perform truncation. All other functions work with + * this high key and do not change it. + */ + if (indnatts != indnkeyatts && isleaf) + { + lefthikey = _bt_truncate_tuple(rel, item); + itemsz = IndexTupleSize(lefthikey); + itemsz = MAXALIGN(itemsz); + } + else + lefthikey = item; + + if (PageAddItem(leftpage, (Item) lefthikey, itemsz, leftoff, false, false) == InvalidOffsetNumber) { memset(rightpage, 0, BufferGetPageSize(rbuf)); @@ -1375,6 +1398,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, xl_btree_split xlrec; uint8 xlinfo; XLogRecPtr recptr; + bool loglhikey = false; xlrec.level = ropaque->btpo.level; xlrec.firstright = firstright; @@ -1404,18 +1428,20 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, XLogRegisterBufData(0, (char *) newitem, MAXALIGN(newitemsz)); /* Log left page */ - if (!isleaf) + if (!isleaf || indnatts != indnkeyatts) { /* - * We must also log the left page's high key, because the right - * page's leftmost key is suppressed on non-leaf levels. Show it - * as belonging to the left page buffer, so that it is not stored - * if XLogInsert decides it needs a full-page image of the left - * page. + * We must also log the left page's high key. There are two + * reasons for that: right page's leftmost key is suppressed on + * non-leaf levels and in covering indexes included columns are + * truncated from high keys. Show it as belonging to the left + * page buffer, so that it is not stored if XLogInsert decides it + * needs a full-page image of the left page. */ itemid = PageGetItemId(origpage, P_HIKEY); item = (IndexTuple) PageGetItem(origpage, itemid); XLogRegisterBufData(0, (char *) item, MAXALIGN(IndexTupleSize(item))); + loglhikey = true; } /* @@ -1434,7 +1460,9 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, (char *) rightpage + ((PageHeader) rightpage)->pd_upper, ((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->pd_upper); - xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R; + xlinfo = newitemonleft ? + (loglhikey ? XLOG_BTREE_SPLIT_L_HIGHKEY : XLOG_BTREE_SPLIT_L) : + (loglhikey ? XLOG_BTREE_SPLIT_R_HIGHKEY : XLOG_BTREE_SPLIT_R); recptr = XLogInsert(RM_BTREE_ID, xlinfo); PageSetLSN(origpage, recptr); @@ -1664,7 +1692,12 @@ _bt_checksplitloc(FindSplitData *state, /* * The first item on the right page becomes the high key of the left page; - * therefore it counts against left space as well as right space. + * therefore it counts against left space as well as right space. When + * index has included attribues, then those attributes of left page high + * key will be truncate leaving that page with slightly more free space. + * However, that shouldn't affect our ability to find valid split + * location, because anyway split location should exists even without high + * key truncation. */ leftfree -= firstrightitemsz; @@ -1787,18 +1820,18 @@ _bt_insert_parent(Relation rel, stack = &fakestack; stack->bts_blkno = BufferGetBlockNumber(pbuf); stack->bts_offset = InvalidOffsetNumber; - /* bts_btentry will be initialized below */ + stack->bts_btentry = InvalidBlockNumber; stack->bts_parent = NULL; _bt_relbuf(rel, pbuf); } - /* get high key from left page == lowest key on new right page */ + /* get high key from left page == lower bound for new right page */ ritem = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_HIKEY)); /* form an index tuple that points at the new right page */ new_item = CopyIndexTuple(ritem); - ItemPointerSet(&(new_item->t_tid), rbknum, P_HIKEY); + BTreeInnerTupleSetDownLink(new_item, rbknum); /* * Find the parent buffer and get the parent page. @@ -1807,7 +1840,7 @@ _bt_insert_parent(Relation rel, * want to find parent pointing to where we are, right ? - vadim * 05/27/97 */ - ItemPointerSet(&(stack->bts_btentry.t_tid), bknum, P_HIKEY); + stack->bts_btentry = bknum; pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); /* @@ -1962,7 +1995,8 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access) { itemid = PageGetItemId(page, offnum); item = (IndexTuple) PageGetItem(page, itemid); - if (BTEntrySame(item, &stack->bts_btentry)) + + if (BTreeInnerTupleGetDownLink(item) == stack->bts_btentry) { /* Return accurate pointer to where link is now */ stack->bts_blkno = blkno; @@ -1977,7 +2011,8 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access) { itemid = PageGetItemId(page, offnum); item = (IndexTuple) PageGetItem(page, itemid); - if (BTEntrySame(item, &stack->bts_btentry)) + + if (BTreeInnerTupleGetDownLink(item) == stack->bts_btentry) { /* Return accurate pointer to where link is now */ stack->bts_blkno = blkno; @@ -2067,7 +2102,8 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) left_item_sz = sizeof(IndexTupleData); left_item = (IndexTuple) palloc(left_item_sz); left_item->t_info = left_item_sz; - ItemPointerSet(&(left_item->t_tid), lbkno, P_HIKEY); + BTreeInnerTupleSetDownLink(left_item, lbkno); + BTreeTupSetNAtts(left_item, 0); /* * Create downlink item for right page. The key for it is obtained from @@ -2077,7 +2113,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) right_item_sz = ItemIdGetLength(itemid); item = (IndexTuple) PageGetItem(lpage, itemid); right_item = CopyIndexTuple(item); - ItemPointerSet(&(right_item->t_tid), rbkno, P_HIKEY); + BTreeInnerTupleSetDownLink(right_item, rbkno); /* NO EREPORT(ERROR) from here till newroot op is logged */ START_CRIT_SECTION(); @@ -2208,6 +2244,7 @@ _bt_pgaddtup(Page page, { trunctuple = *itup; trunctuple.t_info = sizeof(IndexTupleData); + BTreeTupSetNAtts(&trunctuple, 0); itup = &trunctuple; itemsize = sizeof(IndexTupleData); } @@ -2226,9 +2263,10 @@ _bt_pgaddtup(Page page, * Rule is simple: NOT_NULL not equal NULL, NULL not equal NULL too. */ static bool -_bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, +_bt_isequal(Relation idxrel, Page page, OffsetNumber offnum, int keysz, ScanKey scankey) { + TupleDesc itupdesc = RelationGetDescr(idxrel); IndexTuple itup; int i; @@ -2237,6 +2275,17 @@ _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + /* + * Index tuple shouldn't be truncated. Despite we technically could + * compare truncated tuple as well, this function should be only called + * for regular non-truncated leaf tuples and P_HIKEY tuple on + * rightmost leaf page. + */ + Assert((P_RIGHTMOST((BTPageOpaque) PageGetSpecialPointer(page)) || + offnum != P_HIKEY) + ? BTreeTupGetNAtts(itup, idxrel) == itupdesc->natts + : true); + for (i = 1; i <= keysz; i++) { AttrNumber attno; diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 019fe48cb6..ba68925912 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -1143,7 +1143,7 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack, * Locate the downlink of "child" in the parent (updating the stack entry * if needed) */ - ItemPointerSet(&(stack->bts_btentry.t_tid), child, P_HIKEY); + stack->bts_btentry = child; pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); if (pbuf == InvalidBuffer) elog(ERROR, "failed to re-find parent key in index \"%s\" for deletion target page %u", @@ -1414,8 +1414,9 @@ _bt_pagedel(Relation rel, Buffer buf) /* we need an insertion scan key for the search, so build one */ itup_scankey = _bt_mkscankey(rel, targetkey); /* find the leftmost leaf page containing this key */ - stack = _bt_search(rel, rel->rd_rel->relnatts, itup_scankey, - false, &lbuf, BT_READ, NULL); + stack = _bt_search(rel, + IndexRelationGetNumberOfKeyAttributes(rel), + itup_scankey, false, &lbuf, BT_READ, NULL); /* don't need a pin on the page */ _bt_relbuf(rel, lbuf); @@ -1551,15 +1552,15 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) #ifdef USE_ASSERT_CHECKING itemid = PageGetItemId(page, topoff); itup = (IndexTuple) PageGetItem(page, itemid); - Assert(ItemPointerGetBlockNumber(&(itup->t_tid)) == target); + Assert(BTreeInnerTupleGetDownLink(itup) == target); #endif nextoffset = OffsetNumberNext(topoff); itemid = PageGetItemId(page, nextoffset); itup = (IndexTuple) PageGetItem(page, itemid); - if (ItemPointerGetBlockNumber(&(itup->t_tid)) != rightsib) + if (BTreeInnerTupleGetDownLink(itup) != rightsib) elog(ERROR, "right sibling %u of block %u is not next child %u of block %u in index \"%s\"", - rightsib, target, ItemPointerGetBlockNumber(&(itup->t_tid)), + rightsib, target, BTreeInnerTupleGetDownLink(itup), BufferGetBlockNumber(topparent), RelationGetRelationName(rel)); /* @@ -1582,7 +1583,7 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) itemid = PageGetItemId(page, topoff); itup = (IndexTuple) PageGetItem(page, itemid); - ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY); + BTreeInnerTupleSetDownLink(itup, rightsib); nextoffset = OffsetNumberNext(topoff); PageIndexTupleDelete(page, nextoffset); @@ -1601,7 +1602,7 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (target != leafblkno) - ItemPointerSet(&trunctuple.t_tid, target, P_HIKEY); + ItemPointerSetBlockNumber(&trunctuple.t_tid, target); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, @@ -1713,7 +1714,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) */ if (ItemPointerIsValid(leafhikey)) { - target = ItemPointerGetBlockNumber(leafhikey); + target = ItemPointerGetBlockNumberNoCheck(leafhikey); Assert(target != leafblkno); /* fetch the block number of the topmost parent's left sibling */ @@ -1829,7 +1830,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) /* remember the next non-leaf child down in the branch. */ itemid = PageGetItemId(page, P_FIRSTDATAKEY(opaque)); - nextchild = ItemPointerGetBlockNumber(&((IndexTuple) PageGetItem(page, itemid))->t_tid); + nextchild = BTreeInnerTupleGetDownLink((IndexTuple) PageGetItem(page, itemid)); if (nextchild == leafblkno) nextchild = InvalidBlockNumber; } @@ -1920,7 +1921,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) if (nextchild == InvalidBlockNumber) ItemPointerSetInvalid(leafhikey); else - ItemPointerSet(leafhikey, nextchild, P_HIKEY); + ItemPointerSetBlockNumber(leafhikey, nextchild); } /* diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 66a66f2dad..d97f5249de 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -121,6 +121,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->amclusterable = true; amroutine->ampredlocks = true; amroutine->amcanparallel = true; + amroutine->amcaninclude = true; amroutine->amkeytype = InvalidOid; amroutine->ambuild = btbuild; diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 51dca64e13..4c6fdcdd8a 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -147,7 +147,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, offnum = _bt_binsrch(rel, *bufP, keysz, scankey, nextkey); itemid = PageGetItemId(page, offnum); itup = (IndexTuple) PageGetItem(page, itemid); - blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); + blkno = BTreeInnerTupleGetDownLink(itup); par_blkno = BufferGetBlockNumber(*bufP); /* @@ -163,7 +163,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, new_stack = (BTStack) palloc(sizeof(BTStackData)); new_stack->bts_blkno = par_blkno; new_stack->bts_offset = offnum; - memcpy(&new_stack->bts_btentry, itup, sizeof(IndexTupleData)); + new_stack->bts_btentry = blkno; new_stack->bts_parent = stack_in; /* drop the read lock on the parent page, acquire one on the child */ @@ -436,6 +436,15 @@ _bt_compare(Relation rel, IndexTuple itup; int i; + /* + * Check tuple has correct number of attributes. + */ + if (unlikely(!_bt_check_natts(rel, page, offnum))) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("tuple has wrong number of attributes in index \"%s\"", + RelationGetRelationName(rel)))); + /* * Force result ">" if target item is first data item on an internal page * --- see NOTE above. @@ -1833,7 +1842,7 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost, offnum = P_FIRSTDATAKEY(opaque); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); - blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); + blkno = BTreeInnerTupleGetDownLink(itup); buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); page = BufferGetPage(buf); @@ -1959,3 +1968,51 @@ _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir) so->numKilled = 0; /* just paranoia */ so->markItemIndex = -1; /* ditto */ } + +/* + * Check if index tuple have appropriate number of attributes. + */ +bool +_bt_check_natts(Relation index, Page page, OffsetNumber offnum) +{ + int16 natts = IndexRelationGetNumberOfAttributes(index); + int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(index); + ItemId itemid; + IndexTuple itup; + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* + * Assert that mask allocated for number of keys in index tuple can fit + * maximum number of index keys. + */ + StaticAssertStmt(BT_N_KEYS_OFFSET_MASK >= INDEX_MAX_KEYS, + "BT_N_KEYS_OFFSET_MASK can't fit INDEX_MAX_KEYS"); + + itemid = PageGetItemId(page, offnum); + itup = (IndexTuple) PageGetItem(page, itemid); + + if (P_ISLEAF(opaque) && offnum >= P_FIRSTDATAKEY(opaque)) + { + /* + * Regular leaf tuples have as every index attributes + */ + return (BTreeTupGetNAtts(itup, index) == natts); + } + else if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque)) + { + /* + * Leftmost tuples on non-leaf pages have no attributes, or haven't + * INDEX_ALT_TID_MASK set in pg_upgraded indexes. + */ + return (BTreeTupGetNAtts(itup, index) == 0 || + ((itup->t_info & INDEX_ALT_TID_MASK) == 0)); + } + else + { + /* + * Pivot tuples stored in non-leaf pages and hikeys of leaf pages + * contain only key attributes + */ + return (BTreeTupGetNAtts(itup, index) == nkeyatts); + } +} diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 098e0ce1be..feba5e1c8f 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -752,6 +752,7 @@ _bt_sortaddtup(Page page, { trunctuple = *itup; trunctuple.t_info = sizeof(IndexTupleData); + BTreeTupSetNAtts(&trunctuple, 0); itup = &trunctuple; itemsize = sizeof(IndexTupleData); } @@ -802,6 +803,9 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) OffsetNumber last_off; Size pgspc; Size itupsz; + BTPageOpaque pageop; + int indnatts = IndexRelationGetNumberOfAttributes(wstate->index); + int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(wstate->index); /* * This is a handy place to check for cancel interrupts during the btree @@ -856,6 +860,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) ItemId ii; ItemId hii; IndexTuple oitup; + IndexTuple keytup; + BTPageOpaque opageop = (BTPageOpaque) PageGetSpecialPointer(opage); /* Create new page of same level */ npage = _bt_blnewpage(state->btps_level); @@ -883,6 +889,29 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) ItemIdSetUnused(ii); /* redundant */ ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData); + if (indnkeyatts != indnatts && P_ISLEAF(opageop)) + { + /* + * We truncate included attributes of high key here. Subsequent + * insertions assume that hikey is already truncated, and so they + * need not worry about it, when copying the high key into the + * parent page as a downlink. + * + * The code above have just rearranged item pointers, but it + * didn't save any space. In order to save the space on page we + * have to truly shift index tuples on the page. But that's not + * so bad for performance, because we operating pd_upper and don't + * have to shift much of tuples memory. Shift of ItemId's is + * rather cheap, because they are small. + */ + keytup = _bt_truncate_tuple(wstate->index, oitup); + + /* delete "wrong" high key, insert keytup as P_HIKEY. */ + PageIndexTupleDelete(opage, P_HIKEY); + + _bt_sortaddtup(opage, IndexTupleSize(keytup), keytup, P_HIKEY); + } + /* * Link the old page into its parent, using its minimum key. If we * don't have a parent, we have to create one; this adds a new btree @@ -892,15 +921,18 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) state->btps_next = _bt_pagestate(wstate, state->btps_level + 1); Assert(state->btps_minkey != NULL); - ItemPointerSet(&(state->btps_minkey->t_tid), oblkno, P_HIKEY); + BTreeInnerTupleSetDownLink(state->btps_minkey, oblkno); _bt_buildadd(wstate, state->btps_next, state->btps_minkey); pfree(state->btps_minkey); /* * Save a copy of the minimum key for the new page. We have to copy * it off the old page, not the new one, in case we are not at leaf - * level. + * level. Despite oitup is already initialized, it's important to get + * high key from the page, since we could have replaced it with + * truncated copy. See comment above. */ + oitup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, P_HIKEY)); state->btps_minkey = CopyIndexTuple(oitup); /* @@ -927,6 +959,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) last_off = P_FIRSTKEY; } + pageop = (BTPageOpaque) PageGetSpecialPointer(npage); + /* * If the new item is the first for its page, stash a copy for later. Note * this will only happen for the first item on a level; on later pages, @@ -936,7 +970,15 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) if (last_off == P_HIKEY) { Assert(state->btps_minkey == NULL); - state->btps_minkey = CopyIndexTuple(itup); + + /* + * Truncate included attributes of the tuple that we're going to + * insert into the parent page as a downlink + */ + if (indnkeyatts != indnatts && P_ISLEAF(pageop)) + state->btps_minkey = _bt_truncate_tuple(wstate->index, itup); + else + state->btps_minkey = CopyIndexTuple(itup); } /* @@ -989,7 +1031,7 @@ _bt_uppershutdown(BTWriteState *wstate, BTPageState *state) else { Assert(s->btps_minkey != NULL); - ItemPointerSet(&(s->btps_minkey->t_tid), blkno, P_HIKEY); + BTreeInnerTupleSetDownLink(s->btps_minkey, blkno); _bt_buildadd(wstate, s->btps_next, s->btps_minkey); pfree(s->btps_minkey); s->btps_minkey = NULL; @@ -1029,7 +1071,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) bool load1; TupleDesc tupdes = RelationGetDescr(wstate->index); int i, - keysz = RelationGetNumberOfAttributes(wstate->index); + keysz = IndexRelationGetNumberOfKeyAttributes(wstate->index); ScanKey indexScanKey = NULL; SortSupport sortKeys; diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 752667c885..12b636253e 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -63,17 +63,28 @@ _bt_mkscankey(Relation rel, IndexTuple itup) { ScanKey skey; TupleDesc itupdesc; - int natts; + int indnatts PG_USED_FOR_ASSERTS_ONLY; + int indnkeyatts; int16 *indoption; int i; itupdesc = RelationGetDescr(rel); - natts = RelationGetNumberOfAttributes(rel); + indnatts = IndexRelationGetNumberOfAttributes(rel); + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); indoption = rel->rd_indoption; - skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); + Assert(indnkeyatts != 0); + Assert(indnkeyatts <= indnatts); + Assert(BTreeTupGetNAtts(itup, rel) == indnatts || + BTreeTupGetNAtts(itup, rel) == indnkeyatts); - for (i = 0; i < natts; i++) + /* + * We'll execute search using ScanKey constructed on key columns. Non key + * (included) columns must be omitted. + */ + skey = (ScanKey) palloc(indnkeyatts * sizeof(ScanKeyData)); + + for (i = 0; i < indnkeyatts; i++) { FmgrInfo *procinfo; Datum arg; @@ -115,16 +126,16 @@ ScanKey _bt_mkscankey_nodata(Relation rel) { ScanKey skey; - int natts; + int indnkeyatts; int16 *indoption; int i; - natts = RelationGetNumberOfAttributes(rel); + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); indoption = rel->rd_indoption; - skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); + skey = (ScanKey) palloc(indnkeyatts * sizeof(ScanKeyData)); - for (i = 0; i < natts; i++) + for (i = 0; i < indnkeyatts; i++) { FmgrInfo *procinfo; int flags; @@ -2069,3 +2080,30 @@ btproperty(Oid index_oid, int attno, return false; /* punt to generic code */ } } + +/* + * _bt_truncate_tuple() -- remove non-key (INCLUDE) attributes from index + * tuple. + * + * Transforms an ordinal B-tree leaf index tuple into pivot tuple to be used + * as hikey or non-leaf page tuple with downlink. Note that t_tid offset + * will be overritten in order to represent number of present tuple attributes. + */ +IndexTuple +_bt_truncate_tuple(Relation idxrel, IndexTuple olditup) +{ + IndexTuple newitup; + int nkeyattrs = IndexRelationGetNumberOfKeyAttributes(idxrel); + + /* + * We're assuming to truncate only regular leaf index tuples which have + * both key and non-key attributes. + */ + Assert(BTreeTupGetNAtts(olditup, idxrel) == IndexRelationGetNumberOfAttributes(idxrel)); + + newitup = index_truncate_tuple(RelationGetDescr(idxrel), + olditup, nkeyattrs); + BTreeTupSetNAtts(newitup, nkeyattrs); + + return newitup; +} diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index b565bcb540..0986ef07cf 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -202,7 +202,7 @@ btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record) } static void -btree_xlog_split(bool onleft, XLogReaderState *record) +btree_xlog_split(bool onleft, bool lhighkey, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record); @@ -248,11 +248,14 @@ btree_xlog_split(bool onleft, XLogReaderState *record) _bt_restore_page(rpage, datapos, datalen); + /* Non-leaf page should always have its high key logged. */ + Assert(isleaf || lhighkey); + /* - * On leaf level, the high key of the left page is equal to the first key - * on the right page. + * When the high key isn't present is the wal record, then we assume it to + * be equal to the first key on the right page. */ - if (isleaf) + if (!lhighkey) { ItemId hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque)); @@ -296,13 +299,14 @@ btree_xlog_split(bool onleft, XLogReaderState *record) } /* Extract left hikey and its size (assuming 16-bit alignment) */ - if (!isleaf) + if (lhighkey) { left_hikey = (IndexTuple) datapos; left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey)); datapos += left_hikeysz; datalen -= left_hikeysz; } + Assert(datalen == 0); newlpage = PageGetTempPageCopySpecial(lpage); @@ -616,7 +620,7 @@ btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record) * heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer. * Note that we are not looking at tuple data here, just headers. */ - hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid)); + hoffnum = ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)); hitemid = PageGetItemId(hpage, hoffnum); /* @@ -764,11 +768,11 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) nextoffset = OffsetNumberNext(poffset); itemid = PageGetItemId(page, nextoffset); itup = (IndexTuple) PageGetItem(page, itemid); - rightsib = ItemPointerGetBlockNumber(&itup->t_tid); + rightsib = BTreeInnerTupleGetDownLink(itup); itemid = PageGetItemId(page, poffset); itup = (IndexTuple) PageGetItem(page, itemid); - ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY); + BTreeInnerTupleSetDownLink(itup, rightsib); nextoffset = OffsetNumberNext(poffset); PageIndexTupleDelete(page, nextoffset); @@ -798,7 +802,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (xlrec->topparent != InvalidBlockNumber) - ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY); + ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, @@ -908,7 +912,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record) MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (xlrec->topparent != InvalidBlockNumber) - ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY); + ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, @@ -1004,10 +1008,16 @@ btree_redo(XLogReaderState *record) btree_xlog_insert(false, true, record); break; case XLOG_BTREE_SPLIT_L: - btree_xlog_split(true, record); + btree_xlog_split(true, false, record); + break; + case XLOG_BTREE_SPLIT_L_HIGHKEY: + btree_xlog_split(true, true, record); break; case XLOG_BTREE_SPLIT_R: - btree_xlog_split(false, record); + btree_xlog_split(false, false, record); + break; + case XLOG_BTREE_SPLIT_R_HIGHKEY: + btree_xlog_split(false, true, record); break; case XLOG_BTREE_VACUUM: btree_xlog_vacuum(record); diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index c8caf56368..0b996ea13a 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -35,6 +35,8 @@ btree_desc(StringInfo buf, XLogReaderState *record) } case XLOG_BTREE_SPLIT_L: case XLOG_BTREE_SPLIT_R: + case XLOG_BTREE_SPLIT_L_HIGHKEY: + case XLOG_BTREE_SPLIT_R_HIGHKEY: { xl_btree_split *xlrec = (xl_btree_split *) rec; @@ -119,6 +121,12 @@ btree_identify(uint8 info) case XLOG_BTREE_SPLIT_R: id = "SPLIT_R"; break; + case XLOG_BTREE_SPLIT_L_HIGHKEY: + id = "SPLIT_L_HIGHKEY"; + break; + case XLOG_BTREE_SPLIT_R_HIGHKEY: + id = "SPLIT_R_HIGHKEY"; + break; case XLOG_BTREE_VACUUM: id = "VACUUM"; break; diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index c4278b0160..4a9b5da268 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -50,6 +50,7 @@ spghandler(PG_FUNCTION_ARGS) amroutine->amclusterable = false; amroutine->ampredlocks = false; amroutine->amcanparallel = false; + amroutine->amcaninclude = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = spgbuild; diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y index 4ea3aa97cf..1ec0e5c8a9 100644 --- a/src/backend/bootstrap/bootparse.y +++ b/src/backend/bootstrap/bootparse.y @@ -302,6 +302,7 @@ Boot_DeclareIndexStmt: stmt->accessMethod = $8; stmt->tableSpace = NULL; stmt->indexParams = $10; + stmt->indexIncludingParams = NIL; stmt->options = NIL; stmt->whereClause = NULL; stmt->excludeOpNames = NIL; @@ -350,6 +351,7 @@ Boot_DeclareUniqueIndexStmt: stmt->accessMethod = $9; stmt->tableSpace = NULL; stmt->indexParams = $11; + stmt->indexIncludingParams = NIL; stmt->options = NIL; stmt->whereClause = NULL; stmt->excludeOpNames = NIL; diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 1430894ad2..644084d1c3 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -616,7 +616,7 @@ boot_openrel(char *relname) relname, (int) ATTRIBUTE_FIXED_PART_SIZE); boot_reldesc = heap_openrv(makeRangeVar(NULL, relname, -1), NoLock); - numattr = boot_reldesc->rd_rel->relnatts; + numattr = RelationGetNumberOfAttributes(boot_reldesc); for (i = 0; i < numattr; i++) { if (attrtypes[i] == NULL) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index a1def77944..faa12e0615 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -2268,7 +2268,8 @@ StoreRelCheck(Relation rel, const char *ccname, Node *expr, InvalidOid, /* no parent constraint */ RelationGetRelid(rel), /* relation */ attNos, /* attrs in the constraint */ - keycount, /* # attrs in the constraint */ + keycount, /* # key attrs in the constraint */ + keycount, /* # total attrs in the constraint */ InvalidOid, /* not a domain constraint */ InvalidOid, /* no associated index */ InvalidOid, /* Foreign key fields */ diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index b8e9f9f9c7..0966aec25f 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -238,7 +238,7 @@ index_check_primary_key(Relation heapRel, * null, otherwise attempt to ALTER TABLE .. SET NOT NULL */ cmds = NIL; - for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) + for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++) { AttrNumber attnum = indexInfo->ii_KeyAttrNumbers[i]; HeapTuple atttuple; @@ -447,32 +447,40 @@ ConstructTupleDescriptor(Relation heapRelation, /* * Check the opclass and index AM to see if either provides a keytype - * (overriding the attribute type). Opclass takes precedence. + * (overriding the attribute type). Opclass (if exists) takes + * precedence. */ - tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i])); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "cache lookup failed for opclass %u", - classObjectId[i]); - opclassTup = (Form_pg_opclass) GETSTRUCT(tuple); - if (OidIsValid(opclassTup->opckeytype)) - keyType = opclassTup->opckeytype; - else - keyType = amroutine->amkeytype; + keyType = amroutine->amkeytype; /* - * If keytype is specified as ANYELEMENT, and opcintype is ANYARRAY, - * then the attribute type must be an array (else it'd not have - * matched this opclass); use its element type. + * Code below is concerned to the opclasses which are not used with + * the included columns. */ - if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID) + if (i < indexInfo->ii_NumIndexKeyAttrs) { - keyType = get_base_element_type(to->atttypid); - if (!OidIsValid(keyType)) - elog(ERROR, "could not get element type of array type %u", - to->atttypid); - } + tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i])); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for opclass %u", + classObjectId[i]); + opclassTup = (Form_pg_opclass) GETSTRUCT(tuple); + if (OidIsValid(opclassTup->opckeytype)) + keyType = opclassTup->opckeytype; - ReleaseSysCache(tuple); + /* + * If keytype is specified as ANYELEMENT, and opcintype is + * ANYARRAY, then the attribute type must be an array (else it'd + * not have matched this opclass); use its element type. + */ + if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID) + { + keyType = get_base_element_type(to->atttypid); + if (!OidIsValid(keyType)) + elog(ERROR, "could not get element type of array type %u", + to->atttypid); + } + + ReleaseSysCache(tuple); + } /* * If a key type different from the heap value is specified, update @@ -602,7 +610,7 @@ UpdateIndexRelation(Oid indexoid, for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i]; indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexAttrs); - indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs); + indclass = buildoidvector(classOids, indexInfo->ii_NumIndexKeyAttrs); indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs); /* @@ -647,6 +655,7 @@ UpdateIndexRelation(Oid indexoid, values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid); values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid); values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs); + values[Anum_pg_index_indnkeyatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexKeyAttrs); values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique); values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary); values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion); @@ -1086,7 +1095,7 @@ index_create(Relation heapRelation, } /* Store dependency on operator classes */ - for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) + for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++) { referenced.classId = OperatorClassRelationId; referenced.objectId = classObjectId[i]; @@ -1142,6 +1151,8 @@ index_create(Relation heapRelation, else Assert(indexRelation->rd_indexcxt != NULL); + indexRelation->rd_index->indnkeyatts = indexInfo->ii_NumIndexKeyAttrs; + /* * If this is bootstrap (initdb) time, then we don't actually fill in the * index yet. We'll be creating more indexes and classes later, so we @@ -1287,6 +1298,7 @@ index_constraint_create(Relation heapRelation, parentConstraintId, RelationGetRelid(heapRelation), indexInfo->ii_KeyAttrNumbers, + indexInfo->ii_NumIndexKeyAttrs, indexInfo->ii_NumIndexAttrs, InvalidOid, /* no domain */ indexRelationId, /* index OID */ @@ -1732,15 +1744,19 @@ BuildIndexInfo(Relation index) IndexInfo *ii = makeNode(IndexInfo); Form_pg_index indexStruct = index->rd_index; int i; - int numKeys; + int numAtts; /* check the number of keys, and copy attr numbers into the IndexInfo */ - numKeys = indexStruct->indnatts; - if (numKeys < 1 || numKeys > INDEX_MAX_KEYS) + numAtts = indexStruct->indnatts; + if (numAtts < 1 || numAtts > INDEX_MAX_KEYS) elog(ERROR, "invalid indnatts %d for index %u", - numKeys, RelationGetRelid(index)); - ii->ii_NumIndexAttrs = numKeys; - for (i = 0; i < numKeys; i++) + numAtts, RelationGetRelid(index)); + ii->ii_NumIndexAttrs = numAtts; + ii->ii_NumIndexKeyAttrs = indexStruct->indnkeyatts; + Assert(ii->ii_NumIndexKeyAttrs != 0); + Assert(ii->ii_NumIndexKeyAttrs <= ii->ii_NumIndexAttrs); + + for (i = 0; i < numAtts; i++) ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i]; /* fetch any expressions needed for expressional indexes */ @@ -1911,9 +1927,11 @@ CompareIndexInfo(IndexInfo *info1, IndexInfo *info2, void BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii) { - int ncols = index->rd_rel->relnatts; + int indnkeyatts; int i; + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index); + /* * fetch info for checking unique indexes */ @@ -1922,16 +1940,16 @@ BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii) if (index->rd_rel->relam != BTREE_AM_OID) elog(ERROR, "unexpected non-btree speculative unique index"); - ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols); - ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols); - ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols); + ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts); + ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts); + ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts); /* * We have to look up the operator's strategy number. This provides a * cross-check that the operator does match the index. */ /* We need the func OIDs and strategy numbers too */ - for (i = 0; i < ncols; i++) + for (i = 0; i < indnkeyatts; i++) { ii->ii_UniqueStrats[i] = BTEqualStrategyNumber; ii->ii_UniqueOps[i] = diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index a84b7da114..5a361683da 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -119,6 +119,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple) Assert(indexInfo->ii_Predicate == NIL); Assert(indexInfo->ii_ExclusionOps == NULL); Assert(relationDescs[i]->rd_index->indimmediate); + Assert(indexInfo->ii_NumIndexKeyAttrs != 0); /* * FormIndexDatum fills in its values and isnull parameters with the diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index 153522782d..485fd37080 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -57,6 +57,7 @@ CreateConstraintEntry(const char *constraintName, Oid relId, const int16 *constraintKey, int constraintNKeys, + int constraintNTotalKeys, Oid domainId, Oid indexRelId, Oid foreignRelId, @@ -83,6 +84,7 @@ CreateConstraintEntry(const char *constraintName, bool nulls[Natts_pg_constraint]; Datum values[Natts_pg_constraint]; ArrayType *conkeyArray; + ArrayType *conincludingArray; ArrayType *confkeyArray; ArrayType *conpfeqopArray; ArrayType *conppeqopArray; @@ -113,6 +115,21 @@ CreateConstraintEntry(const char *constraintName, else conkeyArray = NULL; + if (constraintNTotalKeys > constraintNKeys) + { + Datum *conincluding; + int j = 0; + int constraintNIncludedKeys = constraintNTotalKeys - constraintNKeys; + + conincluding = (Datum *) palloc(constraintNIncludedKeys * sizeof(Datum)); + for (i = constraintNKeys; i < constraintNTotalKeys; i++) + conincluding[j++] = Int16GetDatum(constraintKey[i]); + conincludingArray = construct_array(conincluding, constraintNIncludedKeys, + INT2OID, 2, true, 's'); + } + else + conincludingArray = NULL; + if (foreignNKeys > 0) { Datum *fkdatums; @@ -186,6 +203,11 @@ CreateConstraintEntry(const char *constraintName, else nulls[Anum_pg_constraint_conkey - 1] = true; + if (conincludingArray) + values[Anum_pg_constraint_conincluding - 1] = PointerGetDatum(conincludingArray); + else + nulls[Anum_pg_constraint_conincluding - 1] = true; + if (confkeyArray) values[Anum_pg_constraint_confkey - 1] = PointerGetDatum(confkeyArray); else @@ -247,9 +269,9 @@ CreateConstraintEntry(const char *constraintName, relobject.classId = RelationRelationId; relobject.objectId = relId; - if (constraintNKeys > 0) + if (constraintNTotalKeys > 0) { - for (i = 0; i < constraintNKeys; i++) + for (i = 0; i < constraintNTotalKeys; i++) { relobject.objectSubId = constraintKey[i]; @@ -548,6 +570,7 @@ CloneForeignKeyConstraints(Oid parentId, Oid relationId, List **cloned) relationId, mapped_conkey, nelem, + nelem, InvalidOid, /* not a domain constraint */ constrForm->conindid, /* same index */ constrForm->confrelid, /* same foreign rel */ diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 9007dc6ebe..9fb2e6b06e 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -303,6 +303,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = 2; + indexInfo->ii_NumIndexKeyAttrs = 2; indexInfo->ii_KeyAttrNumbers[0] = 1; indexInfo->ii_KeyAttrNumbers[1] = 2; indexInfo->ii_Expressions = NIL; diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index e224b91f53..10f01bf5b7 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -109,8 +109,10 @@ static void ReindexPartitionedIndex(Relation parentIdx); * indexes. We acknowledge this when all operator classes, collations and * exclusion operators match. Though we could further permit intra-opfamily * changes for btree and hash indexes, that adds subtle complexity with no - * concrete benefit for core types. - + * concrete benefit for core types. Note, that INCLUDE columns aren't + * checked by this function, for them it's enough that table rewrite is + * skipped. + * * When a comparison or exclusion operator has a polymorphic input type, the * actual input types must also match. This defends against the possibility * that operators could vary behavior in response to get_fn_expr_argtype(). @@ -224,7 +226,7 @@ CheckIndexCompatible(Oid oldId, } /* Any change in operator class or collation breaks compatibility. */ - old_natts = indexForm->indnatts; + old_natts = indexForm->indnkeyatts; Assert(old_natts == numberOfAttributes); d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indcollation, &isnull); @@ -351,6 +353,7 @@ DefineIndex(Oid relationId, bits16 flags; bits16 constr_flags; int numberOfAttributes; + int numberOfKeyAttributes; TransactionId limitXmin; VirtualTransactionId *old_snapshots; ObjectAddress address; @@ -361,10 +364,28 @@ DefineIndex(Oid relationId, Snapshot snapshot; int i; + if (list_intersection(stmt->indexParams, stmt->indexIncludingParams) != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("included columns must not intersect with key columns"))); + + /* + * count key attributes in index + */ + numberOfKeyAttributes = list_length(stmt->indexParams); + /* - * count attributes in index + * We append any INCLUDE columns onto the indexParams list so that we have + * one list with all columns. Later we can determine which of these are + * key columns, and which are just part of the INCLUDE list by checking + * the list position. A list item in a position less than + * ii_NumIndexKeyAttrs is part of the key columns, and anything equal to + * and over is part of the INCLUDE columns. */ + stmt->indexParams = list_concat(stmt->indexParams, + stmt->indexIncludingParams); numberOfAttributes = list_length(stmt->indexParams); + if (numberOfAttributes <= 0) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -568,6 +589,11 @@ DefineIndex(Oid relationId, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("access method \"%s\" does not support unique indexes", accessMethodName))); + if (list_length(stmt->indexIncludingParams) > 0 && !amRoutine->amcaninclude) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support included columns", + accessMethodName))); if (numberOfAttributes > 1 && !amRoutine->amcanmulticol) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -605,6 +631,7 @@ DefineIndex(Oid relationId, */ indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = numberOfAttributes; + indexInfo->ii_NumIndexKeyAttrs = numberOfKeyAttributes; indexInfo->ii_Expressions = NIL; /* for now */ indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = make_ands_implicit((Expr *) stmt->whereClause); @@ -624,7 +651,7 @@ DefineIndex(Oid relationId, typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); - classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); + classObjectId = (Oid *) palloc(numberOfKeyAttributes * sizeof(Oid)); coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16)); ComputeIndexAttrs(indexInfo, typeObjectId, collationObjectId, classObjectId, @@ -1348,16 +1375,15 @@ ComputeIndexAttrs(IndexInfo *indexInfo, ListCell *nextExclOp; ListCell *lc; int attn; + int nkeycols = indexInfo->ii_NumIndexKeyAttrs; /* Allocate space for exclusion operator info, if needed */ if (exclusionOpNames) { - int ncols = list_length(attList); - - Assert(list_length(exclusionOpNames) == ncols); - indexInfo->ii_ExclusionOps = (Oid *) palloc(sizeof(Oid) * ncols); - indexInfo->ii_ExclusionProcs = (Oid *) palloc(sizeof(Oid) * ncols); - indexInfo->ii_ExclusionStrats = (uint16 *) palloc(sizeof(uint16) * ncols); + Assert(list_length(exclusionOpNames) == nkeycols); + indexInfo->ii_ExclusionOps = (Oid *) palloc(sizeof(Oid) * nkeycols); + indexInfo->ii_ExclusionProcs = (Oid *) palloc(sizeof(Oid) * nkeycols); + indexInfo->ii_ExclusionStrats = (uint16 *) palloc(sizeof(uint16) * nkeycols); nextExclOp = list_head(exclusionOpNames); } else @@ -1410,6 +1436,11 @@ ComputeIndexAttrs(IndexInfo *indexInfo, Node *expr = attribute->expr; Assert(expr != NULL); + + if (attn >= nkeycols) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("expressions are not supported in included columns"))); atttype = exprType(expr); attcollation = exprCollation(expr); @@ -1487,6 +1518,16 @@ ComputeIndexAttrs(IndexInfo *indexInfo, collationOidP[attn] = attcollation; + /* + * Skip opclass and ordering options for included columns. + */ + if (attn >= nkeycols) + { + colOptionP[attn] = 0; + attn++; + continue; + } + /* * Identify the opclass to use. */ diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 410d4e5a38..e1eb7c374b 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -602,7 +602,7 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, RelationGetRelationName(tempRel)); diffname = make_temptable_name_n(tempname, 2); - relnatts = matviewRel->rd_rel->relnatts; + relnatts = RelationGetNumberOfAttributes(matviewRel); /* Open SPI context. */ if (SPI_connect() != SPI_OK_CONNECT) @@ -680,7 +680,7 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, if (is_usable_unique_index(indexRel)) { Form_pg_index indexStruct = indexRel->rd_index; - int numatts = indexStruct->indnatts; + int indnkeyatts = indexStruct->indnkeyatts; oidvector *indclass; Datum indclassDatum; bool isnull; @@ -695,7 +695,7 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, indclass = (oidvector *) DatumGetPointer(indclassDatum); /* Add quals for all columns from this index. */ - for (i = 0; i < numatts; i++) + for (i = 0; i < indnkeyatts; i++) { int attnum = indexStruct->indkey.values[i]; Oid opclass = indclass->values[i]; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 801db12bee..846811d1b8 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -5942,7 +5942,7 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) * Loop over each attribute in the primary key and see if it * matches the to-be-altered attribute */ - for (i = 0; i < indexStruct->indnatts; i++) + for (i = 0; i < indexStruct->indnkeyatts; i++) { if (indexStruct->indkey.values[i] == attnum) ereport(ERROR, @@ -7641,6 +7641,7 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, RelationGetRelid(rel), fkattnum, numfks, + numfks, InvalidOid, /* not a domain constraint */ indexOid, RelationGetRelid(pkrel), @@ -8199,7 +8200,7 @@ transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid, * assume a primary key cannot have expressional elements) */ *attnamelist = NIL; - for (i = 0; i < indexStruct->indnatts; i++) + for (i = 0; i < indexStruct->indnkeyatts; i++) { int pkattno = indexStruct->indkey.values[i]; @@ -8277,7 +8278,7 @@ transformFkeyCheckAttrs(Relation pkrel, * partial index; forget it if there are any expressions, too. Invalid * indexes are out as well. */ - if (indexStruct->indnatts == numattrs && + if (indexStruct->indnkeyatts == numattrs && indexStruct->indisunique && IndexIsValid(indexStruct) && heap_attisnull(indexTuple, Anum_pg_index_indpred, NULL) && @@ -12529,7 +12530,7 @@ ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode RelationGetRelationName(indexRel)))); /* Check index for nullable columns. */ - for (key = 0; key < indexRel->rd_index->indnatts; key++) + for (key = 0; key < IndexRelationGetNumberOfKeyAttributes(indexRel); key++) { int16 attno = indexRel->rd_index->indkey.values[key]; Form_pg_attribute attr; diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index a189356cad..67f0b6c0ac 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -742,6 +742,7 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString, RelationGetRelid(rel), NULL, /* no conkey */ 0, + 0, InvalidOid, /* no domain */ InvalidOid, /* no index */ InvalidOid, /* no foreign key */ diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 2fdcb7f3fd..04b8b907b5 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -3157,6 +3157,7 @@ domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid, InvalidOid, /* not a relation constraint */ NULL, 0, + 0, domainOid, /* domain constraint */ InvalidOid, /* no associated index */ InvalidOid, /* Foreign key fields */ diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 62e51f1ef3..903076ee3c 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -648,7 +648,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, Oid *constr_procs; uint16 *constr_strats; Oid *index_collations = index->rd_indcollation; - int index_natts = index->rd_index->indnatts; + int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index); IndexScanDesc index_scan; HeapTuple tup; ScanKeyData scankeys[INDEX_MAX_KEYS]; @@ -675,7 +675,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, * If any of the input values are NULL, the constraint check is assumed to * pass (i.e., we assume the operators are strict). */ - for (i = 0; i < index_natts; i++) + for (i = 0; i < indnkeyatts; i++) { if (isnull[i]) return true; @@ -687,7 +687,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, */ InitDirtySnapshot(DirtySnapshot); - for (i = 0; i < index_natts; i++) + for (i = 0; i < indnkeyatts; i++) { ScanKeyEntryInitialize(&scankeys[i], 0, @@ -719,8 +719,8 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, retry: conflict = false; found_self = false; - index_scan = index_beginscan(heap, index, &DirtySnapshot, index_natts, 0); - index_rescan(index_scan, scankeys, index_natts, NULL, 0); + index_scan = index_beginscan(heap, index, &DirtySnapshot, indnkeyatts, 0); + index_rescan(index_scan, scankeys, indnkeyatts, NULL, 0); while ((tup = index_getnext(index_scan, ForwardScanDirection)) != NULL) @@ -881,10 +881,10 @@ index_recheck_constraint(Relation index, Oid *constr_procs, Datum *existing_values, bool *existing_isnull, Datum *new_values) { - int index_natts = index->rd_index->indnatts; + int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index); int i; - for (i = 0; i < index_natts; i++) + for (i = 0; i < indnkeyatts; i++) { /* Assume the exclusion operators are strict */ if (existing_isnull[i]) diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 971f92a938..6c5a5401c3 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -63,7 +63,7 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel, opclass = (oidvector *) DatumGetPointer(indclassDatum); /* Build scankey for every attribute in the index. */ - for (attoff = 0; attoff < RelationGetNumberOfAttributes(idxrel); attoff++) + for (attoff = 0; attoff < IndexRelationGetNumberOfKeyAttributes(idxrel); attoff++) { Oid operator; Oid opfamily; @@ -131,7 +131,7 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, /* Start an index scan. */ InitDirtySnapshot(snap); scan = index_beginscan(rel, idxrel, &snap, - RelationGetNumberOfAttributes(idxrel), + IndexRelationGetNumberOfKeyAttributes(idxrel), 0); /* Build scan key. */ @@ -140,7 +140,7 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, retry: found = false; - index_rescan(scan, skey, RelationGetNumberOfAttributes(idxrel), NULL, 0); + index_rescan(scan, skey, IndexRelationGetNumberOfKeyAttributes(idxrel), NULL, 0); /* Try to find the tuple */ if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL) diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 01c9de88f4..d6012192a1 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -1227,7 +1227,9 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Expr *leftop; /* expr on lhs of operator */ Expr *rightop; /* expr on rhs ... */ AttrNumber varattno; /* att number used in scan */ + int indnkeyatts; + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index); if (IsA(clause, OpExpr)) { /* indexkey op const or indexkey op expression */ @@ -1252,7 +1254,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; - if (varattno < 1 || varattno > index->rd_index->indnatts) + if (varattno < 1 || varattno > indnkeyatts) elog(ERROR, "bogus index qualification"); /* @@ -1375,7 +1377,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, opnos_cell = lnext(opnos_cell); if (index->rd_rel->relam != BTREE_AM_OID || - varattno < 1 || varattno > index->rd_index->indnatts) + varattno < 1 || varattno > indnkeyatts) elog(ERROR, "bogus RowCompare index qualification"); opfamily = index->rd_opfamily[varattno - 1]; @@ -1499,7 +1501,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; - if (varattno < 1 || varattno > index->rd_index->indnatts) + if (varattno < 1 || varattno > indnkeyatts) elog(ERROR, "bogus index qualification"); /* diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 9287baaedc..d11a6a82f6 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2889,6 +2889,7 @@ _copyConstraint(const Constraint *from) COPY_STRING_FIELD(cooked_expr); COPY_SCALAR_FIELD(generated_when); COPY_NODE_FIELD(keys); + COPY_NODE_FIELD(including); COPY_NODE_FIELD(exclusions); COPY_NODE_FIELD(options); COPY_STRING_FIELD(indexname); @@ -3464,6 +3465,7 @@ _copyIndexStmt(const IndexStmt *from) COPY_STRING_FIELD(accessMethod); COPY_STRING_FIELD(tableSpace); COPY_NODE_FIELD(indexParams); + COPY_NODE_FIELD(indexIncludingParams); COPY_NODE_FIELD(options); COPY_NODE_FIELD(whereClause); COPY_NODE_FIELD(excludeOpNames); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index d758515cfd..39946959af 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1368,6 +1368,7 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b) COMPARE_STRING_FIELD(accessMethod); COMPARE_STRING_FIELD(tableSpace); COMPARE_NODE_FIELD(indexParams); + COMPARE_NODE_FIELD(indexIncludingParams); COMPARE_NODE_FIELD(options); COMPARE_NODE_FIELD(whereClause); COMPARE_NODE_FIELD(excludeOpNames); @@ -2620,6 +2621,7 @@ _equalConstraint(const Constraint *a, const Constraint *b) COMPARE_STRING_FIELD(cooked_expr); COMPARE_SCALAR_FIELD(generated_when); COMPARE_NODE_FIELD(keys); + COMPARE_NODE_FIELD(including); COMPARE_NODE_FIELD(exclusions); COMPARE_NODE_FIELD(options); COMPARE_STRING_FIELD(indexname); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 03a91c3352..26c621c941 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2707,6 +2707,7 @@ _outIndexStmt(StringInfo str, const IndexStmt *node) WRITE_STRING_FIELD(accessMethod); WRITE_STRING_FIELD(tableSpace); WRITE_NODE_FIELD(indexParams); + WRITE_NODE_FIELD(indexIncludingParams); WRITE_NODE_FIELD(options); WRITE_NODE_FIELD(whereClause); WRITE_NODE_FIELD(excludeOpNames); @@ -3535,6 +3536,7 @@ _outConstraint(StringInfo str, const Constraint *node) case CONSTR_PRIMARY: appendStringInfoString(str, "PRIMARY_KEY"); WRITE_NODE_FIELD(keys); + WRITE_NODE_FIELD(including); WRITE_NODE_FIELD(options); WRITE_STRING_FIELD(indexname); WRITE_STRING_FIELD(indexspace); @@ -3544,6 +3546,7 @@ _outConstraint(StringInfo str, const Constraint *node) case CONSTR_UNIQUE: appendStringInfoString(str, "UNIQUE"); WRITE_NODE_FIELD(keys); + WRITE_NODE_FIELD(including); WRITE_NODE_FIELD(options); WRITE_STRING_FIELD(indexname); WRITE_STRING_FIELD(indexspace); @@ -3553,6 +3556,7 @@ _outConstraint(StringInfo str, const Constraint *node) case CONSTR_EXCLUSION: appendStringInfoString(str, "EXCLUSION"); WRITE_NODE_FIELD(exclusions); + WRITE_NODE_FIELD(including); WRITE_NODE_FIELD(options); WRITE_STRING_FIELD(indexname); WRITE_STRING_FIELD(indexspace); diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index c29b79a0c3..87a38f9aaa 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -565,10 +565,12 @@ of scanning the relation and the resulting ordering of the tuples. Sequential scan Paths have NIL pathkeys, indicating no known ordering. Index scans have Path.pathkeys that represent the chosen index's ordering, if any. A single-key index would create a single-PathKey list, while a -multi-column index generates a list with one element per index column. -(Actually, since an index can be scanned either forward or backward, there -are two possible sort orders and two possible PathKey lists it can -generate.) +multi-column index generates a list with one element per key index column. +Non-key columns specified in the INCLUDE clause of covering indexes don't +have corresponding PathKeys in the list, because the have no influence on +index ordering. (Actually, since an index can be scanned either forward or +backward, there are two possible sort orders and two possible PathKey lists +it can generate.) Note that a bitmap scan has NIL pathkeys since we can say nothing about the overall order of its result. Also, an indexscan on an unordered type diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index ec3f60d311..cc607dcdfa 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -2162,7 +2162,7 @@ match_eclass_clauses_to_index(PlannerInfo *root, IndexOptInfo *index, if (!index->rel->has_eclass_joins) return; - for (indexcol = 0; indexcol < index->ncolumns; indexcol++) + for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++) { ec_member_matches_arg arg; List *clauses; diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 6d1cc3b8a0..ec66cb9c3c 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -447,8 +447,10 @@ get_cheapest_parallel_safe_total_inner(List *paths) * If 'scandir' is BackwardScanDirection, build pathkeys representing a * backwards scan of the index. * - * The result is canonical, meaning that redundant pathkeys are removed; - * it may therefore have fewer entries than there are index columns. + * We iterate only key columns of covering indexes, since non-key columns + * don't influence index ordering. The result is canonical, meaning that + * redundant pathkeys are removed; it may therefore have fewer entries than + * there are key columns in the index. * * Another reason for stopping early is that we may be able to tell that * an index column's sort order is uninteresting for this query. However, @@ -477,6 +479,13 @@ build_index_pathkeys(PlannerInfo *root, bool nulls_first; PathKey *cpathkey; + /* + * INCLUDE columns are stored in index unordered, so they don't + * support ordered index scan. + */ + if (i >= index->nkeycolumns) + break; + /* We assume we don't need to make a copy of the tlist item */ indexkey = indextle->expr; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 52e4cca49a..90bb0c2804 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -185,7 +185,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Form_pg_index index; IndexAmRoutine *amroutine; IndexOptInfo *info; - int ncolumns; + int ncolumns, + nkeycolumns; int i; /* @@ -238,19 +239,25 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelationGetForm(indexRelation)->reltablespace; info->rel = rel; info->ncolumns = ncolumns = index->indnatts; + info->nkeycolumns = nkeycolumns = index->indnkeyatts; + info->indexkeys = (int *) palloc(sizeof(int) * ncolumns); info->indexcollations = (Oid *) palloc(sizeof(Oid) * ncolumns); - info->opfamily = (Oid *) palloc(sizeof(Oid) * ncolumns); - info->opcintype = (Oid *) palloc(sizeof(Oid) * ncolumns); + info->opfamily = (Oid *) palloc(sizeof(Oid) * nkeycolumns); + info->opcintype = (Oid *) palloc(sizeof(Oid) * nkeycolumns); info->canreturn = (bool *) palloc(sizeof(bool) * ncolumns); for (i = 0; i < ncolumns; i++) { info->indexkeys[i] = index->indkey.values[i]; info->indexcollations[i] = indexRelation->rd_indcollation[i]; + info->canreturn[i] = index_can_return(indexRelation, i + 1); + } + + for (i = 0; i < nkeycolumns; i++) + { info->opfamily[i] = indexRelation->rd_opfamily[i]; info->opcintype[i] = indexRelation->rd_opcintype[i]; - info->canreturn[i] = index_can_return(indexRelation, i + 1); } info->relam = indexRelation->rd_rel->relam; @@ -279,10 +286,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Assert(amroutine->amcanorder); info->sortopfamily = info->opfamily; - info->reverse_sort = (bool *) palloc(sizeof(bool) * ncolumns); - info->nulls_first = (bool *) palloc(sizeof(bool) * ncolumns); + info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns); + info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns); - for (i = 0; i < ncolumns; i++) + for (i = 0; i < nkeycolumns; i++) { int16 opt = indexRelation->rd_indoption[i]; @@ -306,11 +313,11 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, * of current or foreseeable amcanorder index types, it's not * worth expending more effort on now. */ - info->sortopfamily = (Oid *) palloc(sizeof(Oid) * ncolumns); - info->reverse_sort = (bool *) palloc(sizeof(bool) * ncolumns); - info->nulls_first = (bool *) palloc(sizeof(bool) * ncolumns); + info->sortopfamily = (Oid *) palloc(sizeof(Oid) * nkeycolumns); + info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns); + info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns); - for (i = 0; i < ncolumns; i++) + for (i = 0; i < nkeycolumns; i++) { int16 opt = indexRelation->rd_indoption[i]; Oid ltopr; @@ -731,7 +738,7 @@ infer_arbiter_indexes(PlannerInfo *root) /* Build BMS representation of plain (non expression) index attrs */ indexedAttrs = NULL; - for (natt = 0; natt < idxForm->indnatts; natt++) + for (natt = 0; natt < idxForm->indnkeyatts; natt++) { int attno = idxRel->rd_index->indkey.values[natt]; @@ -1798,7 +1805,7 @@ has_unique_index(RelOptInfo *rel, AttrNumber attno) * just the specified attr is unique. */ if (index->unique && - index->ncolumns == 1 && + index->nkeycolumns == 1 && index->indexkeys[0] == attno && (index->indpred == NIL || index->predOK)) return true; diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 7eb9544efe..606021bc94 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -1051,7 +1051,7 @@ transformOnConflictClause(ParseState *pstate, * relation. Have to be careful to use resnos that correspond to * attnos of the underlying relation. */ - for (attno = 0; attno < targetrel->rd_rel->relnatts; attno++) + for (attno = 0; attno < RelationGetNumberOfAttributes(targetrel); attno++) { Form_pg_attribute attr = TupleDescAttr(targetrel->rd_att, attno); char *name; @@ -2276,8 +2276,8 @@ transformUpdateTargetList(ParseState *pstate, List *origTlist) EXPR_KIND_UPDATE_SOURCE); /* Prepare to assign non-conflicting resnos to resjunk attributes */ - if (pstate->p_next_resno <= pstate->p_target_relation->rd_rel->relnatts) - pstate->p_next_resno = pstate->p_target_relation->rd_rel->relnatts + 1; + if (pstate->p_next_resno <= RelationGetNumberOfAttributes(pstate->p_target_relation)) + pstate->p_next_resno = RelationGetNumberOfAttributes(pstate->p_target_relation) + 1; /* Prepare non-junk columns for assignment to target table */ target_rte = pstate->p_target_rangetblentry; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 177906e083..dd0c26c11b 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -382,6 +382,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); oper_argtypes RuleActionList RuleActionMulti opt_column_list columnList opt_name_list sort_clause opt_sort_clause sortby_list index_params + opt_include opt_c_include index_including_params name_list role_list from_clause from_list opt_array_bounds qualified_name_list any_name any_name_list type_name_list any_operator expr_list attrs @@ -645,7 +646,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); HANDLER HAVING HEADER_P HOLD HOUR_P - IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P + IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION @@ -3686,17 +3687,18 @@ ConstraintElem: n->initially_valid = !n->skip_validation; $$ = (Node *)n; } - | UNIQUE '(' columnList ')' opt_definition OptConsTableSpace + | UNIQUE '(' columnList ')' opt_c_include opt_definition OptConsTableSpace ConstraintAttributeSpec { Constraint *n = makeNode(Constraint); n->contype = CONSTR_UNIQUE; n->location = @1; n->keys = $3; - n->options = $5; + n->including = $5; + n->options = $6; n->indexname = NULL; - n->indexspace = $6; - processCASbits($7, @7, "UNIQUE", + n->indexspace = $7; + processCASbits($8, @8, "UNIQUE", &n->deferrable, &n->initdeferred, NULL, NULL, yyscanner); $$ = (Node *)n; @@ -3707,6 +3709,7 @@ ConstraintElem: n->contype = CONSTR_UNIQUE; n->location = @1; n->keys = NIL; + n->including = NIL; n->options = NIL; n->indexname = $2; n->indexspace = NULL; @@ -3715,17 +3718,18 @@ ConstraintElem: NULL, yyscanner); $$ = (Node *)n; } - | PRIMARY KEY '(' columnList ')' opt_definition OptConsTableSpace + | PRIMARY KEY '(' columnList ')' opt_c_include opt_definition OptConsTableSpace ConstraintAttributeSpec { Constraint *n = makeNode(Constraint); n->contype = CONSTR_PRIMARY; n->location = @1; n->keys = $4; - n->options = $6; + n->including = $6; + n->options = $7; n->indexname = NULL; - n->indexspace = $7; - processCASbits($8, @8, "PRIMARY KEY", + n->indexspace = $8; + processCASbits($9, @9, "PRIMARY KEY", &n->deferrable, &n->initdeferred, NULL, NULL, yyscanner); $$ = (Node *)n; @@ -3736,6 +3740,7 @@ ConstraintElem: n->contype = CONSTR_PRIMARY; n->location = @1; n->keys = NIL; + n->including = NIL; n->options = NIL; n->indexname = $3; n->indexspace = NULL; @@ -3745,7 +3750,7 @@ ConstraintElem: $$ = (Node *)n; } | EXCLUDE access_method_clause '(' ExclusionConstraintList ')' - opt_definition OptConsTableSpace ExclusionWhereClause + opt_c_include opt_definition OptConsTableSpace ExclusionWhereClause ConstraintAttributeSpec { Constraint *n = makeNode(Constraint); @@ -3753,11 +3758,12 @@ ConstraintElem: n->location = @1; n->access_method = $2; n->exclusions = $4; - n->options = $6; + n->including = $6; + n->options = $7; n->indexname = NULL; - n->indexspace = $7; - n->where_clause = $8; - processCASbits($9, @9, "EXCLUDE", + n->indexspace = $8; + n->where_clause = $9; + processCASbits($10, @10, "EXCLUDE", &n->deferrable, &n->initdeferred, NULL, NULL, yyscanner); $$ = (Node *)n; @@ -3803,6 +3809,10 @@ columnElem: ColId } ; +opt_c_include: INCLUDE '(' columnList ')' { $$ = $3; } + | /* EMPTY */ { $$ = NIL; } + ; + key_match: MATCH FULL { $$ = FKCONSTR_MATCH_FULL; @@ -7373,7 +7383,7 @@ defacl_privilege_target: IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name ON relation_expr access_method_clause '(' index_params ')' - opt_reloptions OptTableSpace where_clause + opt_include opt_reloptions OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); n->unique = $2; @@ -7383,9 +7393,10 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name n->relationId = InvalidOid; n->accessMethod = $8; n->indexParams = $10; - n->options = $12; - n->tableSpace = $13; - n->whereClause = $14; + n->indexIncludingParams = $12; + n->options = $13; + n->tableSpace = $14; + n->whereClause = $15; n->excludeOpNames = NIL; n->idxcomment = NULL; n->indexOid = InvalidOid; @@ -7400,7 +7411,7 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name } | CREATE opt_unique INDEX opt_concurrently IF_P NOT EXISTS index_name ON relation_expr access_method_clause '(' index_params ')' - opt_reloptions OptTableSpace where_clause + opt_include opt_reloptions OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); n->unique = $2; @@ -7410,9 +7421,10 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name n->relationId = InvalidOid; n->accessMethod = $11; n->indexParams = $13; - n->options = $15; - n->tableSpace = $16; - n->whereClause = $17; + n->indexIncludingParams = $15; + n->options = $16; + n->tableSpace = $17; + n->whereClause = $18; n->excludeOpNames = NIL; n->idxcomment = NULL; n->indexOid = InvalidOid; @@ -7491,6 +7503,14 @@ index_elem: ColId opt_collate opt_class opt_asc_desc opt_nulls_order } ; +opt_include: INCLUDE '(' index_including_params ')' { $$ = $3; } + | /* EMPTY */ { $$ = NIL; } + ; + +index_including_params: index_elem { $$ = list_make1($1); } + | index_including_params ',' index_elem { $$ = lappend($1, $3); } + ; + opt_collate: COLLATE any_name { $$ = $2; } | /*EMPTY*/ { $$ = NIL; } ; @@ -15206,6 +15226,7 @@ unreserved_keyword: | IMMUTABLE | IMPLICIT_P | IMPORT_P + | INCLUDE | INCLUDING | INCREMENT | INDEX diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index f7e11f969c..8b912eeea3 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -3089,7 +3089,7 @@ attnameAttNum(Relation rd, const char *attname, bool sysColOK) { int i; - for (i = 0; i < rd->rd_rel->relnatts; i++) + for (i = 0; i < RelationGetNumberOfAttributes(rd); i++) { Form_pg_attribute att = TupleDescAttr(rd->rd_att, i); diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index ea209cdab6..4932e58022 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -978,7 +978,8 @@ checkInsertTargets(ParseState *pstate, List *cols, List **attrnos) /* * Generate default column list for INSERT. */ - int numcol = pstate->p_target_relation->rd_rel->relnatts; + int numcol = RelationGetNumberOfAttributes(pstate->p_target_relation); + int i; for (i = 0; i < numcol; i++) diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 513a5dda26..bbbb1a8c1f 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -1468,9 +1468,10 @@ generateClonedIndexStmt(RangeVar *heapRel, Oid heapRelid, Relation source_idx, /* Build the list of IndexElem */ index->indexParams = NIL; + index->indexIncludingParams = NIL; indexpr_item = list_head(indexprs); - for (keyno = 0; keyno < idxrec->indnatts; keyno++) + for (keyno = 0; keyno < idxrec->indnkeyatts; keyno++) { IndexElem *iparam; AttrNumber attnum = idxrec->indkey.values[keyno]; @@ -1559,6 +1560,40 @@ generateClonedIndexStmt(RangeVar *heapRel, Oid heapRelid, Relation source_idx, index->indexParams = lappend(index->indexParams, iparam); } + /* Handle included columns separately */ + for (keyno = idxrec->indnkeyatts; keyno < idxrec->indnatts; keyno++) + { + IndexElem *iparam; + AttrNumber attnum = idxrec->indkey.values[keyno]; + Form_pg_attribute attr = TupleDescAttr(RelationGetDescr(source_idx), + keyno); + + iparam = makeNode(IndexElem); + + if (AttributeNumberIsValid(attnum)) + { + /* Simple index column */ + char *attname; + + attname = get_attname(indrelid, attnum, false); + keycoltype = get_atttype(indrelid, attnum); + + iparam->name = attname; + iparam->expr = NULL; + } + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("expressions are not supported in included columns"))); + + /* Copy the original index column name */ + iparam->indexcolname = pstrdup(NameStr(attr->attname)); + + /* Add the collation name, if non-default */ + iparam->collation = get_collation(indcollation->values[keyno], keycoltype); + + index->indexIncludingParams = lappend(index->indexIncludingParams, iparam); + } /* Copy reloptions if any */ datum = SysCacheGetAttr(RELOID, ht_idxrel, Anum_pg_class_reloptions, &isnull); @@ -1829,6 +1864,7 @@ transformIndexConstraints(CreateStmtContext *cxt) IndexStmt *priorindex = lfirst(k); if (equal(index->indexParams, priorindex->indexParams) && + equal(index->indexIncludingParams, priorindex->indexIncludingParams) && equal(index->whereClause, priorindex->whereClause) && equal(index->excludeOpNames, priorindex->excludeOpNames) && strcmp(index->accessMethod, priorindex->accessMethod) == 0 && @@ -1900,6 +1936,7 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) index->tableSpace = constraint->indexspace; index->whereClause = constraint->where_clause; index->indexParams = NIL; + index->indexIncludingParams = NIL; index->excludeOpNames = NIL; index->idxcomment = NULL; index->indexOid = InvalidOid; @@ -2049,24 +2086,29 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) heap_rel->rd_rel->relhasoids); attname = pstrdup(NameStr(attform->attname)); - /* - * Insist on default opclass and sort options. While the index - * would still work as a constraint with non-default settings, it - * might not provide exactly the same uniqueness semantics as - * you'd get from a normally-created constraint; and there's also - * the dump/reload problem mentioned above. - */ - defopclass = GetDefaultOpClass(attform->atttypid, - index_rel->rd_rel->relam); - if (indclass->values[i] != defopclass || - index_rel->rd_indoption[i] != 0) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("index \"%s\" does not have default sorting behavior", index_name), - errdetail("Cannot create a primary key or unique constraint using such an index."), - parser_errposition(cxt->pstate, constraint->location))); + if (i < index_form->indnkeyatts) + { + /* + * Insist on default opclass and sort options. While the + * index would still work as a constraint with non-default + * settings, it might not provide exactly the same uniqueness + * semantics as you'd get from a normally-created constraint; + * and there's also the dump/reload problem mentioned above. + */ + defopclass = GetDefaultOpClass(attform->atttypid, + index_rel->rd_rel->relam); + if (indclass->values[i] != defopclass || + index_rel->rd_indoption[i] != 0) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("index \"%s\" does not have default sorting behavior", index_name), + errdetail("Cannot create a primary key or unique constraint using such an index."), + parser_errposition(cxt->pstate, constraint->location))); - constraint->keys = lappend(constraint->keys, makeString(attname)); + constraint->keys = lappend(constraint->keys, makeString(attname)); + } + else + constraint->including = lappend(constraint->including, makeString(attname)); } /* Close the index relation but keep the lock */ @@ -2095,8 +2137,6 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) index->indexParams = lappend(index->indexParams, elem); index->excludeOpNames = lappend(index->excludeOpNames, opname); } - - return index; } /* @@ -2107,7 +2147,136 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) * it to DefineIndex to mark the columns NOT NULL, it's more efficient to * get it right the first time.) */ - foreach(lc, constraint->keys) + else + { + foreach(lc, constraint->keys) + { + char *key = strVal(lfirst(lc)); + bool found = false; + ColumnDef *column = NULL; + ListCell *columns; + IndexElem *iparam; + + /* Make sure referenced column exist. */ + foreach(columns, cxt->columns) + { + column = castNode(ColumnDef, lfirst(columns)); + if (strcmp(column->colname, key) == 0) + { + found = true; + break; + } + } + if (found) + { + /* found column in the new table; force it to be NOT NULL */ + if (constraint->contype == CONSTR_PRIMARY) + column->is_not_null = true; + } + else if (SystemAttributeByName(key, cxt->hasoids) != NULL) + { + /* + * column will be a system column in the new table, so accept + * it. System columns can't ever be null, so no need to worry + * about PRIMARY/NOT NULL constraint. + */ + found = true; + } + else if (cxt->inhRelations) + { + /* try inherited tables */ + ListCell *inher; + + foreach(inher, cxt->inhRelations) + { + RangeVar *inh = castNode(RangeVar, lfirst(inher)); + Relation rel; + int count; + + rel = heap_openrv(inh, AccessShareLock); + /* check user requested inheritance from valid relkind */ + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("inherited relation \"%s\" is not a table or foreign table", + inh->relname))); + for (count = 0; count < rel->rd_att->natts; count++) + { + Form_pg_attribute inhattr = TupleDescAttr(rel->rd_att, + count); + char *inhname = NameStr(inhattr->attname); + + if (inhattr->attisdropped) + continue; + if (strcmp(key, inhname) == 0) + { + found = true; + + /* + * We currently have no easy way to force an + * inherited column to be NOT NULL at creation, if + * its parent wasn't so already. We leave it to + * DefineIndex to fix things up in this case. + */ + break; + } + } + heap_close(rel, NoLock); + if (found) + break; + } + } + + /* + * In the ALTER TABLE case, don't complain about index keys not + * created in the command; they may well exist already. + * DefineIndex will complain about them if not, and will also take + * care of marking them NOT NULL. + */ + if (!found && !cxt->isalter) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" named in key does not exist", key), + parser_errposition(cxt->pstate, constraint->location))); + + /* Check for PRIMARY KEY(foo, foo) */ + foreach(columns, index->indexParams) + { + iparam = (IndexElem *) lfirst(columns); + if (iparam->name && strcmp(key, iparam->name) == 0) + { + if (index->primary) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" appears twice in primary key constraint", + key), + parser_errposition(cxt->pstate, constraint->location))); + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" appears twice in unique constraint", + key), + parser_errposition(cxt->pstate, constraint->location))); + } + } + + /* OK, add it to the index definition */ + iparam = makeNode(IndexElem); + iparam->name = pstrdup(key); + iparam->expr = NULL; + iparam->indexcolname = NULL; + iparam->collation = NIL; + iparam->opclass = NIL; + iparam->ordering = SORTBY_DEFAULT; + iparam->nulls_ordering = SORTBY_NULLS_DEFAULT; + index->indexParams = lappend(index->indexParams, iparam); + } + } + + /* Add included columns to index definition */ + foreach(lc, constraint->including) { char *key = strVal(lfirst(lc)); bool found = false; @@ -2124,65 +2293,63 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) break; } } - if (found) - { - /* found column in the new table; force it to be NOT NULL */ - if (constraint->contype == CONSTR_PRIMARY) - column->is_not_null = true; - } - else if (SystemAttributeByName(key, cxt->hasoids) != NULL) - { - /* - * column will be a system column in the new table, so accept it. - * System columns can't ever be null, so no need to worry about - * PRIMARY/NOT NULL constraint. - */ - found = true; - } - else if (cxt->inhRelations) - { - /* try inherited tables */ - ListCell *inher; - foreach(inher, cxt->inhRelations) + if (!found) + { + if (SystemAttributeByName(key, cxt->hasoids) != NULL) { - RangeVar *inh = lfirst_node(RangeVar, inher); - Relation rel; - int count; - - rel = heap_openrv(inh, AccessShareLock); - /* check user requested inheritance from valid relkind */ - if (rel->rd_rel->relkind != RELKIND_RELATION && - rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && - rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("inherited relation \"%s\" is not a table or foreign table", - inh->relname))); - for (count = 0; count < rel->rd_att->natts; count++) - { - Form_pg_attribute inhattr = TupleDescAttr(rel->rd_att, - count); - char *inhname = NameStr(inhattr->attname); + /* + * column will be a system column in the new table, so accept + * it. System columns can't ever be null, so no need to worry + * about PRIMARY/NOT NULL constraint. + */ + found = true; + } + else if (cxt->inhRelations) + { + /* try inherited tables */ + ListCell *inher; - if (inhattr->attisdropped) - continue; - if (strcmp(key, inhname) == 0) + foreach(inher, cxt->inhRelations) + { + RangeVar *inh = lfirst_node(RangeVar, inher); + Relation rel; + int count; + + rel = heap_openrv(inh, AccessShareLock); + /* check user requested inheritance from valid relkind */ + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("inherited relation \"%s\" is not a table or foreign table", + inh->relname))); + for (count = 0; count < rel->rd_att->natts; count++) { - found = true; - - /* - * We currently have no easy way to force an inherited - * column to be NOT NULL at creation, if its parent - * wasn't so already. We leave it to DefineIndex to - * fix things up in this case. - */ - break; + Form_pg_attribute inhattr = TupleDescAttr(rel->rd_att, + count); + char *inhname = NameStr(inhattr->attname); + + if (inhattr->attisdropped) + continue; + if (strcmp(key, inhname) == 0) + { + found = true; + + /* + * We currently have no easy way to force an + * inherited column to be NOT NULL at creation, if + * its parent wasn't so already. We leave it to + * DefineIndex to fix things up in this case. + */ + break; + } } + heap_close(rel, NoLock); + if (found) + break; } - heap_close(rel, NoLock); - if (found) - break; } } @@ -2198,27 +2365,6 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) errmsg("column \"%s\" named in key does not exist", key), parser_errposition(cxt->pstate, constraint->location))); - /* Check for PRIMARY KEY(foo, foo) */ - foreach(columns, index->indexParams) - { - iparam = (IndexElem *) lfirst(columns); - if (iparam->name && strcmp(key, iparam->name) == 0) - { - if (index->primary) - ereport(ERROR, - (errcode(ERRCODE_DUPLICATE_COLUMN), - errmsg("column \"%s\" appears twice in primary key constraint", - key), - parser_errposition(cxt->pstate, constraint->location))); - else - ereport(ERROR, - (errcode(ERRCODE_DUPLICATE_COLUMN), - errmsg("column \"%s\" appears twice in unique constraint", - key), - parser_errposition(cxt->pstate, constraint->location))); - } - } - /* OK, add it to the index definition */ iparam = makeNode(IndexElem); iparam->name = pstrdup(key); @@ -2226,9 +2372,7 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) iparam->indexcolname = NULL; iparam->collation = NIL; iparam->opclass = NIL; - iparam->ordering = SORTBY_DEFAULT; - iparam->nulls_ordering = SORTBY_NULLS_DEFAULT; - index->indexParams = lappend(index->indexParams, iparam); + index->indexIncludingParams = lappend(index->indexIncludingParams, iparam); } return index; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index f8fc7f83f9..b75a224ee8 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1296,6 +1296,21 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, Oid keycoltype; Oid keycolcollation; + /* + * attrsOnly flag is used for building unique-constraint and + * exclusion-constraint error messages. Included attrs are meaningless + * there, so do not include them in the message. + */ + if (attrsOnly && keyno >= idxrec->indnkeyatts) + break; + + /* Report the INCLUDED attributes, if any. */ + if ((!attrsOnly) && keyno == idxrec->indnkeyatts) + { + appendStringInfoString(&buf, ") INCLUDE ("); + sep = ""; + } + if (!colno) appendStringInfoString(&buf, sep); sep = ", "; @@ -1347,6 +1362,9 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, appendStringInfo(&buf, " COLLATE %s", generate_collation_name((indcoll))); + if (keyno >= idxrec->indnkeyatts) + continue; + /* Add the operator class name, if not default */ get_opclass_name(indclass->values[keyno], keycoltype, &buf); @@ -2047,6 +2065,19 @@ pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, appendStringInfoChar(&buf, ')'); + /* Fetch and build including column list */ + isnull = true; + val = SysCacheGetAttr(CONSTROID, tup, + Anum_pg_constraint_conincluding, &isnull); + if (!isnull) + { + appendStringInfoString(&buf, " INCLUDE ("); + + decompile_column_index_array(val, conForm->conrelid, &buf); + + appendStringInfoChar(&buf, ')'); + } + indexId = get_constraint_index(constraintId); /* XXX why do we only print these bits if fullCommand? */ diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index f998d859c1..fe606d7279 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4902,7 +4902,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, * should match has_unique_index(). */ if (index->unique && - index->ncolumns == 1 && + index->nkeycolumns == 1 && (index->indpred == NIL || index->predOK)) vardata->isunique = true; @@ -7053,7 +7053,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * NullTest invalidates that theory, even though it sets eqQualHere. */ if (index->unique && - indexcol == index->ncolumns - 1 && + indexcol == index->nkeycolumns - 1 && eqQualHere && !found_saop && !found_is_null_op) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 40a2c1df04..e81c4691ec 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -538,7 +538,7 @@ RelationBuildTupleDesc(Relation relation) /* * add attribute data to relation->rd_att */ - need = relation->rd_rel->relnatts; + need = RelationGetNumberOfAttributes(relation); while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan))) { @@ -548,7 +548,7 @@ RelationBuildTupleDesc(Relation relation) attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple); attnum = attp->attnum; - if (attnum <= 0 || attnum > relation->rd_rel->relnatts) + if (attnum <= 0 || attnum > RelationGetNumberOfAttributes(relation)) elog(ERROR, "invalid attribute number %d for %s", attp->attnum, RelationGetRelationName(relation)); @@ -567,7 +567,7 @@ RelationBuildTupleDesc(Relation relation) if (attrdef == NULL) attrdef = (AttrDefault *) MemoryContextAllocZero(CacheMemoryContext, - relation->rd_rel->relnatts * + RelationGetNumberOfAttributes(relation) * sizeof(AttrDefault)); attrdef[ndef].adnum = attnum; attrdef[ndef].adbin = NULL; @@ -650,7 +650,7 @@ RelationBuildTupleDesc(Relation relation) { int i; - for (i = 0; i < relation->rd_rel->relnatts; i++) + for (i = 0; i < RelationGetNumberOfAttributes(relation); i++) Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1); } #endif @@ -660,7 +660,7 @@ RelationBuildTupleDesc(Relation relation) * attribute: it must be zero. This eliminates the need for special cases * for attnum=1 that used to exist in fastgetattr() and index_getattr(). */ - if (relation->rd_rel->relnatts > 0) + if (RelationGetNumberOfAttributes(relation) > 0) TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0; /* @@ -673,7 +673,7 @@ RelationBuildTupleDesc(Relation relation) if (ndef > 0) /* DEFAULTs */ { - if (ndef < relation->rd_rel->relnatts) + if (ndef < RelationGetNumberOfAttributes(relation)) constr->defval = (AttrDefault *) repalloc(attrdef, ndef * sizeof(AttrDefault)); else @@ -1557,7 +1557,8 @@ RelationInitIndexAccessInfo(Relation relation) int2vector *indoption; MemoryContext indexcxt; MemoryContext oldcontext; - int natts; + int indnatts; + int indnkeyatts; uint16 amsupport; /* @@ -1587,10 +1588,11 @@ RelationInitIndexAccessInfo(Relation relation) relation->rd_amhandler = aform->amhandler; ReleaseSysCache(tuple); - natts = relation->rd_rel->relnatts; - if (natts != relation->rd_index->indnatts) + indnatts = RelationGetNumberOfAttributes(relation); + if (indnatts != IndexRelationGetNumberOfAttributes(relation)) elog(ERROR, "relnatts disagrees with indnatts for index %u", RelationGetRelid(relation)); + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(relation); /* * Make the private context to hold index access info. The reason we need @@ -1610,17 +1612,18 @@ RelationInitIndexAccessInfo(Relation relation) InitIndexAmRoutine(relation); /* - * Allocate arrays to hold data + * Allocate arrays to hold data. Opclasses are not used for included + * columns, so allocate them for indnkeyatts only. */ relation->rd_opfamily = (Oid *) - MemoryContextAllocZero(indexcxt, natts * sizeof(Oid)); + MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid)); relation->rd_opcintype = (Oid *) - MemoryContextAllocZero(indexcxt, natts * sizeof(Oid)); + MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid)); amsupport = relation->rd_amroutine->amsupport; if (amsupport > 0) { - int nsupport = natts * amsupport; + int nsupport = indnatts * amsupport; relation->rd_support = (RegProcedure *) MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure)); @@ -1634,10 +1637,10 @@ RelationInitIndexAccessInfo(Relation relation) } relation->rd_indcollation = (Oid *) - MemoryContextAllocZero(indexcxt, natts * sizeof(Oid)); + MemoryContextAllocZero(indexcxt, indnatts * sizeof(Oid)); relation->rd_indoption = (int16 *) - MemoryContextAllocZero(indexcxt, natts * sizeof(int16)); + MemoryContextAllocZero(indexcxt, indnatts * sizeof(int16)); /* * indcollation cannot be referenced directly through the C struct, @@ -1650,7 +1653,7 @@ RelationInitIndexAccessInfo(Relation relation) &isnull); Assert(!isnull); indcoll = (oidvector *) DatumGetPointer(indcollDatum); - memcpy(relation->rd_indcollation, indcoll->values, natts * sizeof(Oid)); + memcpy(relation->rd_indcollation, indcoll->values, indnatts * sizeof(Oid)); /* * indclass cannot be referenced directly through the C struct, because it @@ -1671,7 +1674,7 @@ RelationInitIndexAccessInfo(Relation relation) */ IndexSupportInitialize(indclass, relation->rd_support, relation->rd_opfamily, relation->rd_opcintype, - amsupport, natts); + amsupport, indnkeyatts); /* * Similarly extract indoption and copy it to the cache entry @@ -1682,7 +1685,7 @@ RelationInitIndexAccessInfo(Relation relation) &isnull); Assert(!isnull); indoption = (int2vector *) DatumGetPointer(indoptionDatum); - memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16)); + memcpy(relation->rd_indoption, indoption->values, indnatts * sizeof(int16)); /* * expressions, predicate, exclusion caches will be filled later @@ -5064,20 +5067,28 @@ restart: { int attrnum = indexInfo->ii_KeyAttrNumbers[i]; + /* + * Since we have covering indexes with non-key columns, we must + * handle them accurately here. non-key columns must be added into + * indexattrs, since they are in index, and HOT-update shouldn't + * miss them. Obviously, non-key columns couldn't be referenced by + * foreign key or identity key. Hence we do not include them into + * uindexattrs, pkindexattrs and idindexattrs bitmaps. + */ if (attrnum != 0) { indexattrs = bms_add_member(indexattrs, attrnum - FirstLowInvalidHeapAttributeNumber); - if (isKey) + if (isKey && i < indexInfo->ii_NumIndexKeyAttrs) uindexattrs = bms_add_member(uindexattrs, attrnum - FirstLowInvalidHeapAttributeNumber); - if (isPK) + if (isPK && i < indexInfo->ii_NumIndexKeyAttrs) pkindexattrs = bms_add_member(pkindexattrs, attrnum - FirstLowInvalidHeapAttributeNumber); - if (isIDKey) + if (isIDKey && i < indexInfo->ii_NumIndexKeyAttrs) idindexattrs = bms_add_member(idindexattrs, attrnum - FirstLowInvalidHeapAttributeNumber); } @@ -5195,7 +5206,7 @@ RelationGetExclusionInfo(Relation indexRelation, Oid **procs, uint16 **strategies) { - int ncols = indexRelation->rd_rel->relnatts; + int indnkeyatts; Oid *ops; Oid *funcs; uint16 *strats; @@ -5207,17 +5218,19 @@ RelationGetExclusionInfo(Relation indexRelation, MemoryContext oldcxt; int i; + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation); + /* Allocate result space in caller context */ - *operators = ops = (Oid *) palloc(sizeof(Oid) * ncols); - *procs = funcs = (Oid *) palloc(sizeof(Oid) * ncols); - *strategies = strats = (uint16 *) palloc(sizeof(uint16) * ncols); + *operators = ops = (Oid *) palloc(sizeof(Oid) * indnkeyatts); + *procs = funcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts); + *strategies = strats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts); /* Quick exit if we have the data cached already */ if (indexRelation->rd_exclstrats != NULL) { - memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * ncols); - memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * ncols); - memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * ncols); + memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * indnkeyatts); + memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * indnkeyatts); + memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * indnkeyatts); return; } @@ -5266,12 +5279,12 @@ RelationGetExclusionInfo(Relation indexRelation, arr = DatumGetArrayTypeP(val); /* ensure not toasted */ nelem = ARR_DIMS(arr)[0]; if (ARR_NDIM(arr) != 1 || - nelem != ncols || + nelem != indnkeyatts || ARR_HASNULL(arr) || ARR_ELEMTYPE(arr) != OIDOID) elog(ERROR, "conexclop is not a 1-D Oid array"); - memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * ncols); + memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * indnkeyatts); } systable_endscan(conscan); @@ -5282,7 +5295,7 @@ RelationGetExclusionInfo(Relation indexRelation, RelationGetRelationName(indexRelation)); /* We need the func OIDs and strategy numbers too */ - for (i = 0; i < ncols; i++) + for (i = 0; i < indnkeyatts; i++) { funcs[i] = get_opcode(ops[i]); strats[i] = get_op_opfamily_strategy(ops[i], @@ -5295,12 +5308,12 @@ RelationGetExclusionInfo(Relation indexRelation, /* Save a copy of the results in the relcache entry. */ oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt); - indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * ncols); - indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * ncols); - indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * ncols); - memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * ncols); - memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * ncols); - memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * ncols); + indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * indnkeyatts); + indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * indnkeyatts); + indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts); + memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * indnkeyatts); + memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * indnkeyatts); + memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * indnkeyatts); MemoryContextSwitchTo(oldcxt); } diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index e433faad86..a0c0d6f701 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -900,7 +900,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, workMem, randomAccess ? 't' : 'f'); #endif - state->nKeys = RelationGetNumberOfAttributes(indexRel); + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT, false, /* no unique check */ @@ -995,7 +995,7 @@ tuplesort_begin_index_btree(Relation heapRel, workMem, randomAccess ? 't' : 'f'); #endif - state->nKeys = RelationGetNumberOfAttributes(indexRel); + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); TRACE_POSTGRESQL_SORT_START(INDEX_SORT, enforceUnique, @@ -1015,7 +1015,6 @@ tuplesort_begin_index_btree(Relation heapRel, state->enforceUnique = enforceUnique; indexScanKey = _bt_mkscankey_nodata(indexRel); - state->nKeys = RelationGetNumberOfAttributes(indexRel); /* Prepare SortSupport data for each column */ state->sortKeys = (SortSupport) palloc0(state->nKeys * diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 69016a6c4d..d4c1b3261e 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -6726,7 +6726,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) i_indexname, i_parentidx, i_indexdef, - i_indnkeys, + i_indnnkeyatts, + i_indnatts, i_indkey, i_indisclustered, i_indisreplident, @@ -6783,6 +6784,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) "t.relname AS indexname, " "inh.inhparent AS parentidx, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " + "i.indnkeyatts AS indnkeyatts, " + "i.indnatts AS indnatts, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " "i.indisreplident, t.relpages, " @@ -6819,6 +6822,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) "t.relname AS indexname, " "0 AS parentidx, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " + "i.indnatts AS indnkeyatts, " + "i.indnatts AS indnatts, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " "i.indisreplident, t.relpages, " @@ -6851,6 +6856,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) "t.relname AS indexname, " "0 AS parentidx, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " + "i.indnatts AS indnkeyatts, " + "i.indnatts AS indnatts, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " "false AS indisreplident, t.relpages, " @@ -6879,6 +6886,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) "t.relname AS indexname, " "0 AS parentidx, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " + "i.indnatts AS indnkeyatts, " + "i.indnatts AS indnatts, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " "false AS indisreplident, t.relpages, " @@ -6943,7 +6952,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) i_indexname = PQfnumber(res, "indexname"); i_parentidx = PQfnumber(res, "parentidx"); i_indexdef = PQfnumber(res, "indexdef"); - i_indnkeys = PQfnumber(res, "indnkeys"); + i_indnnkeyatts = PQfnumber(res, "indnkeyatts"); + i_indnatts = PQfnumber(res, "indnatts"); i_indkey = PQfnumber(res, "indkey"); i_indisclustered = PQfnumber(res, "indisclustered"); i_indisreplident = PQfnumber(res, "indisreplident"); @@ -6976,12 +6986,13 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) indxinfo[j].dobj.namespace = tbinfo->dobj.namespace; indxinfo[j].indextable = tbinfo; indxinfo[j].indexdef = pg_strdup(PQgetvalue(res, j, i_indexdef)); - indxinfo[j].indnkeys = atoi(PQgetvalue(res, j, i_indnkeys)); + indxinfo[j].indnkeyattrs = atoi(PQgetvalue(res, j, i_indnnkeyatts)); + indxinfo[j].indnattrs = atoi(PQgetvalue(res, j, i_indnatts)); indxinfo[j].tablespace = pg_strdup(PQgetvalue(res, j, i_tablespace)); indxinfo[j].indreloptions = pg_strdup(PQgetvalue(res, j, i_indreloptions)); - indxinfo[j].indkeys = (Oid *) pg_malloc(indxinfo[j].indnkeys * sizeof(Oid)); + indxinfo[j].indkeys = (Oid *) pg_malloc(indxinfo[j].indnattrs * sizeof(Oid)); parseOidArray(PQgetvalue(res, j, i_indkey), - indxinfo[j].indkeys, indxinfo[j].indnkeys); + indxinfo[j].indkeys, indxinfo[j].indnattrs); indxinfo[j].indisclustered = (PQgetvalue(res, j, i_indisclustered)[0] == 't'); indxinfo[j].indisreplident = (PQgetvalue(res, j, i_indisreplident)[0] == 't'); indxinfo[j].parentidx = atooid(PQgetvalue(res, j, i_parentidx)); @@ -16342,7 +16353,7 @@ dumpConstraint(Archive *fout, ConstraintInfo *coninfo) { appendPQExpBuffer(q, "%s (", coninfo->contype == 'p' ? "PRIMARY KEY" : "UNIQUE"); - for (k = 0; k < indxinfo->indnkeys; k++) + for (k = 0; k < indxinfo->indnkeyattrs; k++) { int indkey = (int) indxinfo->indkeys[k]; const char *attname; @@ -16356,6 +16367,23 @@ dumpConstraint(Archive *fout, ConstraintInfo *coninfo) fmtId(attname)); } + if (indxinfo->indnkeyattrs < indxinfo->indnattrs) + appendPQExpBuffer(q, ") INCLUDE ("); + + for (k = indxinfo->indnkeyattrs; k < indxinfo->indnattrs; k++) + { + int indkey = (int) indxinfo->indkeys[k]; + const char *attname; + + if (indkey == InvalidAttrNumber) + break; + attname = getAttrName(indkey, tbinfo); + + appendPQExpBuffer(q, "%s%s", + (k == indxinfo->indnkeyattrs) ? "" : ", ", + fmtId(attname)); + } + appendPQExpBufferChar(q, ')'); if (nonemptyReloptions(indxinfo->indreloptions)) diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index c2314758de..e96c662b1e 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -360,8 +360,10 @@ typedef struct _indxInfo char *indexdef; char *tablespace; /* tablespace in which index is stored */ char *indreloptions; /* options specified by WITH (...) */ - int indnkeys; - Oid *indkeys; + int indnkeyattrs; /* number of index key attributes */ + int indnattrs; /* total number of index attributes */ + Oid *indkeys; /* In spite of the name 'indkeys' this field + * contains both key and nonkey attributes */ bool indisclustered; bool indisreplident; Oid parentidx; /* if partitioned, parent index OID */ diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 8d7bc246e6..d16fa6823b 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -191,6 +191,8 @@ typedef struct IndexAmRoutine bool ampredlocks; /* does AM support parallel scan? */ bool amcanparallel; + /* does AM support columns included with clause INCLUDE? */ + bool amcaninclude; /* type of data stored in index, or InvalidOid if variable */ Oid amkeytype; diff --git a/src/include/access/hash.h b/src/include/access/hash.h index f94bcf9e29..d6c306e969 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -280,7 +280,7 @@ typedef HashMetaPageData *HashMetaPage; sizeof(ItemIdData) - \ MAXALIGN(sizeof(HashPageOpaqueData))) -#define INDEX_MOVED_BY_SPLIT_MASK 0x2000 +#define INDEX_MOVED_BY_SPLIT_MASK INDEX_AM_RESERVED_BIT #define HASH_MIN_FILLFACTOR 10 #define HASH_DEFAULT_FILLFACTOR 75 diff --git a/src/include/access/itup.h b/src/include/access/itup.h index 9be3442c66..04526a8e59 100644 --- a/src/include/access/itup.h +++ b/src/include/access/itup.h @@ -41,7 +41,7 @@ typedef struct IndexTupleData * * 15th (high) bit: has nulls * 14th bit: has var-width attributes - * 13th bit: unused + * 13th bit: AM-defined meaning * 12-0 bit: size of tuple * --------------- */ @@ -63,7 +63,8 @@ typedef IndexAttributeBitMapData * IndexAttributeBitMap; * t_info manipulation macros */ #define INDEX_SIZE_MASK 0x1FFF -/* bit 0x2000 is reserved for index-AM specific usage */ +#define INDEX_AM_RESERVED_BIT 0x2000 /* reserved for index-AM specific + * usage */ #define INDEX_VAR_MASK 0x4000 #define INDEX_NULL_MASK 0x8000 @@ -146,5 +147,7 @@ extern Datum nocache_index_getattr(IndexTuple tup, int attnum, extern void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull); extern IndexTuple CopyIndexTuple(IndexTuple source); +extern IndexTuple index_truncate_tuple(TupleDesc tupleDescriptor, + IndexTuple olditup, int new_indnatts); #endif /* ITUP_H */ diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index f532f3ffff..36619b220f 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -139,31 +139,6 @@ typedef struct BTMetaPageData #define BTREE_DEFAULT_FILLFACTOR 90 #define BTREE_NONLEAF_FILLFACTOR 70 -/* - * Test whether two btree entries are "the same". - * - * Old comments: - * In addition, we must guarantee that all tuples in the index are unique, - * in order to satisfy some assumptions in Lehman and Yao. The way that we - * do this is by generating a new OID for every insertion that we do in the - * tree. This adds eight bytes to the size of btree index tuples. Note - * that we do not use the OID as part of a composite key; the OID only - * serves as a unique identifier for a given index tuple (logical position - * within a page). - * - * New comments: - * actually, we must guarantee that all tuples in A LEVEL - * are unique, not in ALL INDEX. So, we can use the t_tid - * as unique identifier for a given index tuple (logical position - * within a level). - vadim 04/09/97 - */ -#define BTTidSame(i1, i2) \ - ((ItemPointerGetBlockNumber(&(i1)) == ItemPointerGetBlockNumber(&(i2))) && \ - (ItemPointerGetOffsetNumber(&(i1)) == ItemPointerGetOffsetNumber(&(i2)))) -#define BTEntrySame(i1, i2) \ - BTTidSame((i1)->t_tid, (i2)->t_tid) - - /* * In general, the btree code tries to localize its knowledge about * page layout to a couple of routines. However, we need a special @@ -212,6 +187,68 @@ typedef struct BTMetaPageData #define P_FIRSTDATAKEY(opaque) (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY) +/* + * B-tree index with INCLUDE clause has non-key (included) attributes, which + * are used solely in index-only scans. Those non-key attributes are present + * in leaf index tuples which point to corresponding heap tuples. However, + * tree also contains "pivot" tuples. Pivot tuples are used for navigation + * during tree traversal. Pivot tuples include tuples on non-leaf pages and + * high key tuples. Such, tuples don't need to included attributes, because + * they have no use during tree traversal. This is why we truncate them in + * order to save some space. Therefore, B-tree index with INCLUDE clause + * contain tuples with variable number of attributes. + * + * In order to keep on-disk compatibility with upcoming suffix truncation of + * pivot tuples, we store number of attributes present inside tuple itself. + * Thankfully, offset number is always unused in pivot tuple. So, we use free + * bit of index tuple flags as sign that offset have alternative meaning: it + * stores number of keys present in index tuple (12 bit is far enough for that). + * And we have 4 bits reserved for future usage. + * + * Right now INDEX_ALT_TID_MASK is set only on truncation of non-key + * attributes of included indexes. But potentially every pivot index tuple + * might have INDEX_ALT_TID_MASK set. Then this tuple should have number of + * attributes correctly set in BT_N_KEYS_OFFSET_MASK, and in future it might + * use some bits of BT_RESERVED_OFFSET_MASK. + * + * Non-pivot tuples might also use bit of BT_RESERVED_OFFSET_MASK. Despite + * they store heap tuple offset, higher bits of offset are always free. + */ +#define INDEX_ALT_TID_MASK INDEX_AM_RESERVED_BIT /* flag indicating t_tid + * offset has an + * alternative meaning */ +#define BT_RESERVED_OFFSET_MASK 0xF000 /* mask of bits in t_tid offset + * reserved for future usage */ +#define BT_N_KEYS_OFFSET_MASK 0x0FFF /* mask of bits in t_tid offset + * holding number of attributes + * actually present in index tuple */ + +/* Acess to downlink block number */ +#define BTreeInnerTupleGetDownLink(itup) \ + ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid)) + +#define BTreeInnerTupleSetDownLink(itup, blkno) \ + ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)) + +/* Set number of attributes to B-tree index tuple overriding t_tid offset */ +#define BTreeTupSetNAtts(itup, n) \ + do { \ + (itup)->t_info |= INDEX_ALT_TID_MASK; \ + ItemPointerSetOffsetNumber(&(itup)->t_tid, n); \ + } while(0) + +/* Get number of attributes in B-tree index tuple */ +#define BTreeTupGetNAtts(itup, index) \ + ( \ + (itup)->t_info & INDEX_ALT_TID_MASK ? \ + ( \ + AssertMacro((ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_RESERVED_OFFSET_MASK) == 0), \ + ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_N_KEYS_OFFSET_MASK \ + ) \ + : \ + IndexRelationGetNumberOfAttributes(index) \ + ) + /* * Operator strategy numbers for B-tree have been moved to access/stratnum.h, * because many places need to use them in ScanKeyInit() calls. @@ -265,7 +302,7 @@ typedef struct BTStackData { BlockNumber bts_blkno; OffsetNumber bts_offset; - IndexTupleData bts_btentry; + BlockNumber bts_btentry; struct BTStackData *bts_parent; } BTStackData; @@ -524,6 +561,7 @@ extern bool _bt_first(IndexScanDesc scan, ScanDirection dir); extern bool _bt_next(IndexScanDesc scan, ScanDirection dir); extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost, Snapshot snapshot); +extern bool _bt_check_natts(Relation index, Page page, OffsetNumber offnum); /* * prototypes for functions in nbtutils.c @@ -552,6 +590,7 @@ extern bytea *btoptions(Datum reloptions, bool validate); extern bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull); +extern IndexTuple _bt_truncate_tuple(Relation idxrel, IndexTuple olditup); /* * prototypes for functions in nbtvalidate.c diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h index a8ccdcec42..c55b618ff7 100644 --- a/src/include/access/nbtxlog.h +++ b/src/include/access/nbtxlog.h @@ -28,7 +28,8 @@ #define XLOG_BTREE_INSERT_META 0x20 /* same, plus update metapage */ #define XLOG_BTREE_SPLIT_L 0x30 /* add index tuple with split */ #define XLOG_BTREE_SPLIT_R 0x40 /* as above, new item on right */ -/* 0x50 and 0x60 are unused */ +#define XLOG_BTREE_SPLIT_L_HIGHKEY 0x50 /* as above, include truncated highkey */ +#define XLOG_BTREE_SPLIT_R_HIGHKEY 0x60 /* as above, include truncated highkey */ #define XLOG_BTREE_DELETE 0x70 /* delete leaf index tuples for a page */ #define XLOG_BTREE_UNLINK_PAGE 0x80 /* delete a half-dead page */ #define XLOG_BTREE_UNLINK_PAGE_META 0x90 /* same, and update metapage */ @@ -82,10 +83,11 @@ typedef struct xl_btree_insert * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record. * The _L and _R variants indicate whether the inserted tuple went into the * left or right split page (and thus, whether newitemoff and the new item - * are stored or not). The _ROOT variants indicate that we are splitting - * the root page, and thus that a newroot record rather than an insert or - * split record should follow. Note that a split record never carries a - * metapage update --- we'll do that in the parent-level update. + * are stored or not). The _HIGHKEY variants indicate that we've logged + * explicitly left page high key value, otherwise redo should use right page + * leftmost key as a left page high key. _HIGHKEY is specified for internal + * pages where right page leftmost key is suppressed, and for leaf pages + * of covering indexes where high key have non-key attributes truncated. * * Backup Blk 0: original page / new left page * diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 5641c60593..dd69816f9e 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201804072 +#define CATALOG_VERSION_NO 201804073 #endif diff --git a/src/include/catalog/pg_constraint.h b/src/include/catalog/pg_constraint.h index 773713b49d..a0fb5f8243 100644 --- a/src/include/catalog/pg_constraint.h +++ b/src/include/catalog/pg_constraint.h @@ -104,6 +104,12 @@ CATALOG(pg_constraint,2606) */ int16 conkey[1]; + /* + * Columns of conrelid that the constraint does not apply to, but included + * into the same index with key columns. + */ + int16 conincluding[1]; + /* * If a foreign key, the referenced columns of confrelid */ @@ -156,7 +162,7 @@ typedef FormData_pg_constraint *Form_pg_constraint; * compiler constants for pg_constraint * ---------------- */ -#define Natts_pg_constraint 25 +#define Natts_pg_constraint 26 #define Anum_pg_constraint_conname 1 #define Anum_pg_constraint_connamespace 2 #define Anum_pg_constraint_contype 3 @@ -175,13 +181,14 @@ typedef FormData_pg_constraint *Form_pg_constraint; #define Anum_pg_constraint_coninhcount 16 #define Anum_pg_constraint_connoinherit 17 #define Anum_pg_constraint_conkey 18 -#define Anum_pg_constraint_confkey 19 -#define Anum_pg_constraint_conpfeqop 20 -#define Anum_pg_constraint_conppeqop 21 -#define Anum_pg_constraint_conffeqop 22 -#define Anum_pg_constraint_conexclop 23 -#define Anum_pg_constraint_conbin 24 -#define Anum_pg_constraint_consrc 25 +#define Anum_pg_constraint_conincluding 19 +#define Anum_pg_constraint_confkey 20 +#define Anum_pg_constraint_conpfeqop 21 +#define Anum_pg_constraint_conppeqop 22 +#define Anum_pg_constraint_conffeqop 23 +#define Anum_pg_constraint_conexclop 24 +#define Anum_pg_constraint_conbin 25 +#define Anum_pg_constraint_consrc 26 /* ---------------- * initial contents of pg_constraint diff --git a/src/include/catalog/pg_constraint_fn.h b/src/include/catalog/pg_constraint_fn.h index 0170e08c45..5f64409f3d 100644 --- a/src/include/catalog/pg_constraint_fn.h +++ b/src/include/catalog/pg_constraint_fn.h @@ -50,6 +50,7 @@ extern Oid CreateConstraintEntry(const char *constraintName, Oid relId, const int16 *constraintKey, int constraintNKeys, + int constraintNTotalKeys, Oid domainId, Oid indexRelId, Oid foreignRelId, diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h index 057a9f7fe4..6ae03dbcbb 100644 --- a/src/include/catalog/pg_index.h +++ b/src/include/catalog/pg_index.h @@ -32,7 +32,8 @@ CATALOG(pg_index,2610) BKI_WITHOUT_OIDS BKI_SCHEMA_MACRO { Oid indexrelid; /* OID of the index */ Oid indrelid; /* OID of the relation it indexes */ - int16 indnatts; /* number of columns in index */ + int16 indnatts; /* total number of columns in index */ + int16 indnkeyatts; /* number of key columns in index */ bool indisunique; /* is this a unique index? */ bool indisprimary; /* is this index for primary key? */ bool indisexclusion; /* is this index for exclusion constraint? */ @@ -70,26 +71,27 @@ typedef FormData_pg_index *Form_pg_index; * compiler constants for pg_index * ---------------- */ -#define Natts_pg_index 19 +#define Natts_pg_index 20 #define Anum_pg_index_indexrelid 1 #define Anum_pg_index_indrelid 2 #define Anum_pg_index_indnatts 3 -#define Anum_pg_index_indisunique 4 -#define Anum_pg_index_indisprimary 5 -#define Anum_pg_index_indisexclusion 6 -#define Anum_pg_index_indimmediate 7 -#define Anum_pg_index_indisclustered 8 -#define Anum_pg_index_indisvalid 9 -#define Anum_pg_index_indcheckxmin 10 -#define Anum_pg_index_indisready 11 -#define Anum_pg_index_indislive 12 -#define Anum_pg_index_indisreplident 13 -#define Anum_pg_index_indkey 14 -#define Anum_pg_index_indcollation 15 -#define Anum_pg_index_indclass 16 -#define Anum_pg_index_indoption 17 -#define Anum_pg_index_indexprs 18 -#define Anum_pg_index_indpred 19 +#define Anum_pg_index_indnkeyatts 4 +#define Anum_pg_index_indisunique 5 +#define Anum_pg_index_indisprimary 6 +#define Anum_pg_index_indisexclusion 7 +#define Anum_pg_index_indimmediate 8 +#define Anum_pg_index_indisclustered 9 +#define Anum_pg_index_indisvalid 10 +#define Anum_pg_index_indcheckxmin 11 +#define Anum_pg_index_indisready 12 +#define Anum_pg_index_indislive 13 +#define Anum_pg_index_indisreplident 14 +#define Anum_pg_index_indkey 15 +#define Anum_pg_index_indcollation 16 +#define Anum_pg_index_indclass 17 +#define Anum_pg_index_indoption 18 +#define Anum_pg_index_indexprs 19 +#define Anum_pg_index_indpred 20 /* * Index AMs that support ordered scans must support these two indoption diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 538e679cdf..4ad5131aa9 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -118,9 +118,11 @@ typedef struct ExprState * entries for a particular index. Used for both index_build and * retail creation of index entries. * - * NumIndexAttrs number of columns in this index + * NumIndexAttrs total number of columns in this index + * NumIndexKeyAttrs number of key columns in index * KeyAttrNumbers underlying-rel attribute numbers used as keys - * (zeroes indicate expressions) + * (zeroes indicate expressions). It also contains + * info about included columns. * Expressions expr trees for expression entries, or NIL if none * ExpressionsState exec state for expressions, or NIL if none * Predicate partial-index predicate, or NIL if none @@ -146,7 +148,8 @@ typedef struct ExprState typedef struct IndexInfo { NodeTag type; - int ii_NumIndexAttrs; + int ii_NumIndexAttrs; /* total number of columns in index */ + int ii_NumIndexKeyAttrs; /* number of key columns in index */ AttrNumber ii_KeyAttrNumbers[INDEX_MAX_KEYS]; List *ii_Expressions; /* list of Expr */ List *ii_ExpressionsState; /* list of ExprState */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 06abb70e94..c8405386cf 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2147,7 +2147,10 @@ typedef struct Constraint char generated_when; /* Fields used for unique constraints (UNIQUE and PRIMARY KEY): */ - List *keys; /* String nodes naming referenced column(s) */ + List *keys; /* String nodes naming referenced key + * column(s) */ + List *including; /* String nodes naming referenced nonkey + * column(s) */ /* Fields used for EXCLUSION constraints: */ List *exclusions; /* list of (IndexElem, operator name) pairs */ @@ -2760,6 +2763,8 @@ typedef struct IndexStmt char *accessMethod; /* name of access method (eg. btree) */ char *tableSpace; /* tablespace, or NULL for default */ List *indexParams; /* columns to index: a list of IndexElem */ + List *indexIncludingParams; /* additional columns to index: a list + * of IndexElem */ List *options; /* WITH clause options: a list of DefElem */ Node *whereClause; /* qualification (partial-index predicate) */ List *excludeOpNames; /* exclusion operator names, or NIL if none */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index acb8814924..73a41c5475 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -707,11 +707,12 @@ typedef struct RelOptInfo * IndexOptInfo * Per-index information for planning/optimization * - * indexkeys[], indexcollations[], opfamily[], and opcintype[] - * each have ncolumns entries. + * indexkeys[], indexcollations[] each have ncolumns entries. + * opfamily[], and opcintype[] each have nkeycolumns entries. They do + * not contain any information about included attributes. * - * sortopfamily[], reverse_sort[], and nulls_first[] likewise have - * ncolumns entries, if the index is ordered; but if it is unordered, + * sortopfamily[], reverse_sort[], and nulls_first[] have + * nkeycolumns entries, if the index is ordered; but if it is unordered, * those pointers are NULL. * * Zeroes in the indexkeys[] array indicate index columns that are @@ -748,7 +749,9 @@ typedef struct IndexOptInfo /* index descriptor information */ int ncolumns; /* number of columns in index */ - int *indexkeys; /* column numbers of index's keys, or 0 */ + int nkeycolumns; /* number of key columns in index */ + int *indexkeys; /* column numbers of index's attributes both + * key and included columns, or 0 */ Oid *indexcollations; /* OIDs of collations of index columns */ Oid *opfamily; /* OIDs of operator families for columns */ Oid *opcintype; /* OIDs of opclass declared input data types */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 4dff55a8e9..81f758afbf 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -196,6 +196,7 @@ PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD) PG_KEYWORD("implicit", IMPLICIT_P, UNRESERVED_KEYWORD) PG_KEYWORD("import", IMPORT_P, UNRESERVED_KEYWORD) PG_KEYWORD("in", IN_P, RESERVED_KEYWORD) +PG_KEYWORD("include", INCLUDE, UNRESERVED_KEYWORD) PG_KEYWORD("including", INCLUDING, UNRESERVED_KEYWORD) PG_KEYWORD("increment", INCREMENT, UNRESERVED_KEYWORD) PG_KEYWORD("index", INDEX, UNRESERVED_KEYWORD) diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 9826c67fc4..ffffde01da 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -438,10 +438,24 @@ typedef struct ViewOptions /* * RelationGetNumberOfAttributes - * Returns the number of attributes in a relation. + * Returns the total number of attributes in a relation. */ #define RelationGetNumberOfAttributes(relation) ((relation)->rd_rel->relnatts) +/* + * IndexRelationGetNumberOfAttributes + * Returns the number of attributes in an index. + */ +#define IndexRelationGetNumberOfAttributes(relation) \ + ((relation)->rd_index->indnatts) + +/* + * IndexRelationGetNumberOfKeyAttributes + * Returns the number of key attributes in an index. + */ +#define IndexRelationGetNumberOfKeyAttributes(relation) \ + ((relation)->rd_index->indnkeyatts) + /* * RelationGetDescr * Returns tuple descriptor for a relation. diff --git a/src/test/isolation/specs/insert-conflict-do-nothing-2.spec b/src/test/isolation/specs/insert-conflict-do-nothing-2.spec index f1e5bde357..8a8ec94447 100644 --- a/src/test/isolation/specs/insert-conflict-do-nothing-2.spec +++ b/src/test/isolation/specs/insert-conflict-do-nothing-2.spec @@ -3,7 +3,7 @@ setup { - CREATE TABLE ints (key int primary key, val text); + CREATE TABLE ints (key int, val text, PRIMARY KEY (key) INCLUDE (val)); } teardown diff --git a/src/test/isolation/specs/insert-conflict-do-update-2.spec b/src/test/isolation/specs/insert-conflict-do-update-2.spec index cd7e3f42fe..f5b4f601b5 100644 --- a/src/test/isolation/specs/insert-conflict-do-update-2.spec +++ b/src/test/isolation/specs/insert-conflict-do-update-2.spec @@ -7,7 +7,7 @@ setup { CREATE TABLE upsert (key text not null, payload text); - CREATE UNIQUE INDEX ON upsert(lower(key)); + CREATE UNIQUE INDEX ON upsert(lower(key)) INCLUDE (payload); } teardown diff --git a/src/test/isolation/specs/lock-committed-keyupdate.spec b/src/test/isolation/specs/lock-committed-keyupdate.spec index 1630282d0f..3fb424af0e 100644 --- a/src/test/isolation/specs/lock-committed-keyupdate.spec +++ b/src/test/isolation/specs/lock-committed-keyupdate.spec @@ -8,7 +8,7 @@ setup { DROP TABLE IF EXISTS lcku_table; - CREATE TABLE lcku_table (id INTEGER PRIMARY KEY, value TEXT); + CREATE TABLE lcku_table (id INTEGER, value TEXT, PRIMARY KEY (id) INCLUDE (value)); INSERT INTO lcku_table VALUES (1, 'one'); INSERT INTO lcku_table VALUES (3, 'two'); } diff --git a/src/test/isolation/specs/lock-update-traversal.spec b/src/test/isolation/specs/lock-update-traversal.spec index 7042b9399c..2ffe87d152 100644 --- a/src/test/isolation/specs/lock-update-traversal.spec +++ b/src/test/isolation/specs/lock-update-traversal.spec @@ -7,8 +7,9 @@ setup { CREATE TABLE foo ( - key int PRIMARY KEY, - value int + key int, + value int, + PRIMARY KEY (key) INCLUDE (value) ); INSERT INTO foo VALUES (1, 1); diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 09757c5a74..fe5b698669 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -2433,6 +2433,25 @@ DETAIL: Key ((f1 || f2))=(ABCDEF) already exists. -- but this shouldn't: INSERT INTO func_index_heap VALUES('QWERTY'); -- +-- Test unique index with included columns +-- +CREATE TABLE covering_index_heap (f1 int, f2 int, f3 text); +CREATE UNIQUE INDEX covering_index_index on covering_index_heap (f1,f2) INCLUDE(f3); +INSERT INTO covering_index_heap VALUES(1,1,'AAA'); +INSERT INTO covering_index_heap VALUES(1,2,'AAA'); +-- this should fail because of unique index on f1,f2: +INSERT INTO covering_index_heap VALUES(1,2,'BBB'); +ERROR: duplicate key value violates unique constraint "covering_index_index" +DETAIL: Key (f1, f2)=(1, 2) already exists. +-- and this shouldn't: +INSERT INTO covering_index_heap VALUES(1,4,'AAA'); +-- Try to build index on table that already contains data +CREATE UNIQUE INDEX covering_pkey on covering_index_heap (f1,f2) INCLUDE(f3); +-- Try to use existing covering index as primary key +ALTER TABLE covering_index_heap ADD CONSTRAINT covering_pkey PRIMARY KEY USING INDEX +covering_pkey; +DROP TABLE covering_index_heap; +-- -- Also try building functional, expressional, and partial indexes on -- tables that already contain data. -- diff --git a/src/test/regress/expected/index_including.out b/src/test/regress/expected/index_including.out new file mode 100644 index 0000000000..1d253ee77d --- /dev/null +++ b/src/test/regress/expected/index_including.out @@ -0,0 +1,346 @@ +/* + * 1.test CREATE INDEX + */ +-- Regular index with included columns +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c3,c4); +-- must fail because of intersection of key and included columns +CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c1,c3); +ERROR: included columns must not intersect with key columns +SELECT pg_get_indexdef(i.indexrelid) +FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid +WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; + pg_get_indexdef +-------------------------------------------------------------------------- + CREATE INDEX tbl_idx ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +(1 row) + +DROP TABLE tbl; +-- Unique index and unique constraint +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); +ALTER TABLE tbl add UNIQUE USING INDEX tbl_idx_unique; +ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4); +SELECT pg_get_indexdef(i.indexrelid) +FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid +WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; + pg_get_indexdef +--------------------------------------------------------------------------------------------- + CREATE UNIQUE INDEX tbl_c1_c2_c3_c4_key ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) + CREATE UNIQUE INDEX tbl_idx_unique ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +(2 rows) + +DROP TABLE tbl; +-- Unique index and unique constraint. Both must fail. +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); +ERROR: could not create unique index "tbl_idx_unique" +DETAIL: Key (c1, c2)=(1, 2) is duplicated. +ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4); +ERROR: could not create unique index "tbl_c1_c2_c3_c4_key" +DETAIL: Key (c1, c2)=(1, 2) is duplicated. +DROP TABLE tbl; +-- PK constraint +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); +SELECT pg_get_indexdef(i.indexrelid) +FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid +WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; + pg_get_indexdef +---------------------------------------------------------------------------------- + CREATE UNIQUE INDEX tbl_pkey ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +(1 row) + +DROP TABLE tbl; +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); +ALTER TABLE tbl add PRIMARY KEY USING INDEX tbl_idx_unique; +SELECT pg_get_indexdef(i.indexrelid) +FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid +WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; + pg_get_indexdef +---------------------------------------------------------------------------------------- + CREATE UNIQUE INDEX tbl_idx_unique ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +(1 row) + +DROP TABLE tbl; +-- PK constraint. Must fail. +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); +ERROR: could not create unique index "tbl_pkey" +DETAIL: Key (c1, c2)=(1, 2) is duplicated. +DROP TABLE tbl; +/* + * 2. Test CREATE TABLE with constraint + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, + CONSTRAINT covering UNIQUE(c1,c2) INCLUDE(c3,c4)); +SELECT indexrelid::regclass, indnatts, indnkeyatts, indisunique, indisprimary, indkey, indclass FROM pg_index WHERE indrelid = 'tbl'::regclass::oid; + indexrelid | indnatts | indnkeyatts | indisunique | indisprimary | indkey | indclass +------------+----------+-------------+-------------+--------------+---------+----------- + covering | 4 | 2 | t | f | 1 2 3 4 | 1978 1978 +(1 row) + +SELECT pg_get_constraintdef(oid), conname, conkey, conincluding FROM pg_constraint WHERE conrelid = 'tbl'::regclass::oid; + pg_get_constraintdef | conname | conkey | conincluding +----------------------------------+----------+--------+-------------- + UNIQUE (c1, c2) INCLUDE (c3, c4) | covering | {1,2} | {3,4} +(1 row) + +-- ensure that constraint works +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ERROR: duplicate key value violates unique constraint "covering" +DETAIL: Key (c1, c2)=(1, 2) already exists. +DROP TABLE tbl; +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, + CONSTRAINT covering PRIMARY KEY(c1,c2) INCLUDE(c3,c4)); +SELECT indexrelid::regclass, indnatts, indnkeyatts, indisunique, indisprimary, indkey, indclass FROM pg_index WHERE indrelid = 'tbl'::regclass::oid; + indexrelid | indnatts | indnkeyatts | indisunique | indisprimary | indkey | indclass +------------+----------+-------------+-------------+--------------+---------+----------- + covering | 4 | 2 | t | t | 1 2 3 4 | 1978 1978 +(1 row) + +SELECT pg_get_constraintdef(oid), conname, conkey, conincluding FROM pg_constraint WHERE conrelid = 'tbl'::regclass::oid; + pg_get_constraintdef | conname | conkey | conincluding +---------------------------------------+----------+--------+-------------- + PRIMARY KEY (c1, c2) INCLUDE (c3, c4) | covering | {1,2} | {3,4} +(1 row) + +-- ensure that constraint works +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ERROR: duplicate key value violates unique constraint "covering" +DETAIL: Key (c1, c2)=(1, 2) already exists. +INSERT INTO tbl SELECT 1, NULL, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ERROR: null value in column "c2" violates not-null constraint +DETAIL: Failing row contains (1, null, 3, (4,4),(4,4)). +INSERT INTO tbl SELECT x, 2*x, NULL, NULL FROM generate_series(1,10) AS x; +DROP TABLE tbl; +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, + UNIQUE(c1,c2) INCLUDE(c3,c4)); +SELECT indexrelid::regclass, indnatts, indnkeyatts, indisunique, indisprimary, indkey, indclass FROM pg_index WHERE indrelid = 'tbl'::regclass::oid; + indexrelid | indnatts | indnkeyatts | indisunique | indisprimary | indkey | indclass +---------------------+----------+-------------+-------------+--------------+---------+----------- + tbl_c1_c2_c3_c4_key | 4 | 2 | t | f | 1 2 3 4 | 1978 1978 +(1 row) + +SELECT pg_get_constraintdef(oid), conname, conkey, conincluding FROM pg_constraint WHERE conrelid = 'tbl'::regclass::oid; + pg_get_constraintdef | conname | conkey | conincluding +----------------------------------+---------------------+--------+-------------- + UNIQUE (c1, c2) INCLUDE (c3, c4) | tbl_c1_c2_c3_c4_key | {1,2} | {3,4} +(1 row) + +-- ensure that constraint works +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ERROR: duplicate key value violates unique constraint "tbl_c1_c2_c3_c4_key" +DETAIL: Key (c1, c2)=(1, 2) already exists. +DROP TABLE tbl; +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, + PRIMARY KEY(c1,c2) INCLUDE(c3,c4)); +SELECT indexrelid::regclass, indnatts, indnkeyatts, indisunique, indisprimary, indkey, indclass FROM pg_index WHERE indrelid = 'tbl'::regclass::oid; + indexrelid | indnatts | indnkeyatts | indisunique | indisprimary | indkey | indclass +------------+----------+-------------+-------------+--------------+---------+----------- + tbl_pkey | 4 | 2 | t | t | 1 2 3 4 | 1978 1978 +(1 row) + +SELECT pg_get_constraintdef(oid), conname, conkey, conincluding FROM pg_constraint WHERE conrelid = 'tbl'::regclass::oid; + pg_get_constraintdef | conname | conkey | conincluding +---------------------------------------+----------+--------+-------------- + PRIMARY KEY (c1, c2) INCLUDE (c3, c4) | tbl_pkey | {1,2} | {3,4} +(1 row) + +-- ensure that constraint works +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ERROR: duplicate key value violates unique constraint "tbl_pkey" +DETAIL: Key (c1, c2)=(1, 2) already exists. +INSERT INTO tbl SELECT 1, NULL, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ERROR: null value in column "c2" violates not-null constraint +DETAIL: Failing row contains (1, null, 3, (4,4),(4,4)). +INSERT INTO tbl SELECT x, 2*x, NULL, NULL FROM generate_series(1,10) AS x; +DROP TABLE tbl; +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, + EXCLUDE USING btree (c1 WITH =) INCLUDE(c3,c4)); +SELECT indexrelid::regclass, indnatts, indnkeyatts, indisunique, indisprimary, indkey, indclass FROM pg_index WHERE indrelid = 'tbl'::regclass::oid; + indexrelid | indnatts | indnkeyatts | indisunique | indisprimary | indkey | indclass +-------------------+----------+-------------+-------------+--------------+--------+---------- + tbl_c1_c3_c4_excl | 3 | 1 | f | f | 1 3 4 | 1978 +(1 row) + +SELECT pg_get_constraintdef(oid), conname, conkey, conincluding FROM pg_constraint WHERE conrelid = 'tbl'::regclass::oid; + pg_get_constraintdef | conname | conkey | conincluding +--------------------------------------------------+-------------------+--------+-------------- + EXCLUDE USING btree (c1 WITH =) INCLUDE (c3, c4) | tbl_c1_c3_c4_excl | {1} | {3,4} +(1 row) + +-- ensure that constraint works +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ERROR: conflicting key value violates exclusion constraint "tbl_c1_c3_c4_excl" +DETAIL: Key (c1)=(1) conflicts with existing key (c1)=(1). +INSERT INTO tbl SELECT x, 2*x, NULL, NULL FROM generate_series(1,10) AS x; +DROP TABLE tbl; +/* + * 3.0 Test ALTER TABLE DROP COLUMN. + * Any column deletion leads to index deletion. + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 int); +CREATE UNIQUE INDEX tbl_idx ON tbl using btree(c1, c2, c3, c4); +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +------------------------------------------------------------------------ + CREATE UNIQUE INDEX tbl_idx ON public.tbl USING btree (c1, c2, c3, c4) +(1 row) + +ALTER TABLE tbl DROP COLUMN c3; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +---------- +(0 rows) + +DROP TABLE tbl; +/* + * 3.1 Test ALTER TABLE DROP COLUMN. + * Included column deletion leads to the index deletion, + * AS well AS key columns deletion. It's explained in documentation. + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box); +CREATE UNIQUE INDEX tbl_idx ON tbl using btree(c1, c2) INCLUDE(c3,c4); +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +--------------------------------------------------------------------------------- + CREATE UNIQUE INDEX tbl_idx ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +(1 row) + +ALTER TABLE tbl DROP COLUMN c3; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +---------- +(0 rows) + +DROP TABLE tbl; +/* + * 3.2 Test ALTER TABLE DROP COLUMN. + * Included column deletion leads to the index deletion. + * AS well AS key columns deletion. It's explained in documentation. + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, UNIQUE(c1, c2) INCLUDE(c3,c4)); +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +--------------------------------------------------------------------------------------------- + CREATE UNIQUE INDEX tbl_c1_c2_c3_c4_key ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +(1 row) + +ALTER TABLE tbl DROP COLUMN c3; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +---------- +(0 rows) + +ALTER TABLE tbl DROP COLUMN c1; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +---------- +(0 rows) + +DROP TABLE tbl; +/* + * 4. CREATE INDEX CONCURRENTLY + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, UNIQUE(c1, c2) INCLUDE(c3,c4)); +INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,1000) AS x; +CREATE UNIQUE INDEX CONCURRENTLY on tbl (c1, c2) INCLUDE (c3, c4); +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +--------------------------------------------------------------------------------------------- + CREATE UNIQUE INDEX tbl_c1_c2_c3_c4_idx ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) + CREATE UNIQUE INDEX tbl_c1_c2_c3_c4_key ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +(2 rows) + +DROP TABLE tbl; +/* + * 5. REINDEX + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, UNIQUE(c1, c2) INCLUDE(c3,c4)); +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +--------------------------------------------------------------------------------------------- + CREATE UNIQUE INDEX tbl_c1_c2_c3_c4_key ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +(1 row) + +ALTER TABLE tbl DROP COLUMN c3; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +---------- +(0 rows) + +REINDEX INDEX tbl_c1_c2_c3_c4_key; +ERROR: relation "tbl_c1_c2_c3_c4_key" does not exist +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +---------- +(0 rows) + +ALTER TABLE tbl DROP COLUMN c1; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; + indexdef +---------- +(0 rows) + +DROP TABLE tbl; +/* + * 7. Check various AMs. All but btree must fail. + */ +CREATE TABLE tbl (c1 int,c2 int, c3 box, c4 box); +CREATE INDEX on tbl USING brin(c1, c2) INCLUDE (c3, c4); +ERROR: access method "brin" does not support included columns +CREATE INDEX on tbl USING gist(c3) INCLUDE (c4); +ERROR: access method "gist" does not support included columns +CREATE INDEX on tbl USING spgist(c3) INCLUDE (c4); +ERROR: access method "spgist" does not support included columns +CREATE INDEX on tbl USING gin(c1, c2) INCLUDE (c3, c4); +ERROR: access method "gin" does not support included columns +CREATE INDEX on tbl USING hash(c1, c2) INCLUDE (c3, c4); +ERROR: access method "hash" does not support included columns +CREATE INDEX on tbl USING rtree(c1, c2) INCLUDE (c3, c4); +NOTICE: substituting access method "gist" for obsolete method "rtree" +ERROR: access method "gist" does not support included columns +CREATE INDEX on tbl USING btree(c1, c2) INCLUDE (c3, c4); +DROP TABLE tbl; +/* + * 8. Update, delete values in indexed table. + */ +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree(c1, c2) INCLUDE (c3,c4); +UPDATE tbl SET c1 = 100 WHERE c1 = 2; +UPDATE tbl SET c1 = 1 WHERE c1 = 3; +-- should fail +UPDATE tbl SET c2 = 2 WHERE c1 = 1; +ERROR: duplicate key value violates unique constraint "tbl_idx_unique" +DETAIL: Key (c1, c2)=(1, 2) already exists. +UPDATE tbl SET c3 = 1; +DELETE FROM tbl WHERE c1 = 5 OR c3 = 12; +DROP TABLE tbl; +/* + * 9. Alter column type. + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, UNIQUE(c1, c2) INCLUDE(c3,c4)); +INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ALTER TABLE tbl ALTER c1 TYPE bigint; +ALTER TABLE tbl ALTER c3 TYPE bigint; +\d tbl + Table "public.tbl" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | bigint | | | + c2 | integer | | | + c3 | bigint | | | + c4 | box | | | +Indexes: + "tbl_c1_c2_c3_c4_key" UNIQUE CONSTRAINT, btree (c1, c2) INCLUDE (c3, c4) + +DROP TABLE tbl; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 00c324dd44..0d3a27ed41 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -55,7 +55,7 @@ test: copy copyselect copydml # ---------- test: create_misc create_operator create_procedure # These depend on the above two -test: create_index create_view +test: create_index create_view index_including # ---------- # Another group of parallel tests diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 39c3fa9c85..20027c131c 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -65,6 +65,7 @@ test: create_misc test: create_operator test: create_procedure test: create_index +test: index_including test: create_view test: create_aggregate test: create_function_3 diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index c9671a4e13..f7731265a0 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -741,6 +741,26 @@ INSERT INTO func_index_heap VALUES('ABCD', 'EF'); -- but this shouldn't: INSERT INTO func_index_heap VALUES('QWERTY'); +-- +-- Test unique index with included columns +-- +CREATE TABLE covering_index_heap (f1 int, f2 int, f3 text); +CREATE UNIQUE INDEX covering_index_index on covering_index_heap (f1,f2) INCLUDE(f3); + +INSERT INTO covering_index_heap VALUES(1,1,'AAA'); +INSERT INTO covering_index_heap VALUES(1,2,'AAA'); +-- this should fail because of unique index on f1,f2: +INSERT INTO covering_index_heap VALUES(1,2,'BBB'); +-- and this shouldn't: +INSERT INTO covering_index_heap VALUES(1,4,'AAA'); +-- Try to build index on table that already contains data +CREATE UNIQUE INDEX covering_pkey on covering_index_heap (f1,f2) INCLUDE(f3); +-- Try to use existing covering index as primary key +ALTER TABLE covering_index_heap ADD CONSTRAINT covering_pkey PRIMARY KEY USING INDEX +covering_pkey; +DROP TABLE covering_index_heap; + + -- -- Also try building functional, expressional, and partial indexes on -- tables that already contain data. diff --git a/src/test/regress/sql/index_including.sql b/src/test/regress/sql/index_including.sql new file mode 100644 index 0000000000..caedc9866d --- /dev/null +++ b/src/test/regress/sql/index_including.sql @@ -0,0 +1,203 @@ +/* + * 1.test CREATE INDEX + */ + +-- Regular index with included columns +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c3,c4); +-- must fail because of intersection of key and included columns +CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c1,c3); +SELECT pg_get_indexdef(i.indexrelid) +FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid +WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; +DROP TABLE tbl; + +-- Unique index and unique constraint +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); +ALTER TABLE tbl add UNIQUE USING INDEX tbl_idx_unique; +ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4); +SELECT pg_get_indexdef(i.indexrelid) +FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid +WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; +DROP TABLE tbl; + +-- Unique index and unique constraint. Both must fail. +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); +ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4); +DROP TABLE tbl; + +-- PK constraint +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); +SELECT pg_get_indexdef(i.indexrelid) +FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid +WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; +DROP TABLE tbl; + +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); +ALTER TABLE tbl add PRIMARY KEY USING INDEX tbl_idx_unique; +SELECT pg_get_indexdef(i.indexrelid) +FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid +WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; +DROP TABLE tbl; + +-- PK constraint. Must fail. +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); +DROP TABLE tbl; + + +/* + * 2. Test CREATE TABLE with constraint + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, + CONSTRAINT covering UNIQUE(c1,c2) INCLUDE(c3,c4)); +SELECT indexrelid::regclass, indnatts, indnkeyatts, indisunique, indisprimary, indkey, indclass FROM pg_index WHERE indrelid = 'tbl'::regclass::oid; +SELECT pg_get_constraintdef(oid), conname, conkey, conincluding FROM pg_constraint WHERE conrelid = 'tbl'::regclass::oid; +-- ensure that constraint works +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +DROP TABLE tbl; + +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, + CONSTRAINT covering PRIMARY KEY(c1,c2) INCLUDE(c3,c4)); +SELECT indexrelid::regclass, indnatts, indnkeyatts, indisunique, indisprimary, indkey, indclass FROM pg_index WHERE indrelid = 'tbl'::regclass::oid; +SELECT pg_get_constraintdef(oid), conname, conkey, conincluding FROM pg_constraint WHERE conrelid = 'tbl'::regclass::oid; +-- ensure that constraint works +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +INSERT INTO tbl SELECT 1, NULL, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +INSERT INTO tbl SELECT x, 2*x, NULL, NULL FROM generate_series(1,10) AS x; +DROP TABLE tbl; + +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, + UNIQUE(c1,c2) INCLUDE(c3,c4)); +SELECT indexrelid::regclass, indnatts, indnkeyatts, indisunique, indisprimary, indkey, indclass FROM pg_index WHERE indrelid = 'tbl'::regclass::oid; +SELECT pg_get_constraintdef(oid), conname, conkey, conincluding FROM pg_constraint WHERE conrelid = 'tbl'::regclass::oid; +-- ensure that constraint works +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +DROP TABLE tbl; + +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, + PRIMARY KEY(c1,c2) INCLUDE(c3,c4)); +SELECT indexrelid::regclass, indnatts, indnkeyatts, indisunique, indisprimary, indkey, indclass FROM pg_index WHERE indrelid = 'tbl'::regclass::oid; +SELECT pg_get_constraintdef(oid), conname, conkey, conincluding FROM pg_constraint WHERE conrelid = 'tbl'::regclass::oid; +-- ensure that constraint works +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +INSERT INTO tbl SELECT 1, NULL, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +INSERT INTO tbl SELECT x, 2*x, NULL, NULL FROM generate_series(1,10) AS x; +DROP TABLE tbl; + +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, + EXCLUDE USING btree (c1 WITH =) INCLUDE(c3,c4)); +SELECT indexrelid::regclass, indnatts, indnkeyatts, indisunique, indisprimary, indkey, indclass FROM pg_index WHERE indrelid = 'tbl'::regclass::oid; +SELECT pg_get_constraintdef(oid), conname, conkey, conincluding FROM pg_constraint WHERE conrelid = 'tbl'::regclass::oid; +-- ensure that constraint works +INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +INSERT INTO tbl SELECT x, 2*x, NULL, NULL FROM generate_series(1,10) AS x; +DROP TABLE tbl; + +/* + * 3.0 Test ALTER TABLE DROP COLUMN. + * Any column deletion leads to index deletion. + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 int); +CREATE UNIQUE INDEX tbl_idx ON tbl using btree(c1, c2, c3, c4); +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +ALTER TABLE tbl DROP COLUMN c3; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +DROP TABLE tbl; + +/* + * 3.1 Test ALTER TABLE DROP COLUMN. + * Included column deletion leads to the index deletion, + * AS well AS key columns deletion. It's explained in documentation. + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box); +CREATE UNIQUE INDEX tbl_idx ON tbl using btree(c1, c2) INCLUDE(c3,c4); +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +ALTER TABLE tbl DROP COLUMN c3; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +DROP TABLE tbl; + +/* + * 3.2 Test ALTER TABLE DROP COLUMN. + * Included column deletion leads to the index deletion. + * AS well AS key columns deletion. It's explained in documentation. + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, UNIQUE(c1, c2) INCLUDE(c3,c4)); +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +ALTER TABLE tbl DROP COLUMN c3; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +ALTER TABLE tbl DROP COLUMN c1; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +DROP TABLE tbl; + + +/* + * 4. CREATE INDEX CONCURRENTLY + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, UNIQUE(c1, c2) INCLUDE(c3,c4)); +INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,1000) AS x; +CREATE UNIQUE INDEX CONCURRENTLY on tbl (c1, c2) INCLUDE (c3, c4); +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +DROP TABLE tbl; + + +/* + * 5. REINDEX + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, UNIQUE(c1, c2) INCLUDE(c3,c4)); +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +ALTER TABLE tbl DROP COLUMN c3; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +REINDEX INDEX tbl_c1_c2_c3_c4_key; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +ALTER TABLE tbl DROP COLUMN c1; +SELECT indexdef FROM pg_indexes WHERE tablename = 'tbl' ORDER BY indexname; +DROP TABLE tbl; + +/* + * 7. Check various AMs. All but btree must fail. + */ +CREATE TABLE tbl (c1 int,c2 int, c3 box, c4 box); +CREATE INDEX on tbl USING brin(c1, c2) INCLUDE (c3, c4); +CREATE INDEX on tbl USING gist(c3) INCLUDE (c4); +CREATE INDEX on tbl USING spgist(c3) INCLUDE (c4); +CREATE INDEX on tbl USING gin(c1, c2) INCLUDE (c3, c4); +CREATE INDEX on tbl USING hash(c1, c2) INCLUDE (c3, c4); +CREATE INDEX on tbl USING rtree(c1, c2) INCLUDE (c3, c4); +CREATE INDEX on tbl USING btree(c1, c2) INCLUDE (c3, c4); +DROP TABLE tbl; + +/* + * 8. Update, delete values in indexed table. + */ +CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree(c1, c2) INCLUDE (c3,c4); +UPDATE tbl SET c1 = 100 WHERE c1 = 2; +UPDATE tbl SET c1 = 1 WHERE c1 = 3; +-- should fail +UPDATE tbl SET c2 = 2 WHERE c1 = 1; +UPDATE tbl SET c3 = 1; +DELETE FROM tbl WHERE c1 = 5 OR c3 = 12; +DROP TABLE tbl; + +/* + * 9. Alter column type. + */ +CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box, UNIQUE(c1, c2) INCLUDE(c3,c4)); +INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ALTER TABLE tbl ALTER c1 TYPE bigint; +ALTER TABLE tbl ALTER c3 TYPE bigint; +\d tbl +DROP TABLE tbl; + diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl index e0104cd8d0..4050e82bc9 100644 --- a/src/test/subscription/t/001_rep_changes.pl +++ b/src/test/subscription/t/001_rep_changes.pl @@ -3,7 +3,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 16; +use Test::More tests => 17; # Initialize publisher node my $node_publisher = get_new_node('publisher'); @@ -31,6 +31,8 @@ $node_publisher->safe_psql('postgres', "CREATE TABLE tab_mixed (a int primary key, b text)"); $node_publisher->safe_psql('postgres', "INSERT INTO tab_mixed (a, b) VALUES (1, 'foo')"); +$node_publisher->safe_psql('postgres', + "CREATE TABLE tab_include (a int, b text, CONSTRAINT covering PRIMARY KEY(a) INCLUDE(b))"); # Setup structure on subscriber $node_subscriber->safe_psql('postgres', "CREATE TABLE tab_notrep (a int)"); @@ -44,13 +46,17 @@ $node_subscriber->safe_psql('postgres', $node_subscriber->safe_psql('postgres', "CREATE TABLE tab_mixed (c text, b text, a int primary key)"); +# replication of the table with included index +$node_subscriber->safe_psql('postgres', + "CREATE TABLE tab_include (a int, b text, CONSTRAINT covering PRIMARY KEY(a) INCLUDE(b))"); + # Setup logical replication my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; $node_publisher->safe_psql('postgres', "CREATE PUBLICATION tap_pub"); $node_publisher->safe_psql('postgres', "CREATE PUBLICATION tap_pub_ins_only WITH (publish = insert)"); $node_publisher->safe_psql('postgres', -"ALTER PUBLICATION tap_pub ADD TABLE tab_rep, tab_full, tab_full2, tab_mixed" +"ALTER PUBLICATION tap_pub ADD TABLE tab_rep, tab_full, tab_full2, tab_mixed, tab_include" ); $node_publisher->safe_psql('postgres', "ALTER PUBLICATION tap_pub_ins_only ADD TABLE tab_ins"); @@ -89,6 +95,11 @@ $node_publisher->safe_psql('postgres', "UPDATE tab_rep SET a = -a"); $node_publisher->safe_psql('postgres', "INSERT INTO tab_mixed VALUES (2, 'bar')"); +$node_publisher->safe_psql('postgres', + "INSERT INTO tab_include SELECT generate_series(1,50)"); +$node_publisher->safe_psql('postgres', "DELETE FROM tab_include WHERE a > 20"); +$node_publisher->safe_psql('postgres', "UPDATE tab_include SET a = -a"); + $node_publisher->wait_for_catchup($appname); $result = $node_subscriber->safe_psql('postgres', @@ -104,6 +115,10 @@ $result = is( $result, qq(|foo|1 |bar|2), 'check replicated changes with different column order'); +$result = $node_subscriber->safe_psql('postgres', + "SELECT count(*), min(a), max(a) FROM tab_include"); +is($result, qq(20|-20|-1), 'check replicated changes with primary key index with included columns'); + # insert some duplicate rows $node_publisher->safe_psql('postgres', "INSERT INTO tab_full SELECT generate_series(1,10)"); -- 2.40.0