From: Fujii Masao Date: Thu, 28 Jan 2016 03:57:52 +0000 (+0900) Subject: Add gin_clean_pending_list function to clean up GIN pending list X-Git-Tag: REL9_6_BETA1~811 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7f46eaf;p=postgresql Add gin_clean_pending_list function to clean up GIN pending list This function cleans up the pending list of the GIN index by moving entries in it to the main GIN data structure in bulk. It returns the number of pages cleaned up from the pending list. This function is useful, for example, when the pending list needs to be cleaned up *quickly* to improve the performance of the search using GIN index. VACUUM can do the same thing, too, but it may take days to run on a large table. Jeff Janes, reviewed by Julien Rouhaud, Jaime Casanova, Alvaro Herrera and me. Discussion: CAMkU=1x8zFkpfnozXyt40zmR3Ub_kHu58LtRmwHUKRgQss7=iQ@mail.gmail.com --- diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 9c143b2a63..139aa2b811 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -18036,9 +18036,16 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); brin_summarize_new_values + + gin_clean_pending_list + + shows the functions available for index maintenance tasks. + These functions cannot be executed during recovery. + Use of these functions is restricted to superusers and the owner + of the given index. @@ -18056,6 +18063,13 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); integersummarize page ranges not already summarized + + + gin_clean_pending_list(index regclass) + + bigint + move GIN pending list entries into main index structure +
@@ -18069,6 +18083,18 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); into the index. + + gin_clean_pending_list accepts the OID or name of + a GIN index and cleans up the pending list of the specified GIN index + by moving entries in it to the main GIN data structure in bulk. + It returns the number of pages cleaned up from the pending list. + Note that if the argument is a GIN index built with fastupdate + option disabled, the cleanup does not happen and the return value is 0 + because the index doesn't have a pending list. + Please see and + for details of the pending list and fastupdate option. + + diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml index 9eb0b5a957..a392f949ff 100644 --- a/doc/src/sgml/gin.sgml +++ b/doc/src/sgml/gin.sgml @@ -734,7 +734,9 @@ from the indexed item). As of PostgreSQL 8.4, GIN is capable of postponing much of this work by inserting new tuples into a temporary, unsorted list of pending entries. - When the table is vacuumed, or if the pending list becomes larger than + When the table is vacuumed or autoanalyzed, or when + gin_clean_pending_list function is called, or if the + pending list becomes larger than , the entries are moved to the main GIN data structure using the same bulk insert techniques used during initial index creation. This greatly improves diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index ce36a1ba48..ec4146f751 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -362,8 +362,8 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] fastupdate off via ALTER INDEX prevents future insertions from going into the list of pending index entries, but does not in itself flush previous entries. You might want to - VACUUM the table afterward to ensure the pending list is - emptied. + VACUUM the table or call gin_clean_pending_list + function afterward to ensure the pending list is emptied. diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 681ce09814..09f41f55ac 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -20,10 +20,13 @@ #include "access/gin_private.h" #include "access/xloginsert.h" +#include "access/xlog.h" #include "commands/vacuum.h" +#include "catalog/pg_am.h" #include "miscadmin.h" #include "utils/memutils.h" #include "utils/rel.h" +#include "utils/acl.h" #include "storage/indexfsm.h" /* GUC parameter */ @@ -958,3 +961,52 @@ ginInsertCleanup(GinState *ginstate, MemoryContextSwitchTo(oldCtx); MemoryContextDelete(opCtx); } + +/* + * SQL-callable function to clean the insert pending list + */ +Datum +gin_clean_pending_list(PG_FUNCTION_ARGS) +{ + Oid indexoid = PG_GETARG_OID(0); + Relation indexRel = index_open(indexoid, AccessShareLock); + IndexBulkDeleteResult stats; + GinState ginstate; + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("GIN pending list cannot be cleaned up during recovery."))); + + /* Must be a GIN index */ + if (indexRel->rd_rel->relkind != RELKIND_INDEX || + indexRel->rd_rel->relam != GIN_AM_OID) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a GIN index", + RelationGetRelationName(indexRel)))); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(indexRel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary indexes of other sessions"))); + + /* User must own the index (comparable to privileges needed for VACUUM) */ + if (!pg_class_ownercheck(indexoid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, + RelationGetRelationName(indexRel)); + + memset(&stats, 0, sizeof(stats)); + initGinState(&ginstate, indexRel); + ginInsertCleanup(&ginstate, true, &stats); + + index_close(indexRel, AccessShareLock); + + PG_RETURN_INT64((int64) stats.pages_deleted); +} diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 695959c5b1..d2ea58832c 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -881,6 +881,9 @@ extern void ginFreeScanKeys(GinScanOpaque so); /* ginget.c */ extern int64 gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm); +/* ginfast.c */ +extern Datum gin_clean_pending_list(PG_FUNCTION_ARGS); + /* ginlogic.c */ extern void ginInitConsistentFunction(GinState *ginstate, GinScanKey key); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 1f18806fb5..5c480b7d3a 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201601271 +#define CATALOG_VERSION_NO 201601281 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 3a066abc82..ba8760b37d 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4517,6 +4517,8 @@ DATA(insert OID = 3087 ( gin_extract_tsquery PGNSP PGUID 12 1 0 0 0 f f f f t f DESCR("GIN tsvector support (obsolete)"); DATA(insert OID = 3088 ( gin_tsquery_consistent PGNSP PGUID 12 1 0 0 0 f f f f t f i s 6 0 16 "2281 21 3615 23 2281 2281" _null_ _null_ _null_ _null_ _null_ gin_tsquery_consistent_6args _null_ _null_ _null_ )); DESCR("GIN tsvector support (obsolete)"); +DATA(insert OID = 3789 ( gin_clean_pending_list PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 20 "2205" _null_ _null_ _null_ _null_ _null_ gin_clean_pending_list _null_ _null_ _null_ )); +DESCR("clean up GIN pending list"); DATA(insert OID = 3662 ( tsquery_lt PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_lt _null_ _null_ _null_ )); DATA(insert OID = 3663 ( tsquery_le PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_le _null_ _null_ _null_ )); diff --git a/src/test/regress/expected/gin.out b/src/test/regress/expected/gin.out index c015fe7861..cc7601c667 100644 --- a/src/test/regress/expected/gin.out +++ b/src/test/regress/expected/gin.out @@ -8,7 +8,20 @@ create table gin_test_tbl(i int4[]); create index gin_test_idx on gin_test_tbl using gin (i) with (fastupdate = on); insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 20000) g; insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g; +select gin_clean_pending_list('gin_test_idx')>10 as many; -- flush the fastupdate buffers + many +------ + t +(1 row) + +insert into gin_test_tbl select array[3, 1, g] from generate_series(1, 1000) g; vacuum gin_test_tbl; -- flush the fastupdate buffers +select gin_clean_pending_list('gin_test_idx'); -- nothing to flush + gin_clean_pending_list +------------------------ + 0 +(1 row) + -- Test vacuuming delete from gin_test_tbl where i @> array[2]; vacuum gin_test_tbl; diff --git a/src/test/regress/sql/gin.sql b/src/test/regress/sql/gin.sql index 4b35560036..31890b46d8 100644 --- a/src/test/regress/sql/gin.sql +++ b/src/test/regress/sql/gin.sql @@ -10,8 +10,14 @@ create index gin_test_idx on gin_test_tbl using gin (i) with (fastupdate = on); insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 20000) g; insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g; +select gin_clean_pending_list('gin_test_idx')>10 as many; -- flush the fastupdate buffers + +insert into gin_test_tbl select array[3, 1, g] from generate_series(1, 1000) g; + vacuum gin_test_tbl; -- flush the fastupdate buffers +select gin_clean_pending_list('gin_test_idx'); -- nothing to flush + -- Test vacuuming delete from gin_test_tbl where i @> array[2]; vacuum gin_test_tbl;