]> granicus.if.org Git - postgresql/commitdiff
Add gin_clean_pending_list function to clean up GIN pending list
authorFujii Masao <fujii@postgresql.org>
Thu, 28 Jan 2016 03:57:52 +0000 (12:57 +0900)
committerFujii Masao <fujii@postgresql.org>
Thu, 28 Jan 2016 03:57:52 +0000 (12:57 +0900)
This function cleans up the pending list of the GIN index by
moving entries in it to the main GIN data structure in bulk.
It returns the number of pages cleaned up from the pending list.

This function is useful, for example, when the pending list
needs to be cleaned up *quickly* to improve the performance of
the search using GIN index. VACUUM can do the same thing, too,
but it may take days to run on a large table.

Jeff Janes,
reviewed by Julien Rouhaud, Jaime Casanova, Alvaro Herrera and me.

Discussion: CAMkU=1x8zFkpfnozXyt40zmR3Ub_kHu58LtRmwHUKRgQss7=iQ@mail.gmail.com

doc/src/sgml/func.sgml
doc/src/sgml/gin.sgml
doc/src/sgml/ref/create_index.sgml
src/backend/access/gin/ginfast.c
src/include/access/gin_private.h
src/include/catalog/catversion.h
src/include/catalog/pg_proc.h
src/test/regress/expected/gin.out
src/test/regress/sql/gin.sql

index 9c143b2a634a7eaba8672c38dbf9ad45cfa90d66..139aa2b811e88f92719904f8da727c21bd0bc22d 100644 (file)
@@ -18036,9 +18036,16 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
     <primary>brin_summarize_new_values</primary>
    </indexterm>
 
+   <indexterm>
+    <primary>gin_clean_pending_list</primary>
+   </indexterm>
+
    <para>
     <xref linkend="functions-admin-index-table"> shows the functions
     available for index maintenance tasks.
+    These functions cannot be executed during recovery.
+    Use of these functions is restricted to superusers and the owner
+    of the given index.
    </para>
 
    <table id="functions-admin-index-table">
@@ -18056,6 +18063,13 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
        <entry><type>integer</type></entry>
        <entry>summarize page ranges not already summarized</entry>
       </row>
+      <row>
+       <entry>
+        <literal><function>gin_clean_pending_list(<parameter>index</> <type>regclass</>)</function></literal>
+       </entry>
+       <entry><type>bigint</type></entry>
+       <entry>move GIN pending list entries into main index structure</entry>
+      </row>
      </tbody>
     </tgroup>
    </table>
@@ -18069,6 +18083,18 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
     into the index.
    </para>
 
+   <para>
+    <function>gin_clean_pending_list</> accepts the OID or name of
+    a GIN index and cleans up the pending list of the specified GIN index
+    by moving entries in it to the main GIN data structure in bulk.
+    It returns the number of pages cleaned up from the pending list.
+    Note that if the argument is a GIN index built with <literal>fastupdate</>
+    option disabled, the cleanup does not happen and the return value is 0
+    because the index doesn't have a pending list.
+    Please see <xref linkend="gin-fast-update"> and <xref linkend="gin-tips">
+    for details of the pending list and <literal>fastupdate</> option.
+   </para>
+
   </sect2>
 
   <sect2 id="functions-admin-genfile">
index 9eb0b5a957ffac2deeee30fd2342883230eb6650..a392f949ffd49fd65fe350b09f09d33348971884 100644 (file)
    from the indexed item). As of <productname>PostgreSQL</productname> 8.4,
    <acronym>GIN</> is capable of postponing much of this work by inserting
    new tuples into a temporary, unsorted list of pending entries.
-   When the table is vacuumed, or if the pending list becomes larger than
+   When the table is vacuumed or autoanalyzed, or when 
+   <function>gin_clean_pending_list</function> function is called, or if the
+   pending list becomes larger than
    <xref linkend="guc-gin-pending-list-limit">, the entries are moved to the
    main <acronym>GIN</acronym> data structure using the same bulk insert
    techniques used during initial index creation.  This greatly improves
index ce36a1ba48006c055c5415d7a8740deb013a6d6d..ec4146f75150570a0da7e7cbdcab6c010defb2fe 100644 (file)
@@ -362,8 +362,8 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
       Turning <literal>fastupdate</> off via <command>ALTER INDEX</> prevents
       future insertions from going into the list of pending index entries,
       but does not in itself flush previous entries.  You might want to
-      <command>VACUUM</> the table afterward to ensure the pending list is
-      emptied.
+      <command>VACUUM</> the table or call <function>gin_clean_pending_list</>
+      function afterward to ensure the pending list is emptied.
      </para>
     </note>
     </listitem>
index 681ce098144f41600dd47684c5b22994e5e7bacd..09f41f55ac77b58733fd6b99e42e51cb27896058 100644 (file)
 
 #include "access/gin_private.h"
 #include "access/xloginsert.h"
+#include "access/xlog.h"
 #include "commands/vacuum.h"
+#include "catalog/pg_am.h"
 #include "miscadmin.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
+#include "utils/acl.h"
 #include "storage/indexfsm.h"
 
 /* GUC parameter */
@@ -958,3 +961,52 @@ ginInsertCleanup(GinState *ginstate,
        MemoryContextSwitchTo(oldCtx);
        MemoryContextDelete(opCtx);
 }
+
+/*
+ * SQL-callable function to clean the insert pending list
+ */
+Datum
+gin_clean_pending_list(PG_FUNCTION_ARGS)
+{
+       Oid                     indexoid = PG_GETARG_OID(0);
+       Relation        indexRel = index_open(indexoid, AccessShareLock);
+       IndexBulkDeleteResult stats;
+       GinState        ginstate;
+
+       if (RecoveryInProgress())
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("recovery is in progress"),
+                                errhint("GIN pending list cannot be cleaned up during recovery.")));
+
+       /* Must be a GIN index */
+       if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
+               indexRel->rd_rel->relam != GIN_AM_OID)
+               ereport(ERROR,
+                               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                errmsg("\"%s\" is not a GIN index",
+                                               RelationGetRelationName(indexRel))));
+
+       /*
+        * Reject attempts to read non-local temporary relations; we would be
+        * likely to get wrong data since we have no visibility into the owning
+        * session's local buffers.
+        */
+       if (RELATION_IS_OTHER_TEMP(indexRel))
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                          errmsg("cannot access temporary indexes of other sessions")));
+
+       /* User must own the index (comparable to privileges needed for VACUUM) */
+       if (!pg_class_ownercheck(indexoid, GetUserId()))
+               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
+                                          RelationGetRelationName(indexRel));
+
+       memset(&stats, 0, sizeof(stats));
+       initGinState(&ginstate, indexRel);
+       ginInsertCleanup(&ginstate, true, &stats);
+
+       index_close(indexRel, AccessShareLock);
+
+       PG_RETURN_INT64((int64) stats.pages_deleted);
+}
index 695959c5b184e8895891a60e1160229927ffa6de..d2ea58832cee6c6eb16e1da5de43e1eaf8444e26 100644 (file)
@@ -881,6 +881,9 @@ extern void ginFreeScanKeys(GinScanOpaque so);
 /* ginget.c */
 extern int64 gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
 
+/* ginfast.c */
+extern Datum gin_clean_pending_list(PG_FUNCTION_ARGS);
+
 /* ginlogic.c */
 extern void ginInitConsistentFunction(GinState *ginstate, GinScanKey key);
 
index 1f18806fb52e9286f87e6e67c23426a1b0b818a5..5c480b7d3ab98fa6450e60ac4d62c4ff50b12053 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201601271
+#define CATALOG_VERSION_NO     201601281
 
 #endif
index 3a066abc82fc54dc346724ff353efc12a6e197e8..ba8760b37d46362c3dacbf765a93df19214dfcb0 100644 (file)
@@ -4517,6 +4517,8 @@ DATA(insert OID = 3087 (  gin_extract_tsquery     PGNSP PGUID 12 1 0 0 0 f f f f t f
 DESCR("GIN tsvector support (obsolete)");
 DATA(insert OID = 3088 (  gin_tsquery_consistent PGNSP PGUID 12 1 0 0 0 f f f f t f i s 6 0 16 "2281 21 3615 23 2281 2281" _null_ _null_ _null_ _null_ _null_ gin_tsquery_consistent_6args _null_ _null_ _null_ ));
 DESCR("GIN tsvector support (obsolete)");
+DATA(insert OID = 3789 (  gin_clean_pending_list PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 20 "2205" _null_ _null_ _null_ _null_ _null_ gin_clean_pending_list _null_ _null_ _null_ ));
+DESCR("clean up GIN pending list");
 
 DATA(insert OID = 3662 (  tsquery_lt                   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_lt _null_ _null_ _null_ ));
 DATA(insert OID = 3663 (  tsquery_le                   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_le _null_ _null_ _null_ ));
index c015fe7861a111990d64bb9dbbf0df4875071d06..cc7601c667762687fbcf5727c58dce98be73f2b6 100644 (file)
@@ -8,7 +8,20 @@ create table gin_test_tbl(i int4[]);
 create index gin_test_idx on gin_test_tbl using gin (i) with (fastupdate = on);
 insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 20000) g;
 insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g;
+select gin_clean_pending_list('gin_test_idx')>10 as many; -- flush the fastupdate buffers
+ many 
+------
+ t
+(1 row)
+
+insert into gin_test_tbl select array[3, 1, g] from generate_series(1, 1000) g;
 vacuum gin_test_tbl; -- flush the fastupdate buffers
+select gin_clean_pending_list('gin_test_idx'); -- nothing to flush
+ gin_clean_pending_list 
+------------------------
+                      0
+(1 row)
+
 -- Test vacuuming
 delete from gin_test_tbl where i @> array[2];
 vacuum gin_test_tbl;
index 4b35560036ad6020552c04c6e8bf7f7655b8bc45..31890b46d8c0e4c8ea536d9f515557191979c29d 100644 (file)
@@ -10,8 +10,14 @@ create index gin_test_idx on gin_test_tbl using gin (i) with (fastupdate = on);
 insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 20000) g;
 insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g;
 
+select gin_clean_pending_list('gin_test_idx')>10 as many; -- flush the fastupdate buffers
+
+insert into gin_test_tbl select array[3, 1, g] from generate_series(1, 1000) g;
+
 vacuum gin_test_tbl; -- flush the fastupdate buffers
 
+select gin_clean_pending_list('gin_test_idx'); -- nothing to flush
+
 -- Test vacuuming
 delete from gin_test_tbl where i @> array[2];
 vacuum gin_test_tbl;