]> granicus.if.org Git - postgresql/commitdiff
Implement "fastupdate" support for GIN indexes, in which we try to accumulate
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 24 Mar 2009 20:17:18 +0000 (20:17 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 24 Mar 2009 20:17:18 +0000 (20:17 +0000)
multiple index entries in a holding area before adding them to the main index
structure.  This helps because bulk insert is (usually) significantly faster
than retail insert for GIN.

This patch also removes GIN support for amgettuple-style index scans.  The
API defined for amgettuple is difficult to support with fastupdate, and
the previously committed partial-match feature didn't really work with
it either.  We might eventually figure a way to put back amgettuple
support, but it won't happen for 8.4.

catversion bumped because of change in GIN's pg_am entry, and because
the format of GIN indexes changed on-disk (there's a metapage now,
and possibly a pending list).

Teodor Sigaev

30 files changed:
doc/src/sgml/gin.sgml
doc/src/sgml/indexam.sgml
doc/src/sgml/ref/create_index.sgml
doc/src/sgml/ref/vacuum.sgml
doc/src/sgml/textsearch.sgml
src/backend/access/common/reloptions.c
src/backend/access/gin/Makefile
src/backend/access/gin/ginbulk.c
src/backend/access/gin/gindatapage.c
src/backend/access/gin/ginfast.c [new file with mode: 0644]
src/backend/access/gin/ginget.c
src/backend/access/gin/gininsert.c
src/backend/access/gin/ginutil.c
src/backend/access/gin/ginvacuum.c
src/backend/access/gin/ginxlog.c
src/backend/access/gist/gistvacuum.c
src/backend/access/hash/hash.c
src/backend/access/index/indexam.c
src/backend/access/nbtree/nbtree.c
src/backend/catalog/index.c
src/backend/commands/analyze.c
src/backend/commands/vacuum.c
src/backend/commands/vacuumlazy.c
src/backend/nodes/tidbitmap.c
src/include/access/genam.h
src/include/access/gin.h
src/include/catalog/catversion.h
src/include/catalog/pg_am.h
src/include/catalog/pg_proc.h
src/include/nodes/tidbitmap.h

index c28195af9942390466edb9e7ed1e4e82081dc16f..9cdc4ed528131695750875e18ca865c6c757e012 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/gin.sgml,v 2.16 2008/07/22 22:05:24 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/gin.sgml,v 2.17 2009/03/24 20:17:07 tgl Exp $ -->
 
 <chapter id="GIN">
 <title>GIN Indexes</title>
        to consult <literal>n</> to determine the data type of
        <literal>query</> and the key values that need to be extracted.
        The number of returned keys must be stored into <literal>*nkeys</>.
-       If the query contains no keys then <function>extractQuery</> 
+       If the query contains no keys then <function>extractQuery</>
        should store 0 or -1 into <literal>*nkeys</>, depending on the
        semantics of the operator.  0 means that every
-       value matches the <literal>query</> and a sequential scan should be 
-       produced.  -1 means nothing can match the <literal>query</>. 
+       value matches the <literal>query</> and a sequential scan should be
+       produced.  -1 means nothing can match the <literal>query</>.
        <literal>pmatch</> is an output argument for use when partial match
        is supported.  To use it, <function>extractQuery</> must allocate
        an array of <literal>*nkeys</> booleans and store its address at
   list of heap pointers (PL, posting list) if the list is small enough.
  </para>
 
+ <sect2 id="gin-fast-update">
+  <title>GIN fast update technique</title>
+
+  <para>
+   Updating a <acronym>GIN</acronym> index tends to be slow because of the
+   intrinsic nature of inverted indexes: inserting or updating one heap row
+   can cause many inserts into the index (one for each key extracted
+   from the indexed value). As of <productname>PostgreSQL</productname> 8.4,
+   <acronym>GIN</> is capable of postponing much of this work by inserting
+   new tuples into a temporary, unsorted list of pending entries.
+   When the table is vacuumed, or if the pending list becomes too large
+   (larger than <xref linkend="guc-work-mem">), the entries are moved to the
+   main <acronym>GIN</acronym> data structure using the same bulk insert
+   techniques used during initial index creation.  This greatly improves
+   <acronym>GIN</acronym> index update speed, even counting the additional
+   vacuum overhead.  Moreover the overhead can be done by a background
+   process instead of in foreground query processing.
+  </para>
+
+  <para>
+   The main disadvantage of this approach is that searches must scan the list
+   of pending entries in addition to searching the regular index, and so
+   a large list of pending entries will slow searches significantly.
+   Another disadvantage is that, while most updates are fast, an update
+   that causes the pending list to become <quote>too large</> will incur an
+   immediate cleanup cycle and thus be much slower than other updates.
+   Proper use of autovacuum can minimize both of these problems.
+  </para>
+
+  <para>
+   If consistent response time is more important than update speed,
+   use of pending entries can be disabled by turning off the
+   <literal>FASTUPDATE</literal> storage parameter for a
+   <acronym>GIN</acronym> index.  See <xref linkend="sql-createindex"
+   endterm="sql-createindex-title"> for details.
+  </para>
+ </sect2>
+
  <sect2 id="gin-partial-match">
   <title>Partial match algorithm</title>
-  
+
   <para>
    GIN can support <quote>partial match</> queries, in which the query
    does not determine an exact match for one or more keys, but the possible
    to be searched, or greater than zero if the index key is past the range
    that could match.
   </para>
-
-  <para>
-   During a partial-match scan, all <literal>itemPointer</>s for matching keys
-   are OR'ed into a <literal>TIDBitmap</>.
-   The scan fails if the <literal>TIDBitmap</> becomes lossy.
-   In this case an error message will be reported with advice
-   to increase <literal>work_mem</>.
-  </para>
  </sect2>
 
 </sect1>
    <term>Create vs insert</term>
    <listitem>
     <para>
-     In most cases, insertion into a <acronym>GIN</acronym> index is slow
+     Insertion into a <acronym>GIN</acronym> index can be slow
      due to the likelihood of many keys being inserted for each value.
      So, for bulk insertions into a table it is advisable to drop the GIN
      index and recreate it after finishing bulk insertion.
     </para>
+
+    <para>
+     As of <productname>PostgreSQL</productname> 8.4, this advice is less
+     necessary since delayed indexing is used (see <xref
+     linkend="gin-fast-update"> for details).  But for very large updates
+     it may still be best to drop and recreate the index.
+    </para>
    </listitem>
   </varlistentry>
 
    </listitem>
   </varlistentry>
 
+  <varlistentry>
+   <term><xref linkend="guc-work-mem"></term>
+   <listitem>
+    <para>
+     During a series of insertions into an existing <acronym>GIN</acronym>
+     index that has <literal>FASTUPDATE</> enabled, the system will clean up
+     the pending-entry list whenever it grows larger than
+     <varname>work_mem</>.  To avoid fluctuations in observed response time,
+     it's desirable to have pending-list cleanup occur in the background
+     (i.e., via autovacuum).  Foreground cleanup operations can be avoided by
+     increasing <varname>work_mem</> or making autovacuum more aggressive.
+     However, enlarging <varname>work_mem</> means that if a foreground
+     cleanup does occur, it will take even longer.
+    </para>
+   </listitem>
+  </varlistentry>
+
   <varlistentry>
    <term><xref linkend="guc-gin-fuzzy-search-limit"></term>
    <listitem>
   <function>extractQuery</function> must convert an unrestricted search into
   a partial-match query that will scan the whole index.  This is inefficient
   but might be necessary to avoid corner-case failures with operators such
-  as LIKE.  Note however that failure could still occur if the intermediate
-  <literal>TIDBitmap</> becomes lossy.
+  as <literal>LIKE</>.
  </para>
 </sect1>
 
index 3643d706735ba6d35c1a9be45fd4142c99d161e4..19b3c70814ecf003eebf6bf029b099b36924b519 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.29 2009/03/05 23:06:45 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.30 2009/03/24 20:17:08 tgl Exp $ -->
 
 <chapter id="indexam">
  <title>Index Access Method Interface Definition</title>
@@ -79,7 +79,7 @@
   </para>
 
   <para>
-   An individual index is defined by a 
+   An individual index is defined by a
    <link linkend="catalog-pg-class"><structname>pg_class</structname></link>
    entry that describes it as a physical relation, plus a
    <link linkend="catalog-pg-index"><structname>pg_index</structname></link>
@@ -239,6 +239,16 @@ amvacuumcleanup (IndexVacuumInfo *info,
    be returned.
   </para>
 
+  <para>
+   As of <productname>PostgreSQL</productname> 8.4,
+   <function>amvacuumcleanup</> will also be called at completion of an
+   <command>ANALYZE</> operation.  In this case <literal>stats</> is always
+   NULL and any return value will be ignored.  This case can be distinguished
+   by checking <literal>info-&gt;analyze_only</literal>.  It is recommended
+   that the access method do nothing except post-insert cleanup in such a
+   call, and that only in an autovacuum worker process.
+  </para>
+
   <para>
 <programlisting>
 void
@@ -344,7 +354,8 @@ amgetbitmap (IndexScanDesc scan,
 </programlisting>
    Fetch all tuples in the given scan and add them to the caller-supplied
    TIDBitmap (that is, OR the set of tuple IDs into whatever set is already
-   in the bitmap).  The number of tuples fetched is returned. 
+   in the bitmap).  The number of tuples fetched is returned (this might be
+   just an approximate count, for instance some AMs do not detect duplicates).
    While inserting tuple IDs into the bitmap, <function>amgetbitmap</> can
    indicate that rechecking of the scan conditions is required for specific
    tuple IDs.  This is analogous to the <literal>xs_recheck</> output parameter
@@ -521,14 +532,14 @@ amrestrpos (IndexScanDesc scan);
   </para>
 
   <para>
-   Instead of using <function>amgettuple</>, an index scan can be done with 
+   Instead of using <function>amgettuple</>, an index scan can be done with
    <function>amgetbitmap</> to fetch all tuples in one call.  This can be
    noticeably more efficient than <function>amgettuple</> because it allows
    avoiding lock/unlock cycles within the access method.  In principle
    <function>amgetbitmap</> should have the same effects as repeated
    <function>amgettuple</> calls, but we impose several restrictions to
-   simplify matters.  First of all, <function>amgetbitmap</> returns all 
-   tuples at once and marking or restoring scan positions isn't 
+   simplify matters.  First of all, <function>amgetbitmap</> returns all
+   tuples at once and marking or restoring scan positions isn't
    supported. Secondly, the tuples are returned in a bitmap which doesn't
    have any specific ordering, which is why <function>amgetbitmap</> doesn't
    take a <literal>direction</> argument.  Finally, <function>amgetbitmap</>
@@ -572,7 +583,7 @@ amrestrpos (IndexScanDesc scan);
    Aside from the index's own internal consistency requirements, concurrent
    updates create issues about consistency between the parent table (the
    <firstterm>heap</>) and the index.  Because
-   <productname>PostgreSQL</productname> separates accesses 
+   <productname>PostgreSQL</productname> separates accesses
    and updates of the heap from those of the index, there are windows in
    which the index might be inconsistent with the heap.  We handle this problem
    with the following rules:
@@ -701,7 +712,7 @@ amrestrpos (IndexScanDesc scan);
    no error should be raised.  (This case cannot occur during the
    ordinary scenario of inserting a row that's just been created by
    the current transaction.  It can happen during
-   <command>CREATE UNIQUE INDEX CONCURRENTLY</>, however.) 
+   <command>CREATE UNIQUE INDEX CONCURRENTLY</>, however.)
   </para>
 
   <para>
index 3596b5df288f7e9de22f0e6f6ba313acef6fece8..49352accae002cf6789cf198a81cd914b021028b 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/create_index.sgml,v 1.70 2009/02/02 19:31:38 alvherre Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/create_index.sgml,v 1.71 2009/03/24 20:17:08 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -294,6 +294,37 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] <replaceable class="parameter">name</re
 
    </variablelist>
 
+   <para>
+    <literal>GIN</literal> indexes accept a different parameter:
+   </para>
+
+   <variablelist>
+
+   <varlistentry>
+    <term><literal>FASTUPDATE</></term>
+    <listitem>
+    <para>
+     This setting controls usage of the fast update technique described in
+     <xref linkend="gin-fast-update">.  It is a Boolean parameter:
+     <literal>ON</> enables fast update, <literal>OFF</> disables it.
+     (Alternative spellings of <literal>ON</> and <literal>OFF</> are
+     allowed as described in <xref linkend="config-setting">.)  The
+     default is <literal>ON</>.
+    </para>
+
+    <note>
+     <para>
+      Turning <literal>FASTUPDATE</> off via <command>ALTER INDEX</> prevents
+      future insertions from going into the list of pending index entries,
+      but does not in itself flush previous entries.  You might want to
+      <command>VACUUM</> the table afterward to ensure the pending list is
+      emptied.
+     </para>
+    </note>
+    </listitem>
+   </varlistentry>
+
+   </variablelist>
   </refsect2>
 
   <refsect2 id="SQL-CREATEINDEX-CONCURRENTLY">
@@ -501,6 +532,13 @@ CREATE UNIQUE INDEX title_idx ON films (title) WITH (fillfactor = 70);
 </programlisting>
   </para>
 
+  <para>
+   To create a <acronym>GIN</> index with fast updates disabled:
+<programlisting>
+CREATE INDEX gin_idx ON documents_table (locations) WITH (fastupdate = off);
+</programlisting>
+  </para>
+
   <para>
    To create an index on the column <literal>code</> in the table
    <literal>films</> and have the index reside in the tablespace
index cf32ac71b2f1fb0e021cad45ed8817e0735a1cc5..205165e7176338053558a00a266d4f3a7c05a287 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/vacuum.sgml,v 1.54 2008/12/11 18:16:18 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/vacuum.sgml,v 1.55 2009/03/24 20:17:08 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -160,6 +160,13 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER">
     <command>VACUUM</> cannot be executed inside a transaction block.
    </para>
 
+   <para>
+    For tables with <acronym>GIN</> indexes, <command>VACUUM</command> (in
+    any form) also completes any pending index insertions, by moving pending
+    index entries to the appropriate places in the main <acronym>GIN</> index
+    structure.  See <xref linkend="gin-fast-update"> for details.
+   </para>
+
    <para>
     We recommend that active production databases be
     vacuumed frequently (at least nightly), in order to
index 4b18b06c528d69a687a358dbbb5541d015dc97a4..b841183db0041f068b77ac106be98d44b22f63c6 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.47 2009/01/07 22:40:49 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.48 2009/03/24 20:17:08 tgl Exp $ -->
 
 <chapter id="textsearch">
  <title id="textsearch-title">Full Text Search</title>
@@ -3224,12 +3224,14 @@ SELECT plainto_tsquery('supernovae stars');
     </listitem>
     <listitem>
      <para>
-      GIN indexes are about ten times slower to update than GiST
+      GIN indexes are moderately slower to update than GiST indexes, but
+      about 10 times slower if fast-update support was disabled
+      (see <xref linkend="gin-fast-update"> for details)
      </para>
     </listitem>
     <listitem>
      <para>
-      GIN indexes are two-to-three times larger than GiST
+      GIN indexes are two-to-three times larger than GiST indexes
      </para>
     </listitem>
    </itemizedlist>
index b926689c5cbee8e6ddc521c89b6207b0a85d12f7..880f2db52663a3f094923f4cd722c26079855e82 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/common/reloptions.c,v 1.23 2009/03/23 16:36:27 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/common/reloptions.c,v 1.24 2009/03/24 20:17:09 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -56,6 +56,14 @@ static relopt_bool boolRelOpts[] =
                },
                true
        },
+       {
+               {
+                       "fastupdate",
+                       "Enables \"fast update\" feature for this GIN index",
+                       RELOPT_KIND_GIN
+               },
+               true
+       },
        /* list terminator */
        { { NULL } }
 };
index 08946c88a73026f789c633684773cfe575901c01..23b75fc1d80c6200fc0c3494cd656c56e09543cf 100644 (file)
@@ -4,7 +4,7 @@
 #    Makefile for access/gin
 #
 # IDENTIFICATION
-#    $PostgreSQL: pgsql/src/backend/access/gin/Makefile,v 1.3 2008/02/19 10:30:06 petere Exp $
+#    $PostgreSQL: pgsql/src/backend/access/gin/Makefile,v 1.4 2009/03/24 20:17:10 tgl Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -14,6 +14,6 @@ include $(top_builddir)/src/Makefile.global
 
 OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
        ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
-       ginbulk.o
+       ginbulk.o ginfast.o
 
 include $(top_srcdir)/src/backend/common.mk
index 136f80d9977513e155c5388c7dbe156841d4de6b..a7258619aee1f0908a2f7c8da072739c97e120bb 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.14 2009/01/01 17:23:34 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.15 2009/03/24 20:17:10 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -197,6 +197,8 @@ ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber att
        if (nentry <= 0)
                return;
 
+       Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
+
        i = nentry - 1;
        for (; i > 0; i >>= 1)
                nbit++;
index d0e426c65604e81f0ccf35636b93300351ada1b2..a872d44880c7f04ffafbff40a98ad95e9a4a71ff 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.13 2009/01/01 17:23:34 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.14 2009/03/24 20:17:10 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -43,8 +43,16 @@ MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPoint
 
        while (aptr - a < na && bptr - b < nb)
        {
-               if (compareItemPointers(aptr, bptr) > 0)
+               int             cmp = compareItemPointers(aptr, bptr);
+
+               if (cmp > 0)
+                       *dptr++ = *bptr++;
+               else if (cmp == 0)
+               {
+                       /* we want only one copy of the identical items */
                        *dptr++ = *bptr++;
+                       aptr++;
+               }
                else
                        *dptr++ = *aptr++;
        }
@@ -630,11 +638,16 @@ insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
                gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
 
                if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
-                       elog(ERROR, "item pointer (%u,%d) already exists",
-                       ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
-                                ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
-
-               ginInsertValue(&(gdi->btree), gdi->stack);
+               {
+                       /*
+                        * gdi->btree.items[gdi->btree.curitem] already exists in index
+                        */
+                       gdi->btree.curitem++;
+                       LockBuffer(gdi->stack->buffer, GIN_UNLOCK);
+                       freeGinBtreeStack(gdi->stack);
+               }
+               else
+                       ginInsertValue(&(gdi->btree), gdi->stack);
 
                gdi->stack = NULL;
        }
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
new file mode 100644 (file)
index 0000000..d862423
--- /dev/null
@@ -0,0 +1,866 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginfast.c
+ *       Fast insert routines for the Postgres inverted index access method.
+ *       Pending entries are stored in linear list of pages.  Later on
+ *       (typically during VACUUM), ginInsertCleanup() will be invoked to
+ *       transfer pending entries into the regular index structure.  This
+ *       wins because bulk insertion is much more efficient than retail.
+ *
+ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.1 2009/03/24 20:17:10 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/gin.h"
+#include "access/tuptoaster.h"
+#include "catalog/index.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "utils/memutils.h"
+
+
+#define GIN_PAGE_FREESIZE \
+       ( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )
+
+typedef struct DatumArray
+{
+       Datum   *values;                        /* expansible array */
+       int32    nvalues;                       /* current number of valid entries */
+       int32    maxvalues;                     /* allocated size of array */
+} DatumArray;
+
+
+/*
+ * Build a pending-list page from the given array of tuples, and write it out.
+ */
+static int32
+writeListPage(Relation index, Buffer buffer,
+                         IndexTuple *tuples, int32 ntuples, BlockNumber rightlink)
+{
+       Page                    page = BufferGetPage(buffer);
+       int                     i, freesize, size=0;
+       OffsetNumber    l, off;
+       char               *workspace;
+       char               *ptr;
+
+       /* workspace could be a local array; we use palloc for alignment */
+       workspace = palloc(BLCKSZ);
+
+       START_CRIT_SECTION();
+
+       GinInitBuffer(buffer, GIN_LIST);
+
+       off = FirstOffsetNumber;
+       ptr = workspace;
+
+       for(i=0; i<ntuples; i++)
+       {
+               int             this_size = IndexTupleSize(tuples[i]);
+
+               memcpy(ptr, tuples[i], this_size);
+               ptr += this_size;
+               size += this_size;
+
+               l = PageAddItem(page, (Item)tuples[i], this_size, off, false, false);
+
+               if (l == InvalidOffsetNumber)
+                       elog(ERROR, "failed to add item to index page in \"%s\"",
+                                RelationGetRelationName(index));
+
+               off++;
+       }
+
+       Assert(size <= BLCKSZ);         /* else we overran workspace */
+
+       GinPageGetOpaque(page)->rightlink = rightlink;
+
+       /*
+        * tail page may contain only the whole row(s) or final
+        * part of row placed on previous pages
+        */
+       if ( rightlink == InvalidBlockNumber )
+       {
+               GinPageSetFullRow(page);
+               GinPageGetOpaque(page)->maxoff = 1;
+       }
+       else
+       {
+               GinPageGetOpaque(page)->maxoff = 0;
+       }
+
+       freesize = PageGetFreeSpace(page);
+
+       MarkBufferDirty(buffer);
+
+       if (!index->rd_istemp)
+       {
+               XLogRecData                             rdata[2];
+               ginxlogInsertListPage   data;
+               XLogRecPtr                      recptr;
+
+               rdata[0].buffer = buffer;
+               rdata[0].buffer_std = true;
+               rdata[0].data = (char*)&data;
+               rdata[0].len = sizeof(ginxlogInsertListPage);
+               rdata[0].next = rdata+1;
+
+               rdata[1].buffer = InvalidBuffer;
+               rdata[1].data = workspace;
+               rdata[1].len = size;
+               rdata[1].next = NULL;
+
+               data.blkno = BufferGetBlockNumber(buffer);
+               data.rightlink = rightlink;
+               data.ntuples = ntuples;
+
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, rdata);
+               PageSetLSN(page, recptr);
+               PageSetTLI(page, ThisTimeLineID);
+       }
+
+       UnlockReleaseBuffer(buffer);
+
+       END_CRIT_SECTION();
+
+       pfree(workspace);
+
+       return freesize;
+}
+
+static void
+makeSublist(Relation index, IndexTuple *tuples, int32 ntuples,
+                       GinMetaPageData *res)
+{
+       Buffer                  curBuffer = InvalidBuffer;
+       Buffer                  prevBuffer = InvalidBuffer;
+       int                     i, size = 0, tupsize;
+       int                     startTuple = 0;
+
+       Assert(ntuples > 0);
+
+       /*
+        * Split tuples into pages
+        */
+       for(i=0;i<ntuples;i++)
+       {
+               if ( curBuffer == InvalidBuffer )
+               {
+                       curBuffer = GinNewBuffer(index);
+
+                       if ( prevBuffer != InvalidBuffer )
+                       {
+                               res->nPendingPages++;
+                               writeListPage(index, prevBuffer,
+                                                         tuples+startTuple, i-startTuple,
+                                                         BufferGetBlockNumber(curBuffer));
+                       }
+                       else
+                       {
+                               res->head = BufferGetBlockNumber(curBuffer);
+                       }
+
+                       prevBuffer = curBuffer;
+                       startTuple = i;
+                       size = 0;
+               }
+
+               tupsize = MAXALIGN(IndexTupleSize(tuples[i])) + sizeof(ItemIdData);
+
+               if ( size + tupsize >= GinListPageSize )
+               {
+                       /* won't fit, force a new page and reprocess */
+                       i--;
+                       curBuffer = InvalidBuffer;
+               }
+               else
+               {
+                       size += tupsize;
+               }
+       }
+
+       /*
+        * Write last page
+        */
+       res->tail = BufferGetBlockNumber(curBuffer);
+       res->tailFreeSize = writeListPage(index, curBuffer,
+                                                                         tuples+startTuple, ntuples-startTuple,
+                                                                         InvalidBlockNumber);
+       res->nPendingPages++;
+       /* that was only one heap tuple */
+       res->nPendingHeapTuples = 1;
+}
+
+/*
+ * Inserts collected values during normal insertion. Function guarantees
+ * that all values of heap will be stored sequentially, preserving order
+ */
+void
+ginHeapTupleFastInsert(Relation index, GinState *ginstate,
+                                          GinTupleCollector *collector)
+{
+       Buffer                          metabuffer;
+       Page                            metapage;
+       GinMetaPageData    *metadata = NULL;
+       XLogRecData                     rdata[2];
+       Buffer                          buffer = InvalidBuffer;
+       Page                            page = NULL;
+       ginxlogUpdateMeta       data;
+       bool                            separateList = false;
+       bool                            needCleanup = false;
+
+       if ( collector->ntuples == 0 )
+               return;
+
+       data.node = index->rd_node;
+       data.ntuples = 0;
+       data.newRightlink = data.prevTail = InvalidBlockNumber;
+
+       rdata[0].buffer = InvalidBuffer;
+       rdata[0].data = (char *) &data;
+       rdata[0].len = sizeof(ginxlogUpdateMeta);
+       rdata[0].next = NULL;
+
+       metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+       metapage = BufferGetPage(metabuffer);
+
+       if ( collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GIN_PAGE_FREESIZE )
+       {
+               /*
+                * Total size is greater than one page => make sublist
+                */
+               separateList = true;
+       }
+       else
+       {
+               LockBuffer(metabuffer, GIN_EXCLUSIVE);
+               metadata = GinPageGetMeta(metapage);
+
+               if ( metadata->head == InvalidBlockNumber ||
+                       collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize )
+               {
+                       /*
+                        * Pending list is empty or total size is greater than freespace
+                        * on tail page => make sublist
+                        *
+                        * We unlock metabuffer to keep high concurrency
+                        */
+                       separateList = true;
+                       LockBuffer(metabuffer, GIN_UNLOCK);
+               }
+       }
+
+       if ( separateList )
+       {
+               GinMetaPageData         sublist;
+
+               /*
+                * We should make sublist separately and append it to the tail
+                */
+               memset( &sublist, 0, sizeof(GinMetaPageData) );
+
+               makeSublist(index, collector->tuples, collector->ntuples, &sublist);
+
+               /*
+                * metapage was unlocked, see above
+                */
+               LockBuffer(metabuffer, GIN_EXCLUSIVE);
+               metadata = GinPageGetMeta(metapage);
+
+               if ( metadata->head == InvalidBlockNumber )
+               {
+                       /*
+                        * Sublist becomes main list
+                        */
+                       START_CRIT_SECTION();
+                       memcpy(metadata, &sublist, sizeof(GinMetaPageData) );
+                       memcpy(&data.metadata, &sublist, sizeof(GinMetaPageData) );
+               }
+               else
+               {
+                       /*
+                        * merge lists
+                        */
+
+                       data.prevTail = metadata->tail;
+                       buffer = ReadBuffer(index, metadata->tail);
+                       LockBuffer(buffer, GIN_EXCLUSIVE);
+                       page = BufferGetPage(buffer);
+                       Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
+
+                       START_CRIT_SECTION();
+
+                       GinPageGetOpaque(page)->rightlink = sublist.head;
+                       metadata->tail = sublist.tail;
+                       metadata->tailFreeSize = sublist.tailFreeSize;
+
+                       metadata->nPendingPages += sublist.nPendingPages;
+                       metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
+
+                       memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) );
+                       data.newRightlink = sublist.head;
+
+                       MarkBufferDirty(buffer);
+               }
+       }
+       else
+       {
+               /*
+                * Insert into tail page, metapage is already locked
+                */
+
+               OffsetNumber    l, off;
+               int                             i, tupsize;
+               char                    *ptr;
+
+               buffer = ReadBuffer(index, metadata->tail);
+               LockBuffer(buffer, GIN_EXCLUSIVE);
+               page = BufferGetPage(buffer);
+               off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+                               OffsetNumberNext(PageGetMaxOffsetNumber(page));
+
+               rdata[0].next = rdata + 1;
+
+               rdata[1].buffer = buffer;
+               rdata[1].buffer_std = true;
+               ptr = rdata[1].data = (char *) palloc( collector->sumsize );
+               rdata[1].len = collector->sumsize;
+               rdata[1].next = NULL;
+
+               data.ntuples = collector->ntuples;
+
+               START_CRIT_SECTION();
+
+               /*
+                * Increase counter of heap tuples
+                */
+               Assert( GinPageGetOpaque(page)->maxoff <= metadata->nPendingHeapTuples );
+               GinPageGetOpaque(page)->maxoff++;
+               metadata->nPendingHeapTuples++;
+
+               for(i=0; i<collector->ntuples; i++)
+               {
+                       tupsize = IndexTupleSize(collector->tuples[i]);
+                       l = PageAddItem(page, (Item)collector->tuples[i], tupsize, off, false, false);
+
+                       if (l == InvalidOffsetNumber)
+                               elog(ERROR, "failed to add item to index page in \"%s\"",
+                                                RelationGetRelationName(index));
+
+                       memcpy(ptr, collector->tuples[i], tupsize);
+                       ptr+=tupsize;
+
+                       off++;
+               }
+
+               metadata->tailFreeSize -= collector->sumsize + collector->ntuples * sizeof(ItemIdData);
+               memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) );
+               MarkBufferDirty(buffer);
+       }
+
+       /*
+        *  Make real write
+        */
+
+       MarkBufferDirty(metabuffer);
+       if ( !index->rd_istemp )
+       {
+               XLogRecPtr  recptr;
+
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata);
+               PageSetLSN(metapage, recptr);
+               PageSetTLI(metapage, ThisTimeLineID);
+
+               if ( buffer != InvalidBuffer )
+               {
+                       PageSetLSN(page, recptr);
+                       PageSetTLI(page, ThisTimeLineID);
+               }
+       }
+
+       if (buffer != InvalidBuffer)
+               UnlockReleaseBuffer(buffer);
+
+       /*
+        * Force pending list cleanup when it becomes too long.
+        * And, ginInsertCleanup could take significant amount of
+        * time, so we prefer to call it when it can do all the work in a
+        * single collection cycle. In non-vacuum mode, it shouldn't
+        * require maintenance_work_mem, so fire it while pending list is
+        * still small enough to fit into work_mem.
+        *
+        * ginInsertCleanup() should not be called inside our CRIT_SECTION.
+        */
+       if ( metadata->nPendingPages * GIN_PAGE_FREESIZE > work_mem * 1024L )
+               needCleanup = true;
+
+       UnlockReleaseBuffer(metabuffer);
+
+       END_CRIT_SECTION();
+
+       if ( needCleanup )
+               ginInsertCleanup(index, ginstate, false, NULL);
+}
+
+/*
+ * Collect values from one tuples to be indexed. All values for
+ * one tuples should be written at once - to guarantee consistent state
+ */
+uint32
+ginHeapTupleFastCollect(Relation index, GinState *ginstate,
+                                               GinTupleCollector *collector,
+                                               OffsetNumber attnum, Datum value, ItemPointer item)
+{
+       Datum      *entries;
+       int32           i,
+                               nentries;
+
+       entries = extractEntriesSU(ginstate, attnum, value, &nentries);
+
+       if (nentries == 0)
+               /* nothing to insert */
+               return 0;
+
+       /*
+        * Allocate/reallocate memory for storing collected tuples
+        */
+       if ( collector->tuples == NULL )
+       {
+               collector->lentuples = nentries * index->rd_att->natts;
+               collector->tuples = (IndexTuple*)palloc(sizeof(IndexTuple) * collector->lentuples);
+       }
+
+       while ( collector->ntuples + nentries > collector->lentuples )
+       {
+               collector->lentuples *= 2;
+               collector->tuples = (IndexTuple*)repalloc( collector->tuples,
+                                                                                                       sizeof(IndexTuple) * collector->lentuples);
+       }
+
+       /*
+        * Creates tuple's array
+        */
+       for (i = 0; i < nentries; i++)
+       {
+               int32 tupsize;
+
+               collector->tuples[collector->ntuples + i] = GinFormTuple(ginstate, attnum, entries[i], NULL, 0);
+               collector->tuples[collector->ntuples + i]->t_tid = *item;
+               tupsize = IndexTupleSize(collector->tuples[collector->ntuples + i]);
+
+               if ( tupsize > TOAST_INDEX_TARGET || tupsize >= GinMaxItemSize)
+                       elog(ERROR, "huge tuple");
+
+               collector->sumsize += tupsize;
+       }
+
+       collector->ntuples += nentries;
+
+       return nentries;
+}
+
+/*
+ * Deletes pending list pages up to (not including) newHead page.
+ * If newHead == InvalidBlockNumber then function drops the whole list.
+ *
+ * metapage is pinned and exclusive-locked throughout this function.
+ *
+ * Returns true if another cleanup process is running concurrently
+ * (if so, we can just abandon our own efforts)
+ */
+static bool
+shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
+                 IndexBulkDeleteResult *stats)
+{
+       Page                                    metapage;
+       GinMetaPageData            *metadata;
+       BlockNumber                             blknoToDelete;
+
+       metapage = BufferGetPage(metabuffer);
+       metadata = GinPageGetMeta(metapage);
+       blknoToDelete = metadata->head;
+
+       do
+       {
+               Page                                    page;
+               int                                             i;
+               int64                                   nDeletedHeapTuples = 0;
+               ginxlogDeleteListPages  data;
+               XLogRecData                             rdata[1];
+               Buffer                                  buffers[GIN_NDELETE_AT_ONCE];
+
+               data.node = index->rd_node;
+
+               rdata[0].buffer = InvalidBuffer;
+               rdata[0].data = (char *) &data;
+               rdata[0].len = sizeof(ginxlogDeleteListPages);
+               rdata[0].next = NULL;
+
+               data.ndeleted = 0;
+               while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
+               {
+                       data.toDelete[ data.ndeleted ] = blknoToDelete;
+                       buffers[ data.ndeleted ] = ReadBuffer(index, blknoToDelete);
+                       LockBuffer( buffers[ data.ndeleted ], GIN_EXCLUSIVE );
+                       page = BufferGetPage( buffers[ data.ndeleted ] );
+
+                       data.ndeleted++;
+
+                       if ( GinPageIsDeleted(page) )
+                       {
+                               /* concurrent cleanup process is detected */
+                               for(i=0;i<data.ndeleted;i++)
+                                       UnlockReleaseBuffer( buffers[i] );
+
+                               return true;
+                       }
+
+                       nDeletedHeapTuples += GinPageGetOpaque(page)->maxoff;
+                       blknoToDelete = GinPageGetOpaque( page )->rightlink;
+               }
+
+               if (stats)
+                       stats->pages_deleted += data.ndeleted;
+
+               START_CRIT_SECTION();
+
+               metadata->head = blknoToDelete;
+
+               Assert( metadata->nPendingPages >= data.ndeleted );
+               metadata->nPendingPages -= data.ndeleted;
+               Assert( metadata->nPendingHeapTuples >= nDeletedHeapTuples );
+               metadata->nPendingHeapTuples -= nDeletedHeapTuples;
+
+               if ( blknoToDelete == InvalidBlockNumber )
+               {
+                       metadata->tail = InvalidBlockNumber;
+                       metadata->tailFreeSize = 0;
+                       metadata->nPendingPages = 0;
+                       metadata->nPendingHeapTuples = 0;
+               }
+               memcpy( &data.metadata, metadata, sizeof(GinMetaPageData));
+
+               MarkBufferDirty( metabuffer );
+
+               for(i=0; i<data.ndeleted; i++)
+               {
+                       page = BufferGetPage( buffers[ i ] );
+                       GinPageGetOpaque( page )->flags = GIN_DELETED;
+                       MarkBufferDirty( buffers[ i ] );
+               }
+
+               if ( !index->rd_istemp )
+               {
+                       XLogRecPtr  recptr;
+
+                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata);
+                       PageSetLSN(metapage, recptr);
+                       PageSetTLI(metapage, ThisTimeLineID);
+
+                       for(i=0; i<data.ndeleted; i++)
+                       {
+                               page = BufferGetPage( buffers[ i ] );
+                               PageSetLSN(page, recptr);
+                               PageSetTLI(page, ThisTimeLineID);
+                       }
+               }
+
+               for(i=0; i<data.ndeleted; i++)
+                       UnlockReleaseBuffer( buffers[ i ] );
+
+               END_CRIT_SECTION();
+       } while( blknoToDelete != newHead );
+
+       return false;
+}
+
+/* Add datum to DatumArray, resizing if needed */
+static void
+addDatum(DatumArray *datums, Datum datum)
+{
+       if ( datums->nvalues >= datums->maxvalues)
+       {
+               datums->maxvalues *= 2;
+               datums->values = (Datum*)repalloc(datums->values,
+                                                                                 sizeof(Datum)*datums->maxvalues);
+       }
+
+       datums->values[ datums->nvalues++ ] = datum;
+}
+
+/*
+ * Go through all tuples >= startoff on page and collect values in memory
+ *
+ * Note that da is just workspace --- it does not carry any state across
+ * calls.
+ */
+static void
+processPendingPage(BuildAccumulator *accum, DatumArray *da,
+                                  Page page, OffsetNumber startoff)
+{
+       ItemPointerData heapptr;
+       OffsetNumber    i,maxoff;
+       OffsetNumber    attrnum, curattnum;
+
+       /* reset *da to empty */
+       da->nvalues = 0;
+
+       maxoff = PageGetMaxOffsetNumber(page);
+       Assert( maxoff >= FirstOffsetNumber );
+       ItemPointerSetInvalid(&heapptr);
+       attrnum = 0;
+
+       for (i = startoff; i <= maxoff; i = OffsetNumberNext(i))
+       {
+               IndexTuple  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+
+               curattnum = gintuple_get_attrnum(accum->ginstate, itup);
+
+               if ( !ItemPointerIsValid(&heapptr) )
+               {
+                       heapptr = itup->t_tid;
+                       attrnum = curattnum;
+               }
+               else if ( !(ItemPointerEquals(&heapptr, &itup->t_tid) &&
+                                       curattnum == attrnum) )
+               {
+                       /*
+                        * We can insert several datums per call, but only for one heap
+                        * tuple and one column.
+                        */
+                       ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+                       da->nvalues = 0;
+                       heapptr = itup->t_tid;
+                       attrnum = curattnum;
+               }
+               addDatum(da, gin_index_getattr(accum->ginstate, itup));
+       }
+
+       ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+}
+
+/*
+ * Move tuples from pending pages into regular GIN structure.
+ *
+ * This can be called concurrently by multiple backends, so it must cope.
+ * On first glance it looks completely not concurrent-safe and not crash-safe
+ * either.  The reason it's okay is that multiple insertion of the same entry
+ * is detected and treated as a no-op by gininsert.c.  If we crash after
+ * posting entries to the main index and before removing them from the
+ * pending list, it's okay because when we redo the posting later on, nothing
+ * bad will happen.  Likewise, if two backends simultaneously try to post
+ * a pending entry into the main index, one will succeed and one will do
+ * nothing.  We try to notice when someone else is a little bit ahead of
+ * us in the process, but that's just to avoid wasting cycles.  Only the
+ * action of removing a page from the pending list really needs exclusive
+ * lock.
+ *
+ * vac_delay indicates that ginInsertCleanup is called from vacuum process,
+ * so call vacuum_delay_point() periodically.
+ * If stats isn't null, we count deleted pending pages into the counts.
+ */
+void
+ginInsertCleanup(Relation index, GinState *ginstate,
+                                bool vac_delay, IndexBulkDeleteResult *stats)
+{
+       Buffer                          metabuffer, buffer;
+       Page                            metapage, page;
+       GinMetaPageData    *metadata;
+       MemoryContext           opCtx, oldCtx;
+       BuildAccumulator        accum;
+       DatumArray                      datums;
+       BlockNumber                     blkno;
+
+       metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+       LockBuffer(metabuffer, GIN_SHARE);
+       metapage = BufferGetPage(metabuffer);
+       metadata = GinPageGetMeta(metapage);
+
+       if ( metadata->head == InvalidBlockNumber )
+       {
+               /* Nothing to do */
+               UnlockReleaseBuffer(metabuffer);
+               return;
+       }
+
+       /*
+        * Read and lock head of pending list
+        */
+       blkno = metadata->head;
+       buffer = ReadBuffer(index, blkno);
+       LockBuffer(buffer, GIN_SHARE);
+       page = BufferGetPage(buffer);
+
+       LockBuffer(metabuffer, GIN_UNLOCK);
+
+       /*
+        * Initialize.  All temporary space will be in opCtx
+        */
+       opCtx = AllocSetContextCreate(CurrentMemoryContext,
+                                                                 "GIN insert cleanup temporary context",
+                                                                 ALLOCSET_DEFAULT_MINSIZE,
+                                                                 ALLOCSET_DEFAULT_INITSIZE,
+                                                                 ALLOCSET_DEFAULT_MAXSIZE);
+
+       oldCtx = MemoryContextSwitchTo(opCtx);
+
+       datums.maxvalues=128;
+       datums.nvalues = 0;
+       datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues);
+
+       ginInitBA(&accum);
+       accum.ginstate = ginstate;
+
+       /*
+        * At the top of this loop, we have pin and lock on the current page
+        * of the pending list.  However, we'll release that before exiting
+        * the loop.  Note we also have pin but not lock on the metapage.
+        */
+       for(;;)
+       {
+               if ( GinPageIsDeleted(page) )
+               {
+                       /* another cleanup process is running concurrently */
+                       UnlockReleaseBuffer( buffer );
+                       break;
+               }
+
+               /*
+                * read page's datums into memory
+                */
+               processPendingPage(&accum, &datums, page, FirstOffsetNumber);
+
+               if (vac_delay)
+                       vacuum_delay_point();
+
+               /*
+                * Is it time to flush memory to disk?  Flush if we are at the end
+                * of the pending list, or if we have a full row and memory is
+                * getting full.
+                *
+                * XXX using up maintenance_work_mem here is probably unreasonably
+                * much, since vacuum might already be using that much.
+                */
+               if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
+                        ( GinPageHasFullRow(page) &&
+                          accum.allocatedMemory > maintenance_work_mem * 1024L ) )
+               {
+                       ItemPointerData    *list;
+                       uint32                  nlist;
+                       Datum                   entry;
+                       OffsetNumber            maxoff, attnum;
+
+                       /*
+                        * Unlock current page to increase performance.
+                        * Changes of page will be checked later by comparing
+                        * maxoff after completion of memory flush.
+                        */
+                       maxoff = PageGetMaxOffsetNumber(page);
+                       LockBuffer(buffer, GIN_UNLOCK);
+
+                       /*
+                        * Moving collected data into regular structure can take
+                        * significant amount of time - so, run it without locking pending
+                        * list.
+                        */
+                       while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+                       {
+                               ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
+                               if (vac_delay)
+                                       vacuum_delay_point();
+                       }
+
+                       /*
+                        * Lock the whole list to remove pages
+                        */
+                       LockBuffer(metabuffer, GIN_EXCLUSIVE);
+                       LockBuffer(buffer, GIN_SHARE);
+
+                       if ( GinPageIsDeleted(page) )
+                       {
+                               /* another cleanup process is running concurrently */
+                               UnlockReleaseBuffer(buffer);
+                               LockBuffer(metabuffer, GIN_UNLOCK);
+                               break;
+                       }
+
+                       /*
+                        * While we left the page unlocked, more stuff might have gotten
+                        * added to it.  If so, process those entries immediately.  There
+                        * shouldn't be very many, so we don't worry about the fact that
+                        * we're doing this with exclusive lock. Insertion algorithm
+                        * gurantees that inserted row(s) will not continue on next page.
+                        * NOTE: intentionally no vacuum_delay_point in this loop.
+                        */
+                       if ( PageGetMaxOffsetNumber(page) != maxoff )
+                       {
+                               ginInitBA(&accum);
+                               processPendingPage(&accum, &datums, page, maxoff+1);
+
+                               while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+                                       ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
+                       }
+
+                       /*
+                        * Remember next page - it will become the new list head
+                        */
+                       blkno = GinPageGetOpaque(page)->rightlink;
+                       UnlockReleaseBuffer(buffer); /* shiftList will do exclusive locking */
+
+                       /*
+                        * remove readed pages from pending list, at this point all
+                        * content of readed pages is in regular structure
+                        */
+                       if ( shiftList(index, metabuffer, blkno, stats) )
+                       {
+                               /* another cleanup process is running concurrently */
+                               LockBuffer(metabuffer, GIN_UNLOCK);
+                               break;
+                       }
+
+                       Assert( blkno == metadata->head );
+                       LockBuffer(metabuffer, GIN_UNLOCK);
+
+                       /*
+                        * if we removed the whole pending list just exit
+                        */
+                       if ( blkno == InvalidBlockNumber )
+                               break;
+
+                       /*
+                        * release memory used so far and reinit state
+                        */
+                       MemoryContextReset(opCtx);
+                       ginInitBA(&accum);
+                       datums.nvalues = 0;
+                       datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues);
+               }
+               else
+               {
+                       blkno = GinPageGetOpaque(page)->rightlink;
+                       UnlockReleaseBuffer(buffer);
+               }
+
+               /*
+                * Read next page in pending list
+                */
+               CHECK_FOR_INTERRUPTS();
+               buffer = ReadBuffer(index, blkno);
+               LockBuffer(buffer, GIN_SHARE);
+               page = BufferGetPage(buffer);
+       }
+
+       ReleaseBuffer(metabuffer);
+
+       /* Clean up temporary space */
+       MemoryContextSwitchTo(oldCtx);
+       MemoryContextDelete(opCtx);
+}
index 182981498c10ddb1c437934d4da5a4b0d4f5849d..7f9f123660519827728b01ae67ef68a249092425 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.22 2009/01/10 21:08:36 tgl Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.23 2009/03/24 20:17:10 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
 #include "utils/memutils.h"
 
 
+typedef struct pendingPosition
+{
+       Buffer                  pendingBuffer;
+       OffsetNumber    firstOffset;
+       OffsetNumber    lastOffset;
+       ItemPointerData item;
+} pendingPosition;
+
+
 /*
  * Tries to refind previously taken ItemPointer on page.
  */
@@ -258,7 +267,7 @@ computePartialMatchList( GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry
 }
 
 /*
- * Start* functions setup begining state of searches: finds correct buffer and pins it.
+ * Start* functions setup beginning state of searches: finds correct buffer and pins it.
  */
 static void
 startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
@@ -268,6 +277,15 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
        Page                    page;
        bool                    needUnlock = TRUE;
 
+       entry->buffer = InvalidBuffer;
+       entry->offset = InvalidOffsetNumber;
+       entry->list = NULL;
+       entry->nlist = 0;
+       entry->partialMatch = NULL;
+       entry->partialMatchResult = NULL;
+       entry->reduceResult = FALSE;
+       entry->predictNumberResult = 0;
+
        if (entry->master != NULL)
        {
                entry->isFinished = entry->master->isFinished;
@@ -285,15 +303,6 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
        page = BufferGetPage(stackEntry->buffer);
 
        entry->isFinished = TRUE;
-       entry->buffer = InvalidBuffer;
-       entry->offset = InvalidOffsetNumber;
-       entry->list = NULL;
-       entry->nlist = 0;
-       entry->partialMatch = NULL;
-       entry->partialMatchIterator = NULL;
-       entry->partialMatchResult = NULL;
-       entry->reduceResult = FALSE;
-       entry->predictNumberResult = 0;
 
        if ( entry->isPartialMatch )
        {
@@ -354,9 +363,10 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
 
                        entry->buffer = scanBeginPostingTree(gdi);
                        /*
-                        * We keep buffer pinned because we need to prevent deletition
+                        * We keep buffer pinned because we need to prevent deletion of
                         * page during scan. See GIN's vacuum implementation. RefCount
-                        * is increased to keep buffer pinned after freeGinBtreeStack() call.
+                        * is increased to keep buffer pinned after freeGinBtreeStack()
+                        * call.
                         */
                        IncrBufferRefCount(entry->buffer);
 
@@ -536,9 +546,10 @@ entryGetItem(Relation index, GinScanEntry entry)
        {
                do
                {
-                       if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples )
+                       if (entry->partialMatchResult == NULL ||
+                               entry->offset >= entry->partialMatchResult->ntuples)
                        {
-                               entry->partialMatchResult = tbm_iterate( entry->partialMatchIterator );
+                               entry->partialMatchResult = tbm_iterate(entry->partialMatchIterator);
 
                                if ( entry->partialMatchResult == NULL )
                                {
@@ -548,23 +559,37 @@ entryGetItem(Relation index, GinScanEntry entry)
                                        entry->isFinished = TRUE;
                                        break;
                                }
-                               else if ( entry->partialMatchResult->ntuples < 0 )
-                               {
-                                       /* bitmap became lossy */
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_OUT_OF_MEMORY),
-                                                       errmsg("not enough memory to store result of partial match operator" ),
-                                                       errhint("Increase the \"work_mem\" parameter.")));
-                               }
+
+                               /*
+                                * reset counter to the beginning of entry->partialMatchResult.
+                                * Note: entry->offset is still greater than
+                                * partialMatchResult->ntuples if partialMatchResult is
+                                * lossy. So, on next call we will get next result from
+                                * TIDBitmap.
+                                */
                                entry->offset = 0;
                        }
 
-                       ItemPointerSet(&entry->curItem,
-                                                       entry->partialMatchResult->blockno,
-                                                       entry->partialMatchResult->offsets[ entry->offset ]);
-                       entry->offset ++;
+                       if ( entry->partialMatchResult->ntuples < 0 )
+                       {
+                               /*
+                                * lossy result, so we need to check the whole page
+                                */
+                               ItemPointerSetLossyPage(&entry->curItem,
+                                                                               entry->partialMatchResult->blockno);
+                               /*
+                                * We might as well fall out of the loop; we could not
+                                * estimate number of results on this page to support correct
+                                * reducing of result even if it's enabled
+                                */
+                               break;
+                       }
 
-               } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
+                       ItemPointerSet(&entry->curItem,
+                                                  entry->partialMatchResult->blockno,
+                                                  entry->partialMatchResult->offsets[entry->offset]);
+                       entry->offset++;
+               } while (entry->reduceResult == TRUE && dropItem(entry));
        }
        else if (!BufferIsValid(entry->buffer))
        {
@@ -618,6 +643,10 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
 
                        if (key->entryRes[i])
                        {
+                               /*
+                                * Move forward only entries which was the least
+                                * on previous call
+                                */
                                if (entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE)
                                {
                                        if (compareItemPointers(&entry->curItem, &key->curItem) < 0)
@@ -664,6 +693,13 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
                 */
                *keyrecheck = true;
 
+               /*
+                * If one of the entry's scans returns lossy result, return it without
+                * checking - we can't suggest anything helpful to consistentFn.
+                */
+               if (ItemPointerIsLossyPage(&key->curItem))
+                       return FALSE;
+
                oldCtx = MemoryContextSwitchTo(tempCtx);
                res = DatumGetBool(FunctionCall4(&ginstate->consistentFn[key->attnum-1],
                                                                                 PointerGetDatum(key->entryRes),
@@ -677,6 +713,337 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
        return FALSE;
 }
 
+
+/*
+ * Get ItemPointer of next heap row to be checked from pending list.
+ * Returns false if there are no more.
+ *
+ * The pendingBuffer is presumed pinned and share-locked on entry, and is
+ * pinned and share-locked on success exit.  On failure exit it's released.
+ */
+static bool
+scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
+{
+       OffsetNumber            maxoff;
+       Page                            page;
+       IndexTuple              itup;
+
+       ItemPointerSetInvalid( &pos->item );
+       for(;;)
+       {
+               page = BufferGetPage(pos->pendingBuffer);
+
+               maxoff = PageGetMaxOffsetNumber(page);
+               if ( pos->firstOffset > maxoff )
+               {
+                       BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+                       if ( blkno == InvalidBlockNumber )
+                       {
+                               UnlockReleaseBuffer(pos->pendingBuffer);
+                               pos->pendingBuffer=InvalidBuffer;
+
+                               return false;
+                       }
+                       else
+                       {
+                               /*
+                                * Here we must prevent deletion of next page by
+                                * insertcleanup process, which may be trying to obtain
+                                * exclusive lock on current page.  So, we lock next
+                                * page before releasing the current one
+                                */
+                               Buffer  tmpbuf = ReadBuffer(scan->indexRelation, blkno);
+
+                               LockBuffer(tmpbuf, GIN_SHARE);
+                               UnlockReleaseBuffer(pos->pendingBuffer);
+
+                               pos->pendingBuffer = tmpbuf;
+                               pos->firstOffset = FirstOffsetNumber;
+                       }
+               }
+               else
+               {
+                       itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
+                       pos->item = itup->t_tid;
+                       if ( GinPageHasFullRow(page) )
+                       {
+                               /*
+                                * find itempointer to the next row
+                                */
+                               for(pos->lastOffset = pos->firstOffset+1; pos->lastOffset<=maxoff; pos->lastOffset++)
+                               {
+                                       itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
+                                       if (!ItemPointerEquals(&pos->item, &itup->t_tid))
+                                               break;
+                               }
+                       }
+                       else
+                       {
+                               /*
+                                * All itempointers are the same on this page
+                                */
+                               pos->lastOffset = maxoff + 1;
+                       }
+                       break;
+               }
+       }
+
+       return true;
+}
+
+static bool
+matchPartialInPendingList(GinState *ginstate, Page page,
+                                                 OffsetNumber off, OffsetNumber maxoff,
+                                                 Datum value, OffsetNumber attrnum,
+                                                 Datum *datum, bool *datumExtracted,
+                                                 StrategyNumber strategy)
+{
+       IndexTuple              itup;
+       int                             res;
+
+       while ( off < maxoff )
+       {
+               itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
+               if ( attrnum != gintuple_get_attrnum(ginstate, itup) )
+                       return false;
+
+               if (datumExtracted[ off-1 ] == false)
+               {
+                       datum[ off-1 ] = gin_index_getattr(ginstate, itup);
+                       datumExtracted[  off-1 ] = true;
+               }
+
+               res = DatumGetInt32(FunctionCall3(&ginstate->comparePartialFn[attrnum],
+                                                 value,
+                                                 datum[ off-1 ],
+                                                 UInt16GetDatum(strategy)));
+               if ( res == 0 )
+                       return true;
+               else if (res>0)
+                       return false;
+       }
+
+       return false;
+}
+
+/*
+ * Sets entryRes array for each key by looking at
+ * every entry per indexed value (row) in pending list.
+ * returns true if at least one of datum was matched by key's entry
+ *
+ * The pendingBuffer is presumed pinned and share-locked on entry.
+ */
+static bool
+collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
+{
+       GinScanOpaque           so = (GinScanOpaque) scan->opaque;
+       OffsetNumber            attrnum;
+       Page                            page;
+       IndexTuple              itup;
+       int                                     i, j;
+       bool                            hasMatch = false;
+
+       /*
+        * Resets entryRes
+        */
+       for (i = 0; i < so->nkeys; i++)
+       {
+               GinScanKey      key = so->keys + i;
+               memset( key->entryRes, FALSE, key->nentries );
+       }
+
+       for(;;)
+       {
+               Datum                           datum[ BLCKSZ/sizeof(IndexTupleData) ];
+               bool                            datumExtracted[ BLCKSZ/sizeof(IndexTupleData) ];
+
+               Assert( pos->lastOffset > pos->firstOffset );
+               memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset ));
+
+               page = BufferGetPage(pos->pendingBuffer);
+
+               for(i = 0; i < so->nkeys; i++)
+               {
+                       GinScanKey  key = so->keys + i;
+
+                       for(j=0; j<key->nentries; j++)
+                       {
+                               OffsetNumber            StopLow = pos->firstOffset,
+                                                                       StopHigh = pos->lastOffset,
+                                                                       StopMiddle;
+                               GinScanEntry            entry = key->scanEntry + j;
+
+                               if ( key->entryRes[j] )
+                                       continue;
+
+                               while (StopLow < StopHigh)
+                               {
+                                       StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+                                       itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
+                                       attrnum = gintuple_get_attrnum(&so->ginstate, itup);
+
+                                       if (key->attnum < attrnum)
+                                               StopHigh = StopMiddle;
+                                       else if (key->attnum > attrnum)
+                                               StopLow = StopMiddle + 1;
+                                       else
+                                       {
+                                               int res;
+
+                                               if (datumExtracted[ StopMiddle-1 ] == false)
+                                               {
+                                                       datum[ StopMiddle-1 ] = gin_index_getattr(&so->ginstate, itup);
+                                                       datumExtracted[  StopMiddle-1 ] = true;
+                                               }
+                                               res =  compareEntries(&so->ginstate,
+                                                                       entry->attnum,
+                                                                       entry->entry,
+                                                                       datum[ StopMiddle-1 ]);
+
+                                               if ( res == 0 )
+                                               {
+                                                       if ( entry->isPartialMatch )
+                                                               key->entryRes[j] =
+                                                                       matchPartialInPendingList(&so->ginstate,
+                                                                                                                         page, StopMiddle,
+                                                                                                                         pos->lastOffset,
+                                                                                                                         entry->entry,
+                                                                                                                         entry->attnum,
+                                                                                                                         datum,
+                                                                                                                         datumExtracted,
+                                                                                                                         entry->strategy);
+                                                       else
+                                                               key->entryRes[j] = true;
+                                                       break;
+                                               }
+                                               else if ( res < 0  )
+                                                       StopHigh = StopMiddle;
+                                               else
+                                                       StopLow = StopMiddle + 1;
+                                       }
+                               }
+
+                               if ( StopLow>=StopHigh && entry->isPartialMatch )
+                                       key->entryRes[j] =
+                                               matchPartialInPendingList(&so->ginstate,
+                                                                                                 page, StopHigh,
+                                                                                                 pos->lastOffset,
+                                                                                                 entry->entry,
+                                                                                                 entry->attnum,
+                                                                                                 datum,
+                                                                                                 datumExtracted,
+                                                                                                 entry->strategy);
+
+                               hasMatch |= key->entryRes[j];
+                       }
+               }
+
+               pos->firstOffset = pos->lastOffset;
+
+               if ( GinPageHasFullRow(page) )
+               {
+                       /*
+                        * We scan all values from one tuple, go to next one
+                        */
+
+                       return hasMatch;
+               }
+               else
+               {
+                       ItemPointerData item = pos->item;
+
+                       if ( scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item) )
+                               elog(ERROR,"Could not process tuple");  /* XXX should not be here ! */
+               }
+       }
+
+       return hasMatch;
+}
+
+/*
+ * Collect all matched rows from pending list in bitmap
+ */
+static void
+scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
+{
+       GinScanOpaque so = (GinScanOpaque) scan->opaque;
+       MemoryContext   oldCtx;
+       bool                    recheck, keyrecheck, match;
+       int                             i;
+       pendingPosition pos;
+       Buffer          metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
+       BlockNumber     blkno;
+
+       *ntids = 0;
+
+       LockBuffer(metabuffer, GIN_SHARE);
+       blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
+
+       /*
+        * fetch head of list before unlocking metapage.
+        * head page must be pinned to prevent deletion by vacuum process
+        */
+       if ( blkno == InvalidBlockNumber )
+       {
+               /* No pending list, so proceed with normal scan */
+               UnlockReleaseBuffer( metabuffer );
+               return;
+       }
+
+       pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
+       LockBuffer(pos.pendingBuffer, GIN_SHARE);
+       pos.firstOffset = FirstOffsetNumber;
+       UnlockReleaseBuffer( metabuffer );
+
+       /*
+        * loop for each heap row
+        */
+       while( scanGetCandidate(scan, &pos) )
+       {
+
+               /*
+                * Check entries in rows and setup entryRes array
+                */
+               if (!collectDatumForItem(scan, &pos))
+                       continue;
+
+               /*
+                * check for consistent
+                */
+               oldCtx = MemoryContextSwitchTo(so->tempCtx);
+               recheck = false;
+               match = true;
+
+               for (i = 0; match && i < so->nkeys; i++)
+               {
+                       GinScanKey      key = so->keys + i;
+
+                       keyrecheck = true;
+
+                       if ( DatumGetBool(FunctionCall4(&so->ginstate.consistentFn[ key->attnum-1 ],
+                                                                        PointerGetDatum(key->entryRes),
+                                                                        UInt16GetDatum(key->strategy),
+                                                                        key->query,
+                                                                        PointerGetDatum(&keyrecheck))) == false )
+                       {
+                               match = false;
+                       }
+
+                       recheck |= keyrecheck;
+               }
+
+               MemoryContextSwitchTo(oldCtx);
+               MemoryContextReset(so->tempCtx);
+
+               if ( match )
+               {
+                       tbm_add_tuples(tbm, &pos.item, 1, recheck);
+                       (*ntids)++;
+               }
+       }
+}
+
 /*
  * Get heap item pointer from scan
  * returns true if found
@@ -720,6 +1087,18 @@ scanGetItem(IndexScanDesc scan, ItemPointerData *item, bool *recheck)
                {
                        int                     cmp = compareItemPointers(item, &key->curItem);
 
+                       if ( cmp != 0 && (ItemPointerIsLossyPage(item) || ItemPointerIsLossyPage(&key->curItem)) )
+                       {
+                               /*
+                                * if one of ItemPointers points to the whole page then
+                                * compare only page's number
+                                */
+                               if ( ItemPointerGetBlockNumber(item) == ItemPointerGetBlockNumber(&key->curItem) )
+                                       cmp = 0;
+                               else
+                                       cmp = (ItemPointerGetBlockNumber(item) > ItemPointerGetBlockNumber(&key->curItem)) ? 1 : -1;
+                       }
+
                        if (cmp == 0)
                                break;
                        else if (cmp > 0)
@@ -757,9 +1136,26 @@ gingetbitmap(PG_FUNCTION_ARGS)
        if (GinIsVoidRes(scan))
                PG_RETURN_INT64(0);
 
+       ntids = 0;
+
+       /*
+        * First, scan the pending list and collect any matching entries into
+        * the bitmap.  After we scan a pending item, some other backend could
+        * post it into the main index, and so we might visit it a second time
+        * during the main scan.  This is okay because we'll just re-set the
+        * same bit in the bitmap.  (The possibility of duplicate visits is a
+        * major reason why GIN can't support the amgettuple API, however.)
+        * Note that it would not do to scan the main index before the pending
+        * list, since concurrent cleanup could then make us miss entries
+        * entirely.
+        */
+       scanPendingInsert(scan, tbm, &ntids);
+
+       /*
+        * Now scan the main index.
+        */
        startScan(scan);
 
-       ntids = 0;
        for (;;)
        {
                ItemPointerData iptr;
@@ -770,31 +1166,12 @@ gingetbitmap(PG_FUNCTION_ARGS)
                if (!scanGetItem(scan, &iptr, &recheck))
                        break;
 
-               tbm_add_tuples(tbm, &iptr, 1, recheck);
+               if ( ItemPointerIsLossyPage(&iptr) )
+                       tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
+               else
+                       tbm_add_tuples(tbm, &iptr, 1, recheck);
                ntids++;
        }
 
        PG_RETURN_INT64(ntids);
 }
-
-Datum
-gingettuple(PG_FUNCTION_ARGS)
-{
-       IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
-       ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
-       bool            res;
-
-       if (dir != ForwardScanDirection)
-               elog(ERROR, "GIN doesn't support other scan directions than forward");
-
-       if (GinIsNewKey(scan))
-               newScanKey(scan);
-
-       if (GinIsVoidRes(scan))
-               PG_RETURN_BOOL(false);
-
-       startScan(scan);
-       res = scanGetItem(scan, &scan->xs_ctup.t_self, &scan->xs_recheck);
-
-       PG_RETURN_BOOL(res);
-}
index 2ab1105423c1591501b4253138532e6329cbf41a..d05882cdb944f53bde3c1d0c34bc52f442983267 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.18 2009/01/01 17:23:34 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.19 2009/03/24 20:17:11 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -138,9 +138,11 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
 /*
  * Inserts only one entry to the index, but it can add more than 1 ItemPointer.
  */
-static void
-ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, 
-                               ItemPointerData *items, uint32 nitem, bool isBuild)
+void
+ginEntryInsert(Relation index, GinState *ginstate,
+                          OffsetNumber attnum, Datum value,
+                          ItemPointerData *items, uint32 nitem,
+                          bool isBuild)
 {
        GinBtreeData btree;
        GinBtreeStack *stack;
@@ -273,7 +275,7 @@ ginbuild(PG_FUNCTION_ARGS)
        IndexBuildResult *result;
        double          reltuples;
        GinBuildState buildstate;
-       Buffer          buffer;
+       Buffer          RootBuffer, MetaBuffer;
        ItemPointerData *list;
        Datum           entry;
        uint32          nlist;
@@ -286,11 +288,17 @@ ginbuild(PG_FUNCTION_ARGS)
 
        initGinState(&buildstate.ginstate, index);
 
+       /* initialize the meta page */
+       MetaBuffer = GinNewBuffer(index);
+
        /* initialize the root page */
-       buffer = GinNewBuffer(index);
+       RootBuffer = GinNewBuffer(index);
+
        START_CRIT_SECTION();
-       GinInitBuffer(buffer, GIN_LEAF);
-       MarkBufferDirty(buffer);
+       GinInitMetabuffer(MetaBuffer);
+       MarkBufferDirty(MetaBuffer);
+       GinInitBuffer(RootBuffer, GIN_LEAF);
+       MarkBufferDirty(RootBuffer);
 
        if (!index->rd_istemp)
        {
@@ -303,16 +311,19 @@ ginbuild(PG_FUNCTION_ARGS)
                rdata.len = sizeof(RelFileNode);
                rdata.next = NULL;
 
-               page = BufferGetPage(buffer);
-
-
                recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
+               
+               page = BufferGetPage(RootBuffer);
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
 
+               page = BufferGetPage(MetaBuffer);
+               PageSetLSN(page, recptr);
+               PageSetTLI(page, ThisTimeLineID);
        }
 
-       UnlockReleaseBuffer(buffer);
+       UnlockReleaseBuffer(MetaBuffer);
+       UnlockReleaseBuffer(RootBuffer);
        END_CRIT_SECTION();
 
        /* build the index */
@@ -417,9 +428,26 @@ gininsert(PG_FUNCTION_ARGS)
 
        initGinState(&ginstate, index);
 
-       for(i=0; i<ginstate.origTupdesc->natts;i++)
-               if ( !isnull[i] )
-                       res += ginHeapTupleInsert(index, &ginstate, (OffsetNumber)(i+1), values[i], ht_ctid);
+       if ( GinGetUseFastUpdate(index) )
+       {
+               GinTupleCollector       collector;
+
+               memset(&collector, 0, sizeof(GinTupleCollector));
+               for(i=0; i<ginstate.origTupdesc->natts;i++)
+                       if ( !isnull[i] )
+                               res += ginHeapTupleFastCollect(index, &ginstate, &collector,
+                                                                                               (OffsetNumber)(i+1), values[i], ht_ctid);
+
+               ginHeapTupleFastInsert(index, &ginstate, &collector);
+       }
+       else
+       {
+               for(i=0; i<ginstate.origTupdesc->natts;i++)
+                       if ( !isnull[i] ) 
+                               res += ginHeapTupleInsert(index, &ginstate, 
+                                                                                               (OffsetNumber)(i+1), values[i], ht_ctid);
+
+       }
 
        MemoryContextSwitchTo(oldCtx);
        MemoryContextDelete(insertCtx);
index 222ea677883f7f730365ff37511eb4d4f47a6619..e0951a6a4f8bb66ddbca8f59a74b90a5d5b98bc2 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.20 2009/01/05 17:14:28 alvherre Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.21 2009/03/24 20:17:11 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -57,7 +57,7 @@ initGinState(GinState *state, Relation index)
                                                CurrentMemoryContext);
 
                /*
-                * Check opclass capability to do partial match. 
+                * Check opclass capability to do partial match.
                 */
                if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
                {
@@ -88,7 +88,7 @@ gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
                bool    isnull;
 
                /*
-                * First attribute is always int16, so we can safely use any 
+                * First attribute is always int16, so we can safely use any
                 * tuple descriptor to obtain first attribute of tuple
                 */
                res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
@@ -213,6 +213,22 @@ GinInitBuffer(Buffer b, uint32 f)
        GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
 }
 
+void
+GinInitMetabuffer(Buffer b)
+{
+       GinMetaPageData *metadata;
+       Page                     page = BufferGetPage(b);
+
+       GinInitPage(page, GIN_META, BufferGetPageSize(b));
+
+       metadata = GinPageGetMeta(page);
+
+       metadata->head = metadata->tail = InvalidBlockNumber;
+       metadata->tailFreeSize = 0;
+       metadata->nPendingPages = 0;
+       metadata->nPendingHeapTuples = 0;
+}
+
 int
 compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b)
 {
@@ -315,10 +331,26 @@ ginoptions(PG_FUNCTION_ARGS)
 {
        Datum           reloptions = PG_GETARG_DATUM(0);
        bool            validate = PG_GETARG_BOOL(1);
-       bytea      *result;
+       relopt_value *options;
+       GinOptions *rdopts;
+       int                     numoptions;
+       static const relopt_parse_elt tab[] = {
+               {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)}
+       };
+
+       options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN,
+                                                         &numoptions);
+
+       /* if none set, we're done */
+       if (numoptions == 0)
+               PG_RETURN_NULL();
+
+       rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions);
+
+       fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions,
+                                       validate, tab, lengthof(tab));
+
+       pfree(options);
 
-       result = default_reloptions(reloptions, validate, RELOPT_KIND_GIN);
-       if (result)
-               PG_RETURN_BYTEA_P(result);
-       PG_RETURN_NULL();
+       PG_RETURN_BYTEA_P(rdopts);
 }
index be614a3c9c80ece7ce382f1cae7c382ae3e42e16..dd98b9fd2842ad7960d16ef9e975378c1f997f84 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.27 2009/01/01 17:23:34 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.28 2009/03/24 20:17:11 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -19,8 +19,8 @@
 #include "catalog/storage.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
+#include "postmaster/autovacuum.h"
 #include "storage/bufmgr.h"
-#include "storage/freespace.h"
 #include "storage/indexfsm.h"
 #include "storage/lmgr.h"
 
@@ -593,18 +593,24 @@ ginbulkdelete(PG_FUNCTION_ARGS)
        BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
        uint32          nRoot;
 
+       gvs.index = index;
+       gvs.callback = callback;
+       gvs.callback_state = callback_state;
+       gvs.strategy = info->strategy;
+       initGinState(&gvs.ginstate, index);
+
        /* first time through? */
        if (stats == NULL)
+       {
+               /* Yes, so initialize stats to zeroes */
                stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+               /* and cleanup any pending inserts */
+               ginInsertCleanup(index, &gvs.ginstate, true, stats);
+       }
+
        /* we'll re-count the tuples each time */
        stats->num_index_tuples = 0;
-
-       gvs.index = index;
        gvs.result = stats;
-       gvs.callback = callback;
-       gvs.callback_state = callback_state;
-       gvs.strategy = info->strategy;
-       initGinState(&gvs.ginstate, index);
 
        buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
                                                                RBM_NORMAL, info->strategy);
@@ -702,10 +708,32 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
        BlockNumber totFreePages;
        BlockNumber lastBlock = GIN_ROOT_BLKNO,
                                lastFilledBlock = GIN_ROOT_BLKNO;
+       GinState        ginstate;
 
-       /* Set up all-zero stats if ginbulkdelete wasn't called */
+       /*
+        * In an autovacuum analyze, we want to clean up pending insertions.
+        * Otherwise, an ANALYZE-only call is a no-op.
+        */
+       if (info->analyze_only)
+       {
+               if (IsAutoVacuumWorkerProcess())
+               {
+                       initGinState(&ginstate, index);
+                       ginInsertCleanup(index, &ginstate, true, stats);
+               }
+               PG_RETURN_POINTER(stats);
+       }
+
+       /* 
+        * Set up all-zero stats and cleanup pending inserts
+        * if ginbulkdelete wasn't called
+        */
        if (stats == NULL)
+       {
                stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+               initGinState(&ginstate, index);
+               ginInsertCleanup(index, &ginstate, true, stats);
+       }
 
        /*
         * XXX we always report the heap tuple count as the number of index
index 362709de330bbb36d1dc30dd76f7fc0e4e454e15..03cdc1129cfae1b0898ee423e574c7a930984728 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                      $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.17 2009/01/20 18:59:36 heikki Exp $
+ *                      $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.18 2009/03/24 20:17:11 tgl Exp $
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
@@ -71,20 +71,30 @@ static void
 ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
 {
        RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
-       Buffer          buffer;
+       Buffer          RootBuffer, MetaBuffer;
        Page            page;
 
-       buffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
-       Assert(BufferIsValid(buffer));
-       page = (Page) BufferGetPage(buffer);
+       MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
+       Assert(BufferIsValid(MetaBuffer));
+       GinInitMetabuffer(MetaBuffer);
+
+       page = (Page) BufferGetPage(MetaBuffer);
+       PageSetLSN(page, lsn);
+       PageSetTLI(page, ThisTimeLineID);
 
-       GinInitBuffer(buffer, GIN_LEAF);
+       RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
+       Assert(BufferIsValid(RootBuffer));
+       page = (Page) BufferGetPage(RootBuffer);
+
+       GinInitBuffer(RootBuffer, GIN_LEAF);
 
        PageSetLSN(page, lsn);
        PageSetTLI(page, ThisTimeLineID);
 
-       MarkBufferDirty(buffer);
-       UnlockReleaseBuffer(buffer);
+       MarkBufferDirty(MetaBuffer);
+       UnlockReleaseBuffer(MetaBuffer);
+       MarkBufferDirty(RootBuffer);
+       UnlockReleaseBuffer(RootBuffer);
 }
 
 static void
@@ -433,6 +443,174 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
        }
 }
 
+static void
+ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
+{
+       ginxlogUpdateMeta *data = (ginxlogUpdateMeta*) XLogRecGetData(record);
+       Buffer          metabuffer;
+       Page            metapage;
+
+       metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+       metapage = BufferGetPage(metabuffer);
+
+       if (!XLByteLE(lsn, PageGetLSN(metapage)))
+       {
+               memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+               PageSetLSN(metapage, lsn);
+               PageSetTLI(metapage, ThisTimeLineID);
+               MarkBufferDirty(metabuffer);
+       }
+
+       if ( data->ntuples > 0 )
+       {
+               /*
+                * insert into tail page
+                */
+               if (!(record->xl_info & XLR_BKP_BLOCK_1))
+               {
+                       Buffer  buffer = XLogReadBuffer(data->node, data->metadata.tail, false);
+                       Page    page = BufferGetPage(buffer);
+
+                       if ( !XLByteLE(lsn, PageGetLSN(page)))
+                       {
+                               OffsetNumber l, off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+                                               OffsetNumberNext(PageGetMaxOffsetNumber(page));
+                               int                             i, tupsize;
+                               IndexTuple              tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
+
+                               for(i=0; i<data->ntuples; i++)
+                               {
+                                       tupsize = IndexTupleSize(tuples);
+
+                                       l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+                                       if (l == InvalidOffsetNumber)
+                                               elog(ERROR, "failed to add item to index page");
+
+                                       tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+                               }
+
+                               /*
+                                * Increase counter of heap tuples
+                                */
+                               GinPageGetOpaque(page)->maxoff++;
+
+                               PageSetLSN(page, lsn);
+                               PageSetTLI(page, ThisTimeLineID);
+                               MarkBufferDirty(buffer);
+                       }
+                       UnlockReleaseBuffer(buffer);
+               }
+       }
+       else if ( data->prevTail != InvalidBlockNumber )
+       {
+               /*
+                * New tail
+                */
+
+               Buffer  buffer = XLogReadBuffer(data->node, data->prevTail, false);
+               Page    page = BufferGetPage(buffer);
+
+               if ( !XLByteLE(lsn, PageGetLSN(page)))
+               {
+                       GinPageGetOpaque(page)->rightlink = data->newRightlink;
+
+                       PageSetLSN(page, lsn);
+                       PageSetTLI(page, ThisTimeLineID);
+                       MarkBufferDirty(buffer);
+               }
+               UnlockReleaseBuffer(buffer);
+       }
+
+       UnlockReleaseBuffer(metabuffer);
+}
+
+static void
+ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
+{
+       ginxlogInsertListPage *data = (ginxlogInsertListPage*) XLogRecGetData(record);
+       Buffer                  buffer;
+       Page                    page;
+       OffsetNumber    l, off = FirstOffsetNumber;
+       int                             i, tupsize;
+       IndexTuple      tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
+
+       if (record->xl_info & XLR_BKP_BLOCK_1)
+               return;
+
+       buffer = XLogReadBuffer(data->node, data->blkno, true);
+       page = BufferGetPage(buffer);
+
+       GinInitBuffer(buffer, GIN_LIST);
+       GinPageGetOpaque(page)->rightlink = data->rightlink;
+       if ( data->rightlink == InvalidBlockNumber )
+       {
+               /* tail of sublist */
+               GinPageSetFullRow(page);
+               GinPageGetOpaque(page)->maxoff = 1;
+       }
+       else
+       {
+               GinPageGetOpaque(page)->maxoff = 0;
+       }
+
+       for(i=0; i<data->ntuples; i++)
+       {
+               tupsize = IndexTupleSize(tuples);
+
+               l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+               if (l == InvalidOffsetNumber)
+                       elog(ERROR, "failed to add item to index page");
+
+               tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+       }
+
+       PageSetLSN(page, lsn);
+       PageSetTLI(page, ThisTimeLineID);
+       MarkBufferDirty(buffer);
+
+       UnlockReleaseBuffer(buffer);
+}
+
+static void
+ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
+{
+       ginxlogDeleteListPages *data = (ginxlogDeleteListPages*) XLogRecGetData(record);
+       Buffer          metabuffer;
+       Page            metapage;
+       int             i;
+
+       metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+       metapage = BufferGetPage(metabuffer);
+
+       if (!XLByteLE(lsn, PageGetLSN(metapage)))
+       {
+               memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+               PageSetLSN(metapage, lsn);
+               PageSetTLI(metapage, ThisTimeLineID);
+               MarkBufferDirty(metabuffer);
+       }
+
+       for(i=0; i<data->ndeleted; i++)
+       {
+               Buffer  buffer = XLogReadBuffer(data->node,data->toDelete[i],false);
+               Page    page = BufferGetPage(buffer);
+
+               if ( !XLByteLE(lsn, PageGetLSN(page)))
+               {
+                       GinPageGetOpaque(page)->flags = GIN_DELETED;
+
+                       PageSetLSN(page, lsn);
+                       PageSetTLI(page, ThisTimeLineID);
+                       MarkBufferDirty(buffer);
+               }
+
+               UnlockReleaseBuffer(buffer);
+       }
+       UnlockReleaseBuffer(metabuffer);
+}
+
 void
 gin_redo(XLogRecPtr lsn, XLogRecord *record)
 {
@@ -461,6 +639,15 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record)
                case XLOG_GIN_DELETE_PAGE:
                        ginRedoDeletePage(lsn, record);
                        break;
+               case XLOG_GIN_UPDATE_META_PAGE:
+                       ginRedoUpdateMetapage(lsn, record);
+                       break;
+               case XLOG_GIN_INSERT_LISTPAGE:
+                       ginRedoInsertListPage(lsn, record);
+                       break;
+               case XLOG_GIN_DELETE_LISTPAGE: 
+                       ginRedoDeleteListPages(lsn, record);
+                       break;
                default:
                        elog(PANIC, "gin_redo: unknown op code %u", info);
        }
@@ -516,6 +703,18 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
                        appendStringInfo(buf, "Delete page, ");
                        desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
                        break;
+               case XLOG_GIN_UPDATE_META_PAGE:
+                       appendStringInfo(buf, "Update metapage, ");
+                       desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, ((ginxlogUpdateMeta *) rec)->metadata.tail); 
+                       break;
+               case XLOG_GIN_INSERT_LISTPAGE:
+                       appendStringInfo(buf, "Insert new list page, ");
+                       desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno); 
+                       break;
+               case XLOG_GIN_DELETE_LISTPAGE:
+                       appendStringInfo(buf, "Delete list pages (%d), ", ((ginxlogDeleteListPages *) rec)->ndeleted);
+                       desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, ((ginxlogDeleteListPages *) rec)->metadata.head); 
+                       break;
                default:
                        elog(PANIC, "gin_desc: unknown op code %u", info);
        }
index fcf471cf2e9f2f88244b8d6f6931c0748ddb9ee9..01b8512d070b80f892e21da976021d19138fa87f 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.42 2009/01/01 17:23:35 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.43 2009/03/24 20:17:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -515,6 +515,10 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
                                lastFilledBlock = GIST_ROOT_BLKNO;
        bool            needLock;
 
+       /* No-op in ANALYZE ONLY mode */
+       if (info->analyze_only)
+               PG_RETURN_POINTER(stats);
+
        /* Set up all-zero stats if gistbulkdelete wasn't called */
        if (stats == NULL)
        {
index ab2f67c6385dc10084db29f9d291b8c6ababa727..42fe9554f0f5fc075164f177ae98a8c1ae72a9e1 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.108 2009/01/01 17:23:35 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.109 2009/03/24 20:17:11 tgl Exp $
  *
  * NOTES
  *       This file contains only the public interface routines.
@@ -647,6 +647,7 @@ hashvacuumcleanup(PG_FUNCTION_ARGS)
        BlockNumber num_pages;
 
        /* If hashbulkdelete wasn't called, return NULL signifying no change */
+       /* Note: this covers the analyze_only case too */
        if (stats == NULL)
                PG_RETURN_POINTER(NULL);
 
index a03b4595ba1e9fd13e92920961877f10a2036890..197fa3b041d3f66b658419ae83db6c2635edfc0b 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.112 2009/01/01 17:23:35 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.113 2009/03/24 20:17:12 tgl Exp $
  *
  * INTERFACE ROUTINES
  *             index_open              - open an index relation by relation OID
@@ -647,7 +647,8 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
  * item slot could have been replaced by a newer tuple by the time we get
  * to it.
  *
- * Returns the number of matching tuples found.
+ * Returns the number of matching tuples found.  (Note: this might be only
+ * approximate, so it should only be used for statistical purposes.)
  * ----------------
  */
 int64
index 84f409e1aca1c332c4d508ca342b81c94590b9f9..b8bb1ad4906d5fb1dfe504f9e345fa53fe9b5061 100644 (file)
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.167 2009/01/01 17:23:35 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.168 2009/03/24 20:17:12 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -557,6 +557,10 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
        IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
        IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
 
+       /* No-op in ANALYZE ONLY mode */
+       if (info->analyze_only)
+               PG_RETURN_POINTER(stats);
+
        /*
         * If btbulkdelete was called, we need not do anything, just return the
         * stats from the latest btbulkdelete call.  If it wasn't called, we must
index e53f4f52dcf7e4f7b4d4d5e473331af68ce749f7..d1889e16c2c593184ad19bd06540eae2e039619a 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.312 2009/01/22 20:16:01 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.313 2009/03/24 20:17:12 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1938,6 +1938,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
         */
        ivinfo.index = indexRelation;
        ivinfo.vacuum_full = false;
+       ivinfo.analyze_only = false;
        ivinfo.message_level = DEBUG2;
        ivinfo.num_heap_tuples = -1;
        ivinfo.strategy = NULL;
index 33447b671f114c75e68396f368edc5779a284949..176ebde0efdb8a1e60e91d1db9b777ad657f34f4 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.133 2009/01/22 20:16:01 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.134 2009/03/24 20:17:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -496,6 +496,28 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
        /* We skip to here if there were no analyzable columns */
 cleanup:
 
+       /* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */
+       if (!vacstmt->vacuum)
+       {
+               for (ind = 0; ind < nindexes; ind++)
+               {
+                       IndexBulkDeleteResult *stats;
+                       IndexVacuumInfo ivinfo;
+
+                       ivinfo.index = Irel[ind];
+                       ivinfo.vacuum_full = false;
+                       ivinfo.analyze_only = true;
+                       ivinfo.message_level = elevel;
+                       ivinfo.num_heap_tuples = -1; /* not known for sure */
+                       ivinfo.strategy = vac_strategy;
+
+                       stats = index_vacuum_cleanup(&ivinfo, NULL);
+
+                       if (stats)
+                               pfree(stats);
+               }
+       }
+
        /* Done with indexes */
        vac_close_indexes(nindexes, Irel, NoLock);
 
index 4020bf1b29448cc58290fc844de7352f8daa5aef..78b179827ea7395a11e38b1ab38f74977a36021c 100644 (file)
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.385 2009/01/16 13:27:23 heikki Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.386 2009/03/24 20:17:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3388,6 +3388,7 @@ scan_index(Relation indrel, double num_tuples)
 
        ivinfo.index = indrel;
        ivinfo.vacuum_full = true;
+       ivinfo.analyze_only = false;
        ivinfo.message_level = elevel;
        ivinfo.num_heap_tuples = num_tuples;
        ivinfo.strategy = vac_strategy;
@@ -3454,6 +3455,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
 
        ivinfo.index = indrel;
        ivinfo.vacuum_full = true;
+       ivinfo.analyze_only = false;
        ivinfo.message_level = elevel;
        ivinfo.num_heap_tuples = num_tuples + keep_tuples;
        ivinfo.strategy = vac_strategy;
index 4e4624cb132fa1f92b5080012cc6060800d2013a..cb73cfa87a741def95ad43e3f325b8e09f47c9a3 100644 (file)
@@ -29,7 +29,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.118 2009/01/22 19:25:00 heikki Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.119 2009/03/24 20:17:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -875,6 +875,7 @@ lazy_vacuum_index(Relation indrel,
 
        ivinfo.index = indrel;
        ivinfo.vacuum_full = false;
+       ivinfo.analyze_only = false;
        ivinfo.message_level = elevel;
        /* We don't yet know rel_tuples, so pass -1 */
        ivinfo.num_heap_tuples = -1;
@@ -906,6 +907,7 @@ lazy_cleanup_index(Relation indrel,
 
        ivinfo.index = indrel;
        ivinfo.vacuum_full = false;
+       ivinfo.analyze_only = false;
        ivinfo.message_level = elevel;
        ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
        ivinfo.strategy = vac_strategy;
index e214bbb763403b7d65ab5286871c99686b08191c..e56b4696b4f9d5c481a9dcf894317174470a622f 100644 (file)
@@ -32,7 +32,7 @@
  * Copyright (c) 2003-2009, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.17 2009/01/10 21:08:36 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.18 2009/03/24 20:17:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -309,6 +309,22 @@ tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids,
        }
 }
 
+/*
+ * tbm_add_page - add a whole page to a TIDBitmap
+ *
+ * This causes the whole page to be reported (with the recheck flag)
+ * when the TIDBitmap is scanned.
+ */
+void
+tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
+{
+       /* Enter the page in the bitmap, or mark it lossy if already present */
+       tbm_mark_page_lossy(tbm, pageno);
+       /* If we went over the memory limit, lossify some more pages */
+       if (tbm->nentries > tbm->maxentries)
+               tbm_lossify(tbm);
+}
+
 /*
  * tbm_union - set union
  *
@@ -496,7 +512,7 @@ tbm_intersect_page(TIDBitmap *a, PagetableEntry *apage, const TIDBitmap *b)
        {
                /*
                 * Some of the tuples in 'a' might not satisfy the quals for 'b',
-                * but because the page 'b' is lossy, we don't know which ones. 
+                * but because the page 'b' is lossy, we don't know which ones.
                 * Therefore we mark 'a' as requiring rechecks, to indicate that
                 * at most those tuples set in 'a' are matches.
                 */
index b1048504c2ca6c5e073fe6bec293a91fb7619359..65fd7f73310c4e88b6fe4aa310eef916b9180005 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.75 2009/01/01 17:23:55 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.76 2009/03/24 20:17:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -41,6 +41,7 @@ typedef struct IndexVacuumInfo
 {
        Relation        index;                  /* the index being vacuumed */
        bool            vacuum_full;    /* VACUUM FULL (we have exclusive lock) */
+       bool            analyze_only;   /* ANALYZE (without any actual vacuum) */
        int                     message_level;  /* ereport level for progress messages */
        double          num_heap_tuples;        /* tuples remaining in heap */
        BufferAccessStrategy strategy;          /* access strategy for reads */
index 1425333221d487ab4a0b58f60c173dcc4dde53ee..f0f45bc5e8ad118d9811e7359d5d8abb1b98384d 100644 (file)
@@ -4,11 +4,9 @@
  *
  *     Copyright (c) 2006-2009, PostgreSQL Global Development Group
  *
- *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.28 2009/01/10 21:08:36 tgl Exp $
+ *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.29 2009/03/24 20:17:14 tgl Exp $
  *--------------------------------------------------------------------------
  */
-
-
 #ifndef GIN_H
 #define GIN_H
 
 #include "access/itup.h"
 #include "access/xlog.h"
 #include "fmgr.h"
-#include "nodes/tidbitmap.h"
-#include "storage/block.h"
-#include "storage/buf.h"
-#include "storage/off.h"
-#include "storage/relfilenode.h"
 
 
 /*
 typedef struct GinPageOpaqueData
 {
        BlockNumber rightlink;          /* next page if any */
-       OffsetNumber maxoff;            /* number entries on GIN_DATA page: number of
+       OffsetNumber maxoff;            /* number entries on GIN_DATA page; number of
                                                                 * heap ItemPointer on GIN_DATA|GIN_LEAF page
                                                                 * and number of records on GIN_DATA &
-                                                                * ~GIN_LEAF page */
+                                                                * ~GIN_LEAF page. On GIN_LIST page, number of
+                                                                * heap tuples. */
        uint16          flags;                  /* see bit definitions below */
 } GinPageOpaqueData;
 
 typedef GinPageOpaqueData *GinPageOpaque;
 
-#define GIN_ROOT_BLKNO (0)
-
 #define GIN_DATA                 (1 << 0)
 #define GIN_LEAF                 (1 << 1)
 #define GIN_DELETED              (1 << 2)
+#define GIN_META                 (1 << 3)
+#define GIN_LIST                 (1 << 4)
+#define GIN_LIST_FULLROW  (1 << 5)   /* makes sense only on GIN_LIST page */
+
+/* Page numbers of fixed-location pages */
+#define GIN_METAPAGE_BLKNO     (0)
+#define GIN_ROOT_BLKNO         (1)
+
+typedef struct GinMetaPageData
+{
+       /*
+        * Pointers to head and tail of pending list, which consists of GIN_LIST
+        * pages.  These store fast-inserted entries that haven't yet been moved
+        * into the regular GIN structure.
+        */
+       BlockNumber                     head;
+       BlockNumber         tail;
+
+       /*
+        * Free space in bytes in the pending list's tail page.
+        */
+       uint32                          tailFreeSize;
+
+       /*
+        * We store both number of pages and number of heap tuples
+        * that are in the pending list.
+        */
+       BlockNumber                     nPendingPages;
+       int64                           nPendingHeapTuples;
+} GinMetaPageData;
+
+#define GinPageGetMeta(p) \
+       ((GinMetaPageData *) PageGetContents(p))
 
 /*
  * Works on page
@@ -68,6 +93,8 @@ typedef GinPageOpaqueData *GinPageOpaque;
 #define GinPageSetNonLeaf(page)    ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
 #define GinPageIsData(page)    ( GinPageGetOpaque(page)->flags & GIN_DATA )
 #define GinPageSetData(page)   ( GinPageGetOpaque(page)->flags |= GIN_DATA )
+#define GinPageHasFullRow(page)    ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
+#define GinPageSetFullRow(page)   ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
 
 #define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
 #define GinPageSetDeleted(page)    ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
@@ -76,8 +103,8 @@ typedef GinPageOpaqueData *GinPageOpaque;
 #define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
 
 /*
- * Define our ItemPointerGet(BlockNumber|GetOffsetNumber)
- * to prevent asserts
+ * We use our own ItemPointerGet(BlockNumber|GetOffsetNumber)
+ * to avoid Asserts, since sometimes the ip_posid isn't "valid"
  */
 
 #define GinItemPointerGetBlockNumber(pointer) \
@@ -86,6 +113,22 @@ typedef GinPageOpaqueData *GinPageOpaque;
 #define GinItemPointerGetOffsetNumber(pointer) \
        ((pointer)->ip_posid)
 
+#define ItemPointerSetMin(p)  \
+       ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
+#define ItemPointerIsMin(p)  \
+       (ItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \
+        ItemPointerGetBlockNumber(p) == (BlockNumber)0)
+#define ItemPointerSetMax(p)  \
+       ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff)
+#define ItemPointerIsMax(p)  \
+       (ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
+        ItemPointerGetBlockNumber(p) == InvalidBlockNumber)
+#define ItemPointerSetLossyPage(p, b)  \
+       ItemPointerSet((p), (b), (OffsetNumber)0xffff)
+#define ItemPointerIsLossyPage(p)  \
+       (ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
+        ItemPointerGetBlockNumber(p) != InvalidBlockNumber)
+
 typedef struct
 {
        BlockIdData child_blkno;        /* use it instead of BlockNumber to save space
@@ -135,6 +178,26 @@ typedef struct
         - GinPageGetOpaque(page)->maxoff * GinSizeOfItem(page) \
         - MAXALIGN(sizeof(GinPageOpaqueData)))
 
+/*
+ * List pages
+ */
+#define GinListPageSize  \
+    ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
+
+/*
+ * Storage type for GIN's reloptions
+ */
+typedef struct GinOptions
+{
+       int32       vl_len_;        /* varlena header (do not touch directly!) */
+       bool            useFastUpdate;  /* use fast updates? */
+} GinOptions;
+
+#define GIN_DEFAULT_USE_FASTUPDATE  true
+#define GinGetUseFastUpdate(relation) \
+       ((relation)->rd_options ? \
+        ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE)
+
 
 #define GIN_UNLOCK     BUFFER_LOCK_UNLOCK
 #define GIN_SHARE      BUFFER_LOCK_SHARE
@@ -234,14 +297,52 @@ typedef struct ginxlogDeletePage
        BlockNumber rightLink;
 } ginxlogDeletePage;
 
+#define XLOG_GIN_UPDATE_META_PAGE 0x60
+
+typedef struct ginxlogUpdateMeta
+{
+       RelFileNode     node;
+       GinMetaPageData metadata;
+       BlockNumber     prevTail;
+       BlockNumber     newRightlink;
+       int32           ntuples; /* if ntuples > 0 then metadata.tail was updated
+                                                         * with that many tuples; else new sub list was
+                                                         * inserted */
+       /* array of inserted tuples follows */
+} ginxlogUpdateMeta;
+
+#define XLOG_GIN_INSERT_LISTPAGE  0x70
+
+typedef struct ginxlogInsertListPage
+{
+       RelFileNode     node;
+       BlockNumber     blkno;
+       BlockNumber     rightlink;
+       int32           ntuples;
+       /* array of inserted tuples follows */
+} ginxlogInsertListPage;
+
+#define XLOG_GIN_DELETE_LISTPAGE  0x80
+
+#define GIN_NDELETE_AT_ONCE 16
+typedef struct ginxlogDeleteListPages
+{
+       RelFileNode     node;
+       GinMetaPageData metadata;
+       int32           ndeleted;
+       BlockNumber     toDelete[GIN_NDELETE_AT_ONCE];
+} ginxlogDeleteListPages;
+
+
 /* ginutil.c */
 extern Datum ginoptions(PG_FUNCTION_ARGS);
 extern void initGinState(GinState *state, Relation index);
 extern Buffer GinNewBuffer(Relation index);
 extern void GinInitBuffer(Buffer b, uint32 f);
 extern void GinInitPage(Page page, uint32 f, Size pageSize);
+extern void GinInitMetabuffer(Buffer b);
 extern int     compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b);
-extern int     compareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a, 
+extern int     compareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a,
                                                                                                  OffsetNumber attnum_b, Datum b);
 extern Datum *extractEntriesS(GinState *ginstate, OffsetNumber attnum, Datum value,
                                int32 *nentries, bool *needUnique);
@@ -249,9 +350,14 @@ extern Datum *extractEntriesSU(GinState *ginstate, OffsetNumber attnum, Datum va
 
 extern Datum gin_index_getattr(GinState *ginstate, IndexTuple tuple);
 extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
+
 /* gininsert.c */
 extern Datum ginbuild(PG_FUNCTION_ARGS);
 extern Datum gininsert(PG_FUNCTION_ARGS);
+extern void ginEntryInsert(Relation index, GinState *ginstate,
+                                                  OffsetNumber attnum, Datum value,
+                                                  ItemPointerData *items, uint32 nitem,
+                                                  bool isBuild);
 
 /* ginxlog.c */
 extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
@@ -319,7 +425,7 @@ extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack);
 extern void findParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno);
 
 /* ginentrypage.c */
-extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, 
+extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key,
                                                                                ItemPointerData *ipd, uint32 nipd);
 extern void prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum,
                                                                Datum value, GinState *ginstate);
@@ -440,13 +546,7 @@ extern void newScanKey(IndexScanDesc scan);
 /* ginget.c */
 extern PGDLLIMPORT int GinFuzzySearchLimit;
 
-#define ItemPointerSetMax(p)   ItemPointerSet( (p), (BlockNumber)0xffffffff, (OffsetNumber)0xffff )
-#define ItemPointerIsMax(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0xffffffff && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff )
-#define ItemPointerSetMin(p)   ItemPointerSet( (p), (BlockNumber)0, (OffsetNumber)0)
-#define ItemPointerIsMin(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0 && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0 )
-
 extern Datum gingetbitmap(PG_FUNCTION_ARGS);
-extern Datum gingettuple(PG_FUNCTION_ARGS);
 
 /* ginvacuum.c */
 extern Datum ginbulkdelete(PG_FUNCTION_ARGS);
@@ -485,8 +585,26 @@ typedef struct
 
 extern void ginInitBA(BuildAccumulator *accum);
 extern void ginInsertRecordBA(BuildAccumulator *accum,
-                                 ItemPointer heapptr, 
+                                 ItemPointer heapptr,
                                  OffsetNumber attnum, Datum *entries, int32 nentry);
 extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n);
 
-#endif
+/* ginfast.c */
+
+typedef struct GinTupleCollector
+{
+       IndexTuple      *tuples;
+       uint32           ntuples;
+       uint32           lentuples;
+       uint32           sumsize;
+} GinTupleCollector;
+
+extern void ginHeapTupleFastInsert(Relation index, GinState *ginstate,
+                                                                  GinTupleCollector *collector);
+extern uint32 ginHeapTupleFastCollect(Relation index, GinState *ginstate,
+                                       GinTupleCollector *collector,
+                                       OffsetNumber attnum, Datum value, ItemPointer item);
+extern void ginInsertCleanup(Relation index, GinState *ginstate,
+                                                        bool vac_delay, IndexBulkDeleteResult *stats);
+
+#endif /* GIN_H */
index 784ba6889193b76345a649674027797967df791d..3d4fdc33bd3f8627dc0174b790c92225e43c0157 100644 (file)
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.524 2009/02/24 10:06:34 petere Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.525 2009/03/24 20:17:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200902242
+#define CATALOG_VERSION_NO     200903241
 
 #endif
index 7736cb6e58a114bb37d621b9c1304d8a05f03ee0..a92c1f49971061d19afd8f508df99b33766b1cda 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.61 2009/03/05 23:06:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.62 2009/03/24 20:17:15 tgl Exp $
  *
  * NOTES
  *             the genbki.sh script reads this file and generates .bki
@@ -118,7 +118,7 @@ DESCR("hash index access method");
 DATA(insert OID = 783 (  gist  0 7 f f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
 DESCR("GiST index access method");
 #define GIST_AM_OID 783
-DATA(insert OID = 2742 (  gin  0 5 f f f t t f f t f 0 gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
+DATA(insert OID = 2742 (  gin  0 5 f f f t t f f t f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
 DESCR("GIN index access method");
 #define GIN_AM_OID 2742
 
index b0c5be4323f3893a75ba220a5e5da787c259e8e3..2f0dbeb2656bdde06a3fddb5fc71e1353a736354 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.537 2009/02/24 10:06:34 petere Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.538 2009/03/24 20:17:15 tgl Exp $
  *
  * NOTES
  *       The script catalog/genbki.sh reads this file and generates .bki
@@ -4184,8 +4184,6 @@ DATA(insert OID = 2592 (  gist_circle_compress    PGNSP PGUID 12 1 0 0 f f f t f i
 DESCR("GiST support");
 
 /* GIN */
-DATA(insert OID = 2730 (  gingettuple     PGNSP PGUID 12 1 0 0 f f f t f v 2 0 16 "2281 2281" _null_ _null_ _null_ _null_  gingettuple _null_ _null_ _null_ ));
-DESCR("gin(internal)");
 DATA(insert OID = 2731 (  gingetbitmap    PGNSP PGUID 12 1 0 0 f f f t f v 2 0 20 "2281 2281" _null_ _null_ _null_ _null_  gingetbitmap _null_ _null_ _null_ ));
 DESCR("gin(internal)");
 DATA(insert OID = 2732 (  gininsert               PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_  gininsert _null_ _null_ _null_ ));
index 93658543e4287b315bcd3e166b17326dc7476b4c..97e1d4c9c40da192b3b13706fa6aa10c1a8c556f 100644 (file)
@@ -15,7 +15,7 @@
  *
  * Copyright (c) 2003-2009, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/nodes/tidbitmap.h,v 1.9 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/tidbitmap.h,v 1.10 2009/03/24 20:17:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -52,6 +52,7 @@ extern void tbm_free(TIDBitmap *tbm);
 extern void tbm_add_tuples(TIDBitmap *tbm,
                                                   const ItemPointer tids, int ntids,
                                                   bool recheck);
+extern void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno);
 
 extern void tbm_union(TIDBitmap *a, const TIDBitmap *b);
 extern void tbm_intersect(TIDBitmap *a, const TIDBitmap *b);