]> granicus.if.org Git - postgresql/commitdiff
Re-think predicate locking on GIN indexes.
authorTeodor Sigaev <teodor@sigaev.ru>
Fri, 4 May 2018 08:27:50 +0000 (11:27 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Fri, 4 May 2018 08:27:50 +0000 (11:27 +0300)
The principle behind the locking was not very well thought-out, and not
documented. Add a section in the README to explain how it's supposed to
work, and change the code so that it actually works that way.

This fixes two bugs:

1. If fast update was turned on concurrently, subsequent inserts to the
   pending list would not conflict with predicate locks that were acquired
   earlier, on entry pages. The included 'predicate-gin-fastupdate' test
   demonstrates that. To fix, make all scans acquire a predicate lock on
   the metapage. That lock represents a scan of the pending list, whether
   or not there is a pending list at the moment. Forget about the
   optimization to skip locking/checking for locks, when fastupdate=off.
2. If a scan finds no match, it still needs to lock the entry page. The
   point of predicate locks is to lock the gabs between values, whether
   or not there is a match. The included 'predicate-gin-nomatch' test
   tests that case.

In addition to those two bug fixes, this removes some unnecessary locking,
following the principle laid out in the README. Because all items in
a posting tree have the same key value, a lock on the posting tree root is
enough to cover all the items. (With a very large posting tree, it would
possibly be better to lock the posting tree leaf pages instead, so that a
"skip scan" with a query like "A & B", you could avoid unnecessary conflict
if a new tuple is inserted with A but !B. But let's keep this simple.)

Also, some spelling  fixes.

Author: Heikki Linnakangas with some editorization by me
Review: Andrey Borodin, Alexander Korotkov
Discussion: https://www.postgresql.org/message-id/0b3ad2c2-2692-62a9-3a04-5724f2af9114@iki.fi

18 files changed:
src/backend/access/gin/README
src/backend/access/gin/ginbtree.c
src/backend/access/gin/gindatapage.c
src/backend/access/gin/ginfast.c
src/backend/access/gin/ginget.c
src/backend/access/gin/gininsert.c
src/backend/access/gin/ginutil.c
src/backend/access/gin/ginvacuum.c
src/backend/access/gist/gist.c
src/backend/storage/lmgr/README-SSI
src/include/access/gin_private.h
src/test/isolation/expected/predicate-gin-fastupdate.out [new file with mode: 0644]
src/test/isolation/expected/predicate-gin-nomatch.out [new file with mode: 0644]
src/test/isolation/expected/predicate-gin.out
src/test/isolation/isolation_schedule
src/test/isolation/specs/predicate-gin-fastupdate.spec [new file with mode: 0644]
src/test/isolation/specs/predicate-gin-nomatch.spec [new file with mode: 0644]
src/test/isolation/specs/predicate-gin.spec

index 990b5ffa5811798da5e629372a71aec7d67cba8c..cc434b1feb7b26b212b37d110fa2259ba6871a25 100644 (file)
@@ -331,6 +331,40 @@ page-deletions safe; it stamps the deleted pages with an XID and keeps the
 deleted pages around with the right-link intact until all concurrent scans
 have finished.)
 
+Predicate Locking
+-----------------
+
+GIN supports predicate locking, for serializable snapshot isolation.
+A predicate locks represent that a scan has scanned a range of values.  They
+are not concerned with physical pages as such, but the logical key values.
+A predicate lock on a page covers the key range that would belong on that
+page, whether or not there are any matching tuples there currently.  In other
+words, a predicate lock on an index page covers the "gaps" between the index
+tuples.  To minimize false positives, predicate locks are acquired at the
+finest level possible.
+
+* Like in the B-tree index, it is enough to lock only leaf pages, because all
+  insertions happen at the leaf level.
+
+* In an equality search (i.e. not a partial match search), if a key entry has
+  a posting tree, we lock the posting tree root page, to represent a lock on
+  just that key entry.  Otherwise, we lock the entry tree page.  We also lock
+  the entry tree page if no match is found, to lock the "gap" where the entry
+  would've been, had there been one.
+
+* In a partial match search, we lock all the entry leaf pages that we scan,
+  in addition to locks on posting tree roots, to represent the "gaps" between
+  values.
+
+* In addition to the locks on entry leaf pages and posting tree roots, all
+  scans grab a lock the metapage.  This is to interlock with insertions to
+  the fast update pending list.  An insertion to the pending list can really
+  belong anywhere in the tree, and the lock on the metapage represents that.
+
+The interlock for fastupdate pending lists means that with fastupdate=on,
+we effectively always grab a full-index lock, so you could get a lot of false
+positives.
+
 Compatibility
 -------------
 
index 828c7074b7064030f165b73e40fd4502d2362930..030d0f44183534b9f5be5326996f570611013a30 100644 (file)
@@ -84,6 +84,9 @@ ginFindLeafPage(GinBtree btree, bool searchMode, Snapshot snapshot)
        stack->parent = NULL;
        stack->predictNumber = 1;
 
+       if (!searchMode)
+               CheckForSerializableConflictIn(btree->index, NULL, stack->buffer);
+
        for (;;)
        {
                Page            page;
index 59bf21744f5e8efb8687538fd8782a68c137e918..aeaf8adab0920b298fdd3be4d75ac199dcb8278d 100644 (file)
@@ -1812,8 +1812,8 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
        blkno = BufferGetBlockNumber(buffer);
 
        /*
-        * Copy a predicate lock from entry tree leaf (containing posting list) to
-        * posting tree.
+        * Copy any predicate locks from the entry tree leaf (containing posting
+        * list) to the posting tree.
         */
        PredicateLockPageSplit(index, BufferGetBlockNumber(entrybuffer), blkno);
 
@@ -1864,7 +1864,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
        return blkno;
 }
 
-void
+static void
 ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
 {
        memset(btree, 0, sizeof(GinBtreeData));
@@ -1911,7 +1911,6 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
                btree.itemptr = insertdata.items[insertdata.curitem];
                stack = ginFindLeafPage(&btree, false, NULL);
 
-               GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
                ginInsertValue(&btree, stack, &insertdata, buildStats);
        }
 }
index 615730b8e55d886912509a0efe29c10c792b0a6b..5f624cf6facb12902d4919ac78f268b40c796dc6 100644 (file)
@@ -31,6 +31,7 @@
 #include "postmaster/autovacuum.h"
 #include "storage/indexfsm.h"
 #include "storage/lmgr.h"
+#include "storage/predicate.h"
 #include "utils/builtins.h"
 
 /* GUC parameter */
@@ -245,6 +246,13 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
        metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
        metapage = BufferGetPage(metabuffer);
 
+       /*
+        * An insertion to the pending list could logically belong anywhere in
+        * the tree, so it conflicts with all serializable scans.  All scans
+        * acquire a predicate lock on the metabuffer to represent that.
+        */
+       CheckForSerializableConflictIn(index, NULL, metabuffer);
+
        if (collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GinListPageSize)
        {
                /*
index f3db7cc6405ce1866ef17783fea60324c42a1ef0..ef3cd7dbe2ab32d5e4b1f2cd4f4eda03861abb46 100644 (file)
@@ -35,20 +35,6 @@ typedef struct pendingPosition
 } pendingPosition;
 
 
-/*
- * Place predicate lock on GIN page if needed.
- */
-static void
-GinPredicateLockPage(Relation index, BlockNumber blkno, Snapshot snapshot)
-{
-       /*
-        * When fast update is on then no need in locking pages, because we anyway
-        * need to lock the whole index.
-        */
-       if (!GinGetUseFastUpdate(index))
-               PredicateLockPage(index, blkno, snapshot);
-}
-
 /*
  * Goes to the next page if current offset is outside of bounds
  */
@@ -68,7 +54,7 @@ moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot
                stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
                stack->blkno = BufferGetBlockNumber(stack->buffer);
                stack->off = FirstOffsetNumber;
-               GinPredicateLockPage(btree->index, stack->blkno, snapshot);
+               PredicateLockPage(btree->index, stack->blkno, snapshot);
        }
 
        return true;
@@ -100,11 +86,6 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
         */
        for (;;)
        {
-               /*
-                * Predicate lock each leaf page in posting tree
-                */
-               GinPredicateLockPage(index, BufferGetBlockNumber(buffer), snapshot);
-
                page = BufferGetPage(buffer);
                if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
                {
@@ -158,7 +139,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
         * Predicate lock entry leaf page, following pages will be locked by
         * moveRightIfItNeeded()
         */
-       GinPredicateLockPage(btree->index, stack->buffer, snapshot);
+       PredicateLockPage(btree->index, stack->buffer, snapshot);
 
        for (;;)
        {
@@ -253,6 +234,13 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
 
                        LockBuffer(stack->buffer, GIN_UNLOCK);
 
+                       /*
+                        * Acquire predicate lock on the posting tree.  We already hold
+                        * a lock on the entry page, but insertions to the posting tree
+                        * don't check for conflicts on that level.
+                        */
+                       PredicateLockPage(btree->index, rootPostingTree, snapshot);
+
                        /* Collect all the TIDs in this entry's posting tree */
                        scanPostingTree(btree->index, scanEntry, rootPostingTree,
                                                        snapshot);
@@ -400,10 +388,6 @@ restartScanEntry:
        {
                IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
 
-               /* Predicate lock visited entry leaf page */
-               GinPredicateLockPage(ginstate->index,
-                                                        BufferGetBlockNumber(stackEntry->buffer), snapshot);
-
                if (GinIsPostingTree(itup))
                {
                        BlockNumber rootPostingTree = GinGetPostingTree(itup);
@@ -411,6 +395,13 @@ restartScanEntry:
                        Page            page;
                        ItemPointerData minItem;
 
+                       /*
+                        * This is an equality scan, so lock the root of the posting tree.
+                        * It represents a lock on the exact key value, and covers all the
+                        * items in the posting tree.
+                        */
+                       PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
+
                        /*
                         * We should unlock entry page before touching posting tree to
                         * prevent deadlocks with vacuum processes. Because entry is never
@@ -425,12 +416,6 @@ restartScanEntry:
                                                                                        rootPostingTree, snapshot);
                        entry->buffer = stack->buffer;
 
-                       /*
-                        * Predicate lock visited posting tree page, following pages will
-                        * be locked by moveRightIfItNeeded or entryLoadMoreItems
-                        */
-                       GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(entry->buffer), snapshot);
-
                        /*
                         * We keep buffer pinned because we need to prevent deletion of
                         * page during scan. See GIN's vacuum implementation. RefCount is
@@ -452,15 +437,38 @@ restartScanEntry:
                        freeGinBtreeStack(stack);
                        entry->isFinished = false;
                }
-               else if (GinGetNPosting(itup) > 0)
+               else
                {
-                       entry->list = ginReadTuple(ginstate, entry->attnum, itup,
-                                                                          &entry->nlist);
-                       entry->predictNumberResult = entry->nlist;
+                       /*
+                        * Lock the entry leaf page.  This is more coarse-grained than
+                        * necessary, because it will conflict with any insertions that
+                        * land on the same leaf page, not only the exacty key we searched
+                        * for.  But locking an individual tuple would require updating
+                        * that lock whenever it moves because of insertions or vacuums,
+                        * which seems too complicated.
+                        */
+                       PredicateLockPage(ginstate->index,
+                                                         BufferGetBlockNumber(stackEntry->buffer),
+                                                         snapshot);
+                       if (GinGetNPosting(itup) > 0)
+                       {
+                               entry->list = ginReadTuple(ginstate, entry->attnum, itup,
+                                                                                  &entry->nlist);
+                               entry->predictNumberResult = entry->nlist;
 
-                       entry->isFinished = false;
+                               entry->isFinished = false;
+                       }
                }
        }
+       else
+       {
+               /*
+                * No entry found.  Predicate lock the leaf page, to lock the place
+                * where the entry would've been, had there been one.
+                */
+               PredicateLockPage(ginstate->index,
+                                                 BufferGetBlockNumber(stackEntry->buffer), snapshot);
+       }
 
        if (needUnlock)
                LockBuffer(stackEntry->buffer, GIN_UNLOCK);
@@ -533,7 +541,7 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
 
                for (i = 0; i < key->nentries - 1; i++)
                {
-                       /* Pass all entries <= i as false, and the rest as MAYBE */
+                       /* Pass all entries <= i as FALSE, and the rest as MAYBE */
                        for (j = 0; j <= i; j++)
                                key->entryRes[entryIndexes[j]] = GIN_FALSE;
                        for (j = i + 1; j < key->nentries; j++)
@@ -673,8 +681,6 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
                entry->btree.fullScan = false;
                stack = ginFindLeafPage(&entry->btree, true, snapshot);
 
-               GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(stack->buffer), snapshot);
-
                /* we don't need the stack, just the buffer. */
                entry->buffer = stack->buffer;
                IncrBufferRefCount(entry->buffer);
@@ -719,10 +725,6 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
                        entry->buffer = ginStepRight(entry->buffer,
                                                                                 ginstate->index,
                                                                                 GIN_SHARE);
-
-                       GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(entry->buffer), snapshot);
-
-
                        page = BufferGetPage(entry->buffer);
                }
                stepright = true;
@@ -1084,8 +1086,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
         * lossy page even when none of the other entries match.
         *
         * Our strategy is to call the tri-state consistent function, with the
-        * lossy-page entries set to MAYBE, and all the other entries false. If it
-        * returns false, none of the lossy items alone are enough for a match, so
+        * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
+        * returns FALSE, none of the lossy items alone are enough for a match, so
         * we don't need to return a lossy-page pointer. Otherwise, return a
         * lossy-page pointer to indicate that the whole heap page must be
         * checked.  (On subsequent calls, we'll do nothing until minItem is past
@@ -1746,8 +1748,7 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
 }
 
 /*
- * Collect all matched rows from pending list into bitmap. Also function
- * takes PendingLockRelation if it's needed.
+ * Collect all matched rows from pending list into bitmap.
  */
 static void
 scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
@@ -1764,6 +1765,12 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
 
        *ntids = 0;
 
+       /*
+        * Acquire predicate lock on the metapage, to conflict with any
+        * fastupdate insertions.
+        */
+       PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
+
        LockBuffer(metabuffer, GIN_SHARE);
        page = BufferGetPage(metabuffer);
        TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
@@ -1777,24 +1784,9 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
        {
                /* No pending list, so proceed with normal scan */
                UnlockReleaseBuffer(metabuffer);
-
-               /*
-                * If fast update is enabled, we acquire a predicate lock on the
-                * entire relation as fast update postpones the insertion of tuples
-                * into index structure due to which we can't detect rw conflicts.
-                */
-               if (GinGetUseFastUpdate(scan->indexRelation))
-                       PredicateLockRelation(scan->indexRelation, scan->xs_snapshot);
-
                return;
        }
 
-       /*
-        * Pending list is not empty, we need to lock the index doesn't despite on
-        * fastupdate state
-        */
-       PredicateLockRelation(scan->indexRelation, scan->xs_snapshot);
-
        pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
        LockBuffer(pos.pendingBuffer, GIN_SHARE);
        pos.firstOffset = FirstOffsetNumber;
index cf218dd75d4eaa6339a53d7d0fc8e58e7bc19433..5281eb682382ff170fadb0f91abde8a5842974b4 100644 (file)
@@ -219,7 +219,7 @@ ginEntryInsert(GinState *ginstate,
                        return;
                }
 
-               GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
+               CheckForSerializableConflictIn(ginstate->index, NULL, stack->buffer);
                /* modify an existing leaf entry */
                itup = addItemPointersToLeafTuple(ginstate, itup,
                                                                                  items, nitem, buildStats, stack->buffer);
@@ -228,7 +228,7 @@ ginEntryInsert(GinState *ginstate,
        }
        else
        {
-               GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
+               CheckForSerializableConflictIn(ginstate->index, NULL, stack->buffer);
                /* no match, so construct a new leaf entry */
                itup = buildFreshLeafTuple(ginstate, attnum, key, category,
                                                                   items, nitem, buildStats, stack->buffer);
@@ -517,18 +517,6 @@ gininsert(Relation index, Datum *values, bool *isnull,
 
                memset(&collector, 0, sizeof(GinTupleCollector));
 
-               /*
-                * With fastupdate on each scan and each insert begin with access to
-                * pending list, so it effectively lock entire index. In this case we
-                * aquire predicate lock and check for conflicts over index relation,
-                * and hope that it will reduce locking overhead.
-                *
-                * Do not use GinCheckForSerializableConflictIn() here, because it
-                * will do nothing (it does actual work only with fastupdate off).
-                * Check for conflicts for entire index.
-                */
-               CheckForSerializableConflictIn(index, NULL, InvalidBuffer);
-
                for (i = 0; i < ginstate->origTupdesc->natts; i++)
                        ginHeapTupleFastCollect(ginstate, &collector,
                                                                        (OffsetNumber) (i + 1),
@@ -539,16 +527,6 @@ gininsert(Relation index, Datum *values, bool *isnull,
        }
        else
        {
-               GinStatsData stats;
-
-               /*
-                * Fastupdate is off but if pending list isn't empty then we need to
-                * check conflicts with PredicateLockRelation in scanPendingInsert().
-                */
-               ginGetStats(index, &stats);
-               if (stats.nPendingPages > 0)
-                       CheckForSerializableConflictIn(index, NULL, InvalidBuffer);
-
                for (i = 0; i < ginstate->origTupdesc->natts; i++)
                        ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
                                                           values[i], isnull[i],
index 4367523dd98d7bc818edeacd1359814edf226068..0a32182dd7fbafbedb4548a458f37b7286be9a37 100644 (file)
@@ -718,10 +718,3 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
 
        END_CRIT_SECTION();
 }
-
-void
-GinCheckForSerializableConflictIn(Relation relation, HeapTuple tuple, Buffer buffer)
-{
-       if (!GinGetUseFastUpdate(relation))
-               CheckForSerializableConflictIn(relation, tuple, buffer);
-}
index dd8e31b8721100dba529a41885402c2d8b60da37..3104bc12b63122dce9dab179097624e793b68cf1 100644 (file)
@@ -166,7 +166,6 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
        START_CRIT_SECTION();
 
        /* Unlink the page by changing left sibling's rightlink */
-
        page = BufferGetPage(lBuffer);
        GinPageGetOpaque(page)->rightlink = rightlink;
 
index f7a9168925669273c18364ef04d203b6178445e0..8a42effdf7a49e61eb3aa44b7fe687cd9f5d8e5c 100644 (file)
@@ -1220,7 +1220,7 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
        bool            is_split;
 
        /*
-        * Check for any rw conflicts (in serialisation isolation level) just
+        * Check for any rw conflicts (in serializable isolation level) just
         * before we intend to modify the page
         */
        CheckForSerializableConflictIn(state->r, NULL, stack->buffer);
index f2b099d1c9ed2ab913d363d01e23b09f33368091..50d2ecca9d746375ae671404dcae74a592741b47 100644 (file)
@@ -373,21 +373,22 @@ index *leaf* pages needed to lock the appropriate index range. If,
 however, a search discovers that no root page has yet been created, a
 predicate lock on the index relation is required.
 
+    * Like a B-tree, GIN searches acquire predicate locks only on the
+leaf pages of entry tree. When performing an equality scan, and an
+entry has a posting tree, the posting tree root is locked instead, to
+lock only that key value. However, fastupdate=on postpones the
+insertion of tuples into index structure by temporarily storing them
+into pending list. That makes us unable to detect r-w conflicts using
+page-level locks. To cope with that, insertions to the pending list
+conflict with all scans.
+
     * GiST searches can determine that there are no matches at any
 level of the index, so we acquire predicate lock at each index
 level during a GiST search. An index insert at the leaf level can
 then be trusted to ripple up to all levels and locations where
 conflicting predicate locks may exist. In case there is a page split,
-we need to copy predicate lock from an original page to all new pages.
-
-    * GIN searches acquire predicate locks only on the leaf pages
-of entry tree and posting tree. During a page split, a predicate locks are
-copied from the original page to the new page. In the same way predicate locks
-are copied from entry tree leaf page to freshly created posting tree root.
-However, when fast update is enabled, a predicate lock on the whole index
-relation is required. Fast update postpones the insertion of tuples into index
-structure by temporarily storing them into pending list. That makes us unable
-to detect r-w conflicts using page-level locks.
+we need to copy predicate lock from the original page to all the new
+pages.
 
     * Hash index searches acquire predicate locks on the primary
 page of a bucket. It acquires a lock on both the old and new buckets
@@ -395,7 +396,6 @@ for scans that happen concurrently with page splits. During a bucket
 split, a predicate lock is copied from the primary page of an old
 bucket to the primary page of a new bucket.
 
-
     * The effects of page splits, overflows, consolidations, and
 removals must be carefully reviewed to ensure that predicate locks
 aren't "lost" during those operations, or kept with pages which could
index c013d60371a055222b83dc7f863ce0fa5195de57..f0baac65869f034bcbff6683eafac2cc4e43909f 100644 (file)
@@ -103,8 +103,6 @@ extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
 extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
                                 GinNullCategory *category);
-extern void GinCheckForSerializableConflictIn(Relation relation,
-                                                                 HeapTuple tuple, Buffer buffer);
 
 /* gininsert.c */
 extern IndexBuildResult *ginbuild(Relation heap, Relation index,
@@ -227,7 +225,6 @@ extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
                                          GinStatsData *buildStats);
 extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno, Snapshot snapshot);
 extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
-extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
 
 /*
  * This is declared in ginvacuum.c, but is passed between ginVacuumItemPointers
diff --git a/src/test/isolation/expected/predicate-gin-fastupdate.out b/src/test/isolation/expected/predicate-gin-fastupdate.out
new file mode 100644 (file)
index 0000000..7d4fa8e
--- /dev/null
@@ -0,0 +1,30 @@
+Parsed test spec with 3 sessions
+
+starting permutation: r1 r2 w1 c1 w2 c2
+step r1: SELECT count(*) FROM gin_tbl WHERE p @> array[1000];
+count          
+
+2              
+step r2: SELECT * FROM other_tbl;
+id             
+
+step w1: INSERT INTO other_tbl VALUES (42);
+step c1: COMMIT;
+step w2: INSERT INTO gin_tbl SELECT array[1000,19001];
+ERROR:  could not serialize access due to read/write dependencies among transactions
+step c2: COMMIT;
+
+starting permutation: r1 r2 w1 c1 fastupdate_on w2 c2
+step r1: SELECT count(*) FROM gin_tbl WHERE p @> array[1000];
+count          
+
+2              
+step r2: SELECT * FROM other_tbl;
+id             
+
+step w1: INSERT INTO other_tbl VALUES (42);
+step c1: COMMIT;
+step fastupdate_on: ALTER INDEX ginidx SET (fastupdate = on);
+step w2: INSERT INTO gin_tbl SELECT array[1000,19001];
+ERROR:  could not serialize access due to read/write dependencies among transactions
+step c2: COMMIT;
diff --git a/src/test/isolation/expected/predicate-gin-nomatch.out b/src/test/isolation/expected/predicate-gin-nomatch.out
new file mode 100644 (file)
index 0000000..5e73326
--- /dev/null
@@ -0,0 +1,15 @@
+Parsed test spec with 2 sessions
+
+starting permutation: r1 r2 w1 c1 w2 c2
+step r1: SELECT count(*) FROM gin_tbl WHERE p @> array[-1];
+count          
+
+0              
+step r2: SELECT * FROM other_tbl;
+id             
+
+step w1: INSERT INTO other_tbl VALUES (42);
+step c1: COMMIT;
+step w2: INSERT INTO gin_tbl SELECT array[-1];
+ERROR:  could not serialize access due to read/write dependencies among transactions
+step c2: COMMIT;
index 4f5501f6f01b5e8daad455ecb586e9161d886391..bdf8911923bd5c762fcdb013d3857ac26a4ec55d 100644 (file)
@@ -737,8 +737,8 @@ step c2: commit;
 starting permutation: fu1 rxy1 rxy2fu wx1 c1 wy2fu c2
 step fu1: alter index ginidx set (fastupdate = on);
                          commit;
-                         begin isolation level serializable; 
-                         set enable_seqscan=off;
+                         begin isolation level serializable;
+                         set enable_seqscan=off;
 step rxy1: select count(*) from gin_tbl where p @> array[4,5];
 count          
 
index b3a34a8688dfe07d95cdd0f590f364d822dad0e4..b650e467a6372d8341c3d8c5e038bd3aaf03f91e 100644 (file)
@@ -69,6 +69,8 @@ test: vacuum-concurrent-drop
 test: predicate-hash
 test: predicate-gist
 test: predicate-gin
+test: predicate-gin-fastupdate
+test: predicate-gin-nomatch
 test: partition-key-update-1
 test: partition-key-update-2
 test: partition-key-update-3
diff --git a/src/test/isolation/specs/predicate-gin-fastupdate.spec b/src/test/isolation/specs/predicate-gin-fastupdate.spec
new file mode 100644 (file)
index 0000000..04b8036
--- /dev/null
@@ -0,0 +1,49 @@
+#
+# Test that predicate locking on a GIN index works correctly, even if
+# fastupdate is turned on concurrently.
+#
+# 0. fastupdate is off
+# 1. Session 's1' acquires predicate lock on page X
+# 2. fastupdate is turned on
+# 3. Session 's2' inserts a new tuple to the pending list
+#
+# This test tests that if the lock acquired in step 1 would conflict with
+# the scan in step 1, we detect that conflict correctly, even if fastupdate
+# was turned on in-between.
+#
+setup
+{
+  create table gin_tbl(p int4[]);
+  insert into gin_tbl select array[g, g*2,g*3] from generate_series(1, 10000) g;
+  insert into gin_tbl select array[4,5,6] from generate_series(10001, 20000) g;
+  create index ginidx on gin_tbl using gin(p) with (fastupdate = off);
+
+  create table other_tbl (id int4);
+}
+
+teardown
+{
+  drop table gin_tbl;
+  drop table other_tbl;
+}
+
+session "s1"
+setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
+step "r1" { SELECT count(*) FROM gin_tbl WHERE p @> array[1000]; }
+step "w1" { INSERT INTO other_tbl VALUES (42); }
+step "c1" { COMMIT; }
+
+session "s2"
+setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
+step "r2" { SELECT * FROM other_tbl; }
+step "w2" { INSERT INTO gin_tbl SELECT array[1000,19001]; }
+step "c2" { COMMIT; }
+
+session "s3"
+step "fastupdate_on" { ALTER INDEX ginidx SET (fastupdate = on); }
+
+# This correctly throws serialization failure.
+permutation "r1" "r2" "w1" "c1" "w2" "c2"
+
+# But if fastupdate is turned on in the middle, we miss it.
+permutation "r1" "r2" "w1" "c1" "fastupdate_on" "w2" "c2"
diff --git a/src/test/isolation/specs/predicate-gin-nomatch.spec b/src/test/isolation/specs/predicate-gin-nomatch.spec
new file mode 100644 (file)
index 0000000..0ad456c
--- /dev/null
@@ -0,0 +1,35 @@
+#
+# Check that GIN index grabs an appropriate lock, even if there is no match.
+#
+setup
+{
+  create table gin_tbl(p int4[]);
+  insert into gin_tbl select array[g, g*2,g*3] from generate_series(1, 10000) g;
+  insert into gin_tbl select array[4,5,6] from generate_series(10001, 20000) g;
+  create index ginidx on gin_tbl using gin(p) with (fastupdate = off);
+
+  create table other_tbl (id int4);
+}
+
+teardown
+{
+  drop table gin_tbl;
+  drop table other_tbl;
+}
+
+session "s1"
+setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
+# Scan with no match.
+step "r1" { SELECT count(*) FROM gin_tbl WHERE p @> array[-1]; }
+step "w1" { INSERT INTO other_tbl VALUES (42); }
+step "c1" { COMMIT; }
+
+session "s2"
+setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
+step "r2" { SELECT * FROM other_tbl; }
+# Insert row that would've matched in step "r1"
+step "w2" { INSERT INTO gin_tbl SELECT array[-1]; }
+step "c2" { COMMIT; }
+
+# This should throw serialization failure.
+permutation "r1" "r2" "w1" "c1" "w2" "c2"
index 9f0cda805734278c1974ca03d55fb783cf6a0e59..a967695867a2bb930342f53637aaae702aeeaaa2 100644 (file)
@@ -32,8 +32,8 @@ setup
 # enable pending list for a small subset of tests
 step "fu1"     { alter index ginidx set (fastupdate = on);
                          commit;
-                         begin isolation level serializable; 
-                         set enable_seqscan=off; }
+                         begin isolation level serializable;
+                         set enable_seqscan=off; }
 
 step "rxy1"    { select count(*) from gin_tbl where p @> array[4,5]; }
 step "wx1"     { insert into gin_tbl select g, array[5,6] from generate_series