]> granicus.if.org Git - postgresql/blobdiff - src/backend/access/hash/hashsearch.c
Modify BufferGetPage() to prepare for "snapshot too old" feature
[postgresql] / src / backend / access / hash / hashsearch.c
index 76ad5d318497152d60a52d682d6338ff772625c9..dd1f464e53addfa00e02a00615f2cfa3c07bc898 100644 (file)
@@ -3,25 +3,28 @@
  * hashsearch.c
  *       search code for postgres hash tables
  *
- * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.36 2004/08/29 04:12:18 momjian Exp $
+ *       src/backend/access/hash/hashsearch.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include "access/hash.h"
-#include "storage/lmgr.h"
+#include "access/relscan.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "utils/rel.h"
 
 
 /*
  *     _hash_next() -- Get the next item in a scan.
  *
- *             On entry, we have a valid currentItemData in the scan, and a
+ *             On entry, we have a valid hashso_curpos in the scan, and a
  *             pin and read lock on the page that contains that item.
  *             We find the next item in the scan, if any.
  *             On success exit, we have the page containing the next item
@@ -36,7 +39,6 @@ _hash_next(IndexScanDesc scan, ScanDirection dir)
        Page            page;
        OffsetNumber offnum;
        ItemPointer current;
-       HashItem        hitem;
        IndexTuple      itup;
 
        /* we still have the buffer pinned and read-locked */
@@ -50,13 +52,12 @@ _hash_next(IndexScanDesc scan, ScanDirection dir)
                return false;
 
        /* if we're here, _hash_step found a valid tuple */
-       current = &(scan->currentItemData);
+       current = &(so->hashso_curpos);
        offnum = ItemPointerGetOffsetNumber(current);
-       page = BufferGetPage(buf);
-       _hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
-       hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
-       itup = &hitem->hash_itup;
-       scan->xs_ctup.t_self = itup->t_tid;
+       _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+       page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
+       itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+       so->hashso_heappos = itup->t_tid;
 
        return true;
 }
@@ -73,11 +74,12 @@ _hash_readnext(Relation rel,
        blkno = (*opaquep)->hasho_nextblkno;
        _hash_relbuf(rel, *bufp);
        *bufp = InvalidBuffer;
+       /* check for interrupts while we're not holding any buffer lock */
+       CHECK_FOR_INTERRUPTS();
        if (BlockNumberIsValid(blkno))
        {
-               *bufp = _hash_getbuf(rel, blkno, HASH_READ);
-               *pagep = BufferGetPage(*bufp);
-               _hash_checkpage(rel, *pagep, LH_OVERFLOW_PAGE);
+               *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
+               *pagep = BufferGetPage(*bufp, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
                *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
        }
 }
@@ -94,11 +96,13 @@ _hash_readprev(Relation rel,
        blkno = (*opaquep)->hasho_prevblkno;
        _hash_relbuf(rel, *bufp);
        *bufp = InvalidBuffer;
+       /* check for interrupts while we're not holding any buffer lock */
+       CHECK_FOR_INTERRUPTS();
        if (BlockNumberIsValid(blkno))
        {
-               *bufp = _hash_getbuf(rel, blkno, HASH_READ);
-               *pagep = BufferGetPage(*bufp);
-               _hash_checkpage(rel, *pagep, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+               *bufp = _hash_getbuf(rel, blkno, HASH_READ,
+                                                        LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+               *pagep = BufferGetPage(*bufp, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
                *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
        }
 }
@@ -117,76 +121,119 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 {
        Relation        rel = scan->indexRelation;
        HashScanOpaque so = (HashScanOpaque) scan->opaque;
+       ScanKey         cur;
        uint32          hashkey;
        Bucket          bucket;
        BlockNumber blkno;
+       BlockNumber oldblkno = InvalidBuffer;
+       bool            retry = false;
        Buffer          buf;
        Buffer          metabuf;
        Page            page;
        HashPageOpaque opaque;
        HashMetaPage metap;
-       HashItem        hitem;
        IndexTuple      itup;
        ItemPointer current;
        OffsetNumber offnum;
 
-       current = &(scan->currentItemData);
+       pgstat_count_index_scan(rel);
+
+       current = &(so->hashso_curpos);
        ItemPointerSetInvalid(current);
 
        /*
-        * We do not support hash scans with no index qualification, because
-        * we would have to read the whole index rather than just one bucket.
-        * That creates a whole raft of problems, since we haven't got a
-        * practical way to lock all the buckets against splits or compactions.
+        * We do not support hash scans with no index qualification, because we
+        * would have to read the whole index rather than just one bucket. That
+        * creates a whole raft of problems, since we haven't got a practical way
+        * to lock all the buckets against splits or compactions.
         */
        if (scan->numberOfKeys < 1)
                ereport(ERROR,
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                 errmsg("hash indexes do not support whole-index scans")));
 
+       /* There may be more than one index qual, but we hash only the first */
+       cur = &scan->keyData[0];
+
+       /* We support only single-column hash indexes */
+       Assert(cur->sk_attno == 1);
+       /* And there's only one operator strategy, too */
+       Assert(cur->sk_strategy == HTEqualStrategyNumber);
+
        /*
-        * If the constant in the index qual is NULL, assume it cannot match
-        * any items in the index.
+        * If the constant in the index qual is NULL, assume it cannot match any
+        * items in the index.
         */
-       if (scan->keyData[0].sk_flags & SK_ISNULL)
+       if (cur->sk_flags & SK_ISNULL)
                return false;
 
        /*
-        * Okay to compute the hash key.  We want to do this before acquiring
-        * any locks, in case a user-defined hash function happens to be slow.
+        * Okay to compute the hash key.  We want to do this before acquiring any
+        * locks, in case a user-defined hash function happens to be slow.
+        *
+        * If scankey operator is not a cross-type comparison, we can use the
+        * cached hash function; otherwise gotta look it up in the catalogs.
+        *
+        * We support the convention that sk_subtype == InvalidOid means the
+        * opclass input type; this is a hack to simplify life for ScanKeyInit().
         */
-       hashkey = _hash_datum2hashkey(rel, scan->keyData[0].sk_argument);
+       if (cur->sk_subtype == rel->rd_opcintype[0] ||
+               cur->sk_subtype == InvalidOid)
+               hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
+       else
+               hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
+                                                                                  cur->sk_subtype);
 
-       /*
-        * Acquire shared split lock so we can compute the target bucket
-        * safely (see README).
-        */
-       _hash_getlock(rel, 0, HASH_SHARE);
+       so->hashso_sk_hash = hashkey;
 
        /* Read the metapage */
-       metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
-       metap = (HashMetaPage) BufferGetPage(metabuf);
-       _hash_checkpage(rel, (Page) metap, LH_META_PAGE);
+       metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
+       page = BufferGetPage(metabuf, NULL, NULL,
+                                                BGP_NO_SNAPSHOT_TEST);
+       metap = HashPageGetMeta(page);
 
        /*
-        * Compute the target bucket number, and convert to block number.
+        * Loop until we get a lock on the correct target bucket.
         */
-       bucket = _hash_hashkey2bucket(hashkey,
-                                                                 metap->hashm_maxbucket,
-                                                                 metap->hashm_highmask,
-                                                                 metap->hashm_lowmask);
-
-       blkno = BUCKET_TO_BLKNO(metap, bucket);
+       for (;;)
+       {
+               /*
+                * Compute the target bucket number, and convert to block number.
+                */
+               bucket = _hash_hashkey2bucket(hashkey,
+                                                                         metap->hashm_maxbucket,
+                                                                         metap->hashm_highmask,
+                                                                         metap->hashm_lowmask);
+
+               blkno = BUCKET_TO_BLKNO(metap, bucket);
+
+               /* Release metapage lock, but keep pin. */
+               _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
+
+               /*
+                * If the previous iteration of this loop locked what is still the
+                * correct target bucket, we are done.  Otherwise, drop any old lock
+                * and lock what now appears to be the correct bucket.
+                */
+               if (retry)
+               {
+                       if (oldblkno == blkno)
+                               break;
+                       _hash_droplock(rel, oldblkno, HASH_SHARE);
+               }
+               _hash_getlock(rel, blkno, HASH_SHARE);
+
+               /*
+                * Reacquire metapage lock and check that no bucket split has taken
+                * place while we were awaiting the bucket lock.
+                */
+               _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_READ);
+               oldblkno = blkno;
+               retry = true;
+       }
 
        /* done with the metapage */
-       _hash_relbuf(rel, metabuf);
-
-       /*
-        * Acquire share lock on target bucket; then we can release split lock.
-        */
-       _hash_getlock(rel, blkno, HASH_SHARE);
-
-       _hash_droplock(rel, 0, HASH_SHARE);
+       _hash_dropbuf(rel, metabuf);
 
        /* Update scan opaque state to show we have lock on the bucket */
        so->hashso_bucket = bucket;
@@ -194,9 +241,9 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
        so->hashso_bucket_blkno = blkno;
 
        /* Fetch the primary bucket page for the bucket */
-       buf = _hash_getbuf(rel, blkno, HASH_READ);
-       page = BufferGetPage(buf);
-       _hash_checkpage(rel, page, LH_BUCKET_PAGE);
+       buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
+       page = BufferGetPage(buf, NULL, NULL,
+                                                BGP_NO_SNAPSHOT_TEST);
        opaque = (HashPageOpaque) PageGetSpecialPointer(page);
        Assert(opaque->hasho_bucket == bucket);
 
@@ -213,11 +260,10 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 
        /* if we're here, _hash_step found a valid tuple */
        offnum = ItemPointerGetOffsetNumber(current);
-       page = BufferGetPage(buf);
-       _hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
-       hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
-       itup = &hitem->hash_itup;
-       scan->xs_ctup.t_self = itup->t_tid;
+       _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+       page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
+       itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+       so->hashso_heappos = itup->t_tid;
 
        return true;
 }
@@ -226,7 +272,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
  *     _hash_step() -- step to the next valid item in a scan in the bucket.
  *
  *             If no valid record exists in the requested direction, return
- *             false.  Else, return true and set the CurrentItemData for the
+ *             false.  Else, return true and set the hashso_curpos for the
  *             scan to the right thing.
  *
  *             'bufP' points to the current buffer, which is pinned and read-locked.
@@ -244,23 +290,20 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
        HashPageOpaque opaque;
        OffsetNumber maxoff;
        OffsetNumber offnum;
-       Bucket          bucket;
        BlockNumber blkno;
-       HashItem        hitem;
        IndexTuple      itup;
 
-       current = &(scan->currentItemData);
+       current = &(so->hashso_curpos);
 
        buf = *bufP;
-       page = BufferGetPage(buf);
-       _hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+       _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+       page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
        opaque = (HashPageOpaque) PageGetSpecialPointer(page);
-       bucket = opaque->hasho_bucket;
 
        /*
-        * If _hash_step is called from _hash_first, current will not be
-        * valid, so we can't dereference it.  However, in that case, we
-        * presumably want to start at the beginning/end of the page...
+        * If _hash_step is called from _hash_first, current will not be valid, so
+        * we can't dereference it.  However, in that case, we presumably want to
+        * start at the beginning/end of the page...
         */
        maxoff = PageGetMaxOffsetNumber(page);
        if (ItemPointerIsValid(current))
@@ -269,7 +312,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
                offnum = InvalidOffsetNumber;
 
        /*
-        * 'offnum' now points to the last tuple we have seen (if any).
+        * 'offnum' now points to the last tuple we examined (if any).
         *
         * continue to step through tuples until: 1) we get to the end of the
         * bucket chain or 2) we find a valid tuple.
@@ -282,26 +325,39 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
                                if (offnum != InvalidOffsetNumber)
                                        offnum = OffsetNumberNext(offnum);      /* move forward */
                                else
-                                       offnum = FirstOffsetNumber; /* new page */
+                               {
+                                       /* new page, locate starting position by binary search */
+                                       offnum = _hash_binsearch(page, so->hashso_sk_hash);
+                               }
 
-                               while (offnum > maxoff)
+                               for (;;)
                                {
                                        /*
-                                        * either this page is empty
-                                        * (maxoff == InvalidOffsetNumber)
-                                        * or we ran off the end.
+                                        * check if we're still in the range of items with the
+                                        * target hash key
+                                        */
+                                       if (offnum <= maxoff)
+                                       {
+                                               Assert(offnum >= FirstOffsetNumber);
+                                               itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+                                               if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
+                                                       break;          /* yes, so exit for-loop */
+                                       }
+
+                                       /*
+                                        * ran off the end of this page, try the next
                                         */
                                        _hash_readnext(rel, &buf, &page, &opaque);
                                        if (BufferIsValid(buf))
                                        {
                                                maxoff = PageGetMaxOffsetNumber(page);
-                                               offnum = FirstOffsetNumber;
+                                               offnum = _hash_binsearch(page, so->hashso_sk_hash);
                                        }
                                        else
                                        {
                                                /* end of bucket */
-                                               maxoff = offnum = InvalidOffsetNumber;
-                                               break;  /* exit while */
+                                               itup = NULL;
+                                               break;  /* exit for-loop */
                                        }
                                }
                                break;
@@ -310,25 +366,39 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
                                if (offnum != InvalidOffsetNumber)
                                        offnum = OffsetNumberPrev(offnum);      /* move back */
                                else
-                                       offnum = maxoff;        /* new page */
+                               {
+                                       /* new page, locate starting position by binary search */
+                                       offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
+                               }
 
-                               while (offnum < FirstOffsetNumber)
+                               for (;;)
                                {
                                        /*
-                                        * either this page is empty
-                                        * (offnum == InvalidOffsetNumber)
-                                        * or we ran off the end.
+                                        * check if we're still in the range of items with the
+                                        * target hash key
+                                        */
+                                       if (offnum >= FirstOffsetNumber)
+                                       {
+                                               Assert(offnum <= maxoff);
+                                               itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+                                               if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
+                                                       break;          /* yes, so exit for-loop */
+                                       }
+
+                                       /*
+                                        * ran off the end of this page, try the next
                                         */
                                        _hash_readprev(rel, &buf, &page, &opaque);
                                        if (BufferIsValid(buf))
                                        {
-                                               maxoff = offnum = PageGetMaxOffsetNumber(page);
+                                               maxoff = PageGetMaxOffsetNumber(page);
+                                               offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
                                        }
                                        else
                                        {
                                                /* end of bucket */
-                                               maxoff = offnum = InvalidOffsetNumber;
-                                               break;  /* exit while */
+                                               itup = NULL;
+                                               break;  /* exit for-loop */
                                        }
                                }
                                break;
@@ -336,20 +406,19 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
                        default:
                                /* NoMovementScanDirection */
                                /* this should not be reached */
+                               itup = NULL;
                                break;
                }
 
-               /* we ran off the end of the world without finding a match */
-               if (offnum == InvalidOffsetNumber)
+               if (itup == NULL)
                {
+                       /* we ran off the end of the bucket without finding a match */
                        *bufP = so->hashso_curbuf = InvalidBuffer;
                        ItemPointerSetInvalid(current);
                        return false;
                }
 
-               /* get ready to check this tuple */
-               hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
-               itup = &hitem->hash_itup;
+               /* check the tuple quals, loop around if not met */
        } while (!_hash_checkqual(scan, itup));
 
        /* if we made it to here, we've found a valid tuple */