* hashsearch.c
* search code for postgres hash tables
*
- * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.36 2004/08/29 04:12:18 momjian Exp $
+ * src/backend/access/hash/hashsearch.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/hash.h"
-#include "storage/lmgr.h"
+#include "access/relscan.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "utils/rel.h"
/*
* _hash_next() -- Get the next item in a scan.
*
- * On entry, we have a valid currentItemData in the scan, and a
+ * On entry, we have a valid hashso_curpos in the scan, and a
* pin and read lock on the page that contains that item.
* We find the next item in the scan, if any.
* On success exit, we have the page containing the next item
Page page;
OffsetNumber offnum;
ItemPointer current;
- HashItem hitem;
IndexTuple itup;
/* we still have the buffer pinned and read-locked */
return false;
/* if we're here, _hash_step found a valid tuple */
- current = &(scan->currentItemData);
+ current = &(so->hashso_curpos);
offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- _hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
- hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
- itup = &hitem->hash_itup;
- scan->xs_ctup.t_self = itup->t_tid;
+ _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+ so->hashso_heappos = itup->t_tid;
return true;
}
blkno = (*opaquep)->hasho_nextblkno;
_hash_relbuf(rel, *bufp);
*bufp = InvalidBuffer;
+ /* check for interrupts while we're not holding any buffer lock */
+ CHECK_FOR_INTERRUPTS();
if (BlockNumberIsValid(blkno))
{
- *bufp = _hash_getbuf(rel, blkno, HASH_READ);
- *pagep = BufferGetPage(*bufp);
- _hash_checkpage(rel, *pagep, LH_OVERFLOW_PAGE);
+ *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
+ *pagep = BufferGetPage(*bufp, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
}
}
blkno = (*opaquep)->hasho_prevblkno;
_hash_relbuf(rel, *bufp);
*bufp = InvalidBuffer;
+ /* check for interrupts while we're not holding any buffer lock */
+ CHECK_FOR_INTERRUPTS();
if (BlockNumberIsValid(blkno))
{
- *bufp = _hash_getbuf(rel, blkno, HASH_READ);
- *pagep = BufferGetPage(*bufp);
- _hash_checkpage(rel, *pagep, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ *bufp = _hash_getbuf(rel, blkno, HASH_READ,
+ LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ *pagep = BufferGetPage(*bufp, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
}
}
{
Relation rel = scan->indexRelation;
HashScanOpaque so = (HashScanOpaque) scan->opaque;
+ ScanKey cur;
uint32 hashkey;
Bucket bucket;
BlockNumber blkno;
+ BlockNumber oldblkno = InvalidBuffer;
+ bool retry = false;
Buffer buf;
Buffer metabuf;
Page page;
HashPageOpaque opaque;
HashMetaPage metap;
- HashItem hitem;
IndexTuple itup;
ItemPointer current;
OffsetNumber offnum;
- current = &(scan->currentItemData);
+ pgstat_count_index_scan(rel);
+
+ current = &(so->hashso_curpos);
ItemPointerSetInvalid(current);
/*
- * We do not support hash scans with no index qualification, because
- * we would have to read the whole index rather than just one bucket.
- * That creates a whole raft of problems, since we haven't got a
- * practical way to lock all the buckets against splits or compactions.
+ * We do not support hash scans with no index qualification, because we
+ * would have to read the whole index rather than just one bucket. That
+ * creates a whole raft of problems, since we haven't got a practical way
+ * to lock all the buckets against splits or compactions.
*/
if (scan->numberOfKeys < 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("hash indexes do not support whole-index scans")));
+ /* There may be more than one index qual, but we hash only the first */
+ cur = &scan->keyData[0];
+
+ /* We support only single-column hash indexes */
+ Assert(cur->sk_attno == 1);
+ /* And there's only one operator strategy, too */
+ Assert(cur->sk_strategy == HTEqualStrategyNumber);
+
/*
- * If the constant in the index qual is NULL, assume it cannot match
- * any items in the index.
+ * If the constant in the index qual is NULL, assume it cannot match any
+ * items in the index.
*/
- if (scan->keyData[0].sk_flags & SK_ISNULL)
+ if (cur->sk_flags & SK_ISNULL)
return false;
/*
- * Okay to compute the hash key. We want to do this before acquiring
- * any locks, in case a user-defined hash function happens to be slow.
+ * Okay to compute the hash key. We want to do this before acquiring any
+ * locks, in case a user-defined hash function happens to be slow.
+ *
+ * If scankey operator is not a cross-type comparison, we can use the
+ * cached hash function; otherwise gotta look it up in the catalogs.
+ *
+ * We support the convention that sk_subtype == InvalidOid means the
+ * opclass input type; this is a hack to simplify life for ScanKeyInit().
*/
- hashkey = _hash_datum2hashkey(rel, scan->keyData[0].sk_argument);
+ if (cur->sk_subtype == rel->rd_opcintype[0] ||
+ cur->sk_subtype == InvalidOid)
+ hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
+ else
+ hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
+ cur->sk_subtype);
- /*
- * Acquire shared split lock so we can compute the target bucket
- * safely (see README).
- */
- _hash_getlock(rel, 0, HASH_SHARE);
+ so->hashso_sk_hash = hashkey;
/* Read the metapage */
- metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage(rel, (Page) metap, LH_META_PAGE);
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
+ page = BufferGetPage(metabuf, NULL, NULL,
+ BGP_NO_SNAPSHOT_TEST);
+ metap = HashPageGetMeta(page);
/*
- * Compute the target bucket number, and convert to block number.
+ * Loop until we get a lock on the correct target bucket.
*/
- bucket = _hash_hashkey2bucket(hashkey,
- metap->hashm_maxbucket,
- metap->hashm_highmask,
- metap->hashm_lowmask);
-
- blkno = BUCKET_TO_BLKNO(metap, bucket);
+ for (;;)
+ {
+ /*
+ * Compute the target bucket number, and convert to block number.
+ */
+ bucket = _hash_hashkey2bucket(hashkey,
+ metap->hashm_maxbucket,
+ metap->hashm_highmask,
+ metap->hashm_lowmask);
+
+ blkno = BUCKET_TO_BLKNO(metap, bucket);
+
+ /* Release metapage lock, but keep pin. */
+ _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
+
+ /*
+ * If the previous iteration of this loop locked what is still the
+ * correct target bucket, we are done. Otherwise, drop any old lock
+ * and lock what now appears to be the correct bucket.
+ */
+ if (retry)
+ {
+ if (oldblkno == blkno)
+ break;
+ _hash_droplock(rel, oldblkno, HASH_SHARE);
+ }
+ _hash_getlock(rel, blkno, HASH_SHARE);
+
+ /*
+ * Reacquire metapage lock and check that no bucket split has taken
+ * place while we were awaiting the bucket lock.
+ */
+ _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_READ);
+ oldblkno = blkno;
+ retry = true;
+ }
/* done with the metapage */
- _hash_relbuf(rel, metabuf);
-
- /*
- * Acquire share lock on target bucket; then we can release split lock.
- */
- _hash_getlock(rel, blkno, HASH_SHARE);
-
- _hash_droplock(rel, 0, HASH_SHARE);
+ _hash_dropbuf(rel, metabuf);
/* Update scan opaque state to show we have lock on the bucket */
so->hashso_bucket = bucket;
so->hashso_bucket_blkno = blkno;
/* Fetch the primary bucket page for the bucket */
- buf = _hash_getbuf(rel, blkno, HASH_READ);
- page = BufferGetPage(buf);
- _hash_checkpage(rel, page, LH_BUCKET_PAGE);
+ buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
+ page = BufferGetPage(buf, NULL, NULL,
+ BGP_NO_SNAPSHOT_TEST);
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(opaque->hasho_bucket == bucket);
/* if we're here, _hash_step found a valid tuple */
offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- _hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
- hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
- itup = &hitem->hash_itup;
- scan->xs_ctup.t_self = itup->t_tid;
+ _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+ so->hashso_heappos = itup->t_tid;
return true;
}
* _hash_step() -- step to the next valid item in a scan in the bucket.
*
* If no valid record exists in the requested direction, return
- * false. Else, return true and set the CurrentItemData for the
+ * false. Else, return true and set the hashso_curpos for the
* scan to the right thing.
*
* 'bufP' points to the current buffer, which is pinned and read-locked.
HashPageOpaque opaque;
OffsetNumber maxoff;
OffsetNumber offnum;
- Bucket bucket;
BlockNumber blkno;
- HashItem hitem;
IndexTuple itup;
- current = &(scan->currentItemData);
+ current = &(so->hashso_curpos);
buf = *bufP;
- page = BufferGetPage(buf);
- _hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
- bucket = opaque->hasho_bucket;
/*
- * If _hash_step is called from _hash_first, current will not be
- * valid, so we can't dereference it. However, in that case, we
- * presumably want to start at the beginning/end of the page...
+ * If _hash_step is called from _hash_first, current will not be valid, so
+ * we can't dereference it. However, in that case, we presumably want to
+ * start at the beginning/end of the page...
*/
maxoff = PageGetMaxOffsetNumber(page);
if (ItemPointerIsValid(current))
offnum = InvalidOffsetNumber;
/*
- * 'offnum' now points to the last tuple we have seen (if any).
+ * 'offnum' now points to the last tuple we examined (if any).
*
* continue to step through tuples until: 1) we get to the end of the
* bucket chain or 2) we find a valid tuple.
if (offnum != InvalidOffsetNumber)
offnum = OffsetNumberNext(offnum); /* move forward */
else
- offnum = FirstOffsetNumber; /* new page */
+ {
+ /* new page, locate starting position by binary search */
+ offnum = _hash_binsearch(page, so->hashso_sk_hash);
+ }
- while (offnum > maxoff)
+ for (;;)
{
/*
- * either this page is empty
- * (maxoff == InvalidOffsetNumber)
- * or we ran off the end.
+ * check if we're still in the range of items with the
+ * target hash key
+ */
+ if (offnum <= maxoff)
+ {
+ Assert(offnum >= FirstOffsetNumber);
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+ if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
+ break; /* yes, so exit for-loop */
+ }
+
+ /*
+ * ran off the end of this page, try the next
*/
_hash_readnext(rel, &buf, &page, &opaque);
if (BufferIsValid(buf))
{
maxoff = PageGetMaxOffsetNumber(page);
- offnum = FirstOffsetNumber;
+ offnum = _hash_binsearch(page, so->hashso_sk_hash);
}
else
{
/* end of bucket */
- maxoff = offnum = InvalidOffsetNumber;
- break; /* exit while */
+ itup = NULL;
+ break; /* exit for-loop */
}
}
break;
if (offnum != InvalidOffsetNumber)
offnum = OffsetNumberPrev(offnum); /* move back */
else
- offnum = maxoff; /* new page */
+ {
+ /* new page, locate starting position by binary search */
+ offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
+ }
- while (offnum < FirstOffsetNumber)
+ for (;;)
{
/*
- * either this page is empty
- * (offnum == InvalidOffsetNumber)
- * or we ran off the end.
+ * check if we're still in the range of items with the
+ * target hash key
+ */
+ if (offnum >= FirstOffsetNumber)
+ {
+ Assert(offnum <= maxoff);
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+ if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
+ break; /* yes, so exit for-loop */
+ }
+
+ /*
+ * ran off the end of this page, try the next
*/
_hash_readprev(rel, &buf, &page, &opaque);
if (BufferIsValid(buf))
{
- maxoff = offnum = PageGetMaxOffsetNumber(page);
+ maxoff = PageGetMaxOffsetNumber(page);
+ offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
}
else
{
/* end of bucket */
- maxoff = offnum = InvalidOffsetNumber;
- break; /* exit while */
+ itup = NULL;
+ break; /* exit for-loop */
}
}
break;
default:
/* NoMovementScanDirection */
/* this should not be reached */
+ itup = NULL;
break;
}
- /* we ran off the end of the world without finding a match */
- if (offnum == InvalidOffsetNumber)
+ if (itup == NULL)
{
+ /* we ran off the end of the bucket without finding a match */
*bufP = so->hashso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(current);
return false;
}
- /* get ready to check this tuple */
- hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
- itup = &hitem->hash_itup;
+ /* check the tuple quals, loop around if not met */
} while (!_hash_checkqual(scan, itup));
/* if we made it to here, we've found a valid tuple */