1 /*-------------------------------------------------------------------------
4 * search code for postgres hash tables
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/hash/hashsearch.c
13 *-------------------------------------------------------------------------
17 #include "access/hash.h"
18 #include "access/relscan.h"
19 #include "miscadmin.h"
21 #include "utils/rel.h"
25 * _hash_next() -- Get the next item in a scan.
27 * On entry, we have a valid hashso_curpos in the scan, and a
28 * pin and read lock on the page that contains that item.
29 * We find the next item in the scan, if any.
30 * On success exit, we have the page containing the next item
34 _hash_next(IndexScanDesc scan, ScanDirection dir)
36 Relation rel = scan->indexRelation;
37 HashScanOpaque so = (HashScanOpaque) scan->opaque;
44 /* we still have the buffer pinned and read-locked */
45 buf = so->hashso_curbuf;
46 Assert(BufferIsValid(buf));
49 * step to next valid tuple.
51 if (!_hash_step(scan, &buf, dir))
54 /* if we're here, _hash_step found a valid tuple */
55 current = &(so->hashso_curpos);
56 offnum = ItemPointerGetOffsetNumber(current);
57 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
58 page = BufferGetPage(buf);
59 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
60 so->hashso_heappos = itup->t_tid;
66 * Advance to next page in a bucket, if any.
69 _hash_readnext(Relation rel,
70 Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
74 blkno = (*opaquep)->hasho_nextblkno;
75 _hash_relbuf(rel, *bufp);
76 *bufp = InvalidBuffer;
77 /* check for interrupts while we're not holding any buffer lock */
78 CHECK_FOR_INTERRUPTS();
79 if (BlockNumberIsValid(blkno))
81 *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
82 *pagep = BufferGetPage(*bufp);
83 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
88 * Advance to previous page in a bucket, if any.
91 _hash_readprev(Relation rel,
92 Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
96 blkno = (*opaquep)->hasho_prevblkno;
97 _hash_relbuf(rel, *bufp);
98 *bufp = InvalidBuffer;
99 /* check for interrupts while we're not holding any buffer lock */
100 CHECK_FOR_INTERRUPTS();
101 if (BlockNumberIsValid(blkno))
103 *bufp = _hash_getbuf(rel, blkno, HASH_READ,
104 LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
105 *pagep = BufferGetPage(*bufp);
106 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
111 * _hash_first() -- Find the first item in a scan.
113 * Find the first item in the index that
114 * satisfies the qualification associated with the scan descriptor. On
115 * success, the page containing the current index tuple is read locked
116 * and pinned, and the scan's opaque data entry is updated to
117 * include the buffer.
120 _hash_first(IndexScanDesc scan, ScanDirection dir)
122 Relation rel = scan->indexRelation;
123 HashScanOpaque so = (HashScanOpaque) scan->opaque;
128 BlockNumber oldblkno = InvalidBuffer;
133 HashPageOpaque opaque;
139 pgstat_count_index_scan(rel);
141 current = &(so->hashso_curpos);
142 ItemPointerSetInvalid(current);
145 * We do not support hash scans with no index qualification, because we
146 * would have to read the whole index rather than just one bucket. That
147 * creates a whole raft of problems, since we haven't got a practical way
148 * to lock all the buckets against splits or compactions.
150 if (scan->numberOfKeys < 1)
152 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
153 errmsg("hash indexes do not support whole-index scans")));
155 /* There may be more than one index qual, but we hash only the first */
156 cur = &scan->keyData[0];
158 /* We support only single-column hash indexes */
159 Assert(cur->sk_attno == 1);
160 /* And there's only one operator strategy, too */
161 Assert(cur->sk_strategy == HTEqualStrategyNumber);
164 * If the constant in the index qual is NULL, assume it cannot match any
165 * items in the index.
167 if (cur->sk_flags & SK_ISNULL)
171 * Okay to compute the hash key. We want to do this before acquiring any
172 * locks, in case a user-defined hash function happens to be slow.
174 * If scankey operator is not a cross-type comparison, we can use the
175 * cached hash function; otherwise gotta look it up in the catalogs.
177 * We support the convention that sk_subtype == InvalidOid means the
178 * opclass input type; this is a hack to simplify life for ScanKeyInit().
180 if (cur->sk_subtype == rel->rd_opcintype[0] ||
181 cur->sk_subtype == InvalidOid)
182 hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
184 hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
187 so->hashso_sk_hash = hashkey;
189 /* Read the metapage */
190 metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
191 metap = HashPageGetMeta(BufferGetPage(metabuf));
194 * Loop until we get a lock on the correct target bucket.
199 * Compute the target bucket number, and convert to block number.
201 bucket = _hash_hashkey2bucket(hashkey,
202 metap->hashm_maxbucket,
203 metap->hashm_highmask,
204 metap->hashm_lowmask);
206 blkno = BUCKET_TO_BLKNO(metap, bucket);
208 /* Release metapage lock, but keep pin. */
209 _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
212 * If the previous iteration of this loop locked what is still the
213 * correct target bucket, we are done. Otherwise, drop any old lock
214 * and lock what now appears to be the correct bucket.
218 if (oldblkno == blkno)
220 _hash_droplock(rel, oldblkno, HASH_SHARE);
222 _hash_getlock(rel, blkno, HASH_SHARE);
225 * Reacquire metapage lock and check that no bucket split has taken
226 * place while we were awaiting the bucket lock.
228 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_READ);
233 /* done with the metapage */
234 _hash_dropbuf(rel, metabuf);
236 /* Update scan opaque state to show we have lock on the bucket */
237 so->hashso_bucket = bucket;
238 so->hashso_bucket_valid = true;
239 so->hashso_bucket_blkno = blkno;
241 /* Fetch the primary bucket page for the bucket */
242 buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
243 page = BufferGetPage(buf);
244 opaque = (HashPageOpaque) PageGetSpecialPointer(page);
245 Assert(opaque->hasho_bucket == bucket);
247 /* If a backwards scan is requested, move to the end of the chain */
248 if (ScanDirectionIsBackward(dir))
250 while (BlockNumberIsValid(opaque->hasho_nextblkno))
251 _hash_readnext(rel, &buf, &page, &opaque);
254 /* Now find the first tuple satisfying the qualification */
255 if (!_hash_step(scan, &buf, dir))
258 /* if we're here, _hash_step found a valid tuple */
259 offnum = ItemPointerGetOffsetNumber(current);
260 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
261 page = BufferGetPage(buf);
262 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
263 so->hashso_heappos = itup->t_tid;
269 * _hash_step() -- step to the next valid item in a scan in the bucket.
271 * If no valid record exists in the requested direction, return
272 * false. Else, return true and set the hashso_curpos for the
273 * scan to the right thing.
275 * 'bufP' points to the current buffer, which is pinned and read-locked.
276 * On success exit, we have pin and read-lock on whichever page
277 * contains the right item; on failure, we have released all buffers.
280 _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
282 Relation rel = scan->indexRelation;
283 HashScanOpaque so = (HashScanOpaque) scan->opaque;
287 HashPageOpaque opaque;
293 current = &(so->hashso_curpos);
296 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
297 page = BufferGetPage(buf);
298 opaque = (HashPageOpaque) PageGetSpecialPointer(page);
301 * If _hash_step is called from _hash_first, current will not be valid, so
302 * we can't dereference it. However, in that case, we presumably want to
303 * start at the beginning/end of the page...
305 maxoff = PageGetMaxOffsetNumber(page);
306 if (ItemPointerIsValid(current))
307 offnum = ItemPointerGetOffsetNumber(current);
309 offnum = InvalidOffsetNumber;
312 * 'offnum' now points to the last tuple we examined (if any).
314 * continue to step through tuples until: 1) we get to the end of the
315 * bucket chain or 2) we find a valid tuple.
321 case ForwardScanDirection:
322 if (offnum != InvalidOffsetNumber)
323 offnum = OffsetNumberNext(offnum); /* move forward */
326 /* new page, locate starting position by binary search */
327 offnum = _hash_binsearch(page, so->hashso_sk_hash);
333 * check if we're still in the range of items with the
336 if (offnum <= maxoff)
338 Assert(offnum >= FirstOffsetNumber);
339 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
340 if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
341 break; /* yes, so exit for-loop */
345 * ran off the end of this page, try the next
347 _hash_readnext(rel, &buf, &page, &opaque);
348 if (BufferIsValid(buf))
350 maxoff = PageGetMaxOffsetNumber(page);
351 offnum = _hash_binsearch(page, so->hashso_sk_hash);
357 break; /* exit for-loop */
362 case BackwardScanDirection:
363 if (offnum != InvalidOffsetNumber)
364 offnum = OffsetNumberPrev(offnum); /* move back */
367 /* new page, locate starting position by binary search */
368 offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
374 * check if we're still in the range of items with the
377 if (offnum >= FirstOffsetNumber)
379 Assert(offnum <= maxoff);
380 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
381 if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
382 break; /* yes, so exit for-loop */
386 * ran off the end of this page, try the next
388 _hash_readprev(rel, &buf, &page, &opaque);
389 if (BufferIsValid(buf))
391 maxoff = PageGetMaxOffsetNumber(page);
392 offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
398 break; /* exit for-loop */
404 /* NoMovementScanDirection */
405 /* this should not be reached */
412 /* we ran off the end of the bucket without finding a match */
413 *bufP = so->hashso_curbuf = InvalidBuffer;
414 ItemPointerSetInvalid(current);
418 /* check the tuple quals, loop around if not met */
419 } while (!_hash_checkqual(scan, itup));
421 /* if we made it to here, we've found a valid tuple */
422 blkno = BufferGetBlockNumber(buf);
423 *bufP = so->hashso_curbuf = buf;
424 ItemPointerSet(current, blkno, offnum);