1 /*-------------------------------------------------------------------------
4 * search code for postgres hash tables
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.52 2008/05/12 00:00:44 alvherre Exp $
13 *-------------------------------------------------------------------------
17 #include "access/hash.h"
19 #include "storage/bufmgr.h"
23 * _hash_next() -- Get the next item in a scan.
25 * On entry, we have a valid hashso_curpos in the scan, and a
26 * pin and read lock on the page that contains that item.
27 * We find the next item in the scan, if any.
28 * On success exit, we have the page containing the next item
32 _hash_next(IndexScanDesc scan, ScanDirection dir)
34 Relation rel = scan->indexRelation;
35 HashScanOpaque so = (HashScanOpaque) scan->opaque;
42 /* we still have the buffer pinned and read-locked */
43 buf = so->hashso_curbuf;
44 Assert(BufferIsValid(buf));
47 * step to next valid tuple.
49 if (!_hash_step(scan, &buf, dir))
52 /* if we're here, _hash_step found a valid tuple */
53 current = &(so->hashso_curpos);
54 offnum = ItemPointerGetOffsetNumber(current);
55 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
56 page = BufferGetPage(buf);
57 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
58 scan->xs_ctup.t_self = itup->t_tid;
64 * Advance to next page in a bucket, if any.
67 _hash_readnext(Relation rel,
68 Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
72 blkno = (*opaquep)->hasho_nextblkno;
73 _hash_relbuf(rel, *bufp);
74 *bufp = InvalidBuffer;
75 if (BlockNumberIsValid(blkno))
77 *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
78 *pagep = BufferGetPage(*bufp);
79 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
84 * Advance to previous page in a bucket, if any.
87 _hash_readprev(Relation rel,
88 Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
92 blkno = (*opaquep)->hasho_prevblkno;
93 _hash_relbuf(rel, *bufp);
94 *bufp = InvalidBuffer;
95 if (BlockNumberIsValid(blkno))
97 *bufp = _hash_getbuf(rel, blkno, HASH_READ,
98 LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
99 *pagep = BufferGetPage(*bufp);
100 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
105 * _hash_first() -- Find the first item in a scan.
107 * Find the first item in the index that
108 * satisfies the qualification associated with the scan descriptor. On
109 * success, the page containing the current index tuple is read locked
110 * and pinned, and the scan's opaque data entry is updated to
111 * include the buffer.
114 _hash_first(IndexScanDesc scan, ScanDirection dir)
116 Relation rel = scan->indexRelation;
117 HashScanOpaque so = (HashScanOpaque) scan->opaque;
125 HashPageOpaque opaque;
131 pgstat_count_index_scan(rel);
133 current = &(so->hashso_curpos);
134 ItemPointerSetInvalid(current);
137 * We do not support hash scans with no index qualification, because we
138 * would have to read the whole index rather than just one bucket. That
139 * creates a whole raft of problems, since we haven't got a practical way
140 * to lock all the buckets against splits or compactions.
142 if (scan->numberOfKeys < 1)
144 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
145 errmsg("hash indexes do not support whole-index scans")));
147 /* There may be more than one index qual, but we hash only the first */
148 cur = &scan->keyData[0];
150 /* We support only single-column hash indexes */
151 Assert(cur->sk_attno == 1);
152 /* And there's only one operator strategy, too */
153 Assert(cur->sk_strategy == HTEqualStrategyNumber);
156 * If the constant in the index qual is NULL, assume it cannot match any
157 * items in the index.
159 if (cur->sk_flags & SK_ISNULL)
163 * Okay to compute the hash key. We want to do this before acquiring any
164 * locks, in case a user-defined hash function happens to be slow.
166 * If scankey operator is not a cross-type comparison, we can use the
167 * cached hash function; otherwise gotta look it up in the catalogs.
169 * We support the convention that sk_subtype == InvalidOid means the
170 * opclass input type; this is a hack to simplify life for ScanKeyInit().
172 if (cur->sk_subtype == rel->rd_opcintype[0] ||
173 cur->sk_subtype == InvalidOid)
174 hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
176 hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
180 * Acquire shared split lock so we can compute the target bucket safely
183 _hash_getlock(rel, 0, HASH_SHARE);
185 /* Read the metapage */
186 metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
187 metap = (HashMetaPage) BufferGetPage(metabuf);
190 * Compute the target bucket number, and convert to block number.
192 bucket = _hash_hashkey2bucket(hashkey,
193 metap->hashm_maxbucket,
194 metap->hashm_highmask,
195 metap->hashm_lowmask);
197 blkno = BUCKET_TO_BLKNO(metap, bucket);
199 /* done with the metapage */
200 _hash_relbuf(rel, metabuf);
203 * Acquire share lock on target bucket; then we can release split lock.
205 _hash_getlock(rel, blkno, HASH_SHARE);
207 _hash_droplock(rel, 0, HASH_SHARE);
209 /* Update scan opaque state to show we have lock on the bucket */
210 so->hashso_bucket = bucket;
211 so->hashso_bucket_valid = true;
212 so->hashso_bucket_blkno = blkno;
214 /* Fetch the primary bucket page for the bucket */
215 buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
216 page = BufferGetPage(buf);
217 opaque = (HashPageOpaque) PageGetSpecialPointer(page);
218 Assert(opaque->hasho_bucket == bucket);
220 /* If a backwards scan is requested, move to the end of the chain */
221 if (ScanDirectionIsBackward(dir))
223 while (BlockNumberIsValid(opaque->hasho_nextblkno))
224 _hash_readnext(rel, &buf, &page, &opaque);
227 /* Now find the first tuple satisfying the qualification */
228 if (!_hash_step(scan, &buf, dir))
231 /* if we're here, _hash_step found a valid tuple */
232 offnum = ItemPointerGetOffsetNumber(current);
233 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
234 page = BufferGetPage(buf);
235 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
236 scan->xs_ctup.t_self = itup->t_tid;
242 * _hash_step() -- step to the next valid item in a scan in the bucket.
244 * If no valid record exists in the requested direction, return
245 * false. Else, return true and set the hashso_curpos for the
246 * scan to the right thing.
248 * 'bufP' points to the current buffer, which is pinned and read-locked.
249 * On success exit, we have pin and read-lock on whichever page
250 * contains the right item; on failure, we have released all buffers.
253 _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
255 Relation rel = scan->indexRelation;
256 HashScanOpaque so = (HashScanOpaque) scan->opaque;
260 HashPageOpaque opaque;
266 current = &(so->hashso_curpos);
269 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
270 page = BufferGetPage(buf);
271 opaque = (HashPageOpaque) PageGetSpecialPointer(page);
274 * If _hash_step is called from _hash_first, current will not be valid, so
275 * we can't dereference it. However, in that case, we presumably want to
276 * start at the beginning/end of the page...
278 maxoff = PageGetMaxOffsetNumber(page);
279 if (ItemPointerIsValid(current))
280 offnum = ItemPointerGetOffsetNumber(current);
282 offnum = InvalidOffsetNumber;
285 * 'offnum' now points to the last tuple we have seen (if any).
287 * continue to step through tuples until: 1) we get to the end of the
288 * bucket chain or 2) we find a valid tuple.
294 case ForwardScanDirection:
295 if (offnum != InvalidOffsetNumber)
296 offnum = OffsetNumberNext(offnum); /* move forward */
298 offnum = FirstOffsetNumber; /* new page */
300 while (offnum > maxoff)
303 * either this page is empty (maxoff ==
304 * InvalidOffsetNumber) or we ran off the end.
306 _hash_readnext(rel, &buf, &page, &opaque);
307 if (BufferIsValid(buf))
309 maxoff = PageGetMaxOffsetNumber(page);
310 offnum = FirstOffsetNumber;
315 maxoff = offnum = InvalidOffsetNumber;
316 break; /* exit while */
321 case BackwardScanDirection:
322 if (offnum != InvalidOffsetNumber)
323 offnum = OffsetNumberPrev(offnum); /* move back */
325 offnum = maxoff; /* new page */
327 while (offnum < FirstOffsetNumber)
330 * either this page is empty (offnum ==
331 * InvalidOffsetNumber) or we ran off the end.
333 _hash_readprev(rel, &buf, &page, &opaque);
334 if (BufferIsValid(buf))
335 maxoff = offnum = PageGetMaxOffsetNumber(page);
339 maxoff = offnum = InvalidOffsetNumber;
340 break; /* exit while */
346 /* NoMovementScanDirection */
347 /* this should not be reached */
351 /* we ran off the end of the world without finding a match */
352 if (offnum == InvalidOffsetNumber)
354 *bufP = so->hashso_curbuf = InvalidBuffer;
355 ItemPointerSetInvalid(current);
359 /* get ready to check this tuple */
360 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
361 } while (!_hash_checkqual(scan, itup));
363 /* if we made it to here, we've found a valid tuple */
364 blkno = BufferGetBlockNumber(buf);
365 *bufP = so->hashso_curbuf = buf;
366 ItemPointerSet(current, blkno, offnum);