1 /*-------------------------------------------------------------------------
4 * search code for postgres hash tables
6 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.50 2007/05/27 03:50:38 tgl Exp $
13 *-------------------------------------------------------------------------
17 #include "access/hash.h"
22 * _hash_next() -- Get the next item in a scan.
24 * On entry, we have a valid hashso_curpos in the scan, and a
25 * pin and read lock on the page that contains that item.
26 * We find the next item in the scan, if any.
27 * On success exit, we have the page containing the next item
31 _hash_next(IndexScanDesc scan, ScanDirection dir)
33 Relation rel = scan->indexRelation;
34 HashScanOpaque so = (HashScanOpaque) scan->opaque;
41 /* we still have the buffer pinned and read-locked */
42 buf = so->hashso_curbuf;
43 Assert(BufferIsValid(buf));
46 * step to next valid tuple.
48 if (!_hash_step(scan, &buf, dir))
51 /* if we're here, _hash_step found a valid tuple */
52 current = &(so->hashso_curpos);
53 offnum = ItemPointerGetOffsetNumber(current);
54 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
55 page = BufferGetPage(buf);
56 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
57 scan->xs_ctup.t_self = itup->t_tid;
63 * Advance to next page in a bucket, if any.
66 _hash_readnext(Relation rel,
67 Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
71 blkno = (*opaquep)->hasho_nextblkno;
72 _hash_relbuf(rel, *bufp);
73 *bufp = InvalidBuffer;
74 if (BlockNumberIsValid(blkno))
76 *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
77 *pagep = BufferGetPage(*bufp);
78 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
83 * Advance to previous page in a bucket, if any.
86 _hash_readprev(Relation rel,
87 Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
91 blkno = (*opaquep)->hasho_prevblkno;
92 _hash_relbuf(rel, *bufp);
93 *bufp = InvalidBuffer;
94 if (BlockNumberIsValid(blkno))
96 *bufp = _hash_getbuf(rel, blkno, HASH_READ,
97 LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
98 *pagep = BufferGetPage(*bufp);
99 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
104 * _hash_first() -- Find the first item in a scan.
106 * Find the first item in the index that
107 * satisfies the qualification associated with the scan descriptor. On
108 * success, the page containing the current index tuple is read locked
109 * and pinned, and the scan's opaque data entry is updated to
110 * include the buffer.
113 _hash_first(IndexScanDesc scan, ScanDirection dir)
115 Relation rel = scan->indexRelation;
116 HashScanOpaque so = (HashScanOpaque) scan->opaque;
124 HashPageOpaque opaque;
130 pgstat_count_index_scan(rel);
132 current = &(so->hashso_curpos);
133 ItemPointerSetInvalid(current);
136 * We do not support hash scans with no index qualification, because we
137 * would have to read the whole index rather than just one bucket. That
138 * creates a whole raft of problems, since we haven't got a practical way
139 * to lock all the buckets against splits or compactions.
141 if (scan->numberOfKeys < 1)
143 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
144 errmsg("hash indexes do not support whole-index scans")));
146 /* There may be more than one index qual, but we hash only the first */
147 cur = &scan->keyData[0];
149 /* We support only single-column hash indexes */
150 Assert(cur->sk_attno == 1);
151 /* And there's only one operator strategy, too */
152 Assert(cur->sk_strategy == HTEqualStrategyNumber);
155 * If the constant in the index qual is NULL, assume it cannot match any
156 * items in the index.
158 if (cur->sk_flags & SK_ISNULL)
162 * Okay to compute the hash key. We want to do this before acquiring any
163 * locks, in case a user-defined hash function happens to be slow.
165 * If scankey operator is not a cross-type comparison, we can use the
166 * cached hash function; otherwise gotta look it up in the catalogs.
168 * We support the convention that sk_subtype == InvalidOid means the
169 * opclass input type; this is a hack to simplify life for ScanKeyInit().
171 if (cur->sk_subtype == rel->rd_opcintype[0] ||
172 cur->sk_subtype == InvalidOid)
173 hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
175 hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
179 * Acquire shared split lock so we can compute the target bucket safely
182 _hash_getlock(rel, 0, HASH_SHARE);
184 /* Read the metapage */
185 metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
186 metap = (HashMetaPage) BufferGetPage(metabuf);
189 * Compute the target bucket number, and convert to block number.
191 bucket = _hash_hashkey2bucket(hashkey,
192 metap->hashm_maxbucket,
193 metap->hashm_highmask,
194 metap->hashm_lowmask);
196 blkno = BUCKET_TO_BLKNO(metap, bucket);
198 /* done with the metapage */
199 _hash_relbuf(rel, metabuf);
202 * Acquire share lock on target bucket; then we can release split lock.
204 _hash_getlock(rel, blkno, HASH_SHARE);
206 _hash_droplock(rel, 0, HASH_SHARE);
208 /* Update scan opaque state to show we have lock on the bucket */
209 so->hashso_bucket = bucket;
210 so->hashso_bucket_valid = true;
211 so->hashso_bucket_blkno = blkno;
213 /* Fetch the primary bucket page for the bucket */
214 buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
215 page = BufferGetPage(buf);
216 opaque = (HashPageOpaque) PageGetSpecialPointer(page);
217 Assert(opaque->hasho_bucket == bucket);
219 /* If a backwards scan is requested, move to the end of the chain */
220 if (ScanDirectionIsBackward(dir))
222 while (BlockNumberIsValid(opaque->hasho_nextblkno))
223 _hash_readnext(rel, &buf, &page, &opaque);
226 /* Now find the first tuple satisfying the qualification */
227 if (!_hash_step(scan, &buf, dir))
230 /* if we're here, _hash_step found a valid tuple */
231 offnum = ItemPointerGetOffsetNumber(current);
232 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
233 page = BufferGetPage(buf);
234 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
235 scan->xs_ctup.t_self = itup->t_tid;
241 * _hash_step() -- step to the next valid item in a scan in the bucket.
243 * If no valid record exists in the requested direction, return
244 * false. Else, return true and set the hashso_curpos for the
245 * scan to the right thing.
247 * 'bufP' points to the current buffer, which is pinned and read-locked.
248 * On success exit, we have pin and read-lock on whichever page
249 * contains the right item; on failure, we have released all buffers.
252 _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
254 Relation rel = scan->indexRelation;
255 HashScanOpaque so = (HashScanOpaque) scan->opaque;
259 HashPageOpaque opaque;
265 current = &(so->hashso_curpos);
268 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
269 page = BufferGetPage(buf);
270 opaque = (HashPageOpaque) PageGetSpecialPointer(page);
273 * If _hash_step is called from _hash_first, current will not be valid, so
274 * we can't dereference it. However, in that case, we presumably want to
275 * start at the beginning/end of the page...
277 maxoff = PageGetMaxOffsetNumber(page);
278 if (ItemPointerIsValid(current))
279 offnum = ItemPointerGetOffsetNumber(current);
281 offnum = InvalidOffsetNumber;
284 * 'offnum' now points to the last tuple we have seen (if any).
286 * continue to step through tuples until: 1) we get to the end of the
287 * bucket chain or 2) we find a valid tuple.
293 case ForwardScanDirection:
294 if (offnum != InvalidOffsetNumber)
295 offnum = OffsetNumberNext(offnum); /* move forward */
297 offnum = FirstOffsetNumber; /* new page */
299 while (offnum > maxoff)
302 * either this page is empty (maxoff ==
303 * InvalidOffsetNumber) or we ran off the end.
305 _hash_readnext(rel, &buf, &page, &opaque);
306 if (BufferIsValid(buf))
308 maxoff = PageGetMaxOffsetNumber(page);
309 offnum = FirstOffsetNumber;
314 maxoff = offnum = InvalidOffsetNumber;
315 break; /* exit while */
320 case BackwardScanDirection:
321 if (offnum != InvalidOffsetNumber)
322 offnum = OffsetNumberPrev(offnum); /* move back */
324 offnum = maxoff; /* new page */
326 while (offnum < FirstOffsetNumber)
329 * either this page is empty (offnum ==
330 * InvalidOffsetNumber) or we ran off the end.
332 _hash_readprev(rel, &buf, &page, &opaque);
333 if (BufferIsValid(buf))
334 maxoff = offnum = PageGetMaxOffsetNumber(page);
338 maxoff = offnum = InvalidOffsetNumber;
339 break; /* exit while */
345 /* NoMovementScanDirection */
346 /* this should not be reached */
350 /* we ran off the end of the world without finding a match */
351 if (offnum == InvalidOffsetNumber)
353 *bufP = so->hashso_curbuf = InvalidBuffer;
354 ItemPointerSetInvalid(current);
358 /* get ready to check this tuple */
359 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
360 } while (!_hash_checkqual(scan, itup));
362 /* if we made it to here, we've found a valid tuple */
363 blkno = BufferGetBlockNumber(buf);
364 *bufP = so->hashso_curbuf = buf;
365 ItemPointerSet(current, blkno, offnum);