]> granicus.if.org Git - postgresql/blob - src/backend/access/hash/hashsearch.c
Restructure some header files a bit, in particular heapam.h, by removing some
[postgresql] / src / backend / access / hash / hashsearch.c
1 /*-------------------------------------------------------------------------
2  *
3  * hashsearch.c
4  *        search code for postgres hash tables
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.52 2008/05/12 00:00:44 alvherre Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/hash.h"
18 #include "pgstat.h"
19 #include "storage/bufmgr.h"
20
21
22 /*
23  *      _hash_next() -- Get the next item in a scan.
24  *
25  *              On entry, we have a valid hashso_curpos in the scan, and a
26  *              pin and read lock on the page that contains that item.
27  *              We find the next item in the scan, if any.
28  *              On success exit, we have the page containing the next item
29  *              pinned and locked.
30  */
31 bool
32 _hash_next(IndexScanDesc scan, ScanDirection dir)
33 {
34         Relation        rel = scan->indexRelation;
35         HashScanOpaque so = (HashScanOpaque) scan->opaque;
36         Buffer          buf;
37         Page            page;
38         OffsetNumber offnum;
39         ItemPointer current;
40         IndexTuple      itup;
41
42         /* we still have the buffer pinned and read-locked */
43         buf = so->hashso_curbuf;
44         Assert(BufferIsValid(buf));
45
46         /*
47          * step to next valid tuple.
48          */
49         if (!_hash_step(scan, &buf, dir))
50                 return false;
51
52         /* if we're here, _hash_step found a valid tuple */
53         current = &(so->hashso_curpos);
54         offnum = ItemPointerGetOffsetNumber(current);
55         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
56         page = BufferGetPage(buf);
57         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
58         scan->xs_ctup.t_self = itup->t_tid;
59
60         return true;
61 }
62
63 /*
64  * Advance to next page in a bucket, if any.
65  */
66 static void
67 _hash_readnext(Relation rel,
68                            Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
69 {
70         BlockNumber blkno;
71
72         blkno = (*opaquep)->hasho_nextblkno;
73         _hash_relbuf(rel, *bufp);
74         *bufp = InvalidBuffer;
75         if (BlockNumberIsValid(blkno))
76         {
77                 *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
78                 *pagep = BufferGetPage(*bufp);
79                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
80         }
81 }
82
83 /*
84  * Advance to previous page in a bucket, if any.
85  */
86 static void
87 _hash_readprev(Relation rel,
88                            Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
89 {
90         BlockNumber blkno;
91
92         blkno = (*opaquep)->hasho_prevblkno;
93         _hash_relbuf(rel, *bufp);
94         *bufp = InvalidBuffer;
95         if (BlockNumberIsValid(blkno))
96         {
97                 *bufp = _hash_getbuf(rel, blkno, HASH_READ,
98                                                          LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
99                 *pagep = BufferGetPage(*bufp);
100                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
101         }
102 }
103
104 /*
105  *      _hash_first() -- Find the first item in a scan.
106  *
107  *              Find the first item in the index that
108  *              satisfies the qualification associated with the scan descriptor. On
109  *              success, the page containing the current index tuple is read locked
110  *              and pinned, and the scan's opaque data entry is updated to
111  *              include the buffer.
112  */
113 bool
114 _hash_first(IndexScanDesc scan, ScanDirection dir)
115 {
116         Relation        rel = scan->indexRelation;
117         HashScanOpaque so = (HashScanOpaque) scan->opaque;
118         ScanKey         cur;
119         uint32          hashkey;
120         Bucket          bucket;
121         BlockNumber blkno;
122         Buffer          buf;
123         Buffer          metabuf;
124         Page            page;
125         HashPageOpaque opaque;
126         HashMetaPage metap;
127         IndexTuple      itup;
128         ItemPointer current;
129         OffsetNumber offnum;
130
131         pgstat_count_index_scan(rel);
132
133         current = &(so->hashso_curpos);
134         ItemPointerSetInvalid(current);
135
136         /*
137          * We do not support hash scans with no index qualification, because we
138          * would have to read the whole index rather than just one bucket. That
139          * creates a whole raft of problems, since we haven't got a practical way
140          * to lock all the buckets against splits or compactions.
141          */
142         if (scan->numberOfKeys < 1)
143                 ereport(ERROR,
144                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
145                                  errmsg("hash indexes do not support whole-index scans")));
146
147         /* There may be more than one index qual, but we hash only the first */
148         cur = &scan->keyData[0];
149
150         /* We support only single-column hash indexes */
151         Assert(cur->sk_attno == 1);
152         /* And there's only one operator strategy, too */
153         Assert(cur->sk_strategy == HTEqualStrategyNumber);
154
155         /*
156          * If the constant in the index qual is NULL, assume it cannot match any
157          * items in the index.
158          */
159         if (cur->sk_flags & SK_ISNULL)
160                 return false;
161
162         /*
163          * Okay to compute the hash key.  We want to do this before acquiring any
164          * locks, in case a user-defined hash function happens to be slow.
165          *
166          * If scankey operator is not a cross-type comparison, we can use the
167          * cached hash function; otherwise gotta look it up in the catalogs.
168          *
169          * We support the convention that sk_subtype == InvalidOid means the
170          * opclass input type; this is a hack to simplify life for ScanKeyInit().
171          */
172         if (cur->sk_subtype == rel->rd_opcintype[0] ||
173                 cur->sk_subtype == InvalidOid)
174                 hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
175         else
176                 hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
177                                                                                    cur->sk_subtype);
178
179         /*
180          * Acquire shared split lock so we can compute the target bucket safely
181          * (see README).
182          */
183         _hash_getlock(rel, 0, HASH_SHARE);
184
185         /* Read the metapage */
186         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
187         metap = (HashMetaPage) BufferGetPage(metabuf);
188
189         /*
190          * Compute the target bucket number, and convert to block number.
191          */
192         bucket = _hash_hashkey2bucket(hashkey,
193                                                                   metap->hashm_maxbucket,
194                                                                   metap->hashm_highmask,
195                                                                   metap->hashm_lowmask);
196
197         blkno = BUCKET_TO_BLKNO(metap, bucket);
198
199         /* done with the metapage */
200         _hash_relbuf(rel, metabuf);
201
202         /*
203          * Acquire share lock on target bucket; then we can release split lock.
204          */
205         _hash_getlock(rel, blkno, HASH_SHARE);
206
207         _hash_droplock(rel, 0, HASH_SHARE);
208
209         /* Update scan opaque state to show we have lock on the bucket */
210         so->hashso_bucket = bucket;
211         so->hashso_bucket_valid = true;
212         so->hashso_bucket_blkno = blkno;
213
214         /* Fetch the primary bucket page for the bucket */
215         buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
216         page = BufferGetPage(buf);
217         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
218         Assert(opaque->hasho_bucket == bucket);
219
220         /* If a backwards scan is requested, move to the end of the chain */
221         if (ScanDirectionIsBackward(dir))
222         {
223                 while (BlockNumberIsValid(opaque->hasho_nextblkno))
224                         _hash_readnext(rel, &buf, &page, &opaque);
225         }
226
227         /* Now find the first tuple satisfying the qualification */
228         if (!_hash_step(scan, &buf, dir))
229                 return false;
230
231         /* if we're here, _hash_step found a valid tuple */
232         offnum = ItemPointerGetOffsetNumber(current);
233         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
234         page = BufferGetPage(buf);
235         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
236         scan->xs_ctup.t_self = itup->t_tid;
237
238         return true;
239 }
240
241 /*
242  *      _hash_step() -- step to the next valid item in a scan in the bucket.
243  *
244  *              If no valid record exists in the requested direction, return
245  *              false.  Else, return true and set the hashso_curpos for the
246  *              scan to the right thing.
247  *
248  *              'bufP' points to the current buffer, which is pinned and read-locked.
249  *              On success exit, we have pin and read-lock on whichever page
250  *              contains the right item; on failure, we have released all buffers.
251  */
252 bool
253 _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
254 {
255         Relation        rel = scan->indexRelation;
256         HashScanOpaque so = (HashScanOpaque) scan->opaque;
257         ItemPointer current;
258         Buffer          buf;
259         Page            page;
260         HashPageOpaque opaque;
261         OffsetNumber maxoff;
262         OffsetNumber offnum;
263         BlockNumber blkno;
264         IndexTuple      itup;
265
266         current = &(so->hashso_curpos);
267
268         buf = *bufP;
269         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
270         page = BufferGetPage(buf);
271         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
272
273         /*
274          * If _hash_step is called from _hash_first, current will not be valid, so
275          * we can't dereference it.  However, in that case, we presumably want to
276          * start at the beginning/end of the page...
277          */
278         maxoff = PageGetMaxOffsetNumber(page);
279         if (ItemPointerIsValid(current))
280                 offnum = ItemPointerGetOffsetNumber(current);
281         else
282                 offnum = InvalidOffsetNumber;
283
284         /*
285          * 'offnum' now points to the last tuple we have seen (if any).
286          *
287          * continue to step through tuples until: 1) we get to the end of the
288          * bucket chain or 2) we find a valid tuple.
289          */
290         do
291         {
292                 switch (dir)
293                 {
294                         case ForwardScanDirection:
295                                 if (offnum != InvalidOffsetNumber)
296                                         offnum = OffsetNumberNext(offnum);      /* move forward */
297                                 else
298                                         offnum = FirstOffsetNumber; /* new page */
299
300                                 while (offnum > maxoff)
301                                 {
302                                         /*
303                                          * either this page is empty (maxoff ==
304                                          * InvalidOffsetNumber) or we ran off the end.
305                                          */
306                                         _hash_readnext(rel, &buf, &page, &opaque);
307                                         if (BufferIsValid(buf))
308                                         {
309                                                 maxoff = PageGetMaxOffsetNumber(page);
310                                                 offnum = FirstOffsetNumber;
311                                         }
312                                         else
313                                         {
314                                                 /* end of bucket */
315                                                 maxoff = offnum = InvalidOffsetNumber;
316                                                 break;  /* exit while */
317                                         }
318                                 }
319                                 break;
320
321                         case BackwardScanDirection:
322                                 if (offnum != InvalidOffsetNumber)
323                                         offnum = OffsetNumberPrev(offnum);      /* move back */
324                                 else
325                                         offnum = maxoff;        /* new page */
326
327                                 while (offnum < FirstOffsetNumber)
328                                 {
329                                         /*
330                                          * either this page is empty (offnum ==
331                                          * InvalidOffsetNumber) or we ran off the end.
332                                          */
333                                         _hash_readprev(rel, &buf, &page, &opaque);
334                                         if (BufferIsValid(buf))
335                                                 maxoff = offnum = PageGetMaxOffsetNumber(page);
336                                         else
337                                         {
338                                                 /* end of bucket */
339                                                 maxoff = offnum = InvalidOffsetNumber;
340                                                 break;  /* exit while */
341                                         }
342                                 }
343                                 break;
344
345                         default:
346                                 /* NoMovementScanDirection */
347                                 /* this should not be reached */
348                                 break;
349                 }
350
351                 /* we ran off the end of the world without finding a match */
352                 if (offnum == InvalidOffsetNumber)
353                 {
354                         *bufP = so->hashso_curbuf = InvalidBuffer;
355                         ItemPointerSetInvalid(current);
356                         return false;
357                 }
358
359                 /* get ready to check this tuple */
360                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
361         } while (!_hash_checkqual(scan, itup));
362
363         /* if we made it to here, we've found a valid tuple */
364         blkno = BufferGetBlockNumber(buf);
365         *bufP = so->hashso_curbuf = buf;
366         ItemPointerSet(current, blkno, offnum);
367         return true;
368 }