]> granicus.if.org Git - postgresql/blob - src/backend/access/hash/hashsearch.c
pgindent run for release 9.3
[postgresql] / src / backend / access / hash / hashsearch.c
1 /*-------------------------------------------------------------------------
2  *
3  * hashsearch.c
4  *        search code for postgres hash tables
5  *
6  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/access/hash/hashsearch.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/hash.h"
18 #include "access/relscan.h"
19 #include "miscadmin.h"
20 #include "pgstat.h"
21 #include "utils/rel.h"
22
23
24 /*
25  *      _hash_next() -- Get the next item in a scan.
26  *
27  *              On entry, we have a valid hashso_curpos in the scan, and a
28  *              pin and read lock on the page that contains that item.
29  *              We find the next item in the scan, if any.
30  *              On success exit, we have the page containing the next item
31  *              pinned and locked.
32  */
33 bool
34 _hash_next(IndexScanDesc scan, ScanDirection dir)
35 {
36         Relation        rel = scan->indexRelation;
37         HashScanOpaque so = (HashScanOpaque) scan->opaque;
38         Buffer          buf;
39         Page            page;
40         OffsetNumber offnum;
41         ItemPointer current;
42         IndexTuple      itup;
43
44         /* we still have the buffer pinned and read-locked */
45         buf = so->hashso_curbuf;
46         Assert(BufferIsValid(buf));
47
48         /*
49          * step to next valid tuple.
50          */
51         if (!_hash_step(scan, &buf, dir))
52                 return false;
53
54         /* if we're here, _hash_step found a valid tuple */
55         current = &(so->hashso_curpos);
56         offnum = ItemPointerGetOffsetNumber(current);
57         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
58         page = BufferGetPage(buf);
59         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
60         so->hashso_heappos = itup->t_tid;
61
62         return true;
63 }
64
65 /*
66  * Advance to next page in a bucket, if any.
67  */
68 static void
69 _hash_readnext(Relation rel,
70                            Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
71 {
72         BlockNumber blkno;
73
74         blkno = (*opaquep)->hasho_nextblkno;
75         _hash_relbuf(rel, *bufp);
76         *bufp = InvalidBuffer;
77         /* check for interrupts while we're not holding any buffer lock */
78         CHECK_FOR_INTERRUPTS();
79         if (BlockNumberIsValid(blkno))
80         {
81                 *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
82                 *pagep = BufferGetPage(*bufp);
83                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
84         }
85 }
86
87 /*
88  * Advance to previous page in a bucket, if any.
89  */
90 static void
91 _hash_readprev(Relation rel,
92                            Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
93 {
94         BlockNumber blkno;
95
96         blkno = (*opaquep)->hasho_prevblkno;
97         _hash_relbuf(rel, *bufp);
98         *bufp = InvalidBuffer;
99         /* check for interrupts while we're not holding any buffer lock */
100         CHECK_FOR_INTERRUPTS();
101         if (BlockNumberIsValid(blkno))
102         {
103                 *bufp = _hash_getbuf(rel, blkno, HASH_READ,
104                                                          LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
105                 *pagep = BufferGetPage(*bufp);
106                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
107         }
108 }
109
110 /*
111  *      _hash_first() -- Find the first item in a scan.
112  *
113  *              Find the first item in the index that
114  *              satisfies the qualification associated with the scan descriptor. On
115  *              success, the page containing the current index tuple is read locked
116  *              and pinned, and the scan's opaque data entry is updated to
117  *              include the buffer.
118  */
119 bool
120 _hash_first(IndexScanDesc scan, ScanDirection dir)
121 {
122         Relation        rel = scan->indexRelation;
123         HashScanOpaque so = (HashScanOpaque) scan->opaque;
124         ScanKey         cur;
125         uint32          hashkey;
126         Bucket          bucket;
127         BlockNumber blkno;
128         BlockNumber oldblkno = InvalidBuffer;
129         bool            retry = false;
130         Buffer          buf;
131         Buffer          metabuf;
132         Page            page;
133         HashPageOpaque opaque;
134         HashMetaPage metap;
135         IndexTuple      itup;
136         ItemPointer current;
137         OffsetNumber offnum;
138
139         pgstat_count_index_scan(rel);
140
141         current = &(so->hashso_curpos);
142         ItemPointerSetInvalid(current);
143
144         /*
145          * We do not support hash scans with no index qualification, because we
146          * would have to read the whole index rather than just one bucket. That
147          * creates a whole raft of problems, since we haven't got a practical way
148          * to lock all the buckets against splits or compactions.
149          */
150         if (scan->numberOfKeys < 1)
151                 ereport(ERROR,
152                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
153                                  errmsg("hash indexes do not support whole-index scans")));
154
155         /* There may be more than one index qual, but we hash only the first */
156         cur = &scan->keyData[0];
157
158         /* We support only single-column hash indexes */
159         Assert(cur->sk_attno == 1);
160         /* And there's only one operator strategy, too */
161         Assert(cur->sk_strategy == HTEqualStrategyNumber);
162
163         /*
164          * If the constant in the index qual is NULL, assume it cannot match any
165          * items in the index.
166          */
167         if (cur->sk_flags & SK_ISNULL)
168                 return false;
169
170         /*
171          * Okay to compute the hash key.  We want to do this before acquiring any
172          * locks, in case a user-defined hash function happens to be slow.
173          *
174          * If scankey operator is not a cross-type comparison, we can use the
175          * cached hash function; otherwise gotta look it up in the catalogs.
176          *
177          * We support the convention that sk_subtype == InvalidOid means the
178          * opclass input type; this is a hack to simplify life for ScanKeyInit().
179          */
180         if (cur->sk_subtype == rel->rd_opcintype[0] ||
181                 cur->sk_subtype == InvalidOid)
182                 hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
183         else
184                 hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
185                                                                                    cur->sk_subtype);
186
187         so->hashso_sk_hash = hashkey;
188
189         /* Read the metapage */
190         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
191         metap = HashPageGetMeta(BufferGetPage(metabuf));
192
193         /*
194          * Loop until we get a lock on the correct target bucket.
195          */
196         for (;;)
197         {
198                 /*
199                  * Compute the target bucket number, and convert to block number.
200                  */
201                 bucket = _hash_hashkey2bucket(hashkey,
202                                                                           metap->hashm_maxbucket,
203                                                                           metap->hashm_highmask,
204                                                                           metap->hashm_lowmask);
205
206                 blkno = BUCKET_TO_BLKNO(metap, bucket);
207
208                 /* Release metapage lock, but keep pin. */
209                 _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
210
211                 /*
212                  * If the previous iteration of this loop locked what is still the
213                  * correct target bucket, we are done.  Otherwise, drop any old lock
214                  * and lock what now appears to be the correct bucket.
215                  */
216                 if (retry)
217                 {
218                         if (oldblkno == blkno)
219                                 break;
220                         _hash_droplock(rel, oldblkno, HASH_SHARE);
221                 }
222                 _hash_getlock(rel, blkno, HASH_SHARE);
223
224                 /*
225                  * Reacquire metapage lock and check that no bucket split has taken
226                  * place while we were awaiting the bucket lock.
227                  */
228                 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_READ);
229                 oldblkno = blkno;
230                 retry = true;
231         }
232
233         /* done with the metapage */
234         _hash_dropbuf(rel, metabuf);
235
236         /* Update scan opaque state to show we have lock on the bucket */
237         so->hashso_bucket = bucket;
238         so->hashso_bucket_valid = true;
239         so->hashso_bucket_blkno = blkno;
240
241         /* Fetch the primary bucket page for the bucket */
242         buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
243         page = BufferGetPage(buf);
244         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
245         Assert(opaque->hasho_bucket == bucket);
246
247         /* If a backwards scan is requested, move to the end of the chain */
248         if (ScanDirectionIsBackward(dir))
249         {
250                 while (BlockNumberIsValid(opaque->hasho_nextblkno))
251                         _hash_readnext(rel, &buf, &page, &opaque);
252         }
253
254         /* Now find the first tuple satisfying the qualification */
255         if (!_hash_step(scan, &buf, dir))
256                 return false;
257
258         /* if we're here, _hash_step found a valid tuple */
259         offnum = ItemPointerGetOffsetNumber(current);
260         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
261         page = BufferGetPage(buf);
262         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
263         so->hashso_heappos = itup->t_tid;
264
265         return true;
266 }
267
268 /*
269  *      _hash_step() -- step to the next valid item in a scan in the bucket.
270  *
271  *              If no valid record exists in the requested direction, return
272  *              false.  Else, return true and set the hashso_curpos for the
273  *              scan to the right thing.
274  *
275  *              'bufP' points to the current buffer, which is pinned and read-locked.
276  *              On success exit, we have pin and read-lock on whichever page
277  *              contains the right item; on failure, we have released all buffers.
278  */
279 bool
280 _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
281 {
282         Relation        rel = scan->indexRelation;
283         HashScanOpaque so = (HashScanOpaque) scan->opaque;
284         ItemPointer current;
285         Buffer          buf;
286         Page            page;
287         HashPageOpaque opaque;
288         OffsetNumber maxoff;
289         OffsetNumber offnum;
290         BlockNumber blkno;
291         IndexTuple      itup;
292
293         current = &(so->hashso_curpos);
294
295         buf = *bufP;
296         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
297         page = BufferGetPage(buf);
298         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
299
300         /*
301          * If _hash_step is called from _hash_first, current will not be valid, so
302          * we can't dereference it.  However, in that case, we presumably want to
303          * start at the beginning/end of the page...
304          */
305         maxoff = PageGetMaxOffsetNumber(page);
306         if (ItemPointerIsValid(current))
307                 offnum = ItemPointerGetOffsetNumber(current);
308         else
309                 offnum = InvalidOffsetNumber;
310
311         /*
312          * 'offnum' now points to the last tuple we examined (if any).
313          *
314          * continue to step through tuples until: 1) we get to the end of the
315          * bucket chain or 2) we find a valid tuple.
316          */
317         do
318         {
319                 switch (dir)
320                 {
321                         case ForwardScanDirection:
322                                 if (offnum != InvalidOffsetNumber)
323                                         offnum = OffsetNumberNext(offnum);      /* move forward */
324                                 else
325                                 {
326                                         /* new page, locate starting position by binary search */
327                                         offnum = _hash_binsearch(page, so->hashso_sk_hash);
328                                 }
329
330                                 for (;;)
331                                 {
332                                         /*
333                                          * check if we're still in the range of items with the
334                                          * target hash key
335                                          */
336                                         if (offnum <= maxoff)
337                                         {
338                                                 Assert(offnum >= FirstOffsetNumber);
339                                                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
340                                                 if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
341                                                         break;          /* yes, so exit for-loop */
342                                         }
343
344                                         /*
345                                          * ran off the end of this page, try the next
346                                          */
347                                         _hash_readnext(rel, &buf, &page, &opaque);
348                                         if (BufferIsValid(buf))
349                                         {
350                                                 maxoff = PageGetMaxOffsetNumber(page);
351                                                 offnum = _hash_binsearch(page, so->hashso_sk_hash);
352                                         }
353                                         else
354                                         {
355                                                 /* end of bucket */
356                                                 itup = NULL;
357                                                 break;  /* exit for-loop */
358                                         }
359                                 }
360                                 break;
361
362                         case BackwardScanDirection:
363                                 if (offnum != InvalidOffsetNumber)
364                                         offnum = OffsetNumberPrev(offnum);      /* move back */
365                                 else
366                                 {
367                                         /* new page, locate starting position by binary search */
368                                         offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
369                                 }
370
371                                 for (;;)
372                                 {
373                                         /*
374                                          * check if we're still in the range of items with the
375                                          * target hash key
376                                          */
377                                         if (offnum >= FirstOffsetNumber)
378                                         {
379                                                 Assert(offnum <= maxoff);
380                                                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
381                                                 if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
382                                                         break;          /* yes, so exit for-loop */
383                                         }
384
385                                         /*
386                                          * ran off the end of this page, try the next
387                                          */
388                                         _hash_readprev(rel, &buf, &page, &opaque);
389                                         if (BufferIsValid(buf))
390                                         {
391                                                 maxoff = PageGetMaxOffsetNumber(page);
392                                                 offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
393                                         }
394                                         else
395                                         {
396                                                 /* end of bucket */
397                                                 itup = NULL;
398                                                 break;  /* exit for-loop */
399                                         }
400                                 }
401                                 break;
402
403                         default:
404                                 /* NoMovementScanDirection */
405                                 /* this should not be reached */
406                                 itup = NULL;
407                                 break;
408                 }
409
410                 if (itup == NULL)
411                 {
412                         /* we ran off the end of the bucket without finding a match */
413                         *bufP = so->hashso_curbuf = InvalidBuffer;
414                         ItemPointerSetInvalid(current);
415                         return false;
416                 }
417
418                 /* check the tuple quals, loop around if not met */
419         } while (!_hash_checkqual(scan, itup));
420
421         /* if we made it to here, we've found a valid tuple */
422         blkno = BufferGetBlockNumber(buf);
423         *bufP = so->hashso_curbuf = buf;
424         ItemPointerSet(current, blkno, offnum);
425         return true;
426 }