]> granicus.if.org Git - postgresql/blob - src/backend/access/hash/hashsearch.c
Fix up pgstats counting of live and dead tuples to recognize that committed
[postgresql] / src / backend / access / hash / hashsearch.c
1 /*-------------------------------------------------------------------------
2  *
3  * hashsearch.c
4  *        search code for postgres hash tables
5  *
6  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.50 2007/05/27 03:50:38 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/hash.h"
18 #include "pgstat.h"
19
20
21 /*
22  *      _hash_next() -- Get the next item in a scan.
23  *
24  *              On entry, we have a valid hashso_curpos in the scan, and a
25  *              pin and read lock on the page that contains that item.
26  *              We find the next item in the scan, if any.
27  *              On success exit, we have the page containing the next item
28  *              pinned and locked.
29  */
30 bool
31 _hash_next(IndexScanDesc scan, ScanDirection dir)
32 {
33         Relation        rel = scan->indexRelation;
34         HashScanOpaque so = (HashScanOpaque) scan->opaque;
35         Buffer          buf;
36         Page            page;
37         OffsetNumber offnum;
38         ItemPointer current;
39         IndexTuple      itup;
40
41         /* we still have the buffer pinned and read-locked */
42         buf = so->hashso_curbuf;
43         Assert(BufferIsValid(buf));
44
45         /*
46          * step to next valid tuple.
47          */
48         if (!_hash_step(scan, &buf, dir))
49                 return false;
50
51         /* if we're here, _hash_step found a valid tuple */
52         current = &(so->hashso_curpos);
53         offnum = ItemPointerGetOffsetNumber(current);
54         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
55         page = BufferGetPage(buf);
56         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
57         scan->xs_ctup.t_self = itup->t_tid;
58
59         return true;
60 }
61
62 /*
63  * Advance to next page in a bucket, if any.
64  */
65 static void
66 _hash_readnext(Relation rel,
67                            Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
68 {
69         BlockNumber blkno;
70
71         blkno = (*opaquep)->hasho_nextblkno;
72         _hash_relbuf(rel, *bufp);
73         *bufp = InvalidBuffer;
74         if (BlockNumberIsValid(blkno))
75         {
76                 *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
77                 *pagep = BufferGetPage(*bufp);
78                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
79         }
80 }
81
82 /*
83  * Advance to previous page in a bucket, if any.
84  */
85 static void
86 _hash_readprev(Relation rel,
87                            Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
88 {
89         BlockNumber blkno;
90
91         blkno = (*opaquep)->hasho_prevblkno;
92         _hash_relbuf(rel, *bufp);
93         *bufp = InvalidBuffer;
94         if (BlockNumberIsValid(blkno))
95         {
96                 *bufp = _hash_getbuf(rel, blkno, HASH_READ,
97                                                          LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
98                 *pagep = BufferGetPage(*bufp);
99                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
100         }
101 }
102
103 /*
104  *      _hash_first() -- Find the first item in a scan.
105  *
106  *              Find the first item in the index that
107  *              satisfies the qualification associated with the scan descriptor. On
108  *              success, the page containing the current index tuple is read locked
109  *              and pinned, and the scan's opaque data entry is updated to
110  *              include the buffer.
111  */
112 bool
113 _hash_first(IndexScanDesc scan, ScanDirection dir)
114 {
115         Relation        rel = scan->indexRelation;
116         HashScanOpaque so = (HashScanOpaque) scan->opaque;
117         ScanKey         cur;
118         uint32          hashkey;
119         Bucket          bucket;
120         BlockNumber blkno;
121         Buffer          buf;
122         Buffer          metabuf;
123         Page            page;
124         HashPageOpaque opaque;
125         HashMetaPage metap;
126         IndexTuple      itup;
127         ItemPointer current;
128         OffsetNumber offnum;
129
130         pgstat_count_index_scan(rel);
131
132         current = &(so->hashso_curpos);
133         ItemPointerSetInvalid(current);
134
135         /*
136          * We do not support hash scans with no index qualification, because we
137          * would have to read the whole index rather than just one bucket. That
138          * creates a whole raft of problems, since we haven't got a practical way
139          * to lock all the buckets against splits or compactions.
140          */
141         if (scan->numberOfKeys < 1)
142                 ereport(ERROR,
143                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
144                                  errmsg("hash indexes do not support whole-index scans")));
145
146         /* There may be more than one index qual, but we hash only the first */
147         cur = &scan->keyData[0];
148
149         /* We support only single-column hash indexes */
150         Assert(cur->sk_attno == 1);
151         /* And there's only one operator strategy, too */
152         Assert(cur->sk_strategy == HTEqualStrategyNumber);
153
154         /*
155          * If the constant in the index qual is NULL, assume it cannot match any
156          * items in the index.
157          */
158         if (cur->sk_flags & SK_ISNULL)
159                 return false;
160
161         /*
162          * Okay to compute the hash key.  We want to do this before acquiring any
163          * locks, in case a user-defined hash function happens to be slow.
164          *
165          * If scankey operator is not a cross-type comparison, we can use the
166          * cached hash function; otherwise gotta look it up in the catalogs.
167          *
168          * We support the convention that sk_subtype == InvalidOid means the
169          * opclass input type; this is a hack to simplify life for ScanKeyInit().
170          */
171         if (cur->sk_subtype == rel->rd_opcintype[0] ||
172                 cur->sk_subtype == InvalidOid)
173                 hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
174         else
175                 hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
176                                                                                    cur->sk_subtype);
177
178         /*
179          * Acquire shared split lock so we can compute the target bucket safely
180          * (see README).
181          */
182         _hash_getlock(rel, 0, HASH_SHARE);
183
184         /* Read the metapage */
185         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
186         metap = (HashMetaPage) BufferGetPage(metabuf);
187
188         /*
189          * Compute the target bucket number, and convert to block number.
190          */
191         bucket = _hash_hashkey2bucket(hashkey,
192                                                                   metap->hashm_maxbucket,
193                                                                   metap->hashm_highmask,
194                                                                   metap->hashm_lowmask);
195
196         blkno = BUCKET_TO_BLKNO(metap, bucket);
197
198         /* done with the metapage */
199         _hash_relbuf(rel, metabuf);
200
201         /*
202          * Acquire share lock on target bucket; then we can release split lock.
203          */
204         _hash_getlock(rel, blkno, HASH_SHARE);
205
206         _hash_droplock(rel, 0, HASH_SHARE);
207
208         /* Update scan opaque state to show we have lock on the bucket */
209         so->hashso_bucket = bucket;
210         so->hashso_bucket_valid = true;
211         so->hashso_bucket_blkno = blkno;
212
213         /* Fetch the primary bucket page for the bucket */
214         buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
215         page = BufferGetPage(buf);
216         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
217         Assert(opaque->hasho_bucket == bucket);
218
219         /* If a backwards scan is requested, move to the end of the chain */
220         if (ScanDirectionIsBackward(dir))
221         {
222                 while (BlockNumberIsValid(opaque->hasho_nextblkno))
223                         _hash_readnext(rel, &buf, &page, &opaque);
224         }
225
226         /* Now find the first tuple satisfying the qualification */
227         if (!_hash_step(scan, &buf, dir))
228                 return false;
229
230         /* if we're here, _hash_step found a valid tuple */
231         offnum = ItemPointerGetOffsetNumber(current);
232         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
233         page = BufferGetPage(buf);
234         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
235         scan->xs_ctup.t_self = itup->t_tid;
236
237         return true;
238 }
239
240 /*
241  *      _hash_step() -- step to the next valid item in a scan in the bucket.
242  *
243  *              If no valid record exists in the requested direction, return
244  *              false.  Else, return true and set the hashso_curpos for the
245  *              scan to the right thing.
246  *
247  *              'bufP' points to the current buffer, which is pinned and read-locked.
248  *              On success exit, we have pin and read-lock on whichever page
249  *              contains the right item; on failure, we have released all buffers.
250  */
251 bool
252 _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
253 {
254         Relation        rel = scan->indexRelation;
255         HashScanOpaque so = (HashScanOpaque) scan->opaque;
256         ItemPointer current;
257         Buffer          buf;
258         Page            page;
259         HashPageOpaque opaque;
260         OffsetNumber maxoff;
261         OffsetNumber offnum;
262         BlockNumber blkno;
263         IndexTuple      itup;
264
265         current = &(so->hashso_curpos);
266
267         buf = *bufP;
268         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
269         page = BufferGetPage(buf);
270         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
271
272         /*
273          * If _hash_step is called from _hash_first, current will not be valid, so
274          * we can't dereference it.  However, in that case, we presumably want to
275          * start at the beginning/end of the page...
276          */
277         maxoff = PageGetMaxOffsetNumber(page);
278         if (ItemPointerIsValid(current))
279                 offnum = ItemPointerGetOffsetNumber(current);
280         else
281                 offnum = InvalidOffsetNumber;
282
283         /*
284          * 'offnum' now points to the last tuple we have seen (if any).
285          *
286          * continue to step through tuples until: 1) we get to the end of the
287          * bucket chain or 2) we find a valid tuple.
288          */
289         do
290         {
291                 switch (dir)
292                 {
293                         case ForwardScanDirection:
294                                 if (offnum != InvalidOffsetNumber)
295                                         offnum = OffsetNumberNext(offnum);      /* move forward */
296                                 else
297                                         offnum = FirstOffsetNumber; /* new page */
298
299                                 while (offnum > maxoff)
300                                 {
301                                         /*
302                                          * either this page is empty (maxoff ==
303                                          * InvalidOffsetNumber) or we ran off the end.
304                                          */
305                                         _hash_readnext(rel, &buf, &page, &opaque);
306                                         if (BufferIsValid(buf))
307                                         {
308                                                 maxoff = PageGetMaxOffsetNumber(page);
309                                                 offnum = FirstOffsetNumber;
310                                         }
311                                         else
312                                         {
313                                                 /* end of bucket */
314                                                 maxoff = offnum = InvalidOffsetNumber;
315                                                 break;  /* exit while */
316                                         }
317                                 }
318                                 break;
319
320                         case BackwardScanDirection:
321                                 if (offnum != InvalidOffsetNumber)
322                                         offnum = OffsetNumberPrev(offnum);      /* move back */
323                                 else
324                                         offnum = maxoff;        /* new page */
325
326                                 while (offnum < FirstOffsetNumber)
327                                 {
328                                         /*
329                                          * either this page is empty (offnum ==
330                                          * InvalidOffsetNumber) or we ran off the end.
331                                          */
332                                         _hash_readprev(rel, &buf, &page, &opaque);
333                                         if (BufferIsValid(buf))
334                                                 maxoff = offnum = PageGetMaxOffsetNumber(page);
335                                         else
336                                         {
337                                                 /* end of bucket */
338                                                 maxoff = offnum = InvalidOffsetNumber;
339                                                 break;  /* exit while */
340                                         }
341                                 }
342                                 break;
343
344                         default:
345                                 /* NoMovementScanDirection */
346                                 /* this should not be reached */
347                                 break;
348                 }
349
350                 /* we ran off the end of the world without finding a match */
351                 if (offnum == InvalidOffsetNumber)
352                 {
353                         *bufP = so->hashso_curbuf = InvalidBuffer;
354                         ItemPointerSetInvalid(current);
355                         return false;
356                 }
357
358                 /* get ready to check this tuple */
359                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
360         } while (!_hash_checkqual(scan, itup));
361
362         /* if we made it to here, we've found a valid tuple */
363         blkno = BufferGetBlockNumber(buf);
364         *bufP = so->hashso_curbuf = buf;
365         ItemPointerSet(current, blkno, offnum);
366         return true;
367 }