]> granicus.if.org Git - postgresql/blob - src/backend/access/hash/hashutil.c
Reduce pinning and buffer content locking for btree scans.
[postgresql] / src / backend / access / hash / hashutil.c
1 /*-------------------------------------------------------------------------
2  *
3  * hashutil.c
4  *        Utility code for Postgres hash implementation.
5  *
6  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/access/hash/hashutil.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/hash.h"
18 #include "access/reloptions.h"
19 #include "access/relscan.h"
20 #include "utils/lsyscache.h"
21 #include "utils/rel.h"
22
23
24 /*
25  * _hash_checkqual -- does the index tuple satisfy the scan conditions?
26  */
27 bool
28 _hash_checkqual(IndexScanDesc scan, IndexTuple itup)
29 {
30         /*
31          * Currently, we can't check any of the scan conditions since we do not
32          * have the original index entry value to supply to the sk_func. Always
33          * return true; we expect that hashgettuple already set the recheck flag
34          * to make the main indexscan code do it.
35          */
36 #ifdef NOT_USED
37         TupleDesc       tupdesc = RelationGetDescr(scan->indexRelation);
38         ScanKey         key = scan->keyData;
39         int                     scanKeySize = scan->numberOfKeys;
40
41         while (scanKeySize > 0)
42         {
43                 Datum           datum;
44                 bool            isNull;
45                 Datum           test;
46
47                 datum = index_getattr(itup,
48                                                           key->sk_attno,
49                                                           tupdesc,
50                                                           &isNull);
51
52                 /* assume sk_func is strict */
53                 if (isNull)
54                         return false;
55                 if (key->sk_flags & SK_ISNULL)
56                         return false;
57
58                 test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
59                                                                  datum, key->sk_argument);
60
61                 if (!DatumGetBool(test))
62                         return false;
63
64                 key++;
65                 scanKeySize--;
66         }
67 #endif
68
69         return true;
70 }
71
72 /*
73  * _hash_datum2hashkey -- given a Datum, call the index's hash procedure
74  *
75  * The Datum is assumed to be of the index's column type, so we can use the
76  * "primary" hash procedure that's tracked for us by the generic index code.
77  */
78 uint32
79 _hash_datum2hashkey(Relation rel, Datum key)
80 {
81         FmgrInfo   *procinfo;
82         Oid                     collation;
83
84         /* XXX assumes index has only one attribute */
85         procinfo = index_getprocinfo(rel, 1, HASHPROC);
86         collation = rel->rd_indcollation[0];
87
88         return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key));
89 }
90
91 /*
92  * _hash_datum2hashkey_type -- given a Datum of a specified type,
93  *                      hash it in a fashion compatible with this index
94  *
95  * This is much more expensive than _hash_datum2hashkey, so use it only in
96  * cross-type situations.
97  */
98 uint32
99 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype)
100 {
101         RegProcedure hash_proc;
102         Oid                     collation;
103
104         /* XXX assumes index has only one attribute */
105         hash_proc = get_opfamily_proc(rel->rd_opfamily[0],
106                                                                   keytype,
107                                                                   keytype,
108                                                                   HASHPROC);
109         if (!RegProcedureIsValid(hash_proc))
110                 elog(ERROR, "missing support function %d(%u,%u) for index \"%s\"",
111                          HASHPROC, keytype, keytype,
112                          RelationGetRelationName(rel));
113         collation = rel->rd_indcollation[0];
114
115         return DatumGetUInt32(OidFunctionCall1Coll(hash_proc, collation, key));
116 }
117
118 /*
119  * _hash_hashkey2bucket -- determine which bucket the hashkey maps to.
120  */
121 Bucket
122 _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
123                                          uint32 highmask, uint32 lowmask)
124 {
125         Bucket          bucket;
126
127         bucket = hashkey & highmask;
128         if (bucket > maxbucket)
129                 bucket = bucket & lowmask;
130
131         return bucket;
132 }
133
134 /*
135  * _hash_log2 -- returns ceil(lg2(num))
136  */
137 uint32
138 _hash_log2(uint32 num)
139 {
140         uint32          i,
141                                 limit;
142
143         limit = 1;
144         for (i = 0; limit < num; limit <<= 1, i++)
145                 ;
146         return i;
147 }
148
149 /*
150  * _hash_checkpage -- sanity checks on the format of all hash pages
151  *
152  * If flags is not zero, it is a bitwise OR of the acceptable values of
153  * hasho_flag.
154  */
155 void
156 _hash_checkpage(Relation rel, Buffer buf, int flags)
157 {
158         Page            page = BufferGetPage(buf);
159
160         /*
161          * ReadBuffer verifies that every newly-read page passes
162          * PageHeaderIsValid, which means it either contains a reasonably sane
163          * page header or is all-zero.  We have to defend against the all-zero
164          * case, however.
165          */
166         if (PageIsNew(page))
167                 ereport(ERROR,
168                                 (errcode(ERRCODE_INDEX_CORRUPTED),
169                          errmsg("index \"%s\" contains unexpected zero page at block %u",
170                                         RelationGetRelationName(rel),
171                                         BufferGetBlockNumber(buf)),
172                                  errhint("Please REINDEX it.")));
173
174         /*
175          * Additionally check that the special area looks sane.
176          */
177         if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
178                 ereport(ERROR,
179                                 (errcode(ERRCODE_INDEX_CORRUPTED),
180                                  errmsg("index \"%s\" contains corrupted page at block %u",
181                                                 RelationGetRelationName(rel),
182                                                 BufferGetBlockNumber(buf)),
183                                  errhint("Please REINDEX it.")));
184
185         if (flags)
186         {
187                 HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
188
189                 if ((opaque->hasho_flag & flags) == 0)
190                         ereport(ERROR,
191                                         (errcode(ERRCODE_INDEX_CORRUPTED),
192                                    errmsg("index \"%s\" contains corrupted page at block %u",
193                                                   RelationGetRelationName(rel),
194                                                   BufferGetBlockNumber(buf)),
195                                          errhint("Please REINDEX it.")));
196         }
197
198         /*
199          * When checking the metapage, also verify magic number and version.
200          */
201         if (flags == LH_META_PAGE)
202         {
203                 HashMetaPage metap = HashPageGetMeta(page);
204
205                 if (metap->hashm_magic != HASH_MAGIC)
206                         ereport(ERROR,
207                                         (errcode(ERRCODE_INDEX_CORRUPTED),
208                                          errmsg("index \"%s\" is not a hash index",
209                                                         RelationGetRelationName(rel))));
210
211                 if (metap->hashm_version != HASH_VERSION)
212                         ereport(ERROR,
213                                         (errcode(ERRCODE_INDEX_CORRUPTED),
214                                          errmsg("index \"%s\" has wrong hash version",
215                                                         RelationGetRelationName(rel)),
216                                          errhint("Please REINDEX it.")));
217         }
218 }
219
220 Datum
221 hashoptions(PG_FUNCTION_ARGS)
222 {
223         Datum           reloptions = PG_GETARG_DATUM(0);
224         bool            validate = PG_GETARG_BOOL(1);
225         bytea      *result;
226
227         result = default_reloptions(reloptions, validate, RELOPT_KIND_HASH);
228
229         if (result)
230                 PG_RETURN_BYTEA_P(result);
231         PG_RETURN_NULL();
232 }
233
234 /*
235  * _hash_get_indextuple_hashkey - get the hash index tuple's hash key value
236  */
237 uint32
238 _hash_get_indextuple_hashkey(IndexTuple itup)
239 {
240         char       *attp;
241
242         /*
243          * We assume the hash key is the first attribute and can't be null, so
244          * this can be done crudely but very very cheaply ...
245          */
246         attp = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
247         return *((uint32 *) attp);
248 }
249
250 /*
251  * _hash_form_tuple - form an index tuple containing hash code only
252  */
253 IndexTuple
254 _hash_form_tuple(Relation index, Datum *values, bool *isnull)
255 {
256         IndexTuple      itup;
257         uint32          hashkey;
258         Datum           hashkeydatum;
259         TupleDesc       hashdesc;
260
261         if (isnull[0])
262                 hashkeydatum = (Datum) 0;
263         else
264         {
265                 hashkey = _hash_datum2hashkey(index, values[0]);
266                 hashkeydatum = UInt32GetDatum(hashkey);
267         }
268         hashdesc = RelationGetDescr(index);
269         Assert(hashdesc->natts == 1);
270         itup = index_form_tuple(hashdesc, &hashkeydatum, isnull);
271         return itup;
272 }
273
274 /*
275  * _hash_binsearch - Return the offset number in the page where the
276  *                                       specified hash value should be sought or inserted.
277  *
278  * We use binary search, relying on the assumption that the existing entries
279  * are ordered by hash key.
280  *
281  * Returns the offset of the first index entry having hashkey >= hash_value,
282  * or the page's max offset plus one if hash_value is greater than all
283  * existing hash keys in the page.  This is the appropriate place to start
284  * a search, or to insert a new item.
285  */
286 OffsetNumber
287 _hash_binsearch(Page page, uint32 hash_value)
288 {
289         OffsetNumber upper;
290         OffsetNumber lower;
291
292         /* Loop invariant: lower <= desired place <= upper */
293         upper = PageGetMaxOffsetNumber(page) + 1;
294         lower = FirstOffsetNumber;
295
296         while (upper > lower)
297         {
298                 OffsetNumber off;
299                 IndexTuple      itup;
300                 uint32          hashkey;
301
302                 off = (upper + lower) / 2;
303                 Assert(OffsetNumberIsValid(off));
304
305                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
306                 hashkey = _hash_get_indextuple_hashkey(itup);
307                 if (hashkey < hash_value)
308                         lower = off + 1;
309                 else
310                         upper = off;
311         }
312
313         return lower;
314 }
315
316 /*
317  * _hash_binsearch_last
318  *
319  * Same as above, except that if there are multiple matching items in the
320  * page, we return the offset of the last one instead of the first one,
321  * and the possible range of outputs is 0..maxoffset not 1..maxoffset+1.
322  * This is handy for starting a new page in a backwards scan.
323  */
324 OffsetNumber
325 _hash_binsearch_last(Page page, uint32 hash_value)
326 {
327         OffsetNumber upper;
328         OffsetNumber lower;
329
330         /* Loop invariant: lower <= desired place <= upper */
331         upper = PageGetMaxOffsetNumber(page);
332         lower = FirstOffsetNumber - 1;
333
334         while (upper > lower)
335         {
336                 IndexTuple      itup;
337                 OffsetNumber off;
338                 uint32          hashkey;
339
340                 off = (upper + lower + 1) / 2;
341                 Assert(OffsetNumberIsValid(off));
342
343                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
344                 hashkey = _hash_get_indextuple_hashkey(itup);
345                 if (hashkey > hash_value)
346                         upper = off - 1;
347                 else
348                         lower = off;
349         }
350
351         return lower;
352 }