]> granicus.if.org Git - postgresql/blob - contrib/pageinspect/btreefuncs.c
Modify BufferGetPage() to prepare for "snapshot too old" feature
[postgresql] / contrib / pageinspect / btreefuncs.c
1 /*
2  * contrib/pageinspect/btreefuncs.c
3  *
4  *
5  * btreefuncs.c
6  *
7  * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
8  *
9  * Permission to use, copy, modify, and distribute this software and
10  * its documentation for any purpose, without fee, and without a
11  * written agreement is hereby granted, provided that the above
12  * copyright notice and this paragraph and the following two
13  * paragraphs appear in all copies.
14  *
15  * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
16  * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
17  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
18  * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
19  * OF THE POSSIBILITY OF SUCH DAMAGE.
20  *
21  * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
24  * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
25  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
26  */
27
28 #include "postgres.h"
29
30 #include "access/nbtree.h"
31 #include "catalog/namespace.h"
32 #include "catalog/pg_am.h"
33 #include "funcapi.h"
34 #include "miscadmin.h"
35 #include "utils/builtins.h"
36 #include "utils/rel.h"
37
38
39 PG_FUNCTION_INFO_V1(bt_metap);
40 PG_FUNCTION_INFO_V1(bt_page_items);
41 PG_FUNCTION_INFO_V1(bt_page_stats);
42
43 #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
44 #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
45
46 #define CHECK_PAGE_OFFSET_RANGE(pg, offnum) { \
47                 if ( !(FirstOffsetNumber <= (offnum) && \
48                                                 (offnum) <= PageGetMaxOffsetNumber(pg)) ) \
49                          elog(ERROR, "page offset number out of range"); }
50
51 /* note: BlockNumber is unsigned, hence can't be negative */
52 #define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
53                 if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \
54                          elog(ERROR, "block number out of range"); }
55
56 /* ------------------------------------------------
57  * structure for single btree page statistics
58  * ------------------------------------------------
59  */
60 typedef struct BTPageStat
61 {
62         uint32          blkno;
63         uint32          live_items;
64         uint32          dead_items;
65         uint32          page_size;
66         uint32          max_avail;
67         uint32          free_size;
68         uint32          avg_item_size;
69         char            type;
70
71         /* opaque data */
72         BlockNumber btpo_prev;
73         BlockNumber btpo_next;
74         union
75         {
76                 uint32          level;
77                 TransactionId xact;
78         }                       btpo;
79         uint16          btpo_flags;
80         BTCycleId       btpo_cycleid;
81 } BTPageStat;
82
83
84 /* -------------------------------------------------
85  * GetBTPageStatistics()
86  *
87  * Collect statistics of single b-tree page
88  * -------------------------------------------------
89  */
90 static void
91 GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
92 {
93         Page            page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
94         PageHeader      phdr = (PageHeader) page;
95         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
96         BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
97         int                     item_size = 0;
98         int                     off;
99
100         stat->blkno = blkno;
101
102         stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
103
104         stat->dead_items = stat->live_items = 0;
105
106         stat->page_size = PageGetPageSize(page);
107
108         /* page type (flags) */
109         if (P_ISDELETED(opaque))
110         {
111                 stat->type = 'd';
112                 stat->btpo.xact = opaque->btpo.xact;
113                 return;
114         }
115         else if (P_IGNORE(opaque))
116                 stat->type = 'e';
117         else if (P_ISLEAF(opaque))
118                 stat->type = 'l';
119         else if (P_ISROOT(opaque))
120                 stat->type = 'r';
121         else
122                 stat->type = 'i';
123
124         /* btpage opaque data */
125         stat->btpo_prev = opaque->btpo_prev;
126         stat->btpo_next = opaque->btpo_next;
127         stat->btpo.level = opaque->btpo.level;
128         stat->btpo_flags = opaque->btpo_flags;
129         stat->btpo_cycleid = opaque->btpo_cycleid;
130
131         /* count live and dead tuples, and free space */
132         for (off = FirstOffsetNumber; off <= maxoff; off++)
133         {
134                 IndexTuple      itup;
135
136                 ItemId          id = PageGetItemId(page, off);
137
138                 itup = (IndexTuple) PageGetItem(page, id);
139
140                 item_size += IndexTupleSize(itup);
141
142                 if (!ItemIdIsDead(id))
143                         stat->live_items++;
144                 else
145                         stat->dead_items++;
146         }
147         stat->free_size = PageGetFreeSpace(page);
148
149         if ((stat->live_items + stat->dead_items) > 0)
150                 stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
151         else
152                 stat->avg_item_size = 0;
153 }
154
155 /* -----------------------------------------------
156  * bt_page_stats()
157  *
158  * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
159  * -----------------------------------------------
160  */
161 Datum
162 bt_page_stats(PG_FUNCTION_ARGS)
163 {
164         text       *relname = PG_GETARG_TEXT_P(0);
165         uint32          blkno = PG_GETARG_UINT32(1);
166         Buffer          buffer;
167         Relation        rel;
168         RangeVar   *relrv;
169         Datum           result;
170         HeapTuple       tuple;
171         TupleDesc       tupleDesc;
172         int                     j;
173         char       *values[11];
174         BTPageStat      stat;
175
176         if (!superuser())
177                 ereport(ERROR,
178                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
179                                  (errmsg("must be superuser to use pageinspect functions"))));
180
181         relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
182         rel = relation_openrv(relrv, AccessShareLock);
183
184         if (!IS_INDEX(rel) || !IS_BTREE(rel))
185                 elog(ERROR, "relation \"%s\" is not a btree index",
186                          RelationGetRelationName(rel));
187
188         /*
189          * Reject attempts to read non-local temporary relations; we would be
190          * likely to get wrong data since we have no visibility into the owning
191          * session's local buffers.
192          */
193         if (RELATION_IS_OTHER_TEMP(rel))
194                 ereport(ERROR,
195                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
196                                  errmsg("cannot access temporary tables of other sessions")));
197
198         if (blkno == 0)
199                 elog(ERROR, "block 0 is a meta page");
200
201         CHECK_RELATION_BLOCK_RANGE(rel, blkno);
202
203         buffer = ReadBuffer(rel, blkno);
204         LockBuffer(buffer, BUFFER_LOCK_SHARE);
205
206         /* keep compiler quiet */
207         stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
208         stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
209
210         GetBTPageStatistics(blkno, buffer, &stat);
211
212         UnlockReleaseBuffer(buffer);
213         relation_close(rel, AccessShareLock);
214
215         /* Build a tuple descriptor for our result type */
216         if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
217                 elog(ERROR, "return type must be a row type");
218
219         j = 0;
220         values[j++] = psprintf("%d", stat.blkno);
221         values[j++] = psprintf("%c", stat.type);
222         values[j++] = psprintf("%d", stat.live_items);
223         values[j++] = psprintf("%d", stat.dead_items);
224         values[j++] = psprintf("%d", stat.avg_item_size);
225         values[j++] = psprintf("%d", stat.page_size);
226         values[j++] = psprintf("%d", stat.free_size);
227         values[j++] = psprintf("%d", stat.btpo_prev);
228         values[j++] = psprintf("%d", stat.btpo_next);
229         values[j++] = psprintf("%d", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level);
230         values[j++] = psprintf("%d", stat.btpo_flags);
231
232         tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
233                                                                    values);
234
235         result = HeapTupleGetDatum(tuple);
236
237         PG_RETURN_DATUM(result);
238 }
239
240 /*-------------------------------------------------------
241  * bt_page_items()
242  *
243  * Get IndexTupleData set in a btree page
244  *
245  * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
246  *-------------------------------------------------------
247  */
248
249 /*
250  * cross-call data structure for SRF
251  */
252 struct user_args
253 {
254         Page            page;
255         OffsetNumber offset;
256 };
257
258 Datum
259 bt_page_items(PG_FUNCTION_ARGS)
260 {
261         text       *relname = PG_GETARG_TEXT_P(0);
262         uint32          blkno = PG_GETARG_UINT32(1);
263         Datum           result;
264         char       *values[6];
265         HeapTuple       tuple;
266         FuncCallContext *fctx;
267         MemoryContext mctx;
268         struct user_args *uargs;
269
270         if (!superuser())
271                 ereport(ERROR,
272                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
273                                  (errmsg("must be superuser to use pageinspect functions"))));
274
275         if (SRF_IS_FIRSTCALL())
276         {
277                 RangeVar   *relrv;
278                 Relation        rel;
279                 Buffer          buffer;
280                 BTPageOpaque opaque;
281                 TupleDesc       tupleDesc;
282
283                 fctx = SRF_FIRSTCALL_INIT();
284
285                 relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
286                 rel = relation_openrv(relrv, AccessShareLock);
287
288                 if (!IS_INDEX(rel) || !IS_BTREE(rel))
289                         elog(ERROR, "relation \"%s\" is not a btree index",
290                                  RelationGetRelationName(rel));
291
292                 /*
293                  * Reject attempts to read non-local temporary relations; we would be
294                  * likely to get wrong data since we have no visibility into the
295                  * owning session's local buffers.
296                  */
297                 if (RELATION_IS_OTHER_TEMP(rel))
298                         ereport(ERROR,
299                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
300                                 errmsg("cannot access temporary tables of other sessions")));
301
302                 if (blkno == 0)
303                         elog(ERROR, "block 0 is a meta page");
304
305                 CHECK_RELATION_BLOCK_RANGE(rel, blkno);
306
307                 buffer = ReadBuffer(rel, blkno);
308                 LockBuffer(buffer, BUFFER_LOCK_SHARE);
309
310                 /*
311                  * We copy the page into local storage to avoid holding pin on the
312                  * buffer longer than we must, and possibly failing to release it at
313                  * all if the calling query doesn't fetch all rows.
314                  */
315                 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
316
317                 uargs = palloc(sizeof(struct user_args));
318
319                 uargs->page = palloc(BLCKSZ);
320                 memcpy(uargs->page,
321                            BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST),
322                            BLCKSZ);
323
324                 UnlockReleaseBuffer(buffer);
325                 relation_close(rel, AccessShareLock);
326
327                 uargs->offset = FirstOffsetNumber;
328
329                 opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
330
331                 if (P_ISDELETED(opaque))
332                         elog(NOTICE, "page is deleted");
333
334                 fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
335
336                 /* Build a tuple descriptor for our result type */
337                 if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
338                         elog(ERROR, "return type must be a row type");
339
340                 fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
341
342                 fctx->user_fctx = uargs;
343
344                 MemoryContextSwitchTo(mctx);
345         }
346
347         fctx = SRF_PERCALL_SETUP();
348         uargs = fctx->user_fctx;
349
350         if (fctx->call_cntr < fctx->max_calls)
351         {
352                 ItemId          id;
353                 IndexTuple      itup;
354                 int                     j;
355                 int                     off;
356                 int                     dlen;
357                 char       *dump;
358                 char       *ptr;
359
360                 id = PageGetItemId(uargs->page, uargs->offset);
361
362                 if (!ItemIdIsValid(id))
363                         elog(ERROR, "invalid ItemId");
364
365                 itup = (IndexTuple) PageGetItem(uargs->page, id);
366
367                 j = 0;
368                 values[j++] = psprintf("%d", uargs->offset);
369                 values[j++] = psprintf("(%u,%u)",
370                                                            BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)),
371                                                            itup->t_tid.ip_posid);
372                 values[j++] = psprintf("%d", (int) IndexTupleSize(itup));
373                 values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f');
374                 values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
375
376                 ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
377                 dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
378                 dump = palloc0(dlen * 3 + 1);
379                 values[j] = dump;
380                 for (off = 0; off < dlen; off++)
381                 {
382                         if (off > 0)
383                                 *dump++ = ' ';
384                         sprintf(dump, "%02x", *(ptr + off) & 0xff);
385                         dump += 2;
386                 }
387
388                 tuple = BuildTupleFromCStrings(fctx->attinmeta, values);
389                 result = HeapTupleGetDatum(tuple);
390
391                 uargs->offset = uargs->offset + 1;
392
393                 SRF_RETURN_NEXT(fctx, result);
394         }
395         else
396         {
397                 pfree(uargs->page);
398                 pfree(uargs);
399                 SRF_RETURN_DONE(fctx);
400         }
401 }
402
403
404 /* ------------------------------------------------
405  * bt_metap()
406  *
407  * Get a btree's meta-page information
408  *
409  * Usage: SELECT * FROM bt_metap('t1_pkey')
410  * ------------------------------------------------
411  */
412 Datum
413 bt_metap(PG_FUNCTION_ARGS)
414 {
415         text       *relname = PG_GETARG_TEXT_P(0);
416         Datum           result;
417         Relation        rel;
418         RangeVar   *relrv;
419         BTMetaPageData *metad;
420         TupleDesc       tupleDesc;
421         int                     j;
422         char       *values[6];
423         Buffer          buffer;
424         Page            page;
425         HeapTuple       tuple;
426
427         if (!superuser())
428                 ereport(ERROR,
429                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
430                                  (errmsg("must be superuser to use pageinspect functions"))));
431
432         relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
433         rel = relation_openrv(relrv, AccessShareLock);
434
435         if (!IS_INDEX(rel) || !IS_BTREE(rel))
436                 elog(ERROR, "relation \"%s\" is not a btree index",
437                          RelationGetRelationName(rel));
438
439         /*
440          * Reject attempts to read non-local temporary relations; we would be
441          * likely to get wrong data since we have no visibility into the owning
442          * session's local buffers.
443          */
444         if (RELATION_IS_OTHER_TEMP(rel))
445                 ereport(ERROR,
446                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
447                                  errmsg("cannot access temporary tables of other sessions")));
448
449         buffer = ReadBuffer(rel, 0);
450         LockBuffer(buffer, BUFFER_LOCK_SHARE);
451
452         page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
453         metad = BTPageGetMeta(page);
454
455         /* Build a tuple descriptor for our result type */
456         if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
457                 elog(ERROR, "return type must be a row type");
458
459         j = 0;
460         values[j++] = psprintf("%d", metad->btm_magic);
461         values[j++] = psprintf("%d", metad->btm_version);
462         values[j++] = psprintf("%d", metad->btm_root);
463         values[j++] = psprintf("%d", metad->btm_level);
464         values[j++] = psprintf("%d", metad->btm_fastroot);
465         values[j++] = psprintf("%d", metad->btm_fastlevel);
466
467         tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
468                                                                    values);
469
470         result = HeapTupleGetDatum(tuple);
471
472         UnlockReleaseBuffer(buffer);
473         relation_close(rel, AccessShareLock);
474
475         PG_RETURN_DATUM(result);
476 }