2 * contrib/pageinspect/btreefuncs.c
7 * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
9 * Permission to use, copy, modify, and distribute this software and
10 * its documentation for any purpose, without fee, and without a
11 * written agreement is hereby granted, provided that the above
12 * copyright notice and this paragraph and the following two
13 * paragraphs appear in all copies.
15 * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
16 * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
17 * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
18 * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
19 * OF THE POSSIBILITY OF SUCH DAMAGE.
21 * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
24 * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
25 * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
30 #include "access/nbtree.h"
31 #include "catalog/namespace.h"
32 #include "catalog/pg_am.h"
34 #include "miscadmin.h"
35 #include "utils/builtins.h"
36 #include "utils/rel.h"
39 PG_FUNCTION_INFO_V1(bt_metap);
40 PG_FUNCTION_INFO_V1(bt_page_items);
41 PG_FUNCTION_INFO_V1(bt_page_stats);
43 #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
44 #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
46 #define CHECK_PAGE_OFFSET_RANGE(pg, offnum) { \
47 if ( !(FirstOffsetNumber <= (offnum) && \
48 (offnum) <= PageGetMaxOffsetNumber(pg)) ) \
49 elog(ERROR, "page offset number out of range"); }
51 /* note: BlockNumber is unsigned, hence can't be negative */
52 #define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
53 if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \
54 elog(ERROR, "block number out of range"); }
56 /* ------------------------------------------------
57 * structure for single btree page statistics
58 * ------------------------------------------------
60 typedef struct BTPageStat
72 BlockNumber btpo_prev;
73 BlockNumber btpo_next;
80 BTCycleId btpo_cycleid;
84 /* -------------------------------------------------
85 * GetBTPageStatistics()
87 * Collect statistics of single b-tree page
88 * -------------------------------------------------
91 GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
93 Page page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
94 PageHeader phdr = (PageHeader) page;
95 OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
96 BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
102 stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
104 stat->dead_items = stat->live_items = 0;
106 stat->page_size = PageGetPageSize(page);
108 /* page type (flags) */
109 if (P_ISDELETED(opaque))
112 stat->btpo.xact = opaque->btpo.xact;
115 else if (P_IGNORE(opaque))
117 else if (P_ISLEAF(opaque))
119 else if (P_ISROOT(opaque))
124 /* btpage opaque data */
125 stat->btpo_prev = opaque->btpo_prev;
126 stat->btpo_next = opaque->btpo_next;
127 stat->btpo.level = opaque->btpo.level;
128 stat->btpo_flags = opaque->btpo_flags;
129 stat->btpo_cycleid = opaque->btpo_cycleid;
131 /* count live and dead tuples, and free space */
132 for (off = FirstOffsetNumber; off <= maxoff; off++)
136 ItemId id = PageGetItemId(page, off);
138 itup = (IndexTuple) PageGetItem(page, id);
140 item_size += IndexTupleSize(itup);
142 if (!ItemIdIsDead(id))
147 stat->free_size = PageGetFreeSpace(page);
149 if ((stat->live_items + stat->dead_items) > 0)
150 stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
152 stat->avg_item_size = 0;
155 /* -----------------------------------------------
158 * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
159 * -----------------------------------------------
162 bt_page_stats(PG_FUNCTION_ARGS)
164 text *relname = PG_GETARG_TEXT_P(0);
165 uint32 blkno = PG_GETARG_UINT32(1);
178 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
179 (errmsg("must be superuser to use pageinspect functions"))));
181 relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
182 rel = relation_openrv(relrv, AccessShareLock);
184 if (!IS_INDEX(rel) || !IS_BTREE(rel))
185 elog(ERROR, "relation \"%s\" is not a btree index",
186 RelationGetRelationName(rel));
189 * Reject attempts to read non-local temporary relations; we would be
190 * likely to get wrong data since we have no visibility into the owning
191 * session's local buffers.
193 if (RELATION_IS_OTHER_TEMP(rel))
195 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
196 errmsg("cannot access temporary tables of other sessions")));
199 elog(ERROR, "block 0 is a meta page");
201 CHECK_RELATION_BLOCK_RANGE(rel, blkno);
203 buffer = ReadBuffer(rel, blkno);
204 LockBuffer(buffer, BUFFER_LOCK_SHARE);
206 /* keep compiler quiet */
207 stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
208 stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
210 GetBTPageStatistics(blkno, buffer, &stat);
212 UnlockReleaseBuffer(buffer);
213 relation_close(rel, AccessShareLock);
215 /* Build a tuple descriptor for our result type */
216 if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
217 elog(ERROR, "return type must be a row type");
220 values[j++] = psprintf("%d", stat.blkno);
221 values[j++] = psprintf("%c", stat.type);
222 values[j++] = psprintf("%d", stat.live_items);
223 values[j++] = psprintf("%d", stat.dead_items);
224 values[j++] = psprintf("%d", stat.avg_item_size);
225 values[j++] = psprintf("%d", stat.page_size);
226 values[j++] = psprintf("%d", stat.free_size);
227 values[j++] = psprintf("%d", stat.btpo_prev);
228 values[j++] = psprintf("%d", stat.btpo_next);
229 values[j++] = psprintf("%d", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level);
230 values[j++] = psprintf("%d", stat.btpo_flags);
232 tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
235 result = HeapTupleGetDatum(tuple);
237 PG_RETURN_DATUM(result);
240 /*-------------------------------------------------------
243 * Get IndexTupleData set in a btree page
245 * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
246 *-------------------------------------------------------
250 * cross-call data structure for SRF
259 bt_page_items(PG_FUNCTION_ARGS)
261 text *relname = PG_GETARG_TEXT_P(0);
262 uint32 blkno = PG_GETARG_UINT32(1);
266 FuncCallContext *fctx;
268 struct user_args *uargs;
272 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
273 (errmsg("must be superuser to use pageinspect functions"))));
275 if (SRF_IS_FIRSTCALL())
283 fctx = SRF_FIRSTCALL_INIT();
285 relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
286 rel = relation_openrv(relrv, AccessShareLock);
288 if (!IS_INDEX(rel) || !IS_BTREE(rel))
289 elog(ERROR, "relation \"%s\" is not a btree index",
290 RelationGetRelationName(rel));
293 * Reject attempts to read non-local temporary relations; we would be
294 * likely to get wrong data since we have no visibility into the
295 * owning session's local buffers.
297 if (RELATION_IS_OTHER_TEMP(rel))
299 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
300 errmsg("cannot access temporary tables of other sessions")));
303 elog(ERROR, "block 0 is a meta page");
305 CHECK_RELATION_BLOCK_RANGE(rel, blkno);
307 buffer = ReadBuffer(rel, blkno);
308 LockBuffer(buffer, BUFFER_LOCK_SHARE);
311 * We copy the page into local storage to avoid holding pin on the
312 * buffer longer than we must, and possibly failing to release it at
313 * all if the calling query doesn't fetch all rows.
315 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
317 uargs = palloc(sizeof(struct user_args));
319 uargs->page = palloc(BLCKSZ);
321 BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST),
324 UnlockReleaseBuffer(buffer);
325 relation_close(rel, AccessShareLock);
327 uargs->offset = FirstOffsetNumber;
329 opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
331 if (P_ISDELETED(opaque))
332 elog(NOTICE, "page is deleted");
334 fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
336 /* Build a tuple descriptor for our result type */
337 if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
338 elog(ERROR, "return type must be a row type");
340 fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
342 fctx->user_fctx = uargs;
344 MemoryContextSwitchTo(mctx);
347 fctx = SRF_PERCALL_SETUP();
348 uargs = fctx->user_fctx;
350 if (fctx->call_cntr < fctx->max_calls)
360 id = PageGetItemId(uargs->page, uargs->offset);
362 if (!ItemIdIsValid(id))
363 elog(ERROR, "invalid ItemId");
365 itup = (IndexTuple) PageGetItem(uargs->page, id);
368 values[j++] = psprintf("%d", uargs->offset);
369 values[j++] = psprintf("(%u,%u)",
370 BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)),
371 itup->t_tid.ip_posid);
372 values[j++] = psprintf("%d", (int) IndexTupleSize(itup));
373 values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f');
374 values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
376 ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
377 dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
378 dump = palloc0(dlen * 3 + 1);
380 for (off = 0; off < dlen; off++)
384 sprintf(dump, "%02x", *(ptr + off) & 0xff);
388 tuple = BuildTupleFromCStrings(fctx->attinmeta, values);
389 result = HeapTupleGetDatum(tuple);
391 uargs->offset = uargs->offset + 1;
393 SRF_RETURN_NEXT(fctx, result);
399 SRF_RETURN_DONE(fctx);
404 /* ------------------------------------------------
407 * Get a btree's meta-page information
409 * Usage: SELECT * FROM bt_metap('t1_pkey')
410 * ------------------------------------------------
413 bt_metap(PG_FUNCTION_ARGS)
415 text *relname = PG_GETARG_TEXT_P(0);
419 BTMetaPageData *metad;
429 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
430 (errmsg("must be superuser to use pageinspect functions"))));
432 relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
433 rel = relation_openrv(relrv, AccessShareLock);
435 if (!IS_INDEX(rel) || !IS_BTREE(rel))
436 elog(ERROR, "relation \"%s\" is not a btree index",
437 RelationGetRelationName(rel));
440 * Reject attempts to read non-local temporary relations; we would be
441 * likely to get wrong data since we have no visibility into the owning
442 * session's local buffers.
444 if (RELATION_IS_OTHER_TEMP(rel))
446 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
447 errmsg("cannot access temporary tables of other sessions")));
449 buffer = ReadBuffer(rel, 0);
450 LockBuffer(buffer, BUFFER_LOCK_SHARE);
452 page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
453 metad = BTPageGetMeta(page);
455 /* Build a tuple descriptor for our result type */
456 if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
457 elog(ERROR, "return type must be a row type");
460 values[j++] = psprintf("%d", metad->btm_magic);
461 values[j++] = psprintf("%d", metad->btm_version);
462 values[j++] = psprintf("%d", metad->btm_root);
463 values[j++] = psprintf("%d", metad->btm_level);
464 values[j++] = psprintf("%d", metad->btm_fastroot);
465 values[j++] = psprintf("%d", metad->btm_fastlevel);
467 tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
470 result = HeapTupleGetDatum(tuple);
472 UnlockReleaseBuffer(buffer);
473 relation_close(rel, AccessShareLock);
475 PG_RETURN_DATUM(result);