2 * contrib/pgstattuple/pgstattuple.c
4 * Copyright (c) 2001,2002 Tatsuo Ishii
6 * Permission to use, copy, modify, and distribute this software and
7 * its documentation for any purpose, without fee, and without a
8 * written agreement is hereby granted, provided that the above
9 * copyright notice and this paragraph and the following two
10 * paragraphs appear in all copies.
12 * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
13 * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
14 * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
15 * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
16 * OF THE POSSIBILITY OF SUCH DAMAGE.
18 * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
21 * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
22 * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
27 #include "access/gist_private.h"
28 #include "access/hash.h"
29 #include "access/nbtree.h"
30 #include "access/relscan.h"
31 #include "catalog/namespace.h"
33 #include "miscadmin.h"
34 #include "storage/bufmgr.h"
35 #include "storage/lmgr.h"
36 #include "utils/builtins.h"
37 #include "utils/tqual.h"
41 PG_FUNCTION_INFO_V1(pgstattuple);
42 PG_FUNCTION_INFO_V1(pgstattuplebyid);
45 * struct pgstattuple_type
47 * tuple_percent, dead_tuple_percent and free_percent are computable,
48 * so not defined here.
50 typedef struct pgstattuple_type
55 uint64 dead_tuple_count;
56 uint64 dead_tuple_len;
57 uint64 free_space; /* free/reusable space in bytes */
60 typedef void (*pgstat_page) (pgstattuple_type *, Relation, BlockNumber,
61 BufferAccessStrategy);
63 static Datum build_pgstattuple_type(pgstattuple_type *stat,
64 FunctionCallInfo fcinfo);
65 static Datum pgstat_relation(Relation rel, FunctionCallInfo fcinfo);
66 static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo);
67 static void pgstat_btree_page(pgstattuple_type *stat,
68 Relation rel, BlockNumber blkno,
69 BufferAccessStrategy bstrategy);
70 static void pgstat_hash_page(pgstattuple_type *stat,
71 Relation rel, BlockNumber blkno,
72 BufferAccessStrategy bstrategy);
73 static void pgstat_gist_page(pgstattuple_type *stat,
74 Relation rel, BlockNumber blkno,
75 BufferAccessStrategy bstrategy);
76 static Datum pgstat_index(Relation rel, BlockNumber start,
77 pgstat_page pagefn, FunctionCallInfo fcinfo);
78 static void pgstat_index_page(pgstattuple_type *stat, Page page,
79 OffsetNumber minoff, OffsetNumber maxoff);
82 * build_pgstattuple_type -- build a pgstattuple_type tuple
85 build_pgstattuple_type(pgstattuple_type *stat, FunctionCallInfo fcinfo)
91 char *values[NCOLUMNS];
92 char values_buf[NCOLUMNS][NCHARS];
95 double dead_tuple_percent;
96 double free_percent; /* free/reusable space in % */
98 AttInMetadata *attinmeta;
100 /* Build a tuple descriptor for our result type */
101 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
102 elog(ERROR, "return type must be a row type");
105 * Generate attribute metadata needed later to produce tuples from raw C
108 attinmeta = TupleDescGetAttInMetadata(tupdesc);
110 if (stat->table_len == 0)
113 dead_tuple_percent = 0.0;
118 tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
119 dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
120 free_percent = 100.0 * stat->free_space / stat->table_len;
124 * Prepare a values array for constructing the tuple. This should be an
125 * array of C strings which will be processed later by the appropriate
128 for (i = 0; i < NCOLUMNS; i++)
129 values[i] = values_buf[i];
131 snprintf(values[i++], NCHARS, INT64_FORMAT, stat->table_len);
132 snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_count);
133 snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_len);
134 snprintf(values[i++], NCHARS, "%.2f", tuple_percent);
135 snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_count);
136 snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_len);
137 snprintf(values[i++], NCHARS, "%.2f", dead_tuple_percent);
138 snprintf(values[i++], NCHARS, INT64_FORMAT, stat->free_space);
139 snprintf(values[i++], NCHARS, "%.2f", free_percent);
142 tuple = BuildTupleFromCStrings(attinmeta, values);
144 /* make the tuple into a datum */
145 return HeapTupleGetDatum(tuple);
150 * returns live/dead tuples info
152 * C FUNCTION definition
153 * pgstattuple(text) returns pgstattuple_type
154 * see pgstattuple.sql for pgstattuple_type
159 pgstattuple(PG_FUNCTION_ARGS)
161 text *relname = PG_GETARG_TEXT_P(0);
167 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
168 (errmsg("must be superuser to use pgstattuple functions"))));
171 relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
172 rel = relation_openrv(relrv, AccessShareLock);
174 PG_RETURN_DATUM(pgstat_relation(rel, fcinfo));
178 pgstattuplebyid(PG_FUNCTION_ARGS)
180 Oid relid = PG_GETARG_OID(0);
185 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
186 (errmsg("must be superuser to use pgstattuple functions"))));
189 rel = relation_open(relid, AccessShareLock);
191 PG_RETURN_DATUM(pgstat_relation(rel, fcinfo));
198 pgstat_relation(Relation rel, FunctionCallInfo fcinfo)
203 * Reject attempts to read non-local temporary relations; we would be
204 * likely to get wrong data since we have no visibility into the owning
205 * session's local buffers.
207 if (RELATION_IS_OTHER_TEMP(rel))
209 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
210 errmsg("cannot access temporary tables of other sessions")));
212 switch (rel->rd_rel->relkind)
214 case RELKIND_RELATION:
215 case RELKIND_MATVIEW:
216 case RELKIND_TOASTVALUE:
217 case RELKIND_SEQUENCE:
218 return pgstat_heap(rel, fcinfo);
220 switch (rel->rd_rel->relam)
223 return pgstat_index(rel, BTREE_METAPAGE + 1,
224 pgstat_btree_page, fcinfo);
226 return pgstat_index(rel, HASH_METAPAGE + 1,
227 pgstat_hash_page, fcinfo);
229 return pgstat_index(rel, GIST_ROOT_BLKNO + 1,
230 pgstat_gist_page, fcinfo);
235 err = "spgist index";
238 err = "unknown index";
245 case RELKIND_COMPOSITE_TYPE:
246 err = "composite type";
248 case RELKIND_FOREIGN_TABLE:
249 err = "foreign table";
257 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
258 errmsg("\"%s\" (%s) is not supported",
259 RelationGetRelationName(rel), err)));
260 return 0; /* should not happen */
264 * pgstat_heap -- returns live/dead tuples info in a heap
267 pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
272 BlockNumber block = 0; /* next block to count free space in */
273 BlockNumber tupblock;
275 pgstattuple_type stat = {0};
276 SnapshotData SnapshotDirty;
278 /* Disable syncscan because we assume we scan from block zero upwards */
279 scan = heap_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false);
280 InitDirtySnapshot(SnapshotDirty);
282 nblocks = scan->rs_nblocks; /* # blocks to be scanned */
284 /* scan the relation */
285 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
287 CHECK_FOR_INTERRUPTS();
289 /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
290 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
292 if (HeapTupleSatisfiesVisibility(tuple, &SnapshotDirty, scan->rs_cbuf))
294 stat.tuple_len += tuple->t_len;
299 stat.dead_tuple_len += tuple->t_len;
300 stat.dead_tuple_count++;
303 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
306 * To avoid physically reading the table twice, try to do the
307 * free-space scan in parallel with the heap scan. However,
308 * heap_getnext may find no tuples on a given page, so we cannot
309 * simply examine the pages returned by the heap scan.
311 tupblock = BlockIdGetBlockNumber(&tuple->t_self.ip_blkid);
313 while (block <= tupblock)
315 CHECK_FOR_INTERRUPTS();
317 buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block,
318 RBM_NORMAL, scan->rs_strategy);
319 LockBuffer(buffer, BUFFER_LOCK_SHARE);
320 stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
321 UnlockReleaseBuffer(buffer);
326 while (block < nblocks)
328 CHECK_FOR_INTERRUPTS();
330 buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block,
331 RBM_NORMAL, scan->rs_strategy);
332 LockBuffer(buffer, BUFFER_LOCK_SHARE);
333 stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
334 UnlockReleaseBuffer(buffer);
339 relation_close(rel, AccessShareLock);
341 stat.table_len = (uint64) nblocks *BLCKSZ;
343 return build_pgstattuple_type(&stat, fcinfo);
347 * pgstat_btree_page -- check tuples in a btree page
350 pgstat_btree_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno,
351 BufferAccessStrategy bstrategy)
356 buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
357 LockBuffer(buf, BT_READ);
358 page = BufferGetPage(buf);
360 /* Page is valid, see what to do with it */
363 /* fully empty page */
364 stat->free_space += BLCKSZ;
370 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
371 if (opaque->btpo_flags & (BTP_DELETED | BTP_HALF_DEAD))
373 /* recyclable page */
374 stat->free_space += BLCKSZ;
376 else if (P_ISLEAF(opaque))
378 pgstat_index_page(stat, page, P_FIRSTDATAKEY(opaque),
379 PageGetMaxOffsetNumber(page));
387 _bt_relbuf(rel, buf);
391 * pgstat_hash_page -- check tuples in a hash page
394 pgstat_hash_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno,
395 BufferAccessStrategy bstrategy)
400 _hash_getlock(rel, blkno, HASH_SHARE);
401 buf = _hash_getbuf_with_strategy(rel, blkno, HASH_READ, 0, bstrategy);
402 page = BufferGetPage(buf);
404 if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HashPageOpaqueData)))
406 HashPageOpaque opaque;
408 opaque = (HashPageOpaque) PageGetSpecialPointer(page);
409 switch (opaque->hasho_flag)
412 stat->free_space += BLCKSZ;
415 case LH_OVERFLOW_PAGE:
416 pgstat_index_page(stat, page, FirstOffsetNumber,
417 PageGetMaxOffsetNumber(page));
427 /* maybe corrupted */
430 _hash_relbuf(rel, buf);
431 _hash_droplock(rel, blkno, HASH_SHARE);
435 * pgstat_gist_page -- check tuples in a gist page
438 pgstat_gist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno,
439 BufferAccessStrategy bstrategy)
444 buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
445 LockBuffer(buf, GIST_SHARE);
446 gistcheckpage(rel, buf);
447 page = BufferGetPage(buf);
449 if (GistPageIsLeaf(page))
451 pgstat_index_page(stat, page, FirstOffsetNumber,
452 PageGetMaxOffsetNumber(page));
459 UnlockReleaseBuffer(buf);
463 * pgstat_index -- returns live/dead tuples info in a generic index
466 pgstat_index(Relation rel, BlockNumber start, pgstat_page pagefn,
467 FunctionCallInfo fcinfo)
471 BufferAccessStrategy bstrategy;
472 pgstattuple_type stat = {0};
474 /* prepare access strategy for this index */
475 bstrategy = GetAccessStrategy(BAS_BULKREAD);
480 /* Get the current relation length */
481 LockRelationForExtension(rel, ExclusiveLock);
482 nblocks = RelationGetNumberOfBlocks(rel);
483 UnlockRelationForExtension(rel, ExclusiveLock);
485 /* Quit if we've scanned the whole relation */
486 if (blkno >= nblocks)
488 stat.table_len = (uint64) nblocks *BLCKSZ;
493 for (; blkno < nblocks; blkno++)
495 CHECK_FOR_INTERRUPTS();
497 pagefn(&stat, rel, blkno, bstrategy);
501 relation_close(rel, AccessShareLock);
503 return build_pgstattuple_type(&stat, fcinfo);
507 * pgstat_index_page -- for generic index page
510 pgstat_index_page(pgstattuple_type *stat, Page page,
511 OffsetNumber minoff, OffsetNumber maxoff)
515 stat->free_space += PageGetFreeSpace(page);
517 for (i = minoff; i <= maxoff; i = OffsetNumberNext(i))
519 ItemId itemid = PageGetItemId(page, i);
521 if (ItemIdIsDead(itemid))
523 stat->dead_tuple_count++;
524 stat->dead_tuple_len += ItemIdGetLength(itemid);
529 stat->tuple_len += ItemIdGetLength(itemid);