1 /*-------------------------------------------------------------------------
4 * Functions to investigate heap pages
6 * We check the input to these functions for corrupt pointers etc. that
7 * might cause crashes, but at the same time we try to print out as much
8 * information as possible, even if it's nonsense. That's because if a
9 * page is corrupt, we don't know why and how exactly it is corrupt, so we
10 * let the user judge it.
12 * These functions are restricted to superusers for the fear of introducing
13 * security holes if the input checking isn't as water-tight as it should be.
14 * You'd need to be superuser to obtain a raw page image anyway, so
15 * there's hardly any use case for using these without superuser-rights
18 * Copyright (c) 2007-2019, PostgreSQL Global Development Group
21 * contrib/pageinspect/heapfuncs.c
23 *-------------------------------------------------------------------------
28 #include "pageinspect.h"
30 #include "access/htup_details.h"
31 #include "access/relation.h"
33 #include "catalog/pg_type.h"
34 #include "miscadmin.h"
35 #include "utils/array.h"
36 #include "utils/builtins.h"
37 #include "utils/rel.h"
40 * It's not supported to create tuples with oids anymore, but when pg_upgrade
41 * was used to upgrade from an older version, tuples might still have an
42 * oid. Seems worthwhile to display that.
44 #define HeapTupleHeaderGetOidOld(tup) \
46 ((tup)->t_infomask & HEAP_HASOID_OLD) ? \
47 *((Oid *) ((char *)(tup) + (tup)->t_hoff - sizeof(Oid))) \
56 * Converts a bits8-array of 'len' bits to a human-readable
57 * c-string representation.
60 bits_to_text(bits8 *bits, int len)
65 str = palloc(len + 1);
67 for (i = 0; i < len; i++)
68 str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
79 * Converts a c-string representation of bits into a bits8-array. This is
80 * the reverse operation of previous routine.
83 text_to_bits(char *str, int len)
89 bits = palloc(len + 1);
96 if ((str[off] == '0') || (str[off] == '1'))
97 byte = byte | ((str[off] - '0') << off % 8);
100 (errcode(ERRCODE_DATA_CORRUPTED),
101 errmsg("illegal character '%c' in t_bits string", str[off])));
104 bits[off / 8] = byte;
115 * Allows inspection of line pointers and tuple headers of a heap page.
117 PG_FUNCTION_INFO_V1(heap_page_items);
119 typedef struct heap_page_items_state
124 } heap_page_items_state;
127 heap_page_items(PG_FUNCTION_ARGS)
129 bytea *raw_page = PG_GETARG_BYTEA_P(0);
130 heap_page_items_state *inter_call_data = NULL;
131 FuncCallContext *fctx;
136 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
137 (errmsg("must be superuser to use raw page functions"))));
139 raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
141 if (SRF_IS_FIRSTCALL())
146 if (raw_page_size < SizeOfPageHeaderData)
148 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
149 errmsg("input page too small (%d bytes)", raw_page_size)));
151 fctx = SRF_FIRSTCALL_INIT();
152 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
154 inter_call_data = palloc(sizeof(heap_page_items_state));
156 /* Build a tuple descriptor for our result type */
157 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
158 elog(ERROR, "return type must be a row type");
160 inter_call_data->tupd = tupdesc;
162 inter_call_data->offset = FirstOffsetNumber;
163 inter_call_data->page = VARDATA(raw_page);
165 fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
166 fctx->user_fctx = inter_call_data;
168 MemoryContextSwitchTo(mctx);
171 fctx = SRF_PERCALL_SETUP();
172 inter_call_data = fctx->user_fctx;
174 if (fctx->call_cntr < fctx->max_calls)
176 Page page = inter_call_data->page;
177 HeapTuple resultTuple;
186 memset(nulls, 0, sizeof(nulls));
188 /* Extract information from the line pointer */
190 id = PageGetItemId(page, inter_call_data->offset);
192 lp_offset = ItemIdGetOffset(id);
193 lp_flags = ItemIdGetFlags(id);
194 lp_len = ItemIdGetLength(id);
196 values[0] = UInt16GetDatum(inter_call_data->offset);
197 values[1] = UInt16GetDatum(lp_offset);
198 values[2] = UInt16GetDatum(lp_flags);
199 values[3] = UInt16GetDatum(lp_len);
202 * We do just enough validity checking to make sure we don't reference
203 * data outside the page passed to us. The page could be corrupt in
204 * many other ways, but at least we won't crash.
206 if (ItemIdHasStorage(id) &&
207 lp_len >= MinHeapTupleSize &&
208 lp_offset == MAXALIGN(lp_offset) &&
209 lp_offset + lp_len <= raw_page_size)
211 HeapTupleHeader tuphdr;
212 bytea *tuple_data_bytea;
215 /* Extract information from the tuple header */
217 tuphdr = (HeapTupleHeader) PageGetItem(page, id);
219 values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
220 values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
221 /* shared with xvac */
222 values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
223 values[7] = PointerGetDatum(&tuphdr->t_ctid);
224 values[8] = UInt32GetDatum(tuphdr->t_infomask2);
225 values[9] = UInt32GetDatum(tuphdr->t_infomask);
226 values[10] = UInt8GetDatum(tuphdr->t_hoff);
228 /* Copy raw tuple data into bytea attribute */
229 tuple_data_len = lp_len - tuphdr->t_hoff;
230 tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
231 SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
232 memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
234 values[13] = PointerGetDatum(tuple_data_bytea);
237 * We already checked that the item is completely within the raw
238 * page passed to us, with the length given in the line pointer.
239 * Let's check that t_hoff doesn't point over lp_len, before using
240 * it to access t_bits and oid.
242 if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
243 tuphdr->t_hoff <= lp_len &&
244 tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
246 if (tuphdr->t_infomask & HEAP_HASNULL)
251 BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
252 values[11] = CStringGetTextDatum(
253 bits_to_text(tuphdr->t_bits, bits_len));
258 if (tuphdr->t_infomask & HEAP_HASOID_OLD)
259 values[12] = HeapTupleHeaderGetOidOld(tuphdr);
272 * The line pointer is not used, or it's invalid. Set the rest of
277 for (i = 4; i <= 13; i++)
281 /* Build and return the result tuple. */
282 resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
283 result = HeapTupleGetDatum(resultTuple);
285 inter_call_data->offset++;
287 SRF_RETURN_NEXT(fctx, result);
290 SRF_RETURN_DONE(fctx);
294 * tuple_data_split_internal
296 * Split raw tuple data taken directly from a page into an array of bytea
297 * elements. This routine does a lookup on NULL values and creates array
298 * elements accordingly. This is a reimplementation of nocachegetattr()
299 * in heaptuple.c simplified for educational purposes.
302 tuple_data_split_internal(Oid relid, char *tupdata,
303 uint16 tupdata_len, uint16 t_infomask,
304 uint16 t_infomask2, bits8 *t_bits,
307 ArrayBuildState *raw_attrs;
314 /* Get tuple descriptor from relation OID */
315 rel = relation_open(relid, AccessShareLock);
316 tupdesc = RelationGetDescr(rel);
318 raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
319 nattrs = tupdesc->natts;
321 if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
323 (errcode(ERRCODE_DATA_CORRUPTED),
324 errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
326 for (i = 0; i < nattrs; i++)
328 Form_pg_attribute attr;
330 bytea *attr_data = NULL;
332 attr = TupleDescAttr(tupdesc, i);
335 * Tuple header can specify less attributes than tuple descriptor as
336 * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
337 * change tuples in pages, so attributes with numbers greater than
338 * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
340 if (i >= (t_infomask2 & HEAP_NATTS_MASK))
343 is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
349 if (attr->attlen == -1)
351 off = att_align_pointer(off, attr->attalign, -1,
355 * As VARSIZE_ANY throws an exception if it can't properly
356 * detect the type of external storage in macros VARTAG_SIZE,
357 * this check is repeated to have a nicer error handling.
359 if (VARATT_IS_EXTERNAL(tupdata + off) &&
360 !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
361 !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
363 (errcode(ERRCODE_DATA_CORRUPTED),
364 errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
366 len = VARSIZE_ANY(tupdata + off);
370 off = att_align_nominal(off, attr->attalign);
374 if (tupdata_len < off + len)
376 (errcode(ERRCODE_DATA_CORRUPTED),
377 errmsg("unexpected end of tuple data")));
379 if (attr->attlen == -1 && do_detoast)
380 attr_data = DatumGetByteaPCopy(tupdata + off);
383 attr_data = (bytea *) palloc(len + VARHDRSZ);
384 SET_VARSIZE(attr_data, len + VARHDRSZ);
385 memcpy(VARDATA(attr_data), tupdata + off, len);
388 off = att_addlength_pointer(off, attr->attlen,
392 raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
393 is_null, BYTEAOID, CurrentMemoryContext);
398 if (tupdata_len != off)
400 (errcode(ERRCODE_DATA_CORRUPTED),
401 errmsg("end of tuple reached without looking at all its data")));
403 relation_close(rel, AccessShareLock);
405 return makeArrayResult(raw_attrs, CurrentMemoryContext);
411 * Split raw tuple data taken directly from page into distinct elements
412 * taking into account null values.
414 PG_FUNCTION_INFO_V1(tuple_data_split);
417 tuple_data_split(PG_FUNCTION_ARGS)
424 bool do_detoast = false;
425 bits8 *t_bits = NULL;
428 relid = PG_GETARG_OID(0);
429 raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
430 t_infomask = PG_GETARG_INT16(2);
431 t_infomask2 = PG_GETARG_INT16(3);
432 t_bits_str = PG_ARGISNULL(4) ? NULL :
433 text_to_cstring(PG_GETARG_TEXT_PP(4));
436 do_detoast = PG_GETARG_BOOL(5);
440 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
441 errmsg("must be superuser to use raw page functions")));
447 * Convert t_bits string back to the bits8 array as represented in the
450 if (t_infomask & HEAP_HASNULL)
455 bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
458 (errcode(ERRCODE_DATA_CORRUPTED),
459 errmsg("argument of t_bits is null, but it is expected to be null and %d character long",
462 bits_str_len = strlen(t_bits_str);
463 if (bits_len != bits_str_len)
465 (errcode(ERRCODE_DATA_CORRUPTED),
466 errmsg("unexpected length of t_bits %u, expected %d",
467 bits_str_len, bits_len)));
469 /* do the conversion */
470 t_bits = text_to_bits(t_bits_str, bits_str_len);
476 (errcode(ERRCODE_DATA_CORRUPTED),
477 errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",
478 strlen(t_bits_str))));
481 /* Split tuple data */
482 res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
483 VARSIZE(raw_data) - VARHDRSZ,
484 t_infomask, t_infomask2, t_bits,
490 PG_RETURN_ARRAYTYPE_P(res);