]> granicus.if.org Git - postgresql/blob - contrib/pageinspect/heapfuncs.c
Improve pageinspect module
[postgresql] / contrib / pageinspect / heapfuncs.c
1 /*-------------------------------------------------------------------------
2  *
3  * heapfuncs.c
4  *        Functions to investigate heap pages
5  *
6  * We check the input to these functions for corrupt pointers etc. that
7  * might cause crashes, but at the same time we try to print out as much
8  * information as possible, even if it's nonsense. That's because if a
9  * page is corrupt, we don't know why and how exactly it is corrupt, so we
10  * let the user judge it.
11  *
12  * These functions are restricted to superusers for the fear of introducing
13  * security holes if the input checking isn't as water-tight as it should be.
14  * You'd need to be superuser to obtain a raw page image anyway, so
15  * there's hardly any use case for using these without superuser-rights
16  * anyway.
17  *
18  * Copyright (c) 2007-2015, PostgreSQL Global Development Group
19  *
20  * IDENTIFICATION
21  *        contrib/pageinspect/heapfuncs.c
22  *
23  *-------------------------------------------------------------------------
24  */
25
26 #include "postgres.h"
27
28 #include "access/htup_details.h"
29 #include "funcapi.h"
30 #include "catalog/pg_type.h"
31 #include "miscadmin.h"
32 #include "utils/array.h"
33 #include "utils/builtins.h"
34 #include "utils/rel.h"
35
36
37 /*
38  * bits_to_text
39  *
40  * Converts a bits8-array of 'len' bits to a human-readable
41  * c-string representation.
42  */
43 static char *
44 bits_to_text(bits8 *bits, int len)
45 {
46         int                     i;
47         char       *str;
48
49         str = palloc(len + 1);
50
51         for (i = 0; i < len; i++)
52                 str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
53
54         str[i] = '\0';
55
56         return str;
57 }
58
59
60 /*
61  * text_to_bits
62  *
63  * Converts a c-string representation of bits into a bits8-array. This is
64  * the reverse operation of previous routine.
65  */
66 static bits8 *
67 text_to_bits(char *str, int len)
68 {
69         bits8      *bits;
70         int                     off = 0;
71         char            byte = 0;
72
73         bits = palloc(len + 1);
74
75         while (off < len)
76         {
77                 if (off % 8 == 0)
78                         byte = 0;
79
80                 if ((str[off] == '0') || (str[off] == '1'))
81                         byte = byte | ((str[off] - '0') << off % 8);
82                 else
83                         ereport(ERROR,
84                                         (errcode(ERRCODE_DATA_CORRUPTED),
85                                          errmsg("illegal character '%c' in t_bits string", str[off])));
86
87                 if (off % 8 == 7)
88                         bits[off / 8] = byte;
89
90                 off++;
91         }
92
93         return bits;
94 }
95
96 /*
97  * heap_page_items
98  *
99  * Allows inspection of line pointers and tuple headers of a heap page.
100  */
101 PG_FUNCTION_INFO_V1(heap_page_items);
102
103 typedef struct heap_page_items_state
104 {
105         TupleDesc       tupd;
106         Page            page;
107         uint16          offset;
108 } heap_page_items_state;
109
110 Datum
111 heap_page_items(PG_FUNCTION_ARGS)
112 {
113         bytea      *raw_page = PG_GETARG_BYTEA_P(0);
114         heap_page_items_state *inter_call_data = NULL;
115         FuncCallContext *fctx;
116         int                     raw_page_size;
117
118         if (!superuser())
119                 ereport(ERROR,
120                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
121                                  (errmsg("must be superuser to use raw page functions"))));
122
123         raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
124
125         if (SRF_IS_FIRSTCALL())
126         {
127                 TupleDesc       tupdesc;
128                 MemoryContext mctx;
129
130                 if (raw_page_size < SizeOfPageHeaderData)
131                         ereport(ERROR,
132                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
133                                   errmsg("input page too small (%d bytes)", raw_page_size)));
134
135                 fctx = SRF_FIRSTCALL_INIT();
136                 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
137
138                 inter_call_data = palloc(sizeof(heap_page_items_state));
139
140                 /* Build a tuple descriptor for our result type */
141                 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
142                         elog(ERROR, "return type must be a row type");
143
144                 inter_call_data->tupd = tupdesc;
145
146                 inter_call_data->offset = FirstOffsetNumber;
147                 inter_call_data->page = VARDATA(raw_page);
148
149                 fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
150                 fctx->user_fctx = inter_call_data;
151
152                 MemoryContextSwitchTo(mctx);
153         }
154
155         fctx = SRF_PERCALL_SETUP();
156         inter_call_data = fctx->user_fctx;
157
158         if (fctx->call_cntr < fctx->max_calls)
159         {
160                 Page            page = inter_call_data->page;
161                 HeapTuple       resultTuple;
162                 Datum           result;
163                 ItemId          id;
164                 Datum           values[14];
165                 bool            nulls[14];
166                 uint16          lp_offset;
167                 uint16          lp_flags;
168                 uint16          lp_len;
169
170                 memset(nulls, 0, sizeof(nulls));
171
172                 /* Extract information from the line pointer */
173
174                 id = PageGetItemId(page, inter_call_data->offset);
175
176                 lp_offset = ItemIdGetOffset(id);
177                 lp_flags = ItemIdGetFlags(id);
178                 lp_len = ItemIdGetLength(id);
179
180                 values[0] = UInt16GetDatum(inter_call_data->offset);
181                 values[1] = UInt16GetDatum(lp_offset);
182                 values[2] = UInt16GetDatum(lp_flags);
183                 values[3] = UInt16GetDatum(lp_len);
184
185                 /*
186                  * We do just enough validity checking to make sure we don't reference
187                  * data outside the page passed to us. The page could be corrupt in
188                  * many other ways, but at least we won't crash.
189                  */
190                 if (ItemIdHasStorage(id) &&
191                         lp_len >= MinHeapTupleSize &&
192                         lp_offset == MAXALIGN(lp_offset) &&
193                         lp_offset + lp_len <= raw_page_size)
194                 {
195                         HeapTupleHeader         tuphdr;
196                         bytea                      *tuple_data_bytea;
197                         int                                     tuple_data_len;
198
199                         /* Extract information from the tuple header */
200
201                         tuphdr = (HeapTupleHeader) PageGetItem(page, id);
202
203                         values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
204                         values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
205                         /* shared with xvac */
206                         values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
207                         values[7] = PointerGetDatum(&tuphdr->t_ctid);
208                         values[8] = UInt32GetDatum(tuphdr->t_infomask2);
209                         values[9] = UInt32GetDatum(tuphdr->t_infomask);
210                         values[10] = UInt8GetDatum(tuphdr->t_hoff);
211
212                         /* Copy raw tuple data into bytea attribute */
213                         tuple_data_len = lp_len - tuphdr->t_hoff;
214                         tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
215                         SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
216                         memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
217                                          tuple_data_len);
218                         values[13] = PointerGetDatum(tuple_data_bytea);
219
220                         /*
221                          * We already checked that the item is completely within the raw
222                          * page passed to us, with the length given in the line pointer.
223                          * Let's check that t_hoff doesn't point over lp_len, before using
224                          * it to access t_bits and oid.
225                          */
226                         if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
227                                 tuphdr->t_hoff <= lp_len &&
228                                 tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
229                         {
230                                 if (tuphdr->t_infomask & HEAP_HASNULL)
231                                 {
232                                         int     bits_len =
233                                                 ((tuphdr->t_infomask2 & HEAP_NATTS_MASK) / 8 + 1) * 8;
234
235                                         values[11] = CStringGetTextDatum(
236                                                                  bits_to_text(tuphdr->t_bits, bits_len));
237                                 }
238                                 else
239                                         nulls[11] = true;
240
241                                 if (tuphdr->t_infomask & HEAP_HASOID)
242                                         values[12] = HeapTupleHeaderGetOid(tuphdr);
243                                 else
244                                         nulls[12] = true;
245                         }
246                         else
247                         {
248                                 nulls[11] = true;
249                                 nulls[12] = true;
250                         }
251                 }
252                 else
253                 {
254                         /*
255                          * The line pointer is not used, or it's invalid. Set the rest of
256                          * the fields to NULL
257                          */
258                         int                     i;
259
260                         for (i = 4; i <= 13; i++)
261                                 nulls[i] = true;
262                 }
263
264                 /* Build and return the result tuple. */
265                 resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
266                 result = HeapTupleGetDatum(resultTuple);
267
268                 inter_call_data->offset++;
269
270                 SRF_RETURN_NEXT(fctx, result);
271         }
272         else
273                 SRF_RETURN_DONE(fctx);
274 }
275
276 /*
277  * tuple_data_split_internal
278  *
279  * Split raw tuple data taken directly from a page into an array of bytea
280  * elements. This routine does a lookup on NULL values and creates array
281  * elements accordindly. This is a reimplementation of nocachegetattr()
282  * in heaptuple.c simplified for educational purposes.
283  */
284 static Datum
285 tuple_data_split_internal(Oid relid, char *tupdata,
286                                  uint16 tupdata_len, uint16 t_infomask,
287                                  uint16 t_infomask2, bits8 *t_bits,
288                                  bool do_detoast)
289 {
290         ArrayBuildState    *raw_attrs;
291         int                             nattrs;
292         int                                     i;
293         int                                     off = 0;
294         Relation                        rel;
295         TupleDesc                       tupdesc;
296
297         /* Get tuple descriptor from relation OID */
298         rel = relation_open(relid, NoLock);
299         tupdesc = CreateTupleDescCopyConstr(rel->rd_att);
300         relation_close(rel, NoLock);
301
302         raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
303         nattrs = tupdesc->natts;
304
305         if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
306                 ereport(ERROR,
307                                 (errcode(ERRCODE_DATA_CORRUPTED),
308                                  errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
309
310         for (i = 0; i < nattrs; i++)
311         {
312                 Form_pg_attribute       attr;
313                 bool                            is_null;
314                 bytea                      *attr_data = NULL;
315
316                 attr = tupdesc->attrs[i];
317                 is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
318
319                 /*
320                  * Tuple header can specify less attributes than tuple descriptor
321                  * as ALTER TABLE ADD COLUMN without DEFAULT keyword does not
322                  * actually change tuples in pages, so attributes with numbers greater
323                  * than (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
324                  */
325                 if (i >= (t_infomask2 & HEAP_NATTS_MASK))
326                         is_null = true;
327
328                 if (!is_null)
329                 {
330                         int             len;
331
332                         if (attr->attlen == -1)
333                         {
334                                 off = att_align_pointer(off, tupdesc->attrs[i]->attalign, -1,
335                                                                                 tupdata + off);
336                                 /*
337                                  * As VARSIZE_ANY throws an exception if it can't properly
338                                  * detect the type of external storage in macros VARTAG_SIZE,
339                                  * this check is repeated to have a nicer error handling.
340                                  */
341                                 if (VARATT_IS_EXTERNAL(tupdata + off) &&
342                                         !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
343                                         !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
344                                         ereport(ERROR,
345                                                 (errcode(ERRCODE_DATA_CORRUPTED),
346                                                  errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
347
348                                 len = VARSIZE_ANY(tupdata + off);
349                         }
350                         else
351                         {
352                                 off = att_align_nominal(off, tupdesc->attrs[i]->attalign);
353                                 len = attr->attlen;
354                         }
355
356                         if (tupdata_len < off + len)
357                                 ereport(ERROR,
358                                                 (errcode(ERRCODE_DATA_CORRUPTED),
359                                                  errmsg("unexpected end of tuple data")));
360
361                         if (attr->attlen == -1 && do_detoast)
362                                 attr_data = DatumGetByteaPCopy(tupdata + off);
363                         else
364                         {
365                                 attr_data = (bytea *) palloc(len + VARHDRSZ);
366                                 SET_VARSIZE(attr_data, len + VARHDRSZ);
367                                 memcpy(VARDATA(attr_data), tupdata + off, len);
368                         }
369
370                         off = att_addlength_pointer(off, tupdesc->attrs[i]->attlen,
371                                                                                 tupdata + off);
372                 }
373
374                 raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
375                                                                          is_null, BYTEAOID, CurrentMemoryContext);
376                 if (attr_data)
377                         pfree(attr_data);
378         }
379
380         if (tupdata_len != off)
381                 ereport(ERROR,
382                                 (errcode(ERRCODE_DATA_CORRUPTED),
383                                  errmsg("end of tuple reached without looking at all its data")));
384
385         return makeArrayResult(raw_attrs, CurrentMemoryContext);
386 }
387
388 /*
389  * tuple_data_split
390  *
391  * Split raw tuple data taken directly from page into distinct elements
392  * taking into account null values.
393  */
394 PG_FUNCTION_INFO_V1(tuple_data_split);
395
396 Datum
397 tuple_data_split(PG_FUNCTION_ARGS)
398 {
399         Oid                             relid;
400         bytea              *raw_data;
401         uint16                  t_infomask;
402         uint16                  t_infomask2;
403         char               *t_bits_str;
404         bool                    do_detoast = false;
405         bits8              *t_bits = NULL;
406         Datum                   res;
407
408         relid = PG_GETARG_OID(0);
409         raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
410         t_infomask = PG_GETARG_INT16(2);
411         t_infomask2 = PG_GETARG_INT16(3);
412         t_bits_str = PG_ARGISNULL(4) ? NULL :
413                 text_to_cstring(PG_GETARG_TEXT_PP(4));
414
415         if (PG_NARGS() >= 6)
416                 do_detoast = PG_GETARG_BOOL(5);
417
418         if (!superuser())
419                 ereport(ERROR,
420                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
421                                  errmsg("must be superuser to use raw page functions")));
422
423         if (!raw_data)
424                 PG_RETURN_NULL();
425
426         /*
427          * Convert t_bits string back to the bits8 array as represented in the
428          * tuple header.
429          */
430         if (t_infomask & HEAP_HASNULL)
431         {
432                 int             bits_str_len;
433                 int             bits_len;
434
435                 bits_len = (t_infomask2 & HEAP_NATTS_MASK) / 8 + 1;
436                 if (!t_bits_str)
437                         ereport(ERROR,
438                                         (errcode(ERRCODE_DATA_CORRUPTED),
439                                          errmsg("argument of t_bits is null, but it is expected to be null and %i character long",
440                                                         bits_len * 8)));
441
442                 bits_str_len = strlen(t_bits_str);
443                 if ((bits_str_len % 8) != 0)
444                         ereport(ERROR,
445                                         (errcode(ERRCODE_DATA_CORRUPTED),
446                                          errmsg("length of t_bits is not a multiple of eight")));
447
448                 if (bits_len * 8 != bits_str_len)
449                         ereport(ERROR,
450                                         (errcode(ERRCODE_DATA_CORRUPTED),
451                                          errmsg("unexpected length of t_bits %u, expected %i",
452                                                         bits_str_len, bits_len * 8)));
453
454                 /* do the conversion */
455                 t_bits = text_to_bits(t_bits_str, bits_str_len);
456         }
457         else
458         {
459                 if (t_bits_str)
460                         ereport(ERROR,
461                                         (errcode(ERRCODE_DATA_CORRUPTED),
462                                          errmsg("t_bits string is expected to be NULL, but instead it is %lu bytes length",
463                                                         strlen(t_bits_str))));
464         }
465
466         /* Split tuple data */
467         res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
468                                                                         VARSIZE(raw_data) - VARHDRSZ,
469                                                                         t_infomask, t_infomask2, t_bits,
470                                                                         do_detoast);
471
472         if (t_bits)
473                 pfree(t_bits);
474
475         PG_RETURN_ARRAYTYPE_P(res);
476 }