1 /*-------------------------------------------------------------------------
4 * Support routines for external and compressed storage of
5 * variable size attributes.
7 * Copyright (c) 2000-2015, PostgreSQL Global Development Group
11 * src/backend/access/heap/tuptoaster.c
15 * toast_insert_or_update -
16 * Try to make a given tuple fit into one page by compressing
17 * or moving off attributes
20 * Reclaim toast storage when a tuple is deleted
22 * heap_tuple_untoast_attr -
23 * Fetch back a given value from the "secondary" relation
25 *-------------------------------------------------------------------------
33 #include "access/genam.h"
34 #include "access/heapam.h"
35 #include "access/tuptoaster.h"
36 #include "access/xact.h"
37 #include "catalog/catalog.h"
38 #include "common/pg_lzcompress.h"
39 #include "miscadmin.h"
40 #include "utils/fmgroids.h"
41 #include "utils/rel.h"
42 #include "utils/typcache.h"
43 #include "utils/tqual.h"
49 * The information at the start of the compressed toast data.
51 typedef struct toast_compress_header
53 int32 vl_len_; /* varlena header (do not touch directly!) */
55 } toast_compress_header;
58 * Utilities for manipulation of header information for compressed
61 #define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header))
62 #define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
63 #define TOAST_COMPRESS_RAWDATA(ptr) \
64 (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
65 #define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
66 (((toast_compress_header *) (ptr))->rawsize = (len))
68 static void toast_delete_datum(Relation rel, Datum value);
69 static Datum toast_save_datum(Relation rel, Datum value,
70 struct varlena * oldexternal, int options);
71 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
72 static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
73 static struct varlena *toast_fetch_datum(struct varlena * attr);
74 static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
75 int32 sliceoffset, int32 length);
76 static struct varlena *toast_decompress_datum(struct varlena * attr);
77 static int toast_open_indexes(Relation toastrel,
81 static void toast_close_indexes(Relation *toastidxs, int num_indexes,
86 * heap_tuple_fetch_attr -
88 * Public entry point to get back a toasted value from
89 * external source (possibly still in compressed format).
91 * This will return a datum that contains all the data internally, ie, not
92 * relying on external storage or memory, but it can still be compressed or
93 * have a short header. Note some callers assume that if the input is an
94 * EXTERNAL datum, the result will be a pfree'able chunk.
98 heap_tuple_fetch_attr(struct varlena * attr)
100 struct varlena *result;
102 if (VARATT_IS_EXTERNAL_ONDISK(attr))
105 * This is an external stored plain value
107 result = toast_fetch_datum(attr);
109 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
112 * This is an indirect pointer --- dereference it
114 struct varatt_indirect redirect;
116 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
117 attr = (struct varlena *) redirect.pointer;
119 /* nested indirect Datums aren't allowed */
120 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
122 /* recurse if value is still external in some other way */
123 if (VARATT_IS_EXTERNAL(attr))
124 return heap_tuple_fetch_attr(attr);
127 * Copy into the caller's memory context, in case caller tries to
130 result = (struct varlena *) palloc(VARSIZE_ANY(attr));
131 memcpy(result, attr, VARSIZE_ANY(attr));
136 * This is a plain value inside of the main tuple - why am I called?
146 * heap_tuple_untoast_attr -
148 * Public entry point to get back a toasted value from compression
149 * or external storage. The result is always non-extended varlena form.
151 * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
152 * datum, the result will be a pfree'able chunk.
156 heap_tuple_untoast_attr(struct varlena * attr)
158 if (VARATT_IS_EXTERNAL_ONDISK(attr))
161 * This is an externally stored datum --- fetch it back from there
163 attr = toast_fetch_datum(attr);
164 /* If it's compressed, decompress it */
165 if (VARATT_IS_COMPRESSED(attr))
167 struct varlena *tmp = attr;
169 attr = toast_decompress_datum(tmp);
173 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
176 * This is an indirect pointer --- dereference it
178 struct varatt_indirect redirect;
180 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
181 attr = (struct varlena *) redirect.pointer;
183 /* nested indirect Datums aren't allowed */
184 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
186 /* recurse in case value is still extended in some other way */
187 attr = heap_tuple_untoast_attr(attr);
189 /* if it isn't, we'd better copy it */
190 if (attr == (struct varlena *) redirect.pointer)
192 struct varlena *result;
194 result = (struct varlena *) palloc(VARSIZE_ANY(attr));
195 memcpy(result, attr, VARSIZE_ANY(attr));
199 else if (VARATT_IS_COMPRESSED(attr))
202 * This is a compressed value inside of the main tuple
204 attr = toast_decompress_datum(attr);
206 else if (VARATT_IS_SHORT(attr))
209 * This is a short-header varlena --- convert to 4-byte header format
211 Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
212 Size new_size = data_size + VARHDRSZ;
213 struct varlena *new_attr;
215 new_attr = (struct varlena *) palloc(new_size);
216 SET_VARSIZE(new_attr, new_size);
217 memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
226 * heap_tuple_untoast_attr_slice -
228 * Public entry point to get back part of a toasted value
229 * from compression or external storage.
233 heap_tuple_untoast_attr_slice(struct varlena * attr,
234 int32 sliceoffset, int32 slicelength)
236 struct varlena *preslice;
237 struct varlena *result;
241 if (VARATT_IS_EXTERNAL_ONDISK(attr))
243 struct varatt_external toast_pointer;
245 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
247 /* fast path for non-compressed external datums */
248 if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
249 return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
251 /* fetch it back (compressed marker will get set automatically) */
252 preslice = toast_fetch_datum(attr);
254 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
256 struct varatt_indirect redirect;
258 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
260 /* nested indirect Datums aren't allowed */
261 Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
263 return heap_tuple_untoast_attr_slice(redirect.pointer,
264 sliceoffset, slicelength);
269 Assert(!VARATT_IS_EXTERNAL(preslice));
271 if (VARATT_IS_COMPRESSED(preslice))
273 struct varlena *tmp = preslice;
275 preslice = toast_decompress_datum(tmp);
281 if (VARATT_IS_SHORT(preslice))
283 attrdata = VARDATA_SHORT(preslice);
284 attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
288 attrdata = VARDATA(preslice);
289 attrsize = VARSIZE(preslice) - VARHDRSZ;
292 /* slicing of datum for compressed cases and plain value */
294 if (sliceoffset >= attrsize)
300 if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
301 slicelength = attrsize - sliceoffset;
303 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
304 SET_VARSIZE(result, slicelength + VARHDRSZ);
306 memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
308 if (preslice != attr)
316 * toast_raw_datum_size -
318 * Return the raw (detoasted) size of a varlena datum
319 * (including the VARHDRSZ header)
323 toast_raw_datum_size(Datum value)
325 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
328 if (VARATT_IS_EXTERNAL_ONDISK(attr))
330 /* va_rawsize is the size of the original datum -- including header */
331 struct varatt_external toast_pointer;
333 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
334 result = toast_pointer.va_rawsize;
336 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
338 struct varatt_indirect toast_pointer;
340 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
342 /* nested indirect Datums aren't allowed */
343 Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
345 return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
347 else if (VARATT_IS_COMPRESSED(attr))
349 /* here, va_rawsize is just the payload size */
350 result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
352 else if (VARATT_IS_SHORT(attr))
355 * we have to normalize the header length to VARHDRSZ or else the
356 * callers of this function will be confused.
358 result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
362 /* plain untoasted datum */
363 result = VARSIZE(attr);
371 * Return the physical storage size (possibly compressed) of a varlena datum
375 toast_datum_size(Datum value)
377 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
380 if (VARATT_IS_EXTERNAL_ONDISK(attr))
383 * Attribute is stored externally - return the extsize whether
384 * compressed or not. We do not count the size of the toast pointer
387 struct varatt_external toast_pointer;
389 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
390 result = toast_pointer.va_extsize;
392 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
394 struct varatt_indirect toast_pointer;
396 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
398 /* nested indirect Datums aren't allowed */
399 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
401 return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
403 else if (VARATT_IS_SHORT(attr))
405 result = VARSIZE_SHORT(attr);
410 * Attribute is stored inline either compressed or not, just calculate
411 * the size of the datum in either case.
413 result = VARSIZE(attr);
422 * Cascaded delete toast-entries on DELETE
426 toast_delete(Relation rel, HeapTuple oldtup)
429 Form_pg_attribute *att;
432 Datum toast_values[MaxHeapAttributeNumber];
433 bool toast_isnull[MaxHeapAttributeNumber];
436 * We should only ever be called for tuples of plain relations or
437 * materialized views --- recursing on a toast rel is bad news.
439 Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
440 rel->rd_rel->relkind == RELKIND_MATVIEW);
443 * Get the tuple descriptor and break down the tuple into fields.
445 * NOTE: it's debatable whether to use heap_deform_tuple() here or just
446 * heap_getattr() only the varlena columns. The latter could win if there
447 * are few varlena columns and many non-varlena ones. However,
448 * heap_deform_tuple costs only O(N) while the heap_getattr way would cost
449 * O(N^2) if there are many varlena columns, so it seems better to err on
450 * the side of linear cost. (We won't even be here unless there's at
451 * least one varlena column, by the way.)
453 tupleDesc = rel->rd_att;
454 att = tupleDesc->attrs;
455 numAttrs = tupleDesc->natts;
457 Assert(numAttrs <= MaxHeapAttributeNumber);
458 heap_deform_tuple(oldtup, tupleDesc, toast_values, toast_isnull);
461 * Check for external stored attributes and delete them from the secondary
464 for (i = 0; i < numAttrs; i++)
466 if (att[i]->attlen == -1)
468 Datum value = toast_values[i];
472 else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value)))
473 toast_delete_datum(rel, value);
480 * toast_insert_or_update -
482 * Delete no-longer-used toast-entries and create new ones to
483 * make the new tuple fit on INSERT or UPDATE
486 * newtup: the candidate new tuple to be inserted
487 * oldtup: the old row version for UPDATE, or NULL for INSERT
488 * options: options to be passed to heap_insert() for toast rows
490 * either newtup if no toasting is needed, or a palloc'd modified tuple
491 * that is what should actually get stored
493 * NOTE: neither newtup nor oldtup will be modified. This is a change
494 * from the pre-8.1 API of this routine.
498 toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
501 HeapTuple result_tuple;
503 Form_pg_attribute *att;
507 bool need_change = false;
508 bool need_free = false;
509 bool need_delold = false;
510 bool has_nulls = false;
515 char toast_action[MaxHeapAttributeNumber];
516 bool toast_isnull[MaxHeapAttributeNumber];
517 bool toast_oldisnull[MaxHeapAttributeNumber];
518 Datum toast_values[MaxHeapAttributeNumber];
519 Datum toast_oldvalues[MaxHeapAttributeNumber];
520 struct varlena *toast_oldexternal[MaxHeapAttributeNumber];
521 int32 toast_sizes[MaxHeapAttributeNumber];
522 bool toast_free[MaxHeapAttributeNumber];
523 bool toast_delold[MaxHeapAttributeNumber];
526 * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super
527 * deletions just normally insert/delete the toast values. It seems
528 * easiest to deal with that here, instead on, potentially, multiple
531 options &= ~HEAP_INSERT_SPECULATIVE;
534 * We should only ever be called for tuples of plain relations or
535 * materialized views --- recursing on a toast rel is bad news.
537 Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
538 rel->rd_rel->relkind == RELKIND_MATVIEW);
541 * Get the tuple descriptor and break down the tuple(s) into fields.
543 tupleDesc = rel->rd_att;
544 att = tupleDesc->attrs;
545 numAttrs = tupleDesc->natts;
547 Assert(numAttrs <= MaxHeapAttributeNumber);
548 heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull);
550 heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull);
553 * Then collect information about the values given
555 * NOTE: toast_action[i] can have these values:
556 * ' ' default handling
557 * 'p' already processed --- don't touch it
558 * 'x' incompressible, but OK to move off
560 * NOTE: toast_sizes[i] is only made valid for varlena attributes with
561 * toast_action[i] different from 'p'.
564 memset(toast_action, ' ', numAttrs * sizeof(char));
565 memset(toast_oldexternal, 0, numAttrs * sizeof(struct varlena *));
566 memset(toast_free, 0, numAttrs * sizeof(bool));
567 memset(toast_delold, 0, numAttrs * sizeof(bool));
569 for (i = 0; i < numAttrs; i++)
571 struct varlena *old_value;
572 struct varlena *new_value;
577 * For UPDATE get the old and new values of this attribute
579 old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]);
580 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
583 * If the old value is stored on disk, check if it has changed so
584 * we have to delete it later.
586 if (att[i]->attlen == -1 && !toast_oldisnull[i] &&
587 VARATT_IS_EXTERNAL_ONDISK(old_value))
589 if (toast_isnull[i] || !VARATT_IS_EXTERNAL_ONDISK(new_value) ||
590 memcmp((char *) old_value, (char *) new_value,
591 VARSIZE_EXTERNAL(old_value)) != 0)
594 * The old external stored value isn't needed any more
597 toast_delold[i] = true;
603 * This attribute isn't changed by this update so we reuse
604 * the original reference to the old value in the new
607 toast_action[i] = 'p';
615 * For INSERT simply get the new value
617 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
621 * Handle NULL attributes
625 toast_action[i] = 'p';
631 * Now look at varlena attributes
633 if (att[i]->attlen == -1)
636 * If the table's attribute says PLAIN always, force it so.
638 if (att[i]->attstorage == 'p')
639 toast_action[i] = 'p';
642 * We took care of UPDATE above, so any external value we find
643 * still in the tuple must be someone else's that we cannot reuse
644 * (this includes the case of an out-of-line in-memory datum).
645 * Fetch it back (without decompression, unless we are forcing
646 * PLAIN storage). If necessary, we'll push it out as a new
647 * external value below.
649 if (VARATT_IS_EXTERNAL(new_value))
651 toast_oldexternal[i] = new_value;
652 if (att[i]->attstorage == 'p')
653 new_value = heap_tuple_untoast_attr(new_value);
655 new_value = heap_tuple_fetch_attr(new_value);
656 toast_values[i] = PointerGetDatum(new_value);
657 toast_free[i] = true;
663 * Remember the size of this attribute
665 toast_sizes[i] = VARSIZE_ANY(new_value);
670 * Not a varlena attribute, plain storage always
672 toast_action[i] = 'p';
677 * Compress and/or save external until data fits into target length
679 * 1: Inline compress attributes with attstorage 'x', and store very
680 * large attributes with attstorage 'x' or 'e' external immediately
681 * 2: Store attributes with attstorage 'x' or 'e' external
682 * 3: Inline compress attributes with attstorage 'm'
683 * 4: Store attributes with attstorage 'm' external
687 /* compute header overhead --- this should match heap_form_tuple() */
688 hoff = SizeofHeapTupleHeader;
690 hoff += BITMAPLEN(numAttrs);
691 if (newtup->t_data->t_infomask & HEAP_HASOID)
693 hoff = MAXALIGN(hoff);
694 /* now convert to a limit on the tuple data size */
695 maxDataLen = TOAST_TUPLE_TARGET - hoff;
698 * Look for attributes with attstorage 'x' to compress. Also find large
699 * attributes with attstorage 'x' or 'e', and store them external.
701 while (heap_compute_data_size(tupleDesc,
702 toast_values, toast_isnull) > maxDataLen)
704 int biggest_attno = -1;
705 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
710 * Search for the biggest yet unprocessed internal attribute
712 for (i = 0; i < numAttrs; i++)
714 if (toast_action[i] != ' ')
716 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
717 continue; /* can't happen, toast_action would be 'p' */
718 if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
720 if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
722 if (toast_sizes[i] > biggest_size)
725 biggest_size = toast_sizes[i];
729 if (biggest_attno < 0)
733 * Attempt to compress it inline, if it has attstorage 'x'
736 if (att[i]->attstorage == 'x')
738 old_value = toast_values[i];
739 new_value = toast_compress_datum(old_value);
741 if (DatumGetPointer(new_value) != NULL)
743 /* successful compression */
745 pfree(DatumGetPointer(old_value));
746 toast_values[i] = new_value;
747 toast_free[i] = true;
748 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
754 /* incompressible, ignore on subsequent compression passes */
755 toast_action[i] = 'x';
760 /* has attstorage 'e', ignore on subsequent compression passes */
761 toast_action[i] = 'x';
765 * If this value is by itself more than maxDataLen (after compression
766 * if any), push it out to the toast table immediately, if possible.
767 * This avoids uselessly compressing other fields in the common case
768 * where we have one long field and several short ones.
770 * XXX maybe the threshold should be less than maxDataLen?
772 if (toast_sizes[i] > maxDataLen &&
773 rel->rd_rel->reltoastrelid != InvalidOid)
775 old_value = toast_values[i];
776 toast_action[i] = 'p';
777 toast_values[i] = toast_save_datum(rel, toast_values[i],
778 toast_oldexternal[i], options);
780 pfree(DatumGetPointer(old_value));
781 toast_free[i] = true;
788 * Second we look for attributes of attstorage 'x' or 'e' that are still
789 * inline. But skip this if there's no toast table to push them to.
791 while (heap_compute_data_size(tupleDesc,
792 toast_values, toast_isnull) > maxDataLen &&
793 rel->rd_rel->reltoastrelid != InvalidOid)
795 int biggest_attno = -1;
796 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
800 * Search for the biggest yet inlined attribute with
801 * attstorage equals 'x' or 'e'
804 for (i = 0; i < numAttrs; i++)
806 if (toast_action[i] == 'p')
808 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
809 continue; /* can't happen, toast_action would be 'p' */
810 if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
812 if (toast_sizes[i] > biggest_size)
815 biggest_size = toast_sizes[i];
819 if (biggest_attno < 0)
823 * Store this external
826 old_value = toast_values[i];
827 toast_action[i] = 'p';
828 toast_values[i] = toast_save_datum(rel, toast_values[i],
829 toast_oldexternal[i], options);
831 pfree(DatumGetPointer(old_value));
832 toast_free[i] = true;
839 * Round 3 - this time we take attributes with storage 'm' into
842 while (heap_compute_data_size(tupleDesc,
843 toast_values, toast_isnull) > maxDataLen)
845 int biggest_attno = -1;
846 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
851 * Search for the biggest yet uncompressed internal attribute
853 for (i = 0; i < numAttrs; i++)
855 if (toast_action[i] != ' ')
857 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
858 continue; /* can't happen, toast_action would be 'p' */
859 if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
861 if (att[i]->attstorage != 'm')
863 if (toast_sizes[i] > biggest_size)
866 biggest_size = toast_sizes[i];
870 if (biggest_attno < 0)
874 * Attempt to compress it inline
877 old_value = toast_values[i];
878 new_value = toast_compress_datum(old_value);
880 if (DatumGetPointer(new_value) != NULL)
882 /* successful compression */
884 pfree(DatumGetPointer(old_value));
885 toast_values[i] = new_value;
886 toast_free[i] = true;
887 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
893 /* incompressible, ignore on subsequent compression passes */
894 toast_action[i] = 'x';
899 * Finally we store attributes of type 'm' externally. At this point we
900 * increase the target tuple size, so that 'm' attributes aren't stored
901 * externally unless really necessary.
903 maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff;
905 while (heap_compute_data_size(tupleDesc,
906 toast_values, toast_isnull) > maxDataLen &&
907 rel->rd_rel->reltoastrelid != InvalidOid)
909 int biggest_attno = -1;
910 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
914 * Search for the biggest yet inlined attribute with
918 for (i = 0; i < numAttrs; i++)
920 if (toast_action[i] == 'p')
922 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
923 continue; /* can't happen, toast_action would be 'p' */
924 if (att[i]->attstorage != 'm')
926 if (toast_sizes[i] > biggest_size)
929 biggest_size = toast_sizes[i];
933 if (biggest_attno < 0)
937 * Store this external
940 old_value = toast_values[i];
941 toast_action[i] = 'p';
942 toast_values[i] = toast_save_datum(rel, toast_values[i],
943 toast_oldexternal[i], options);
945 pfree(DatumGetPointer(old_value));
946 toast_free[i] = true;
953 * In the case we toasted any values, we need to build a new heap tuple
954 * with the changed values.
958 HeapTupleHeader olddata = newtup->t_data;
959 HeapTupleHeader new_data;
960 int32 new_header_len;
965 * Calculate the new size of the tuple.
967 * Note: we used to assume here that the old tuple's t_hoff must equal
968 * the new_header_len value, but that was incorrect. The old tuple
969 * might have a smaller-than-current natts, if there's been an ALTER
970 * TABLE ADD COLUMN since it was stored; and that would lead to a
971 * different conclusion about the size of the null bitmap, or even
972 * whether there needs to be one at all.
974 new_header_len = SizeofHeapTupleHeader;
976 new_header_len += BITMAPLEN(numAttrs);
977 if (olddata->t_infomask & HEAP_HASOID)
978 new_header_len += sizeof(Oid);
979 new_header_len = MAXALIGN(new_header_len);
980 new_data_len = heap_compute_data_size(tupleDesc,
981 toast_values, toast_isnull);
982 new_tuple_len = new_header_len + new_data_len;
985 * Allocate and zero the space needed, and fill HeapTupleData fields.
987 result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_tuple_len);
988 result_tuple->t_len = new_tuple_len;
989 result_tuple->t_self = newtup->t_self;
990 result_tuple->t_tableOid = newtup->t_tableOid;
991 new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
992 result_tuple->t_data = new_data;
995 * Copy the existing tuple header, but adjust natts and t_hoff.
997 memcpy(new_data, olddata, SizeofHeapTupleHeader);
998 HeapTupleHeaderSetNatts(new_data, numAttrs);
999 new_data->t_hoff = new_header_len;
1000 if (olddata->t_infomask & HEAP_HASOID)
1001 HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(olddata));
1003 /* Copy over the data, and fill the null bitmap if needed */
1004 heap_fill_tuple(tupleDesc,
1007 (char *) new_data + new_header_len,
1009 &(new_data->t_infomask),
1010 has_nulls ? new_data->t_bits : NULL);
1013 result_tuple = newtup;
1016 * Free allocated temp values
1019 for (i = 0; i < numAttrs; i++)
1021 pfree(DatumGetPointer(toast_values[i]));
1024 * Delete external values from the old tuple
1027 for (i = 0; i < numAttrs; i++)
1028 if (toast_delold[i])
1029 toast_delete_datum(rel, toast_oldvalues[i]);
1031 return result_tuple;
1036 * toast_flatten_tuple -
1038 * "Flatten" a tuple to contain no out-of-line toasted fields.
1039 * (This does not eliminate compressed or short-header datums.)
1041 * Note: we expect the caller already checked HeapTupleHasExternal(tup),
1042 * so there is no need for a short-circuit path.
1046 toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
1048 HeapTuple new_tuple;
1049 Form_pg_attribute *att = tupleDesc->attrs;
1050 int numAttrs = tupleDesc->natts;
1052 Datum toast_values[MaxTupleAttributeNumber];
1053 bool toast_isnull[MaxTupleAttributeNumber];
1054 bool toast_free[MaxTupleAttributeNumber];
1057 * Break down the tuple into fields.
1059 Assert(numAttrs <= MaxTupleAttributeNumber);
1060 heap_deform_tuple(tup, tupleDesc, toast_values, toast_isnull);
1062 memset(toast_free, 0, numAttrs * sizeof(bool));
1064 for (i = 0; i < numAttrs; i++)
1067 * Look at non-null varlena attributes
1069 if (!toast_isnull[i] && att[i]->attlen == -1)
1071 struct varlena *new_value;
1073 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
1074 if (VARATT_IS_EXTERNAL(new_value))
1076 new_value = heap_tuple_fetch_attr(new_value);
1077 toast_values[i] = PointerGetDatum(new_value);
1078 toast_free[i] = true;
1084 * Form the reconfigured tuple.
1086 new_tuple = heap_form_tuple(tupleDesc, toast_values, toast_isnull);
1089 * Be sure to copy the tuple's OID and identity fields. We also make a
1090 * point of copying visibility info, just in case anybody looks at those
1091 * fields in a syscache entry.
1093 if (tupleDesc->tdhasoid)
1094 HeapTupleSetOid(new_tuple, HeapTupleGetOid(tup));
1096 new_tuple->t_self = tup->t_self;
1097 new_tuple->t_tableOid = tup->t_tableOid;
1099 new_tuple->t_data->t_choice = tup->t_data->t_choice;
1100 new_tuple->t_data->t_ctid = tup->t_data->t_ctid;
1101 new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
1102 new_tuple->t_data->t_infomask |=
1103 tup->t_data->t_infomask & HEAP_XACT_MASK;
1104 new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
1105 new_tuple->t_data->t_infomask2 |=
1106 tup->t_data->t_infomask2 & HEAP2_XACT_MASK;
1109 * Free allocated temp values
1111 for (i = 0; i < numAttrs; i++)
1113 pfree(DatumGetPointer(toast_values[i]));
1120 * toast_flatten_tuple_to_datum -
1122 * "Flatten" a tuple containing out-of-line toasted fields into a Datum.
1123 * The result is always palloc'd in the current memory context.
1125 * We have a general rule that Datums of container types (rows, arrays,
1126 * ranges, etc) must not contain any external TOAST pointers. Without
1127 * this rule, we'd have to look inside each Datum when preparing a tuple
1128 * for storage, which would be expensive and would fail to extend cleanly
1129 * to new sorts of container types.
1131 * However, we don't want to say that tuples represented as HeapTuples
1132 * can't contain toasted fields, so instead this routine should be called
1133 * when such a HeapTuple is being converted into a Datum.
1135 * While we're at it, we decompress any compressed fields too. This is not
1136 * necessary for correctness, but reflects an expectation that compression
1137 * will be more effective if applied to the whole tuple not individual
1138 * fields. We are not so concerned about that that we want to deconstruct
1139 * and reconstruct tuples just to get rid of compressed fields, however.
1140 * So callers typically won't call this unless they see that the tuple has
1141 * at least one external field.
1143 * On the other hand, in-line short-header varlena fields are left alone.
1144 * If we "untoasted" them here, they'd just get changed back to short-header
1145 * format anyway within heap_fill_tuple.
1149 toast_flatten_tuple_to_datum(HeapTupleHeader tup,
1151 TupleDesc tupleDesc)
1153 HeapTupleHeader new_data;
1154 int32 new_header_len;
1156 int32 new_tuple_len;
1157 HeapTupleData tmptup;
1158 Form_pg_attribute *att = tupleDesc->attrs;
1159 int numAttrs = tupleDesc->natts;
1161 bool has_nulls = false;
1162 Datum toast_values[MaxTupleAttributeNumber];
1163 bool toast_isnull[MaxTupleAttributeNumber];
1164 bool toast_free[MaxTupleAttributeNumber];
1166 /* Build a temporary HeapTuple control structure */
1167 tmptup.t_len = tup_len;
1168 ItemPointerSetInvalid(&(tmptup.t_self));
1169 tmptup.t_tableOid = InvalidOid;
1170 tmptup.t_data = tup;
1173 * Break down the tuple into fields.
1175 Assert(numAttrs <= MaxTupleAttributeNumber);
1176 heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull);
1178 memset(toast_free, 0, numAttrs * sizeof(bool));
1180 for (i = 0; i < numAttrs; i++)
1183 * Look at non-null varlena attributes
1185 if (toast_isnull[i])
1187 else if (att[i]->attlen == -1)
1189 struct varlena *new_value;
1191 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
1192 if (VARATT_IS_EXTERNAL(new_value) ||
1193 VARATT_IS_COMPRESSED(new_value))
1195 new_value = heap_tuple_untoast_attr(new_value);
1196 toast_values[i] = PointerGetDatum(new_value);
1197 toast_free[i] = true;
1203 * Calculate the new size of the tuple.
1205 * This should match the reconstruction code in toast_insert_or_update.
1207 new_header_len = SizeofHeapTupleHeader;
1209 new_header_len += BITMAPLEN(numAttrs);
1210 if (tup->t_infomask & HEAP_HASOID)
1211 new_header_len += sizeof(Oid);
1212 new_header_len = MAXALIGN(new_header_len);
1213 new_data_len = heap_compute_data_size(tupleDesc,
1214 toast_values, toast_isnull);
1215 new_tuple_len = new_header_len + new_data_len;
1217 new_data = (HeapTupleHeader) palloc0(new_tuple_len);
1220 * Copy the existing tuple header, but adjust natts and t_hoff.
1222 memcpy(new_data, tup, SizeofHeapTupleHeader);
1223 HeapTupleHeaderSetNatts(new_data, numAttrs);
1224 new_data->t_hoff = new_header_len;
1225 if (tup->t_infomask & HEAP_HASOID)
1226 HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(tup));
1228 /* Set the composite-Datum header fields correctly */
1229 HeapTupleHeaderSetDatumLength(new_data, new_tuple_len);
1230 HeapTupleHeaderSetTypeId(new_data, tupleDesc->tdtypeid);
1231 HeapTupleHeaderSetTypMod(new_data, tupleDesc->tdtypmod);
1233 /* Copy over the data, and fill the null bitmap if needed */
1234 heap_fill_tuple(tupleDesc,
1237 (char *) new_data + new_header_len,
1239 &(new_data->t_infomask),
1240 has_nulls ? new_data->t_bits : NULL);
1243 * Free allocated temp values
1245 for (i = 0; i < numAttrs; i++)
1247 pfree(DatumGetPointer(toast_values[i]));
1249 return PointerGetDatum(new_data);
1254 * toast_compress_datum -
1256 * Create a compressed version of a varlena datum
1258 * If we fail (ie, compressed result is actually bigger than original)
1259 * then return NULL. We must not use compressed data if it'd expand
1262 * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
1263 * copying them. But we can't handle external or compressed datums.
1267 toast_compress_datum(Datum value)
1269 struct varlena *tmp;
1270 int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
1273 Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
1274 Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
1277 * No point in wasting a palloc cycle if value size is out of the allowed
1278 * range for compression
1280 if (valsize < PGLZ_strategy_default->min_input_size ||
1281 valsize > PGLZ_strategy_default->max_input_size)
1282 return PointerGetDatum(NULL);
1284 tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
1285 TOAST_COMPRESS_HDRSZ);
1288 * We recheck the actual size even if pglz_compress() reports success,
1289 * because it might be satisfied with having saved as little as one byte
1290 * in the compressed data --- which could turn into a net loss once you
1291 * consider header and alignment padding. Worst case, the compressed
1292 * format might require three padding bytes (plus header, which is
1293 * included in VARSIZE(tmp)), whereas the uncompressed format would take
1294 * only one header byte and no padding if the value is short enough. So
1295 * we insist on a savings of more than 2 bytes to ensure we have a gain.
1297 len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)),
1299 TOAST_COMPRESS_RAWDATA(tmp),
1300 PGLZ_strategy_default);
1302 len + TOAST_COMPRESS_HDRSZ < valsize - 2)
1304 TOAST_COMPRESS_SET_RAWSIZE(tmp, valsize);
1305 SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ);
1306 /* successful compression */
1307 return PointerGetDatum(tmp);
1311 /* incompressible data */
1313 return PointerGetDatum(NULL);
1319 * toast_get_valid_index
1321 * Get OID of valid index associated to given toast relation. A toast
1322 * relation can have only one valid index at the same time.
1325 toast_get_valid_index(Oid toastoid, LOCKMODE lock)
1330 Relation *toastidxs;
1333 /* Open the toast relation */
1334 toastrel = heap_open(toastoid, lock);
1336 /* Look for the valid index of the toast relation */
1337 validIndex = toast_open_indexes(toastrel,
1341 validIndexOid = RelationGetRelid(toastidxs[validIndex]);
1343 /* Close the toast relation and all its indexes */
1344 toast_close_indexes(toastidxs, num_indexes, lock);
1345 heap_close(toastrel, lock);
1347 return validIndexOid;
1352 * toast_save_datum -
1354 * Save one single datum into the secondary relation and return
1355 * a Datum reference for it.
1357 * rel: the main relation we're working with (not the toast rel!)
1358 * value: datum to be pushed to toast storage
1359 * oldexternal: if not NULL, toast pointer previously representing the datum
1360 * options: options to be passed to heap_insert() for toast rows
1364 toast_save_datum(Relation rel, Datum value,
1365 struct varlena * oldexternal, int options)
1368 Relation *toastidxs;
1370 TupleDesc toasttupDesc;
1373 CommandId mycid = GetCurrentCommandId(true);
1374 struct varlena *result;
1375 struct varatt_external toast_pointer;
1379 /* this is to make the union big enough for a chunk: */
1380 char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
1381 /* ensure union is aligned well enough: */
1385 int32 chunk_seq = 0;
1388 Pointer dval = DatumGetPointer(value);
1392 Assert(!VARATT_IS_EXTERNAL(value));
1395 * Open the toast relation and its indexes. We can use the index to check
1396 * uniqueness of the OID we assign to the toasted item, even though it has
1397 * additional columns besides OID.
1399 toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
1400 toasttupDesc = toastrel->rd_att;
1402 /* Open all the toast indexes and look for the valid one */
1403 validIndex = toast_open_indexes(toastrel,
1409 * Get the data pointer and length, and compute va_rawsize and va_extsize.
1411 * va_rawsize is the size of the equivalent fully uncompressed datum, so
1412 * we have to adjust for short headers.
1414 * va_extsize is the actual size of the data payload in the toast records.
1416 if (VARATT_IS_SHORT(dval))
1418 data_p = VARDATA_SHORT(dval);
1419 data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
1420 toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
1421 toast_pointer.va_extsize = data_todo;
1423 else if (VARATT_IS_COMPRESSED(dval))
1425 data_p = VARDATA(dval);
1426 data_todo = VARSIZE(dval) - VARHDRSZ;
1427 /* rawsize in a compressed datum is just the size of the payload */
1428 toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ;
1429 toast_pointer.va_extsize = data_todo;
1430 /* Assert that the numbers look like it's compressed */
1431 Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1435 data_p = VARDATA(dval);
1436 data_todo = VARSIZE(dval) - VARHDRSZ;
1437 toast_pointer.va_rawsize = VARSIZE(dval);
1438 toast_pointer.va_extsize = data_todo;
1442 * Insert the correct table OID into the result TOAST pointer.
1444 * Normally this is the actual OID of the target toast table, but during
1445 * table-rewriting operations such as CLUSTER, we have to insert the OID
1446 * of the table's real permanent toast table instead. rd_toastoid is set
1447 * if we have to substitute such an OID.
1449 if (OidIsValid(rel->rd_toastoid))
1450 toast_pointer.va_toastrelid = rel->rd_toastoid;
1452 toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
1455 * Choose an OID to use as the value ID for this toast value.
1457 * Normally we just choose an unused OID within the toast table. But
1458 * during table-rewriting operations where we are preserving an existing
1459 * toast table OID, we want to preserve toast value OIDs too. So, if
1460 * rd_toastoid is set and we had a prior external value from that same
1461 * toast table, re-use its value ID. If we didn't have a prior external
1462 * value (which is a corner case, but possible if the table's attstorage
1463 * options have been changed), we have to pick a value ID that doesn't
1464 * conflict with either new or existing toast value OIDs.
1466 if (!OidIsValid(rel->rd_toastoid))
1468 /* normal case: just choose an unused OID */
1469 toast_pointer.va_valueid =
1470 GetNewOidWithIndex(toastrel,
1471 RelationGetRelid(toastidxs[validIndex]),
1476 /* rewrite case: check to see if value was in old toast table */
1477 toast_pointer.va_valueid = InvalidOid;
1478 if (oldexternal != NULL)
1480 struct varatt_external old_toast_pointer;
1482 Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
1483 /* Must copy to access aligned fields */
1484 VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
1485 if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
1487 /* This value came from the old toast table; reuse its OID */
1488 toast_pointer.va_valueid = old_toast_pointer.va_valueid;
1491 * There is a corner case here: the table rewrite might have
1492 * to copy both live and recently-dead versions of a row, and
1493 * those versions could easily reference the same toast value.
1494 * When we copy the second or later version of such a row,
1495 * reusing the OID will mean we select an OID that's already
1496 * in the new toast table. Check for that, and if so, just
1497 * fall through without writing the data again.
1499 * While annoying and ugly-looking, this is a good thing
1500 * because it ensures that we wind up with only one copy of
1501 * the toast value when there is only one copy in the old
1502 * toast table. Before we detected this case, we'd have made
1503 * multiple copies, wasting space; and what's worse, the
1504 * copies belonging to already-deleted heap tuples would not
1505 * be reclaimed by VACUUM.
1507 if (toastrel_valueid_exists(toastrel,
1508 toast_pointer.va_valueid))
1510 /* Match, so short-circuit the data storage loop below */
1515 if (toast_pointer.va_valueid == InvalidOid)
1518 * new value; must choose an OID that doesn't conflict in either
1519 * old or new toast table
1523 toast_pointer.va_valueid =
1524 GetNewOidWithIndex(toastrel,
1525 RelationGetRelid(toastidxs[validIndex]),
1527 } while (toastid_valueid_exists(rel->rd_toastoid,
1528 toast_pointer.va_valueid));
1533 * Initialize constant parts of the tuple data
1535 t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
1536 t_values[2] = PointerGetDatum(&chunk_data);
1537 t_isnull[0] = false;
1538 t_isnull[1] = false;
1539 t_isnull[2] = false;
1542 * Split up the item into chunks
1544 while (data_todo > 0)
1548 CHECK_FOR_INTERRUPTS();
1551 * Calculate the size of this chunk
1553 chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
1556 * Build a tuple and store it
1558 t_values[1] = Int32GetDatum(chunk_seq++);
1559 SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
1560 memcpy(VARDATA(&chunk_data), data_p, chunk_size);
1561 toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
1563 heap_insert(toastrel, toasttup, mycid, options, NULL);
1566 * Create the index entry. We cheat a little here by not using
1567 * FormIndexDatum: this relies on the knowledge that the index columns
1568 * are the same as the initial columns of the table for all the
1571 * Note also that there had better not be any user-created index on
1572 * the TOAST table, since we don't bother to update anything else.
1574 for (i = 0; i < num_indexes; i++)
1576 /* Only index relations marked as ready can be updated */
1577 if (IndexIsReady(toastidxs[i]->rd_index))
1578 index_insert(toastidxs[i], t_values, t_isnull,
1579 &(toasttup->t_self),
1581 toastidxs[i]->rd_index->indisunique ?
1582 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
1588 heap_freetuple(toasttup);
1591 * Move on to next chunk
1593 data_todo -= chunk_size;
1594 data_p += chunk_size;
1598 * Done - close toast relation and its indexes
1600 toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1601 heap_close(toastrel, RowExclusiveLock);
1604 * Create the TOAST pointer value that we'll return
1606 result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
1607 SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
1608 memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
1610 return PointerGetDatum(result);
1615 * toast_delete_datum -
1617 * Delete a single external stored value.
1621 toast_delete_datum(Relation rel, Datum value)
1623 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
1624 struct varatt_external toast_pointer;
1626 Relation *toastidxs;
1627 ScanKeyData toastkey;
1628 SysScanDesc toastscan;
1633 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
1636 /* Must copy to access aligned fields */
1637 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1640 * Open the toast relation and its indexes
1642 toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock);
1644 /* Fetch valid relation used for process */
1645 validIndex = toast_open_indexes(toastrel,
1651 * Setup a scan key to find chunks with matching va_valueid
1653 ScanKeyInit(&toastkey,
1655 BTEqualStrategyNumber, F_OIDEQ,
1656 ObjectIdGetDatum(toast_pointer.va_valueid));
1659 * Find all the chunks. (We don't actually care whether we see them in
1660 * sequence or not, but since we've already locked the index we might as
1661 * well use systable_beginscan_ordered.)
1663 toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
1664 SnapshotToast, 1, &toastkey);
1665 while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1668 * Have a chunk, delete it
1670 simple_heap_delete(toastrel, &toasttup->t_self);
1674 * End scan and close relations
1676 systable_endscan_ordered(toastscan);
1677 toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1678 heap_close(toastrel, RowExclusiveLock);
1683 * toastrel_valueid_exists -
1685 * Test whether a toast value with the given ID exists in the toast relation
1689 toastrel_valueid_exists(Relation toastrel, Oid valueid)
1691 bool result = false;
1692 ScanKeyData toastkey;
1693 SysScanDesc toastscan;
1696 Relation *toastidxs;
1698 /* Fetch a valid index relation */
1699 validIndex = toast_open_indexes(toastrel,
1705 * Setup a scan key to find chunks with matching va_valueid
1707 ScanKeyInit(&toastkey,
1709 BTEqualStrategyNumber, F_OIDEQ,
1710 ObjectIdGetDatum(valueid));
1713 * Is there any such chunk?
1715 toastscan = systable_beginscan(toastrel,
1716 RelationGetRelid(toastidxs[validIndex]),
1717 true, SnapshotToast, 1, &toastkey);
1719 if (systable_getnext(toastscan) != NULL)
1722 systable_endscan(toastscan);
1725 toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1731 * toastid_valueid_exists -
1733 * As above, but work from toast rel's OID not an open relation
1737 toastid_valueid_exists(Oid toastrelid, Oid valueid)
1742 toastrel = heap_open(toastrelid, AccessShareLock);
1744 result = toastrel_valueid_exists(toastrel, valueid);
1746 heap_close(toastrel, AccessShareLock);
1753 * toast_fetch_datum -
1755 * Reconstruct an in memory Datum from the chunks saved
1756 * in the toast relation
1759 static struct varlena *
1760 toast_fetch_datum(struct varlena * attr)
1763 Relation *toastidxs;
1764 ScanKeyData toastkey;
1765 SysScanDesc toastscan;
1767 TupleDesc toasttupDesc;
1768 struct varlena *result;
1769 struct varatt_external toast_pointer;
1781 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
1782 elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
1784 /* Must copy to access aligned fields */
1785 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1787 ressize = toast_pointer.va_extsize;
1788 numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1790 result = (struct varlena *) palloc(ressize + VARHDRSZ);
1792 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1793 SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ);
1795 SET_VARSIZE(result, ressize + VARHDRSZ);
1798 * Open the toast relation and its indexes
1800 toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
1801 toasttupDesc = toastrel->rd_att;
1803 /* Look for the valid index of the toast relation */
1804 validIndex = toast_open_indexes(toastrel,
1810 * Setup a scan key to fetch from the index by va_valueid
1812 ScanKeyInit(&toastkey,
1814 BTEqualStrategyNumber, F_OIDEQ,
1815 ObjectIdGetDatum(toast_pointer.va_valueid));
1818 * Read the chunks by index
1820 * Note that because the index is actually on (valueid, chunkidx) we will
1821 * see the chunks in chunkidx order, even though we didn't explicitly ask
1826 toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
1827 SnapshotToast, 1, &toastkey);
1828 while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1831 * Have a chunk, extract the sequence number and the data
1833 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
1835 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
1837 if (!VARATT_IS_EXTENDED(chunk))
1839 chunksize = VARSIZE(chunk) - VARHDRSZ;
1840 chunkdata = VARDATA(chunk);
1842 else if (VARATT_IS_SHORT(chunk))
1844 /* could happen due to heap_form_tuple doing its thing */
1845 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
1846 chunkdata = VARDATA_SHORT(chunk);
1850 /* should never happen */
1851 elog(ERROR, "found toasted toast chunk for toast value %u in %s",
1852 toast_pointer.va_valueid,
1853 RelationGetRelationName(toastrel));
1854 chunksize = 0; /* keep compiler quiet */
1859 * Some checks on the data we've found
1861 if (residx != nextidx)
1862 elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
1864 toast_pointer.va_valueid,
1865 RelationGetRelationName(toastrel));
1866 if (residx < numchunks - 1)
1868 if (chunksize != TOAST_MAX_CHUNK_SIZE)
1869 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
1870 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
1872 toast_pointer.va_valueid,
1873 RelationGetRelationName(toastrel));
1875 else if (residx == numchunks - 1)
1877 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
1878 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
1880 (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
1882 toast_pointer.va_valueid,
1883 RelationGetRelationName(toastrel));
1886 elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
1889 toast_pointer.va_valueid,
1890 RelationGetRelationName(toastrel));
1893 * Copy the data into proper place in our result
1895 memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE,
1903 * Final checks that we successfully fetched the datum
1905 if (nextidx != numchunks)
1906 elog(ERROR, "missing chunk number %d for toast value %u in %s",
1908 toast_pointer.va_valueid,
1909 RelationGetRelationName(toastrel));
1912 * End scan and close relations
1914 systable_endscan_ordered(toastscan);
1915 toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
1916 heap_close(toastrel, AccessShareLock);
1922 * toast_fetch_datum_slice -
1924 * Reconstruct a segment of a Datum from the chunks saved
1925 * in the toast relation
1928 static struct varlena *
1929 toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
1932 Relation *toastidxs;
1933 ScanKeyData toastkey[3];
1935 SysScanDesc toastscan;
1937 TupleDesc toasttupDesc;
1938 struct varlena *result;
1939 struct varatt_external toast_pointer;
1958 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
1959 elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
1961 /* Must copy to access aligned fields */
1962 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1965 * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
1966 * we can't return a compressed datum which is meaningful to toast later
1968 Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1970 attrsize = toast_pointer.va_extsize;
1971 totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1973 if (sliceoffset >= attrsize)
1979 if (((sliceoffset + length) > attrsize) || length < 0)
1980 length = attrsize - sliceoffset;
1982 result = (struct varlena *) palloc(length + VARHDRSZ);
1984 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1985 SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
1987 SET_VARSIZE(result, length + VARHDRSZ);
1990 return result; /* Can save a lot of work at this point! */
1992 startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
1993 endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
1994 numchunks = (endchunk - startchunk) + 1;
1996 startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE;
1997 endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
2000 * Open the toast relation and its indexes
2002 toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
2003 toasttupDesc = toastrel->rd_att;
2005 /* Look for the valid index of toast relation */
2006 validIndex = toast_open_indexes(toastrel,
2012 * Setup a scan key to fetch from the index. This is either two keys or
2013 * three depending on the number of chunks.
2015 ScanKeyInit(&toastkey[0],
2017 BTEqualStrategyNumber, F_OIDEQ,
2018 ObjectIdGetDatum(toast_pointer.va_valueid));
2021 * Use equality condition for one chunk, a range condition otherwise:
2025 ScanKeyInit(&toastkey[1],
2027 BTEqualStrategyNumber, F_INT4EQ,
2028 Int32GetDatum(startchunk));
2033 ScanKeyInit(&toastkey[1],
2035 BTGreaterEqualStrategyNumber, F_INT4GE,
2036 Int32GetDatum(startchunk));
2037 ScanKeyInit(&toastkey[2],
2039 BTLessEqualStrategyNumber, F_INT4LE,
2040 Int32GetDatum(endchunk));
2045 * Read the chunks by index
2047 * The index is on (valueid, chunkidx) so they will come in order
2049 nextidx = startchunk;
2050 toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
2051 SnapshotToast, nscankeys, toastkey);
2052 while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
2055 * Have a chunk, extract the sequence number and the data
2057 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
2059 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
2061 if (!VARATT_IS_EXTENDED(chunk))
2063 chunksize = VARSIZE(chunk) - VARHDRSZ;
2064 chunkdata = VARDATA(chunk);
2066 else if (VARATT_IS_SHORT(chunk))
2068 /* could happen due to heap_form_tuple doing its thing */
2069 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
2070 chunkdata = VARDATA_SHORT(chunk);
2074 /* should never happen */
2075 elog(ERROR, "found toasted toast chunk for toast value %u in %s",
2076 toast_pointer.va_valueid,
2077 RelationGetRelationName(toastrel));
2078 chunksize = 0; /* keep compiler quiet */
2083 * Some checks on the data we've found
2085 if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
2086 elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
2088 toast_pointer.va_valueid,
2089 RelationGetRelationName(toastrel));
2090 if (residx < totalchunks - 1)
2092 if (chunksize != TOAST_MAX_CHUNK_SIZE)
2093 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice",
2094 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
2095 residx, totalchunks,
2096 toast_pointer.va_valueid,
2097 RelationGetRelationName(toastrel));
2099 else if (residx == totalchunks - 1)
2101 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
2102 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice",
2104 (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE),
2106 toast_pointer.va_valueid,
2107 RelationGetRelationName(toastrel));
2110 elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
2113 toast_pointer.va_valueid,
2114 RelationGetRelationName(toastrel));
2117 * Copy the data into proper place in our result
2120 chcpyend = chunksize - 1;
2121 if (residx == startchunk)
2122 chcpystrt = startoffset;
2123 if (residx == endchunk)
2124 chcpyend = endoffset;
2126 memcpy(VARDATA(result) +
2127 (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
2128 chunkdata + chcpystrt,
2129 (chcpyend - chcpystrt) + 1);
2135 * Final checks that we successfully fetched the datum
2137 if (nextidx != (endchunk + 1))
2138 elog(ERROR, "missing chunk number %d for toast value %u in %s",
2140 toast_pointer.va_valueid,
2141 RelationGetRelationName(toastrel));
2144 * End scan and close relations
2146 systable_endscan_ordered(toastscan);
2147 toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
2148 heap_close(toastrel, AccessShareLock);
2154 * toast_decompress_datum -
2156 * Decompress a compressed version of a varlena datum
2158 static struct varlena *
2159 toast_decompress_datum(struct varlena * attr)
2161 struct varlena *result;
2163 Assert(VARATT_IS_COMPRESSED(attr));
2165 result = (struct varlena *)
2166 palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
2167 SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
2169 if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
2170 VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
2172 TOAST_COMPRESS_RAWSIZE(attr)) < 0)
2173 elog(ERROR, "compressed data is corrupted");
2180 * toast_open_indexes
2182 * Get an array of the indexes associated to the given toast relation
2183 * and return as well the position of the valid index used by the toast
2184 * relation in this array. It is the responsibility of the caller of this
2185 * function to close the indexes as well as free them.
2188 toast_open_indexes(Relation toastrel,
2190 Relation **toastidxs,
2199 /* Get index list of the toast relation */
2200 indexlist = RelationGetIndexList(toastrel);
2201 Assert(indexlist != NIL);
2203 *num_indexes = list_length(indexlist);
2205 /* Open all the index relations */
2206 *toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation));
2207 foreach(lc, indexlist)
2208 (*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);
2210 /* Fetch the first valid index in list */
2211 for (i = 0; i < *num_indexes; i++)
2213 Relation toastidx = (*toastidxs)[i];
2215 if (toastidx->rd_index->indisvalid)
2224 * Free index list, not necessary anymore as relations are opened and a
2225 * valid index has been found.
2227 list_free(indexlist);
2230 * The toast relation should have one valid index, so something is going
2231 * wrong if there is nothing.
2234 elog(ERROR, "no valid index found for toast relation with Oid %u",
2235 RelationGetRelid(toastrel));
2241 * toast_close_indexes
2243 * Close an array of indexes for a toast relation and free it. This should
2244 * be called for a set of indexes opened previously with toast_open_indexes.
2247 toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
2251 /* Close relations and clean up things */
2252 for (i = 0; i < num_indexes; i++)
2253 index_close(toastidxs[i], lock);