1 /*-------------------------------------------------------------------------
4 * Support routines for external and compressed storage of
5 * variable size attributes.
7 * Copyright (c) 2000-2013, PostgreSQL Global Development Group
11 * src/backend/access/heap/tuptoaster.c
15 * toast_insert_or_update -
16 * Try to make a given tuple fit into one page by compressing
17 * or moving off attributes
20 * Reclaim toast storage when a tuple is deleted
22 * heap_tuple_untoast_attr -
23 * Fetch back a given value from the "secondary" relation
25 *-------------------------------------------------------------------------
33 #include "access/genam.h"
34 #include "access/heapam.h"
35 #include "access/tuptoaster.h"
36 #include "access/xact.h"
37 #include "catalog/catalog.h"
38 #include "utils/fmgroids.h"
39 #include "utils/pg_lzcompress.h"
40 #include "utils/rel.h"
41 #include "utils/typcache.h"
42 #include "utils/tqual.h"
48 * Testing whether an externally-stored value is compressed now requires
49 * comparing extsize (the actual length of the external data) to rawsize
50 * (the original uncompressed datum's size). The latter includes VARHDRSZ
51 * overhead, the former doesn't. We never use compression unless it actually
52 * saves space, so we expect either equality or less-than.
54 #define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
55 ((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ)
58 * Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
59 * into a local "struct varatt_external" toast pointer. This should be
60 * just a memcpy, but some versions of gcc seem to produce broken code
61 * that assumes the datum contents are aligned. Introducing an explicit
62 * intermediate "varattrib_1b_e *" variable seems to fix it.
64 #define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \
66 varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
67 Assert(VARATT_IS_EXTERNAL(attre)); \
68 Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \
69 memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
73 static void toast_delete_datum(Relation rel, Datum value);
74 static Datum toast_save_datum(Relation rel, Datum value,
75 struct varlena * oldexternal, int options);
76 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
77 static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
78 static struct varlena *toast_fetch_datum(struct varlena * attr);
79 static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
80 int32 sliceoffset, int32 length);
84 * heap_tuple_fetch_attr -
86 * Public entry point to get back a toasted value from
87 * external source (possibly still in compressed format).
89 * This will return a datum that contains all the data internally, ie, not
90 * relying on external storage or memory, but it can still be compressed or
91 * have a short header.
95 heap_tuple_fetch_attr(struct varlena * attr)
97 struct varlena *result;
99 if (VARATT_IS_EXTERNAL_ONDISK(attr))
102 * This is an external stored plain value
104 result = toast_fetch_datum(attr);
106 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
109 * copy into the caller's memory context. That's not required in all
110 * cases but sufficient for now since this is mainly used when we need
111 * to persist a Datum for unusually long time, like in a HOLD cursor.
113 struct varatt_indirect redirect;
114 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
115 attr = (struct varlena *)redirect.pointer;
117 /* nested indirect Datums aren't allowed */
118 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
120 /* doesn't make much sense, but better handle it */
121 if (VARATT_IS_EXTERNAL_ONDISK(attr))
122 return heap_tuple_fetch_attr(attr);
124 /* copy datum verbatim */
125 result = (struct varlena *) palloc(VARSIZE_ANY(attr));
126 memcpy(result, attr, VARSIZE_ANY(attr));
131 * This is a plain value inside of the main tuple - why am I called?
141 * heap_tuple_untoast_attr -
143 * Public entry point to get back a toasted value from compression
144 * or external storage.
148 heap_tuple_untoast_attr(struct varlena * attr)
150 if (VARATT_IS_EXTERNAL_ONDISK(attr))
153 * This is an externally stored datum --- fetch it back from there
155 attr = toast_fetch_datum(attr);
156 /* If it's compressed, decompress it */
157 if (VARATT_IS_COMPRESSED(attr))
159 PGLZ_Header *tmp = (PGLZ_Header *) attr;
161 attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
162 SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
163 pglz_decompress(tmp, VARDATA(attr));
167 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
169 struct varatt_indirect redirect;
170 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
171 attr = (struct varlena *)redirect.pointer;
173 /* nested indirect Datums aren't allowed */
174 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
176 attr = heap_tuple_untoast_attr(attr);
178 else if (VARATT_IS_COMPRESSED(attr))
181 * This is a compressed value inside of the main tuple
183 PGLZ_Header *tmp = (PGLZ_Header *) attr;
185 attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
186 SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
187 pglz_decompress(tmp, VARDATA(attr));
189 else if (VARATT_IS_SHORT(attr))
192 * This is a short-header varlena --- convert to 4-byte header format
194 Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
195 Size new_size = data_size + VARHDRSZ;
196 struct varlena *new_attr;
198 new_attr = (struct varlena *) palloc(new_size);
199 SET_VARSIZE(new_attr, new_size);
200 memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
209 * heap_tuple_untoast_attr_slice -
211 * Public entry point to get back part of a toasted value
212 * from compression or external storage.
216 heap_tuple_untoast_attr_slice(struct varlena * attr,
217 int32 sliceoffset, int32 slicelength)
219 struct varlena *preslice;
220 struct varlena *result;
224 if (VARATT_IS_EXTERNAL_ONDISK(attr))
226 struct varatt_external toast_pointer;
228 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
230 /* fast path for non-compressed external datums */
231 if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
232 return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
234 /* fetch it back (compressed marker will get set automatically) */
235 preslice = toast_fetch_datum(attr);
237 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
239 struct varatt_indirect redirect;
240 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
242 /* nested indirect Datums aren't allowed */
243 Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
245 return heap_tuple_untoast_attr_slice(redirect.pointer,
246 sliceoffset, slicelength);
251 if (VARATT_IS_COMPRESSED(preslice))
253 PGLZ_Header *tmp = (PGLZ_Header *) preslice;
254 Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ;
256 preslice = (struct varlena *) palloc(size);
257 SET_VARSIZE(preslice, size);
258 pglz_decompress(tmp, VARDATA(preslice));
260 if (tmp != (PGLZ_Header *) attr)
264 if (VARATT_IS_SHORT(preslice))
266 attrdata = VARDATA_SHORT(preslice);
267 attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
271 attrdata = VARDATA(preslice);
272 attrsize = VARSIZE(preslice) - VARHDRSZ;
275 /* slicing of datum for compressed cases and plain value */
277 if (sliceoffset >= attrsize)
283 if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
284 slicelength = attrsize - sliceoffset;
286 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
287 SET_VARSIZE(result, slicelength + VARHDRSZ);
289 memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
291 if (preslice != attr)
299 * toast_raw_datum_size -
301 * Return the raw (detoasted) size of a varlena datum
302 * (including the VARHDRSZ header)
306 toast_raw_datum_size(Datum value)
308 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
311 if (VARATT_IS_EXTERNAL_ONDISK(attr))
313 /* va_rawsize is the size of the original datum -- including header */
314 struct varatt_external toast_pointer;
316 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
317 result = toast_pointer.va_rawsize;
319 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
321 struct varatt_indirect toast_pointer;
322 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
324 /* nested indirect Datums aren't allowed */
325 Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
327 return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
329 else if (VARATT_IS_COMPRESSED(attr))
331 /* here, va_rawsize is just the payload size */
332 result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
334 else if (VARATT_IS_SHORT(attr))
337 * we have to normalize the header length to VARHDRSZ or else the
338 * callers of this function will be confused.
340 result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
344 /* plain untoasted datum */
345 result = VARSIZE(attr);
353 * Return the physical storage size (possibly compressed) of a varlena datum
357 toast_datum_size(Datum value)
359 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
362 if (VARATT_IS_EXTERNAL_ONDISK(attr))
365 * Attribute is stored externally - return the extsize whether
366 * compressed or not. We do not count the size of the toast pointer
369 struct varatt_external toast_pointer;
371 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
372 result = toast_pointer.va_extsize;
374 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
376 struct varatt_indirect toast_pointer;
377 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
379 /* nested indirect Datums aren't allowed */
380 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
382 return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
384 else if (VARATT_IS_SHORT(attr))
386 result = VARSIZE_SHORT(attr);
391 * Attribute is stored inline either compressed or not, just calculate
392 * the size of the datum in either case.
394 result = VARSIZE(attr);
403 * Cascaded delete toast-entries on DELETE
407 toast_delete(Relation rel, HeapTuple oldtup)
410 Form_pg_attribute *att;
413 Datum toast_values[MaxHeapAttributeNumber];
414 bool toast_isnull[MaxHeapAttributeNumber];
417 * We should only ever be called for tuples of plain relations or
418 * materialized views --- recursing on a toast rel is bad news.
420 Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
421 rel->rd_rel->relkind == RELKIND_MATVIEW);
424 * Get the tuple descriptor and break down the tuple into fields.
426 * NOTE: it's debatable whether to use heap_deform_tuple() here or just
427 * heap_getattr() only the varlena columns. The latter could win if there
428 * are few varlena columns and many non-varlena ones. However,
429 * heap_deform_tuple costs only O(N) while the heap_getattr way would cost
430 * O(N^2) if there are many varlena columns, so it seems better to err on
431 * the side of linear cost. (We won't even be here unless there's at
432 * least one varlena column, by the way.)
434 tupleDesc = rel->rd_att;
435 att = tupleDesc->attrs;
436 numAttrs = tupleDesc->natts;
438 Assert(numAttrs <= MaxHeapAttributeNumber);
439 heap_deform_tuple(oldtup, tupleDesc, toast_values, toast_isnull);
442 * Check for external stored attributes and delete them from the secondary
445 for (i = 0; i < numAttrs; i++)
447 if (att[i]->attlen == -1)
449 Datum value = toast_values[i];
453 else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value)))
454 toast_delete_datum(rel, value);
455 else if (VARATT_IS_EXTERNAL_INDIRECT(PointerGetDatum(value)))
456 elog(ERROR, "attempt to delete tuple containing indirect datums");
463 * toast_insert_or_update -
465 * Delete no-longer-used toast-entries and create new ones to
466 * make the new tuple fit on INSERT or UPDATE
469 * newtup: the candidate new tuple to be inserted
470 * oldtup: the old row version for UPDATE, or NULL for INSERT
471 * options: options to be passed to heap_insert() for toast rows
473 * either newtup if no toasting is needed, or a palloc'd modified tuple
474 * that is what should actually get stored
476 * NOTE: neither newtup nor oldtup will be modified. This is a change
477 * from the pre-8.1 API of this routine.
481 toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
484 HeapTuple result_tuple;
486 Form_pg_attribute *att;
490 bool need_change = false;
491 bool need_free = false;
492 bool need_delold = false;
493 bool has_nulls = false;
498 char toast_action[MaxHeapAttributeNumber];
499 bool toast_isnull[MaxHeapAttributeNumber];
500 bool toast_oldisnull[MaxHeapAttributeNumber];
501 Datum toast_values[MaxHeapAttributeNumber];
502 Datum toast_oldvalues[MaxHeapAttributeNumber];
503 struct varlena *toast_oldexternal[MaxHeapAttributeNumber];
504 int32 toast_sizes[MaxHeapAttributeNumber];
505 bool toast_free[MaxHeapAttributeNumber];
506 bool toast_delold[MaxHeapAttributeNumber];
509 * We should only ever be called for tuples of plain relations ---
510 * recursing on a toast rel is bad news.
512 Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
513 rel->rd_rel->relkind == RELKIND_MATVIEW);
516 * Get the tuple descriptor and break down the tuple(s) into fields.
518 tupleDesc = rel->rd_att;
519 att = tupleDesc->attrs;
520 numAttrs = tupleDesc->natts;
522 Assert(numAttrs <= MaxHeapAttributeNumber);
523 heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull);
525 heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull);
528 * Then collect information about the values given
530 * NOTE: toast_action[i] can have these values:
531 * ' ' default handling
532 * 'p' already processed --- don't touch it
533 * 'x' incompressible, but OK to move off
535 * NOTE: toast_sizes[i] is only made valid for varlena attributes with
536 * toast_action[i] different from 'p'.
539 memset(toast_action, ' ', numAttrs * sizeof(char));
540 memset(toast_oldexternal, 0, numAttrs * sizeof(struct varlena *));
541 memset(toast_free, 0, numAttrs * sizeof(bool));
542 memset(toast_delold, 0, numAttrs * sizeof(bool));
544 for (i = 0; i < numAttrs; i++)
546 struct varlena *old_value;
547 struct varlena *new_value;
552 * For UPDATE get the old and new values of this attribute
554 old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]);
555 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
558 * If the old value is stored on disk, check if it has changed so
559 * we have to delete it later.
561 if (att[i]->attlen == -1 && !toast_oldisnull[i] &&
562 VARATT_IS_EXTERNAL_ONDISK(old_value))
564 if (toast_isnull[i] || !VARATT_IS_EXTERNAL_ONDISK(new_value) ||
565 memcmp((char *) old_value, (char *) new_value,
566 VARSIZE_EXTERNAL(old_value)) != 0)
569 * The old external stored value isn't needed any more
572 toast_delold[i] = true;
578 * This attribute isn't changed by this update so we reuse
579 * the original reference to the old value in the new
582 toast_action[i] = 'p';
590 * For INSERT simply get the new value
592 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
596 * Handle NULL attributes
600 toast_action[i] = 'p';
606 * Now look at varlena attributes
608 if (att[i]->attlen == -1)
611 * If the table's attribute says PLAIN always, force it so.
613 if (att[i]->attstorage == 'p')
614 toast_action[i] = 'p';
617 * We took care of UPDATE above, so any external value we find
618 * still in the tuple must be someone else's we cannot reuse.
619 * Fetch it back (without decompression, unless we are forcing
620 * PLAIN storage). If necessary, we'll push it out as a new
621 * external value below.
623 if (VARATT_IS_EXTERNAL(new_value))
625 toast_oldexternal[i] = new_value;
626 if (att[i]->attstorage == 'p')
627 new_value = heap_tuple_untoast_attr(new_value);
629 new_value = heap_tuple_fetch_attr(new_value);
630 toast_values[i] = PointerGetDatum(new_value);
631 toast_free[i] = true;
637 * Remember the size of this attribute
639 toast_sizes[i] = VARSIZE_ANY(new_value);
644 * Not a varlena attribute, plain storage always
646 toast_action[i] = 'p';
651 * Compress and/or save external until data fits into target length
653 * 1: Inline compress attributes with attstorage 'x', and store very
654 * large attributes with attstorage 'x' or 'e' external immediately
655 * 2: Store attributes with attstorage 'x' or 'e' external
656 * 3: Inline compress attributes with attstorage 'm'
657 * 4: Store attributes with attstorage 'm' external
661 /* compute header overhead --- this should match heap_form_tuple() */
662 hoff = offsetof(HeapTupleHeaderData, t_bits);
664 hoff += BITMAPLEN(numAttrs);
665 if (newtup->t_data->t_infomask & HEAP_HASOID)
667 hoff = MAXALIGN(hoff);
668 /* now convert to a limit on the tuple data size */
669 maxDataLen = TOAST_TUPLE_TARGET - hoff;
672 * Look for attributes with attstorage 'x' to compress. Also find large
673 * attributes with attstorage 'x' or 'e', and store them external.
675 while (heap_compute_data_size(tupleDesc,
676 toast_values, toast_isnull) > maxDataLen)
678 int biggest_attno = -1;
679 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
684 * Search for the biggest yet unprocessed internal attribute
686 for (i = 0; i < numAttrs; i++)
688 if (toast_action[i] != ' ')
690 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
691 continue; /* can't happen, toast_action would be 'p' */
692 if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
694 if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
696 if (toast_sizes[i] > biggest_size)
699 biggest_size = toast_sizes[i];
703 if (biggest_attno < 0)
707 * Attempt to compress it inline, if it has attstorage 'x'
710 if (att[i]->attstorage == 'x')
712 old_value = toast_values[i];
713 new_value = toast_compress_datum(old_value);
715 if (DatumGetPointer(new_value) != NULL)
717 /* successful compression */
719 pfree(DatumGetPointer(old_value));
720 toast_values[i] = new_value;
721 toast_free[i] = true;
722 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
728 /* incompressible, ignore on subsequent compression passes */
729 toast_action[i] = 'x';
734 /* has attstorage 'e', ignore on subsequent compression passes */
735 toast_action[i] = 'x';
739 * If this value is by itself more than maxDataLen (after compression
740 * if any), push it out to the toast table immediately, if possible.
741 * This avoids uselessly compressing other fields in the common case
742 * where we have one long field and several short ones.
744 * XXX maybe the threshold should be less than maxDataLen?
746 if (toast_sizes[i] > maxDataLen &&
747 rel->rd_rel->reltoastrelid != InvalidOid)
749 old_value = toast_values[i];
750 toast_action[i] = 'p';
751 toast_values[i] = toast_save_datum(rel, toast_values[i],
752 toast_oldexternal[i], options);
754 pfree(DatumGetPointer(old_value));
755 toast_free[i] = true;
762 * Second we look for attributes of attstorage 'x' or 'e' that are still
763 * inline. But skip this if there's no toast table to push them to.
765 while (heap_compute_data_size(tupleDesc,
766 toast_values, toast_isnull) > maxDataLen &&
767 rel->rd_rel->reltoastrelid != InvalidOid)
769 int biggest_attno = -1;
770 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
774 * Search for the biggest yet inlined attribute with
775 * attstorage equals 'x' or 'e'
778 for (i = 0; i < numAttrs; i++)
780 if (toast_action[i] == 'p')
782 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
783 continue; /* can't happen, toast_action would be 'p' */
784 if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
786 if (toast_sizes[i] > biggest_size)
789 biggest_size = toast_sizes[i];
793 if (biggest_attno < 0)
797 * Store this external
800 old_value = toast_values[i];
801 toast_action[i] = 'p';
802 toast_values[i] = toast_save_datum(rel, toast_values[i],
803 toast_oldexternal[i], options);
805 pfree(DatumGetPointer(old_value));
806 toast_free[i] = true;
813 * Round 3 - this time we take attributes with storage 'm' into
816 while (heap_compute_data_size(tupleDesc,
817 toast_values, toast_isnull) > maxDataLen)
819 int biggest_attno = -1;
820 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
825 * Search for the biggest yet uncompressed internal attribute
827 for (i = 0; i < numAttrs; i++)
829 if (toast_action[i] != ' ')
831 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
832 continue; /* can't happen, toast_action would be 'p' */
833 if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
835 if (att[i]->attstorage != 'm')
837 if (toast_sizes[i] > biggest_size)
840 biggest_size = toast_sizes[i];
844 if (biggest_attno < 0)
848 * Attempt to compress it inline
851 old_value = toast_values[i];
852 new_value = toast_compress_datum(old_value);
854 if (DatumGetPointer(new_value) != NULL)
856 /* successful compression */
858 pfree(DatumGetPointer(old_value));
859 toast_values[i] = new_value;
860 toast_free[i] = true;
861 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
867 /* incompressible, ignore on subsequent compression passes */
868 toast_action[i] = 'x';
873 * Finally we store attributes of type 'm' externally. At this point we
874 * increase the target tuple size, so that 'm' attributes aren't stored
875 * externally unless really necessary.
877 maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff;
879 while (heap_compute_data_size(tupleDesc,
880 toast_values, toast_isnull) > maxDataLen &&
881 rel->rd_rel->reltoastrelid != InvalidOid)
883 int biggest_attno = -1;
884 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
888 * Search for the biggest yet inlined attribute with
892 for (i = 0; i < numAttrs; i++)
894 if (toast_action[i] == 'p')
896 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
897 continue; /* can't happen, toast_action would be 'p' */
898 if (att[i]->attstorage != 'm')
900 if (toast_sizes[i] > biggest_size)
903 biggest_size = toast_sizes[i];
907 if (biggest_attno < 0)
911 * Store this external
914 old_value = toast_values[i];
915 toast_action[i] = 'p';
916 toast_values[i] = toast_save_datum(rel, toast_values[i],
917 toast_oldexternal[i], options);
919 pfree(DatumGetPointer(old_value));
920 toast_free[i] = true;
927 * In the case we toasted any values, we need to build a new heap tuple
928 * with the changed values.
932 HeapTupleHeader olddata = newtup->t_data;
933 HeapTupleHeader new_data;
934 int32 new_header_len;
939 * Calculate the new size of the tuple.
941 * Note: we used to assume here that the old tuple's t_hoff must equal
942 * the new_header_len value, but that was incorrect. The old tuple
943 * might have a smaller-than-current natts, if there's been an ALTER
944 * TABLE ADD COLUMN since it was stored; and that would lead to a
945 * different conclusion about the size of the null bitmap, or even
946 * whether there needs to be one at all.
948 new_header_len = offsetof(HeapTupleHeaderData, t_bits);
950 new_header_len += BITMAPLEN(numAttrs);
951 if (olddata->t_infomask & HEAP_HASOID)
952 new_header_len += sizeof(Oid);
953 new_header_len = MAXALIGN(new_header_len);
954 new_data_len = heap_compute_data_size(tupleDesc,
955 toast_values, toast_isnull);
956 new_tuple_len = new_header_len + new_data_len;
959 * Allocate and zero the space needed, and fill HeapTupleData fields.
961 result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_tuple_len);
962 result_tuple->t_len = new_tuple_len;
963 result_tuple->t_self = newtup->t_self;
964 result_tuple->t_tableOid = newtup->t_tableOid;
965 new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
966 result_tuple->t_data = new_data;
969 * Copy the existing tuple header, but adjust natts and t_hoff.
971 memcpy(new_data, olddata, offsetof(HeapTupleHeaderData, t_bits));
972 HeapTupleHeaderSetNatts(new_data, numAttrs);
973 new_data->t_hoff = new_header_len;
974 if (olddata->t_infomask & HEAP_HASOID)
975 HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(olddata));
977 /* Copy over the data, and fill the null bitmap if needed */
978 heap_fill_tuple(tupleDesc,
981 (char *) new_data + new_header_len,
983 &(new_data->t_infomask),
984 has_nulls ? new_data->t_bits : NULL);
987 result_tuple = newtup;
990 * Free allocated temp values
993 for (i = 0; i < numAttrs; i++)
995 pfree(DatumGetPointer(toast_values[i]));
998 * Delete external values from the old tuple
1001 for (i = 0; i < numAttrs; i++)
1002 if (toast_delold[i])
1003 toast_delete_datum(rel, toast_oldvalues[i]);
1005 return result_tuple;
1010 * toast_flatten_tuple -
1012 * "Flatten" a tuple to contain no out-of-line toasted fields.
1013 * (This does not eliminate compressed or short-header datums.)
1017 toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
1019 HeapTuple new_tuple;
1020 Form_pg_attribute *att = tupleDesc->attrs;
1021 int numAttrs = tupleDesc->natts;
1023 Datum toast_values[MaxTupleAttributeNumber];
1024 bool toast_isnull[MaxTupleAttributeNumber];
1025 bool toast_free[MaxTupleAttributeNumber];
1028 * Break down the tuple into fields.
1030 Assert(numAttrs <= MaxTupleAttributeNumber);
1031 heap_deform_tuple(tup, tupleDesc, toast_values, toast_isnull);
1033 memset(toast_free, 0, numAttrs * sizeof(bool));
1035 for (i = 0; i < numAttrs; i++)
1038 * Look at non-null varlena attributes
1040 if (!toast_isnull[i] && att[i]->attlen == -1)
1042 struct varlena *new_value;
1044 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
1045 if (VARATT_IS_EXTERNAL(new_value))
1047 new_value = toast_fetch_datum(new_value);
1048 toast_values[i] = PointerGetDatum(new_value);
1049 toast_free[i] = true;
1055 * Form the reconfigured tuple.
1057 new_tuple = heap_form_tuple(tupleDesc, toast_values, toast_isnull);
1060 * Be sure to copy the tuple's OID and identity fields. We also make a
1061 * point of copying visibility info, just in case anybody looks at those
1062 * fields in a syscache entry.
1064 if (tupleDesc->tdhasoid)
1065 HeapTupleSetOid(new_tuple, HeapTupleGetOid(tup));
1067 new_tuple->t_self = tup->t_self;
1068 new_tuple->t_tableOid = tup->t_tableOid;
1070 new_tuple->t_data->t_choice = tup->t_data->t_choice;
1071 new_tuple->t_data->t_ctid = tup->t_data->t_ctid;
1072 new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
1073 new_tuple->t_data->t_infomask |=
1074 tup->t_data->t_infomask & HEAP_XACT_MASK;
1075 new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
1076 new_tuple->t_data->t_infomask2 |=
1077 tup->t_data->t_infomask2 & HEAP2_XACT_MASK;
1080 * Free allocated temp values
1082 for (i = 0; i < numAttrs; i++)
1084 pfree(DatumGetPointer(toast_values[i]));
1091 * toast_flatten_tuple_attribute -
1093 * If a Datum is of composite type, "flatten" it to contain no toasted fields.
1094 * This must be invoked on any potentially-composite field that is to be
1095 * inserted into a tuple. Doing this preserves the invariant that toasting
1096 * goes only one level deep in a tuple.
1098 * Note that flattening does not mean expansion of short-header varlenas,
1099 * so in one sense toasting is allowed within composite datums.
1103 toast_flatten_tuple_attribute(Datum value,
1104 Oid typeId, int32 typeMod)
1106 TupleDesc tupleDesc;
1107 HeapTupleHeader olddata;
1108 HeapTupleHeader new_data;
1109 int32 new_header_len;
1111 int32 new_tuple_len;
1112 HeapTupleData tmptup;
1113 Form_pg_attribute *att;
1116 bool need_change = false;
1117 bool has_nulls = false;
1118 Datum toast_values[MaxTupleAttributeNumber];
1119 bool toast_isnull[MaxTupleAttributeNumber];
1120 bool toast_free[MaxTupleAttributeNumber];
1123 * See if it's a composite type, and get the tupdesc if so.
1125 tupleDesc = lookup_rowtype_tupdesc_noerror(typeId, typeMod, true);
1126 if (tupleDesc == NULL)
1127 return value; /* not a composite type */
1129 att = tupleDesc->attrs;
1130 numAttrs = tupleDesc->natts;
1133 * Break down the tuple into fields.
1135 olddata = DatumGetHeapTupleHeader(value);
1136 Assert(typeId == HeapTupleHeaderGetTypeId(olddata));
1137 Assert(typeMod == HeapTupleHeaderGetTypMod(olddata));
1138 /* Build a temporary HeapTuple control structure */
1139 tmptup.t_len = HeapTupleHeaderGetDatumLength(olddata);
1140 ItemPointerSetInvalid(&(tmptup.t_self));
1141 tmptup.t_tableOid = InvalidOid;
1142 tmptup.t_data = olddata;
1144 Assert(numAttrs <= MaxTupleAttributeNumber);
1145 heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull);
1147 memset(toast_free, 0, numAttrs * sizeof(bool));
1149 for (i = 0; i < numAttrs; i++)
1152 * Look at non-null varlena attributes
1154 if (toast_isnull[i])
1156 else if (att[i]->attlen == -1)
1158 struct varlena *new_value;
1160 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
1161 if (VARATT_IS_EXTERNAL(new_value) ||
1162 VARATT_IS_COMPRESSED(new_value))
1164 new_value = heap_tuple_untoast_attr(new_value);
1165 toast_values[i] = PointerGetDatum(new_value);
1166 toast_free[i] = true;
1173 * If nothing to untoast, just return the original tuple.
1177 ReleaseTupleDesc(tupleDesc);
1182 * Calculate the new size of the tuple.
1184 * This should match the reconstruction code in toast_insert_or_update.
1186 new_header_len = offsetof(HeapTupleHeaderData, t_bits);
1188 new_header_len += BITMAPLEN(numAttrs);
1189 if (olddata->t_infomask & HEAP_HASOID)
1190 new_header_len += sizeof(Oid);
1191 new_header_len = MAXALIGN(new_header_len);
1192 new_data_len = heap_compute_data_size(tupleDesc,
1193 toast_values, toast_isnull);
1194 new_tuple_len = new_header_len + new_data_len;
1196 new_data = (HeapTupleHeader) palloc0(new_tuple_len);
1199 * Copy the existing tuple header, but adjust natts and t_hoff.
1201 memcpy(new_data, olddata, offsetof(HeapTupleHeaderData, t_bits));
1202 HeapTupleHeaderSetNatts(new_data, numAttrs);
1203 new_data->t_hoff = new_header_len;
1204 if (olddata->t_infomask & HEAP_HASOID)
1205 HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(olddata));
1207 /* Reset the datum length field, too */
1208 HeapTupleHeaderSetDatumLength(new_data, new_tuple_len);
1210 /* Copy over the data, and fill the null bitmap if needed */
1211 heap_fill_tuple(tupleDesc,
1214 (char *) new_data + new_header_len,
1216 &(new_data->t_infomask),
1217 has_nulls ? new_data->t_bits : NULL);
1220 * Free allocated temp values
1222 for (i = 0; i < numAttrs; i++)
1224 pfree(DatumGetPointer(toast_values[i]));
1225 ReleaseTupleDesc(tupleDesc);
1227 return PointerGetDatum(new_data);
1232 * toast_compress_datum -
1234 * Create a compressed version of a varlena datum
1236 * If we fail (ie, compressed result is actually bigger than original)
1237 * then return NULL. We must not use compressed data if it'd expand
1240 * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
1241 * copying them. But we can't handle external or compressed datums.
1245 toast_compress_datum(Datum value)
1247 struct varlena *tmp;
1248 int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
1250 Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
1251 Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
1254 * No point in wasting a palloc cycle if value size is out of the allowed
1255 * range for compression
1257 if (valsize < PGLZ_strategy_default->min_input_size ||
1258 valsize > PGLZ_strategy_default->max_input_size)
1259 return PointerGetDatum(NULL);
1261 tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));
1264 * We recheck the actual size even if pglz_compress() reports success,
1265 * because it might be satisfied with having saved as little as one byte
1266 * in the compressed data --- which could turn into a net loss once you
1267 * consider header and alignment padding. Worst case, the compressed
1268 * format might require three padding bytes (plus header, which is
1269 * included in VARSIZE(tmp)), whereas the uncompressed format would take
1270 * only one header byte and no padding if the value is short enough. So
1271 * we insist on a savings of more than 2 bytes to ensure we have a gain.
1273 if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize,
1274 (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
1275 VARSIZE(tmp) < valsize - 2)
1277 /* successful compression */
1278 return PointerGetDatum(tmp);
1282 /* incompressible data */
1284 return PointerGetDatum(NULL);
1290 * toast_save_datum -
1292 * Save one single datum into the secondary relation and return
1293 * a Datum reference for it.
1295 * rel: the main relation we're working with (not the toast rel!)
1296 * value: datum to be pushed to toast storage
1297 * oldexternal: if not NULL, toast pointer previously representing the datum
1298 * options: options to be passed to heap_insert() for toast rows
1302 toast_save_datum(Relation rel, Datum value,
1303 struct varlena * oldexternal, int options)
1308 TupleDesc toasttupDesc;
1311 CommandId mycid = GetCurrentCommandId(true);
1312 struct varlena *result;
1313 struct varatt_external toast_pointer;
1317 char data[TOAST_MAX_CHUNK_SIZE]; /* make struct big enough */
1318 int32 align_it; /* ensure struct is aligned well enough */
1321 int32 chunk_seq = 0;
1324 Pointer dval = DatumGetPointer(value);
1326 Assert(!VARATT_IS_EXTERNAL(value));
1329 * Open the toast relation and its index. We can use the index to check
1330 * uniqueness of the OID we assign to the toasted item, even though it has
1331 * additional columns besides OID.
1333 toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
1334 toasttupDesc = toastrel->rd_att;
1335 toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock);
1338 * Get the data pointer and length, and compute va_rawsize and va_extsize.
1340 * va_rawsize is the size of the equivalent fully uncompressed datum, so
1341 * we have to adjust for short headers.
1343 * va_extsize is the actual size of the data payload in the toast records.
1345 if (VARATT_IS_SHORT(dval))
1347 data_p = VARDATA_SHORT(dval);
1348 data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
1349 toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
1350 toast_pointer.va_extsize = data_todo;
1352 else if (VARATT_IS_COMPRESSED(dval))
1354 data_p = VARDATA(dval);
1355 data_todo = VARSIZE(dval) - VARHDRSZ;
1356 /* rawsize in a compressed datum is just the size of the payload */
1357 toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ;
1358 toast_pointer.va_extsize = data_todo;
1359 /* Assert that the numbers look like it's compressed */
1360 Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1364 data_p = VARDATA(dval);
1365 data_todo = VARSIZE(dval) - VARHDRSZ;
1366 toast_pointer.va_rawsize = VARSIZE(dval);
1367 toast_pointer.va_extsize = data_todo;
1371 * Insert the correct table OID into the result TOAST pointer.
1373 * Normally this is the actual OID of the target toast table, but during
1374 * table-rewriting operations such as CLUSTER, we have to insert the OID
1375 * of the table's real permanent toast table instead. rd_toastoid is set
1376 * if we have to substitute such an OID.
1378 if (OidIsValid(rel->rd_toastoid))
1379 toast_pointer.va_toastrelid = rel->rd_toastoid;
1381 toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
1384 * Choose an OID to use as the value ID for this toast value.
1386 * Normally we just choose an unused OID within the toast table. But
1387 * during table-rewriting operations where we are preserving an existing
1388 * toast table OID, we want to preserve toast value OIDs too. So, if
1389 * rd_toastoid is set and we had a prior external value from that same
1390 * toast table, re-use its value ID. If we didn't have a prior external
1391 * value (which is a corner case, but possible if the table's attstorage
1392 * options have been changed), we have to pick a value ID that doesn't
1393 * conflict with either new or existing toast value OIDs.
1395 if (!OidIsValid(rel->rd_toastoid))
1397 /* normal case: just choose an unused OID */
1398 toast_pointer.va_valueid =
1399 GetNewOidWithIndex(toastrel,
1400 RelationGetRelid(toastidx),
1405 /* rewrite case: check to see if value was in old toast table */
1406 toast_pointer.va_valueid = InvalidOid;
1407 if (oldexternal != NULL)
1409 struct varatt_external old_toast_pointer;
1411 Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
1412 /* Must copy to access aligned fields */
1413 VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
1414 if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
1416 /* This value came from the old toast table; reuse its OID */
1417 toast_pointer.va_valueid = old_toast_pointer.va_valueid;
1420 * There is a corner case here: the table rewrite might have
1421 * to copy both live and recently-dead versions of a row, and
1422 * those versions could easily reference the same toast value.
1423 * When we copy the second or later version of such a row,
1424 * reusing the OID will mean we select an OID that's already
1425 * in the new toast table. Check for that, and if so, just
1426 * fall through without writing the data again.
1428 * While annoying and ugly-looking, this is a good thing
1429 * because it ensures that we wind up with only one copy of
1430 * the toast value when there is only one copy in the old
1431 * toast table. Before we detected this case, we'd have made
1432 * multiple copies, wasting space; and what's worse, the
1433 * copies belonging to already-deleted heap tuples would not
1434 * be reclaimed by VACUUM.
1436 if (toastrel_valueid_exists(toastrel,
1437 toast_pointer.va_valueid))
1439 /* Match, so short-circuit the data storage loop below */
1444 if (toast_pointer.va_valueid == InvalidOid)
1447 * new value; must choose an OID that doesn't conflict in either
1448 * old or new toast table
1452 toast_pointer.va_valueid =
1453 GetNewOidWithIndex(toastrel,
1454 RelationGetRelid(toastidx),
1456 } while (toastid_valueid_exists(rel->rd_toastoid,
1457 toast_pointer.va_valueid));
1462 * Initialize constant parts of the tuple data
1464 t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
1465 t_values[2] = PointerGetDatum(&chunk_data);
1466 t_isnull[0] = false;
1467 t_isnull[1] = false;
1468 t_isnull[2] = false;
1471 * Split up the item into chunks
1473 while (data_todo > 0)
1476 * Calculate the size of this chunk
1478 chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
1481 * Build a tuple and store it
1483 t_values[1] = Int32GetDatum(chunk_seq++);
1484 SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
1485 memcpy(VARDATA(&chunk_data), data_p, chunk_size);
1486 toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
1488 heap_insert(toastrel, toasttup, mycid, options, NULL);
1491 * Create the index entry. We cheat a little here by not using
1492 * FormIndexDatum: this relies on the knowledge that the index columns
1493 * are the same as the initial columns of the table.
1495 * Note also that there had better not be any user-created index on
1496 * the TOAST table, since we don't bother to update anything else.
1498 index_insert(toastidx, t_values, t_isnull,
1499 &(toasttup->t_self),
1501 toastidx->rd_index->indisunique ?
1502 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
1507 heap_freetuple(toasttup);
1510 * Move on to next chunk
1512 data_todo -= chunk_size;
1513 data_p += chunk_size;
1517 * Done - close toast relation
1519 index_close(toastidx, RowExclusiveLock);
1520 heap_close(toastrel, RowExclusiveLock);
1523 * Create the TOAST pointer value that we'll return
1525 result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
1526 SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
1527 memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
1529 return PointerGetDatum(result);
1534 * toast_delete_datum -
1536 * Delete a single external stored value.
1540 toast_delete_datum(Relation rel, Datum value)
1542 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
1543 struct varatt_external toast_pointer;
1546 ScanKeyData toastkey;
1547 SysScanDesc toastscan;
1550 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
1553 /* Must copy to access aligned fields */
1554 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1557 * Open the toast relation and its index
1559 toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock);
1560 toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock);
1563 * Setup a scan key to find chunks with matching va_valueid
1565 ScanKeyInit(&toastkey,
1567 BTEqualStrategyNumber, F_OIDEQ,
1568 ObjectIdGetDatum(toast_pointer.va_valueid));
1571 * Find all the chunks. (We don't actually care whether we see them in
1572 * sequence or not, but since we've already locked the index we might as
1573 * well use systable_beginscan_ordered.)
1575 toastscan = systable_beginscan_ordered(toastrel, toastidx,
1576 SnapshotToast, 1, &toastkey);
1577 while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1580 * Have a chunk, delete it
1582 simple_heap_delete(toastrel, &toasttup->t_self);
1586 * End scan and close relations
1588 systable_endscan_ordered(toastscan);
1589 index_close(toastidx, RowExclusiveLock);
1590 heap_close(toastrel, RowExclusiveLock);
1595 * toastrel_valueid_exists -
1597 * Test whether a toast value with the given ID exists in the toast relation
1601 toastrel_valueid_exists(Relation toastrel, Oid valueid)
1603 bool result = false;
1604 ScanKeyData toastkey;
1605 SysScanDesc toastscan;
1608 * Setup a scan key to find chunks with matching va_valueid
1610 ScanKeyInit(&toastkey,
1612 BTEqualStrategyNumber, F_OIDEQ,
1613 ObjectIdGetDatum(valueid));
1616 * Is there any such chunk?
1618 toastscan = systable_beginscan(toastrel, toastrel->rd_rel->reltoastidxid,
1619 true, SnapshotToast, 1, &toastkey);
1621 if (systable_getnext(toastscan) != NULL)
1624 systable_endscan(toastscan);
1630 * toastid_valueid_exists -
1632 * As above, but work from toast rel's OID not an open relation
1636 toastid_valueid_exists(Oid toastrelid, Oid valueid)
1641 toastrel = heap_open(toastrelid, AccessShareLock);
1643 result = toastrel_valueid_exists(toastrel, valueid);
1645 heap_close(toastrel, AccessShareLock);
1652 * toast_fetch_datum -
1654 * Reconstruct an in memory Datum from the chunks saved
1655 * in the toast relation
1658 static struct varlena *
1659 toast_fetch_datum(struct varlena * attr)
1663 ScanKeyData toastkey;
1664 SysScanDesc toastscan;
1666 TupleDesc toasttupDesc;
1667 struct varlena *result;
1668 struct varatt_external toast_pointer;
1678 if (VARATT_IS_EXTERNAL_INDIRECT(attr))
1679 elog(ERROR, "shouldn't be called for indirect tuples");
1681 /* Must copy to access aligned fields */
1682 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1684 ressize = toast_pointer.va_extsize;
1685 numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1687 result = (struct varlena *) palloc(ressize + VARHDRSZ);
1689 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1690 SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ);
1692 SET_VARSIZE(result, ressize + VARHDRSZ);
1695 * Open the toast relation and its index
1697 toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
1698 toasttupDesc = toastrel->rd_att;
1699 toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock);
1702 * Setup a scan key to fetch from the index by va_valueid
1704 ScanKeyInit(&toastkey,
1706 BTEqualStrategyNumber, F_OIDEQ,
1707 ObjectIdGetDatum(toast_pointer.va_valueid));
1710 * Read the chunks by index
1712 * Note that because the index is actually on (valueid, chunkidx) we will
1713 * see the chunks in chunkidx order, even though we didn't explicitly ask
1718 toastscan = systable_beginscan_ordered(toastrel, toastidx,
1719 SnapshotToast, 1, &toastkey);
1720 while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1723 * Have a chunk, extract the sequence number and the data
1725 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
1727 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
1729 if (!VARATT_IS_EXTENDED(chunk))
1731 chunksize = VARSIZE(chunk) - VARHDRSZ;
1732 chunkdata = VARDATA(chunk);
1734 else if (VARATT_IS_SHORT(chunk))
1736 /* could happen due to heap_form_tuple doing its thing */
1737 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
1738 chunkdata = VARDATA_SHORT(chunk);
1742 /* should never happen */
1743 elog(ERROR, "found toasted toast chunk for toast value %u in %s",
1744 toast_pointer.va_valueid,
1745 RelationGetRelationName(toastrel));
1746 chunksize = 0; /* keep compiler quiet */
1751 * Some checks on the data we've found
1753 if (residx != nextidx)
1754 elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
1756 toast_pointer.va_valueid,
1757 RelationGetRelationName(toastrel));
1758 if (residx < numchunks - 1)
1760 if (chunksize != TOAST_MAX_CHUNK_SIZE)
1761 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
1762 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
1764 toast_pointer.va_valueid,
1765 RelationGetRelationName(toastrel));
1767 else if (residx == numchunks - 1)
1769 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
1770 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
1772 (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
1774 toast_pointer.va_valueid,
1775 RelationGetRelationName(toastrel));
1778 elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
1781 toast_pointer.va_valueid,
1782 RelationGetRelationName(toastrel));
1785 * Copy the data into proper place in our result
1787 memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE,
1795 * Final checks that we successfully fetched the datum
1797 if (nextidx != numchunks)
1798 elog(ERROR, "missing chunk number %d for toast value %u in %s",
1800 toast_pointer.va_valueid,
1801 RelationGetRelationName(toastrel));
1804 * End scan and close relations
1806 systable_endscan_ordered(toastscan);
1807 index_close(toastidx, AccessShareLock);
1808 heap_close(toastrel, AccessShareLock);
1814 * toast_fetch_datum_slice -
1816 * Reconstruct a segment of a Datum from the chunks saved
1817 * in the toast relation
1820 static struct varlena *
1821 toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
1825 ScanKeyData toastkey[3];
1827 SysScanDesc toastscan;
1829 TupleDesc toasttupDesc;
1830 struct varlena *result;
1831 struct varatt_external toast_pointer;
1848 Assert(VARATT_IS_EXTERNAL_ONDISK(attr));
1850 /* Must copy to access aligned fields */
1851 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1854 * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
1855 * we can't return a compressed datum which is meaningful to toast later
1857 Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1859 attrsize = toast_pointer.va_extsize;
1860 totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1862 if (sliceoffset >= attrsize)
1868 if (((sliceoffset + length) > attrsize) || length < 0)
1869 length = attrsize - sliceoffset;
1871 result = (struct varlena *) palloc(length + VARHDRSZ);
1873 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1874 SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
1876 SET_VARSIZE(result, length + VARHDRSZ);
1879 return result; /* Can save a lot of work at this point! */
1881 startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
1882 endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
1883 numchunks = (endchunk - startchunk) + 1;
1885 startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE;
1886 endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
1889 * Open the toast relation and its index
1891 toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
1892 toasttupDesc = toastrel->rd_att;
1893 toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock);
1896 * Setup a scan key to fetch from the index. This is either two keys or
1897 * three depending on the number of chunks.
1899 ScanKeyInit(&toastkey[0],
1901 BTEqualStrategyNumber, F_OIDEQ,
1902 ObjectIdGetDatum(toast_pointer.va_valueid));
1905 * Use equality condition for one chunk, a range condition otherwise:
1909 ScanKeyInit(&toastkey[1],
1911 BTEqualStrategyNumber, F_INT4EQ,
1912 Int32GetDatum(startchunk));
1917 ScanKeyInit(&toastkey[1],
1919 BTGreaterEqualStrategyNumber, F_INT4GE,
1920 Int32GetDatum(startchunk));
1921 ScanKeyInit(&toastkey[2],
1923 BTLessEqualStrategyNumber, F_INT4LE,
1924 Int32GetDatum(endchunk));
1929 * Read the chunks by index
1931 * The index is on (valueid, chunkidx) so they will come in order
1933 nextidx = startchunk;
1934 toastscan = systable_beginscan_ordered(toastrel, toastidx,
1935 SnapshotToast, nscankeys, toastkey);
1936 while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1939 * Have a chunk, extract the sequence number and the data
1941 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
1943 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
1945 if (!VARATT_IS_EXTENDED(chunk))
1947 chunksize = VARSIZE(chunk) - VARHDRSZ;
1948 chunkdata = VARDATA(chunk);
1950 else if (VARATT_IS_SHORT(chunk))
1952 /* could happen due to heap_form_tuple doing its thing */
1953 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
1954 chunkdata = VARDATA_SHORT(chunk);
1958 /* should never happen */
1959 elog(ERROR, "found toasted toast chunk for toast value %u in %s",
1960 toast_pointer.va_valueid,
1961 RelationGetRelationName(toastrel));
1962 chunksize = 0; /* keep compiler quiet */
1967 * Some checks on the data we've found
1969 if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
1970 elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
1972 toast_pointer.va_valueid,
1973 RelationGetRelationName(toastrel));
1974 if (residx < totalchunks - 1)
1976 if (chunksize != TOAST_MAX_CHUNK_SIZE)
1977 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice",
1978 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
1979 residx, totalchunks,
1980 toast_pointer.va_valueid,
1981 RelationGetRelationName(toastrel));
1983 else if (residx == totalchunks - 1)
1985 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
1986 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice",
1988 (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE),
1990 toast_pointer.va_valueid,
1991 RelationGetRelationName(toastrel));
1994 elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
1997 toast_pointer.va_valueid,
1998 RelationGetRelationName(toastrel));
2001 * Copy the data into proper place in our result
2004 chcpyend = chunksize - 1;
2005 if (residx == startchunk)
2006 chcpystrt = startoffset;
2007 if (residx == endchunk)
2008 chcpyend = endoffset;
2010 memcpy(VARDATA(result) +
2011 (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
2012 chunkdata + chcpystrt,
2013 (chcpyend - chcpystrt) + 1);
2019 * Final checks that we successfully fetched the datum
2021 if (nextidx != (endchunk + 1))
2022 elog(ERROR, "missing chunk number %d for toast value %u in %s",
2024 toast_pointer.va_valueid,
2025 RelationGetRelationName(toastrel));
2028 * End scan and close relations
2030 systable_endscan_ordered(toastscan);
2031 index_close(toastidx, AccessShareLock);
2032 heap_close(toastrel, AccessShareLock);