1 /*-------------------------------------------------------------------------
4 * Support routines for external and compressed storage of
5 * variable size attributes.
7 * Copyright (c) 2000-2014, PostgreSQL Global Development Group
11 * src/backend/access/heap/tuptoaster.c
15 * toast_insert_or_update -
16 * Try to make a given tuple fit into one page by compressing
17 * or moving off attributes
20 * Reclaim toast storage when a tuple is deleted
22 * heap_tuple_untoast_attr -
23 * Fetch back a given value from the "secondary" relation
25 *-------------------------------------------------------------------------
33 #include "access/genam.h"
34 #include "access/heapam.h"
35 #include "access/tuptoaster.h"
36 #include "access/xact.h"
37 #include "catalog/catalog.h"
38 #include "miscadmin.h"
39 #include "utils/fmgroids.h"
40 #include "common/pg_lzcompress.h"
41 #include "utils/rel.h"
42 #include "utils/typcache.h"
43 #include "utils/tqual.h"
48 static void toast_delete_datum(Relation rel, Datum value);
49 static Datum toast_save_datum(Relation rel, Datum value,
50 struct varlena * oldexternal, int options);
51 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
52 static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
53 static struct varlena *toast_fetch_datum(struct varlena * attr);
54 static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
55 int32 sliceoffset, int32 length);
56 static int toast_open_indexes(Relation toastrel,
60 static void toast_close_indexes(Relation *toastidxs, int num_indexes,
65 * heap_tuple_fetch_attr -
67 * Public entry point to get back a toasted value from
68 * external source (possibly still in compressed format).
70 * This will return a datum that contains all the data internally, ie, not
71 * relying on external storage or memory, but it can still be compressed or
72 * have a short header.
76 heap_tuple_fetch_attr(struct varlena * attr)
78 struct varlena *result;
80 if (VARATT_IS_EXTERNAL_ONDISK(attr))
83 * This is an external stored plain value
85 result = toast_fetch_datum(attr);
87 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
90 * copy into the caller's memory context. That's not required in all
91 * cases but sufficient for now since this is mainly used when we need
92 * to persist a Datum for unusually long time, like in a HOLD cursor.
94 struct varatt_indirect redirect;
96 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
97 attr = (struct varlena *) redirect.pointer;
99 /* nested indirect Datums aren't allowed */
100 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
102 /* doesn't make much sense, but better handle it */
103 if (VARATT_IS_EXTERNAL_ONDISK(attr))
104 return heap_tuple_fetch_attr(attr);
106 /* copy datum verbatim */
107 result = (struct varlena *) palloc(VARSIZE_ANY(attr));
108 memcpy(result, attr, VARSIZE_ANY(attr));
113 * This is a plain value inside of the main tuple - why am I called?
123 * heap_tuple_untoast_attr -
125 * Public entry point to get back a toasted value from compression
126 * or external storage.
130 heap_tuple_untoast_attr(struct varlena * attr)
132 if (VARATT_IS_EXTERNAL_ONDISK(attr))
135 * This is an externally stored datum --- fetch it back from there
137 attr = toast_fetch_datum(attr);
138 /* If it's compressed, decompress it */
139 if (VARATT_IS_COMPRESSED(attr))
141 PGLZ_Header *tmp = (PGLZ_Header *) attr;
143 attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
144 SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
145 if (!pglz_decompress(tmp, VARDATA(attr)))
146 elog(ERROR, "compressed data is corrupted");
150 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
152 struct varatt_indirect redirect;
154 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
155 attr = (struct varlena *) redirect.pointer;
157 /* nested indirect Datums aren't allowed */
158 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
160 attr = heap_tuple_untoast_attr(attr);
162 else if (VARATT_IS_COMPRESSED(attr))
165 * This is a compressed value inside of the main tuple
167 PGLZ_Header *tmp = (PGLZ_Header *) attr;
169 attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
170 SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
171 if (!pglz_decompress(tmp, VARDATA(attr)))
172 elog(ERROR, "compressed data is corrupted");
174 else if (VARATT_IS_SHORT(attr))
177 * This is a short-header varlena --- convert to 4-byte header format
179 Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
180 Size new_size = data_size + VARHDRSZ;
181 struct varlena *new_attr;
183 new_attr = (struct varlena *) palloc(new_size);
184 SET_VARSIZE(new_attr, new_size);
185 memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
194 * heap_tuple_untoast_attr_slice -
196 * Public entry point to get back part of a toasted value
197 * from compression or external storage.
201 heap_tuple_untoast_attr_slice(struct varlena * attr,
202 int32 sliceoffset, int32 slicelength)
204 struct varlena *preslice;
205 struct varlena *result;
209 if (VARATT_IS_EXTERNAL_ONDISK(attr))
211 struct varatt_external toast_pointer;
213 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
215 /* fast path for non-compressed external datums */
216 if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
217 return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
219 /* fetch it back (compressed marker will get set automatically) */
220 preslice = toast_fetch_datum(attr);
222 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
224 struct varatt_indirect redirect;
226 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
228 /* nested indirect Datums aren't allowed */
229 Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
231 return heap_tuple_untoast_attr_slice(redirect.pointer,
232 sliceoffset, slicelength);
237 if (VARATT_IS_COMPRESSED(preslice))
239 PGLZ_Header *tmp = (PGLZ_Header *) preslice;
240 Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ;
242 preslice = (struct varlena *) palloc(size);
243 SET_VARSIZE(preslice, size);
244 if (!pglz_decompress(tmp, VARDATA(preslice)))
245 elog(ERROR, "compressed data is corrupted");
247 if (tmp != (PGLZ_Header *) attr)
251 if (VARATT_IS_SHORT(preslice))
253 attrdata = VARDATA_SHORT(preslice);
254 attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
258 attrdata = VARDATA(preslice);
259 attrsize = VARSIZE(preslice) - VARHDRSZ;
262 /* slicing of datum for compressed cases and plain value */
264 if (sliceoffset >= attrsize)
270 if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
271 slicelength = attrsize - sliceoffset;
273 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
274 SET_VARSIZE(result, slicelength + VARHDRSZ);
276 memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
278 if (preslice != attr)
286 * toast_raw_datum_size -
288 * Return the raw (detoasted) size of a varlena datum
289 * (including the VARHDRSZ header)
293 toast_raw_datum_size(Datum value)
295 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
298 if (VARATT_IS_EXTERNAL_ONDISK(attr))
300 /* va_rawsize is the size of the original datum -- including header */
301 struct varatt_external toast_pointer;
303 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
304 result = toast_pointer.va_rawsize;
306 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
308 struct varatt_indirect toast_pointer;
310 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
312 /* nested indirect Datums aren't allowed */
313 Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
315 return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
317 else if (VARATT_IS_COMPRESSED(attr))
319 /* here, va_rawsize is just the payload size */
320 result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
322 else if (VARATT_IS_SHORT(attr))
325 * we have to normalize the header length to VARHDRSZ or else the
326 * callers of this function will be confused.
328 result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
332 /* plain untoasted datum */
333 result = VARSIZE(attr);
341 * Return the physical storage size (possibly compressed) of a varlena datum
345 toast_datum_size(Datum value)
347 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
350 if (VARATT_IS_EXTERNAL_ONDISK(attr))
353 * Attribute is stored externally - return the extsize whether
354 * compressed or not. We do not count the size of the toast pointer
357 struct varatt_external toast_pointer;
359 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
360 result = toast_pointer.va_extsize;
362 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
364 struct varatt_indirect toast_pointer;
366 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
368 /* nested indirect Datums aren't allowed */
369 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
371 return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
373 else if (VARATT_IS_SHORT(attr))
375 result = VARSIZE_SHORT(attr);
380 * Attribute is stored inline either compressed or not, just calculate
381 * the size of the datum in either case.
383 result = VARSIZE(attr);
392 * Cascaded delete toast-entries on DELETE
396 toast_delete(Relation rel, HeapTuple oldtup)
399 Form_pg_attribute *att;
402 Datum toast_values[MaxHeapAttributeNumber];
403 bool toast_isnull[MaxHeapAttributeNumber];
406 * We should only ever be called for tuples of plain relations or
407 * materialized views --- recursing on a toast rel is bad news.
409 Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
410 rel->rd_rel->relkind == RELKIND_MATVIEW);
413 * Get the tuple descriptor and break down the tuple into fields.
415 * NOTE: it's debatable whether to use heap_deform_tuple() here or just
416 * heap_getattr() only the varlena columns. The latter could win if there
417 * are few varlena columns and many non-varlena ones. However,
418 * heap_deform_tuple costs only O(N) while the heap_getattr way would cost
419 * O(N^2) if there are many varlena columns, so it seems better to err on
420 * the side of linear cost. (We won't even be here unless there's at
421 * least one varlena column, by the way.)
423 tupleDesc = rel->rd_att;
424 att = tupleDesc->attrs;
425 numAttrs = tupleDesc->natts;
427 Assert(numAttrs <= MaxHeapAttributeNumber);
428 heap_deform_tuple(oldtup, tupleDesc, toast_values, toast_isnull);
431 * Check for external stored attributes and delete them from the secondary
434 for (i = 0; i < numAttrs; i++)
436 if (att[i]->attlen == -1)
438 Datum value = toast_values[i];
442 else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value)))
443 toast_delete_datum(rel, value);
444 else if (VARATT_IS_EXTERNAL_INDIRECT(PointerGetDatum(value)))
445 elog(ERROR, "attempt to delete tuple containing indirect datums");
452 * toast_insert_or_update -
454 * Delete no-longer-used toast-entries and create new ones to
455 * make the new tuple fit on INSERT or UPDATE
458 * newtup: the candidate new tuple to be inserted
459 * oldtup: the old row version for UPDATE, or NULL for INSERT
460 * options: options to be passed to heap_insert() for toast rows
462 * either newtup if no toasting is needed, or a palloc'd modified tuple
463 * that is what should actually get stored
465 * NOTE: neither newtup nor oldtup will be modified. This is a change
466 * from the pre-8.1 API of this routine.
470 toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
473 HeapTuple result_tuple;
475 Form_pg_attribute *att;
479 bool need_change = false;
480 bool need_free = false;
481 bool need_delold = false;
482 bool has_nulls = false;
487 char toast_action[MaxHeapAttributeNumber];
488 bool toast_isnull[MaxHeapAttributeNumber];
489 bool toast_oldisnull[MaxHeapAttributeNumber];
490 Datum toast_values[MaxHeapAttributeNumber];
491 Datum toast_oldvalues[MaxHeapAttributeNumber];
492 struct varlena *toast_oldexternal[MaxHeapAttributeNumber];
493 int32 toast_sizes[MaxHeapAttributeNumber];
494 bool toast_free[MaxHeapAttributeNumber];
495 bool toast_delold[MaxHeapAttributeNumber];
498 * We should only ever be called for tuples of plain relations or
499 * materialized views --- recursing on a toast rel is bad news.
501 Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
502 rel->rd_rel->relkind == RELKIND_MATVIEW);
505 * Get the tuple descriptor and break down the tuple(s) into fields.
507 tupleDesc = rel->rd_att;
508 att = tupleDesc->attrs;
509 numAttrs = tupleDesc->natts;
511 Assert(numAttrs <= MaxHeapAttributeNumber);
512 heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull);
514 heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull);
517 * Then collect information about the values given
519 * NOTE: toast_action[i] can have these values:
520 * ' ' default handling
521 * 'p' already processed --- don't touch it
522 * 'x' incompressible, but OK to move off
524 * NOTE: toast_sizes[i] is only made valid for varlena attributes with
525 * toast_action[i] different from 'p'.
528 memset(toast_action, ' ', numAttrs * sizeof(char));
529 memset(toast_oldexternal, 0, numAttrs * sizeof(struct varlena *));
530 memset(toast_free, 0, numAttrs * sizeof(bool));
531 memset(toast_delold, 0, numAttrs * sizeof(bool));
533 for (i = 0; i < numAttrs; i++)
535 struct varlena *old_value;
536 struct varlena *new_value;
541 * For UPDATE get the old and new values of this attribute
543 old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]);
544 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
547 * If the old value is stored on disk, check if it has changed so
548 * we have to delete it later.
550 if (att[i]->attlen == -1 && !toast_oldisnull[i] &&
551 VARATT_IS_EXTERNAL_ONDISK(old_value))
553 if (toast_isnull[i] || !VARATT_IS_EXTERNAL_ONDISK(new_value) ||
554 memcmp((char *) old_value, (char *) new_value,
555 VARSIZE_EXTERNAL(old_value)) != 0)
558 * The old external stored value isn't needed any more
561 toast_delold[i] = true;
567 * This attribute isn't changed by this update so we reuse
568 * the original reference to the old value in the new
571 toast_action[i] = 'p';
579 * For INSERT simply get the new value
581 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
585 * Handle NULL attributes
589 toast_action[i] = 'p';
595 * Now look at varlena attributes
597 if (att[i]->attlen == -1)
600 * If the table's attribute says PLAIN always, force it so.
602 if (att[i]->attstorage == 'p')
603 toast_action[i] = 'p';
606 * We took care of UPDATE above, so any external value we find
607 * still in the tuple must be someone else's we cannot reuse.
608 * Fetch it back (without decompression, unless we are forcing
609 * PLAIN storage). If necessary, we'll push it out as a new
610 * external value below.
612 if (VARATT_IS_EXTERNAL(new_value))
614 toast_oldexternal[i] = new_value;
615 if (att[i]->attstorage == 'p')
616 new_value = heap_tuple_untoast_attr(new_value);
618 new_value = heap_tuple_fetch_attr(new_value);
619 toast_values[i] = PointerGetDatum(new_value);
620 toast_free[i] = true;
626 * Remember the size of this attribute
628 toast_sizes[i] = VARSIZE_ANY(new_value);
633 * Not a varlena attribute, plain storage always
635 toast_action[i] = 'p';
640 * Compress and/or save external until data fits into target length
642 * 1: Inline compress attributes with attstorage 'x', and store very
643 * large attributes with attstorage 'x' or 'e' external immediately
644 * 2: Store attributes with attstorage 'x' or 'e' external
645 * 3: Inline compress attributes with attstorage 'm'
646 * 4: Store attributes with attstorage 'm' external
650 /* compute header overhead --- this should match heap_form_tuple() */
651 hoff = offsetof(HeapTupleHeaderData, t_bits);
653 hoff += BITMAPLEN(numAttrs);
654 if (newtup->t_data->t_infomask & HEAP_HASOID)
656 hoff = MAXALIGN(hoff);
657 /* now convert to a limit on the tuple data size */
658 maxDataLen = TOAST_TUPLE_TARGET - hoff;
661 * Look for attributes with attstorage 'x' to compress. Also find large
662 * attributes with attstorage 'x' or 'e', and store them external.
664 while (heap_compute_data_size(tupleDesc,
665 toast_values, toast_isnull) > maxDataLen)
667 int biggest_attno = -1;
668 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
673 * Search for the biggest yet unprocessed internal attribute
675 for (i = 0; i < numAttrs; i++)
677 if (toast_action[i] != ' ')
679 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
680 continue; /* can't happen, toast_action would be 'p' */
681 if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
683 if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
685 if (toast_sizes[i] > biggest_size)
688 biggest_size = toast_sizes[i];
692 if (biggest_attno < 0)
696 * Attempt to compress it inline, if it has attstorage 'x'
699 if (att[i]->attstorage == 'x')
701 old_value = toast_values[i];
702 new_value = toast_compress_datum(old_value);
704 if (DatumGetPointer(new_value) != NULL)
706 /* successful compression */
708 pfree(DatumGetPointer(old_value));
709 toast_values[i] = new_value;
710 toast_free[i] = true;
711 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
717 /* incompressible, ignore on subsequent compression passes */
718 toast_action[i] = 'x';
723 /* has attstorage 'e', ignore on subsequent compression passes */
724 toast_action[i] = 'x';
728 * If this value is by itself more than maxDataLen (after compression
729 * if any), push it out to the toast table immediately, if possible.
730 * This avoids uselessly compressing other fields in the common case
731 * where we have one long field and several short ones.
733 * XXX maybe the threshold should be less than maxDataLen?
735 if (toast_sizes[i] > maxDataLen &&
736 rel->rd_rel->reltoastrelid != InvalidOid)
738 old_value = toast_values[i];
739 toast_action[i] = 'p';
740 toast_values[i] = toast_save_datum(rel, toast_values[i],
741 toast_oldexternal[i], options);
743 pfree(DatumGetPointer(old_value));
744 toast_free[i] = true;
751 * Second we look for attributes of attstorage 'x' or 'e' that are still
752 * inline. But skip this if there's no toast table to push them to.
754 while (heap_compute_data_size(tupleDesc,
755 toast_values, toast_isnull) > maxDataLen &&
756 rel->rd_rel->reltoastrelid != InvalidOid)
758 int biggest_attno = -1;
759 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
763 * Search for the biggest yet inlined attribute with
764 * attstorage equals 'x' or 'e'
767 for (i = 0; i < numAttrs; i++)
769 if (toast_action[i] == 'p')
771 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
772 continue; /* can't happen, toast_action would be 'p' */
773 if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
775 if (toast_sizes[i] > biggest_size)
778 biggest_size = toast_sizes[i];
782 if (biggest_attno < 0)
786 * Store this external
789 old_value = toast_values[i];
790 toast_action[i] = 'p';
791 toast_values[i] = toast_save_datum(rel, toast_values[i],
792 toast_oldexternal[i], options);
794 pfree(DatumGetPointer(old_value));
795 toast_free[i] = true;
802 * Round 3 - this time we take attributes with storage 'm' into
805 while (heap_compute_data_size(tupleDesc,
806 toast_values, toast_isnull) > maxDataLen)
808 int biggest_attno = -1;
809 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
814 * Search for the biggest yet uncompressed internal attribute
816 for (i = 0; i < numAttrs; i++)
818 if (toast_action[i] != ' ')
820 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
821 continue; /* can't happen, toast_action would be 'p' */
822 if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
824 if (att[i]->attstorage != 'm')
826 if (toast_sizes[i] > biggest_size)
829 biggest_size = toast_sizes[i];
833 if (biggest_attno < 0)
837 * Attempt to compress it inline
840 old_value = toast_values[i];
841 new_value = toast_compress_datum(old_value);
843 if (DatumGetPointer(new_value) != NULL)
845 /* successful compression */
847 pfree(DatumGetPointer(old_value));
848 toast_values[i] = new_value;
849 toast_free[i] = true;
850 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
856 /* incompressible, ignore on subsequent compression passes */
857 toast_action[i] = 'x';
862 * Finally we store attributes of type 'm' externally. At this point we
863 * increase the target tuple size, so that 'm' attributes aren't stored
864 * externally unless really necessary.
866 maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff;
868 while (heap_compute_data_size(tupleDesc,
869 toast_values, toast_isnull) > maxDataLen &&
870 rel->rd_rel->reltoastrelid != InvalidOid)
872 int biggest_attno = -1;
873 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
877 * Search for the biggest yet inlined attribute with
881 for (i = 0; i < numAttrs; i++)
883 if (toast_action[i] == 'p')
885 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
886 continue; /* can't happen, toast_action would be 'p' */
887 if (att[i]->attstorage != 'm')
889 if (toast_sizes[i] > biggest_size)
892 biggest_size = toast_sizes[i];
896 if (biggest_attno < 0)
900 * Store this external
903 old_value = toast_values[i];
904 toast_action[i] = 'p';
905 toast_values[i] = toast_save_datum(rel, toast_values[i],
906 toast_oldexternal[i], options);
908 pfree(DatumGetPointer(old_value));
909 toast_free[i] = true;
916 * In the case we toasted any values, we need to build a new heap tuple
917 * with the changed values.
921 HeapTupleHeader olddata = newtup->t_data;
922 HeapTupleHeader new_data;
923 int32 new_header_len;
928 * Calculate the new size of the tuple.
930 * Note: we used to assume here that the old tuple's t_hoff must equal
931 * the new_header_len value, but that was incorrect. The old tuple
932 * might have a smaller-than-current natts, if there's been an ALTER
933 * TABLE ADD COLUMN since it was stored; and that would lead to a
934 * different conclusion about the size of the null bitmap, or even
935 * whether there needs to be one at all.
937 new_header_len = offsetof(HeapTupleHeaderData, t_bits);
939 new_header_len += BITMAPLEN(numAttrs);
940 if (olddata->t_infomask & HEAP_HASOID)
941 new_header_len += sizeof(Oid);
942 new_header_len = MAXALIGN(new_header_len);
943 new_data_len = heap_compute_data_size(tupleDesc,
944 toast_values, toast_isnull);
945 new_tuple_len = new_header_len + new_data_len;
948 * Allocate and zero the space needed, and fill HeapTupleData fields.
950 result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_tuple_len);
951 result_tuple->t_len = new_tuple_len;
952 result_tuple->t_self = newtup->t_self;
953 result_tuple->t_tableOid = newtup->t_tableOid;
954 new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
955 result_tuple->t_data = new_data;
958 * Copy the existing tuple header, but adjust natts and t_hoff.
960 memcpy(new_data, olddata, offsetof(HeapTupleHeaderData, t_bits));
961 HeapTupleHeaderSetNatts(new_data, numAttrs);
962 new_data->t_hoff = new_header_len;
963 if (olddata->t_infomask & HEAP_HASOID)
964 HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(olddata));
966 /* Copy over the data, and fill the null bitmap if needed */
967 heap_fill_tuple(tupleDesc,
970 (char *) new_data + new_header_len,
972 &(new_data->t_infomask),
973 has_nulls ? new_data->t_bits : NULL);
976 result_tuple = newtup;
979 * Free allocated temp values
982 for (i = 0; i < numAttrs; i++)
984 pfree(DatumGetPointer(toast_values[i]));
987 * Delete external values from the old tuple
990 for (i = 0; i < numAttrs; i++)
992 toast_delete_datum(rel, toast_oldvalues[i]);
999 * toast_flatten_tuple -
1001 * "Flatten" a tuple to contain no out-of-line toasted fields.
1002 * (This does not eliminate compressed or short-header datums.)
1004 * Note: we expect the caller already checked HeapTupleHasExternal(tup),
1005 * so there is no need for a short-circuit path.
1009 toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
1011 HeapTuple new_tuple;
1012 Form_pg_attribute *att = tupleDesc->attrs;
1013 int numAttrs = tupleDesc->natts;
1015 Datum toast_values[MaxTupleAttributeNumber];
1016 bool toast_isnull[MaxTupleAttributeNumber];
1017 bool toast_free[MaxTupleAttributeNumber];
1020 * Break down the tuple into fields.
1022 Assert(numAttrs <= MaxTupleAttributeNumber);
1023 heap_deform_tuple(tup, tupleDesc, toast_values, toast_isnull);
1025 memset(toast_free, 0, numAttrs * sizeof(bool));
1027 for (i = 0; i < numAttrs; i++)
1030 * Look at non-null varlena attributes
1032 if (!toast_isnull[i] && att[i]->attlen == -1)
1034 struct varlena *new_value;
1036 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
1037 if (VARATT_IS_EXTERNAL(new_value))
1039 new_value = toast_fetch_datum(new_value);
1040 toast_values[i] = PointerGetDatum(new_value);
1041 toast_free[i] = true;
1047 * Form the reconfigured tuple.
1049 new_tuple = heap_form_tuple(tupleDesc, toast_values, toast_isnull);
1052 * Be sure to copy the tuple's OID and identity fields. We also make a
1053 * point of copying visibility info, just in case anybody looks at those
1054 * fields in a syscache entry.
1056 if (tupleDesc->tdhasoid)
1057 HeapTupleSetOid(new_tuple, HeapTupleGetOid(tup));
1059 new_tuple->t_self = tup->t_self;
1060 new_tuple->t_tableOid = tup->t_tableOid;
1062 new_tuple->t_data->t_choice = tup->t_data->t_choice;
1063 new_tuple->t_data->t_ctid = tup->t_data->t_ctid;
1064 new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
1065 new_tuple->t_data->t_infomask |=
1066 tup->t_data->t_infomask & HEAP_XACT_MASK;
1067 new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
1068 new_tuple->t_data->t_infomask2 |=
1069 tup->t_data->t_infomask2 & HEAP2_XACT_MASK;
1072 * Free allocated temp values
1074 for (i = 0; i < numAttrs; i++)
1076 pfree(DatumGetPointer(toast_values[i]));
1083 * toast_flatten_tuple_to_datum -
1085 * "Flatten" a tuple containing out-of-line toasted fields into a Datum.
1086 * The result is always palloc'd in the current memory context.
1088 * We have a general rule that Datums of container types (rows, arrays,
1089 * ranges, etc) must not contain any external TOAST pointers. Without
1090 * this rule, we'd have to look inside each Datum when preparing a tuple
1091 * for storage, which would be expensive and would fail to extend cleanly
1092 * to new sorts of container types.
1094 * However, we don't want to say that tuples represented as HeapTuples
1095 * can't contain toasted fields, so instead this routine should be called
1096 * when such a HeapTuple is being converted into a Datum.
1098 * While we're at it, we decompress any compressed fields too. This is not
1099 * necessary for correctness, but reflects an expectation that compression
1100 * will be more effective if applied to the whole tuple not individual
1101 * fields. We are not so concerned about that that we want to deconstruct
1102 * and reconstruct tuples just to get rid of compressed fields, however.
1103 * So callers typically won't call this unless they see that the tuple has
1104 * at least one external field.
1106 * On the other hand, in-line short-header varlena fields are left alone.
1107 * If we "untoasted" them here, they'd just get changed back to short-header
1108 * format anyway within heap_fill_tuple.
1112 toast_flatten_tuple_to_datum(HeapTupleHeader tup,
1114 TupleDesc tupleDesc)
1116 HeapTupleHeader new_data;
1117 int32 new_header_len;
1119 int32 new_tuple_len;
1120 HeapTupleData tmptup;
1121 Form_pg_attribute *att = tupleDesc->attrs;
1122 int numAttrs = tupleDesc->natts;
1124 bool has_nulls = false;
1125 Datum toast_values[MaxTupleAttributeNumber];
1126 bool toast_isnull[MaxTupleAttributeNumber];
1127 bool toast_free[MaxTupleAttributeNumber];
1129 /* Build a temporary HeapTuple control structure */
1130 tmptup.t_len = tup_len;
1131 ItemPointerSetInvalid(&(tmptup.t_self));
1132 tmptup.t_tableOid = InvalidOid;
1133 tmptup.t_data = tup;
1136 * Break down the tuple into fields.
1138 Assert(numAttrs <= MaxTupleAttributeNumber);
1139 heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull);
1141 memset(toast_free, 0, numAttrs * sizeof(bool));
1143 for (i = 0; i < numAttrs; i++)
1146 * Look at non-null varlena attributes
1148 if (toast_isnull[i])
1150 else if (att[i]->attlen == -1)
1152 struct varlena *new_value;
1154 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
1155 if (VARATT_IS_EXTERNAL(new_value) ||
1156 VARATT_IS_COMPRESSED(new_value))
1158 new_value = heap_tuple_untoast_attr(new_value);
1159 toast_values[i] = PointerGetDatum(new_value);
1160 toast_free[i] = true;
1166 * Calculate the new size of the tuple.
1168 * This should match the reconstruction code in toast_insert_or_update.
1170 new_header_len = offsetof(HeapTupleHeaderData, t_bits);
1172 new_header_len += BITMAPLEN(numAttrs);
1173 if (tup->t_infomask & HEAP_HASOID)
1174 new_header_len += sizeof(Oid);
1175 new_header_len = MAXALIGN(new_header_len);
1176 new_data_len = heap_compute_data_size(tupleDesc,
1177 toast_values, toast_isnull);
1178 new_tuple_len = new_header_len + new_data_len;
1180 new_data = (HeapTupleHeader) palloc0(new_tuple_len);
1183 * Copy the existing tuple header, but adjust natts and t_hoff.
1185 memcpy(new_data, tup, offsetof(HeapTupleHeaderData, t_bits));
1186 HeapTupleHeaderSetNatts(new_data, numAttrs);
1187 new_data->t_hoff = new_header_len;
1188 if (tup->t_infomask & HEAP_HASOID)
1189 HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(tup));
1191 /* Set the composite-Datum header fields correctly */
1192 HeapTupleHeaderSetDatumLength(new_data, new_tuple_len);
1193 HeapTupleHeaderSetTypeId(new_data, tupleDesc->tdtypeid);
1194 HeapTupleHeaderSetTypMod(new_data, tupleDesc->tdtypmod);
1196 /* Copy over the data, and fill the null bitmap if needed */
1197 heap_fill_tuple(tupleDesc,
1200 (char *) new_data + new_header_len,
1202 &(new_data->t_infomask),
1203 has_nulls ? new_data->t_bits : NULL);
1206 * Free allocated temp values
1208 for (i = 0; i < numAttrs; i++)
1210 pfree(DatumGetPointer(toast_values[i]));
1212 return PointerGetDatum(new_data);
1217 * toast_compress_datum -
1219 * Create a compressed version of a varlena datum
1221 * If we fail (ie, compressed result is actually bigger than original)
1222 * then return NULL. We must not use compressed data if it'd expand
1225 * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
1226 * copying them. But we can't handle external or compressed datums.
1230 toast_compress_datum(Datum value)
1232 struct varlena *tmp;
1233 int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
1235 Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
1236 Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
1239 * No point in wasting a palloc cycle if value size is out of the allowed
1240 * range for compression
1242 if (valsize < PGLZ_strategy_default->min_input_size ||
1243 valsize > PGLZ_strategy_default->max_input_size)
1244 return PointerGetDatum(NULL);
1246 tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));
1249 * We recheck the actual size even if pglz_compress() reports success,
1250 * because it might be satisfied with having saved as little as one byte
1251 * in the compressed data --- which could turn into a net loss once you
1252 * consider header and alignment padding. Worst case, the compressed
1253 * format might require three padding bytes (plus header, which is
1254 * included in VARSIZE(tmp)), whereas the uncompressed format would take
1255 * only one header byte and no padding if the value is short enough. So
1256 * we insist on a savings of more than 2 bytes to ensure we have a gain.
1258 if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize,
1259 (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
1260 VARSIZE(tmp) < valsize - 2)
1262 /* successful compression */
1263 return PointerGetDatum(tmp);
1267 /* incompressible data */
1269 return PointerGetDatum(NULL);
1275 * toast_get_valid_index
1277 * Get OID of valid index associated to given toast relation. A toast
1278 * relation can have only one valid index at the same time.
1281 toast_get_valid_index(Oid toastoid, LOCKMODE lock)
1286 Relation *toastidxs;
1289 /* Open the toast relation */
1290 toastrel = heap_open(toastoid, lock);
1292 /* Look for the valid index of the toast relation */
1293 validIndex = toast_open_indexes(toastrel,
1297 validIndexOid = RelationGetRelid(toastidxs[validIndex]);
1299 /* Close the toast relation and all its indexes */
1300 toast_close_indexes(toastidxs, num_indexes, lock);
1301 heap_close(toastrel, lock);
1303 return validIndexOid;
1308 * toast_save_datum -
1310 * Save one single datum into the secondary relation and return
1311 * a Datum reference for it.
1313 * rel: the main relation we're working with (not the toast rel!)
1314 * value: datum to be pushed to toast storage
1315 * oldexternal: if not NULL, toast pointer previously representing the datum
1316 * options: options to be passed to heap_insert() for toast rows
1320 toast_save_datum(Relation rel, Datum value,
1321 struct varlena * oldexternal, int options)
1324 Relation *toastidxs;
1326 TupleDesc toasttupDesc;
1329 CommandId mycid = GetCurrentCommandId(true);
1330 struct varlena *result;
1331 struct varatt_external toast_pointer;
1335 char data[TOAST_MAX_CHUNK_SIZE]; /* make struct big enough */
1336 int32 align_it; /* ensure struct is aligned well enough */
1339 int32 chunk_seq = 0;
1342 Pointer dval = DatumGetPointer(value);
1346 Assert(!VARATT_IS_EXTERNAL(value));
1349 * Open the toast relation and its indexes. We can use the index to check
1350 * uniqueness of the OID we assign to the toasted item, even though it has
1351 * additional columns besides OID.
1353 toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
1354 toasttupDesc = toastrel->rd_att;
1356 /* Open all the toast indexes and look for the valid one */
1357 validIndex = toast_open_indexes(toastrel,
1363 * Get the data pointer and length, and compute va_rawsize and va_extsize.
1365 * va_rawsize is the size of the equivalent fully uncompressed datum, so
1366 * we have to adjust for short headers.
1368 * va_extsize is the actual size of the data payload in the toast records.
1370 if (VARATT_IS_SHORT(dval))
1372 data_p = VARDATA_SHORT(dval);
1373 data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
1374 toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
1375 toast_pointer.va_extsize = data_todo;
1377 else if (VARATT_IS_COMPRESSED(dval))
1379 data_p = VARDATA(dval);
1380 data_todo = VARSIZE(dval) - VARHDRSZ;
1381 /* rawsize in a compressed datum is just the size of the payload */
1382 toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ;
1383 toast_pointer.va_extsize = data_todo;
1384 /* Assert that the numbers look like it's compressed */
1385 Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1389 data_p = VARDATA(dval);
1390 data_todo = VARSIZE(dval) - VARHDRSZ;
1391 toast_pointer.va_rawsize = VARSIZE(dval);
1392 toast_pointer.va_extsize = data_todo;
1396 * Insert the correct table OID into the result TOAST pointer.
1398 * Normally this is the actual OID of the target toast table, but during
1399 * table-rewriting operations such as CLUSTER, we have to insert the OID
1400 * of the table's real permanent toast table instead. rd_toastoid is set
1401 * if we have to substitute such an OID.
1403 if (OidIsValid(rel->rd_toastoid))
1404 toast_pointer.va_toastrelid = rel->rd_toastoid;
1406 toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
1409 * Choose an OID to use as the value ID for this toast value.
1411 * Normally we just choose an unused OID within the toast table. But
1412 * during table-rewriting operations where we are preserving an existing
1413 * toast table OID, we want to preserve toast value OIDs too. So, if
1414 * rd_toastoid is set and we had a prior external value from that same
1415 * toast table, re-use its value ID. If we didn't have a prior external
1416 * value (which is a corner case, but possible if the table's attstorage
1417 * options have been changed), we have to pick a value ID that doesn't
1418 * conflict with either new or existing toast value OIDs.
1420 if (!OidIsValid(rel->rd_toastoid))
1422 /* normal case: just choose an unused OID */
1423 toast_pointer.va_valueid =
1424 GetNewOidWithIndex(toastrel,
1425 RelationGetRelid(toastidxs[validIndex]),
1430 /* rewrite case: check to see if value was in old toast table */
1431 toast_pointer.va_valueid = InvalidOid;
1432 if (oldexternal != NULL)
1434 struct varatt_external old_toast_pointer;
1436 Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
1437 /* Must copy to access aligned fields */
1438 VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
1439 if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
1441 /* This value came from the old toast table; reuse its OID */
1442 toast_pointer.va_valueid = old_toast_pointer.va_valueid;
1445 * There is a corner case here: the table rewrite might have
1446 * to copy both live and recently-dead versions of a row, and
1447 * those versions could easily reference the same toast value.
1448 * When we copy the second or later version of such a row,
1449 * reusing the OID will mean we select an OID that's already
1450 * in the new toast table. Check for that, and if so, just
1451 * fall through without writing the data again.
1453 * While annoying and ugly-looking, this is a good thing
1454 * because it ensures that we wind up with only one copy of
1455 * the toast value when there is only one copy in the old
1456 * toast table. Before we detected this case, we'd have made
1457 * multiple copies, wasting space; and what's worse, the
1458 * copies belonging to already-deleted heap tuples would not
1459 * be reclaimed by VACUUM.
1461 if (toastrel_valueid_exists(toastrel,
1462 toast_pointer.va_valueid))
1464 /* Match, so short-circuit the data storage loop below */
1469 if (toast_pointer.va_valueid == InvalidOid)
1472 * new value; must choose an OID that doesn't conflict in either
1473 * old or new toast table
1477 toast_pointer.va_valueid =
1478 GetNewOidWithIndex(toastrel,
1479 RelationGetRelid(toastidxs[validIndex]),
1481 } while (toastid_valueid_exists(rel->rd_toastoid,
1482 toast_pointer.va_valueid));
1487 * Initialize constant parts of the tuple data
1489 t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
1490 t_values[2] = PointerGetDatum(&chunk_data);
1491 t_isnull[0] = false;
1492 t_isnull[1] = false;
1493 t_isnull[2] = false;
1496 * Split up the item into chunks
1498 while (data_todo > 0)
1502 CHECK_FOR_INTERRUPTS();
1505 * Calculate the size of this chunk
1507 chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
1510 * Build a tuple and store it
1512 t_values[1] = Int32GetDatum(chunk_seq++);
1513 SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
1514 memcpy(VARDATA(&chunk_data), data_p, chunk_size);
1515 toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
1517 heap_insert(toastrel, toasttup, mycid, options, NULL);
1520 * Create the index entry. We cheat a little here by not using
1521 * FormIndexDatum: this relies on the knowledge that the index columns
1522 * are the same as the initial columns of the table for all the
1525 * Note also that there had better not be any user-created index on
1526 * the TOAST table, since we don't bother to update anything else.
1528 for (i = 0; i < num_indexes; i++)
1530 /* Only index relations marked as ready can be updated */
1531 if (IndexIsReady(toastidxs[i]->rd_index))
1532 index_insert(toastidxs[i], t_values, t_isnull,
1533 &(toasttup->t_self),
1535 toastidxs[i]->rd_index->indisunique ?
1536 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
1542 heap_freetuple(toasttup);
1545 * Move on to next chunk
1547 data_todo -= chunk_size;
1548 data_p += chunk_size;
1552 * Done - close toast relation and its indexes
1554 toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1555 heap_close(toastrel, RowExclusiveLock);
1558 * Create the TOAST pointer value that we'll return
1560 result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
1561 SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
1562 memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
1564 return PointerGetDatum(result);
1569 * toast_delete_datum -
1571 * Delete a single external stored value.
1575 toast_delete_datum(Relation rel, Datum value)
1577 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
1578 struct varatt_external toast_pointer;
1580 Relation *toastidxs;
1581 ScanKeyData toastkey;
1582 SysScanDesc toastscan;
1587 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
1590 /* Must copy to access aligned fields */
1591 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1594 * Open the toast relation and its indexes
1596 toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock);
1598 /* Fetch valid relation used for process */
1599 validIndex = toast_open_indexes(toastrel,
1605 * Setup a scan key to find chunks with matching va_valueid
1607 ScanKeyInit(&toastkey,
1609 BTEqualStrategyNumber, F_OIDEQ,
1610 ObjectIdGetDatum(toast_pointer.va_valueid));
1613 * Find all the chunks. (We don't actually care whether we see them in
1614 * sequence or not, but since we've already locked the index we might as
1615 * well use systable_beginscan_ordered.)
1617 toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
1618 SnapshotToast, 1, &toastkey);
1619 while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1622 * Have a chunk, delete it
1624 simple_heap_delete(toastrel, &toasttup->t_self);
1628 * End scan and close relations
1630 systable_endscan_ordered(toastscan);
1631 toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1632 heap_close(toastrel, RowExclusiveLock);
1637 * toastrel_valueid_exists -
1639 * Test whether a toast value with the given ID exists in the toast relation
1643 toastrel_valueid_exists(Relation toastrel, Oid valueid)
1645 bool result = false;
1646 ScanKeyData toastkey;
1647 SysScanDesc toastscan;
1650 Relation *toastidxs;
1652 /* Fetch a valid index relation */
1653 validIndex = toast_open_indexes(toastrel,
1659 * Setup a scan key to find chunks with matching va_valueid
1661 ScanKeyInit(&toastkey,
1663 BTEqualStrategyNumber, F_OIDEQ,
1664 ObjectIdGetDatum(valueid));
1667 * Is there any such chunk?
1669 toastscan = systable_beginscan(toastrel,
1670 RelationGetRelid(toastidxs[validIndex]),
1671 true, SnapshotToast, 1, &toastkey);
1673 if (systable_getnext(toastscan) != NULL)
1676 systable_endscan(toastscan);
1679 toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1685 * toastid_valueid_exists -
1687 * As above, but work from toast rel's OID not an open relation
1691 toastid_valueid_exists(Oid toastrelid, Oid valueid)
1696 toastrel = heap_open(toastrelid, AccessShareLock);
1698 result = toastrel_valueid_exists(toastrel, valueid);
1700 heap_close(toastrel, AccessShareLock);
1707 * toast_fetch_datum -
1709 * Reconstruct an in memory Datum from the chunks saved
1710 * in the toast relation
1713 static struct varlena *
1714 toast_fetch_datum(struct varlena * attr)
1717 Relation *toastidxs;
1718 ScanKeyData toastkey;
1719 SysScanDesc toastscan;
1721 TupleDesc toasttupDesc;
1722 struct varlena *result;
1723 struct varatt_external toast_pointer;
1735 if (VARATT_IS_EXTERNAL_INDIRECT(attr))
1736 elog(ERROR, "shouldn't be called for indirect tuples");
1738 /* Must copy to access aligned fields */
1739 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1741 ressize = toast_pointer.va_extsize;
1742 numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1744 result = (struct varlena *) palloc(ressize + VARHDRSZ);
1746 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1747 SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ);
1749 SET_VARSIZE(result, ressize + VARHDRSZ);
1752 * Open the toast relation and its indexes
1754 toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
1755 toasttupDesc = toastrel->rd_att;
1757 /* Look for the valid index of the toast relation */
1758 validIndex = toast_open_indexes(toastrel,
1764 * Setup a scan key to fetch from the index by va_valueid
1766 ScanKeyInit(&toastkey,
1768 BTEqualStrategyNumber, F_OIDEQ,
1769 ObjectIdGetDatum(toast_pointer.va_valueid));
1772 * Read the chunks by index
1774 * Note that because the index is actually on (valueid, chunkidx) we will
1775 * see the chunks in chunkidx order, even though we didn't explicitly ask
1780 toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
1781 SnapshotToast, 1, &toastkey);
1782 while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1785 * Have a chunk, extract the sequence number and the data
1787 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
1789 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
1791 if (!VARATT_IS_EXTENDED(chunk))
1793 chunksize = VARSIZE(chunk) - VARHDRSZ;
1794 chunkdata = VARDATA(chunk);
1796 else if (VARATT_IS_SHORT(chunk))
1798 /* could happen due to heap_form_tuple doing its thing */
1799 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
1800 chunkdata = VARDATA_SHORT(chunk);
1804 /* should never happen */
1805 elog(ERROR, "found toasted toast chunk for toast value %u in %s",
1806 toast_pointer.va_valueid,
1807 RelationGetRelationName(toastrel));
1808 chunksize = 0; /* keep compiler quiet */
1813 * Some checks on the data we've found
1815 if (residx != nextidx)
1816 elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
1818 toast_pointer.va_valueid,
1819 RelationGetRelationName(toastrel));
1820 if (residx < numchunks - 1)
1822 if (chunksize != TOAST_MAX_CHUNK_SIZE)
1823 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
1824 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
1826 toast_pointer.va_valueid,
1827 RelationGetRelationName(toastrel));
1829 else if (residx == numchunks - 1)
1831 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
1832 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
1834 (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
1836 toast_pointer.va_valueid,
1837 RelationGetRelationName(toastrel));
1840 elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
1843 toast_pointer.va_valueid,
1844 RelationGetRelationName(toastrel));
1847 * Copy the data into proper place in our result
1849 memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE,
1857 * Final checks that we successfully fetched the datum
1859 if (nextidx != numchunks)
1860 elog(ERROR, "missing chunk number %d for toast value %u in %s",
1862 toast_pointer.va_valueid,
1863 RelationGetRelationName(toastrel));
1866 * End scan and close relations
1868 systable_endscan_ordered(toastscan);
1869 toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
1870 heap_close(toastrel, AccessShareLock);
1876 * toast_fetch_datum_slice -
1878 * Reconstruct a segment of a Datum from the chunks saved
1879 * in the toast relation
1882 static struct varlena *
1883 toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
1886 Relation *toastidxs;
1887 ScanKeyData toastkey[3];
1889 SysScanDesc toastscan;
1891 TupleDesc toasttupDesc;
1892 struct varlena *result;
1893 struct varatt_external toast_pointer;
1912 Assert(VARATT_IS_EXTERNAL_ONDISK(attr));
1914 /* Must copy to access aligned fields */
1915 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1918 * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
1919 * we can't return a compressed datum which is meaningful to toast later
1921 Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1923 attrsize = toast_pointer.va_extsize;
1924 totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1926 if (sliceoffset >= attrsize)
1932 if (((sliceoffset + length) > attrsize) || length < 0)
1933 length = attrsize - sliceoffset;
1935 result = (struct varlena *) palloc(length + VARHDRSZ);
1937 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1938 SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
1940 SET_VARSIZE(result, length + VARHDRSZ);
1943 return result; /* Can save a lot of work at this point! */
1945 startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
1946 endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
1947 numchunks = (endchunk - startchunk) + 1;
1949 startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE;
1950 endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
1953 * Open the toast relation and its indexes
1955 toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
1956 toasttupDesc = toastrel->rd_att;
1958 /* Look for the valid index of toast relation */
1959 validIndex = toast_open_indexes(toastrel,
1965 * Setup a scan key to fetch from the index. This is either two keys or
1966 * three depending on the number of chunks.
1968 ScanKeyInit(&toastkey[0],
1970 BTEqualStrategyNumber, F_OIDEQ,
1971 ObjectIdGetDatum(toast_pointer.va_valueid));
1974 * Use equality condition for one chunk, a range condition otherwise:
1978 ScanKeyInit(&toastkey[1],
1980 BTEqualStrategyNumber, F_INT4EQ,
1981 Int32GetDatum(startchunk));
1986 ScanKeyInit(&toastkey[1],
1988 BTGreaterEqualStrategyNumber, F_INT4GE,
1989 Int32GetDatum(startchunk));
1990 ScanKeyInit(&toastkey[2],
1992 BTLessEqualStrategyNumber, F_INT4LE,
1993 Int32GetDatum(endchunk));
1998 * Read the chunks by index
2000 * The index is on (valueid, chunkidx) so they will come in order
2002 nextidx = startchunk;
2003 toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
2004 SnapshotToast, nscankeys, toastkey);
2005 while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
2008 * Have a chunk, extract the sequence number and the data
2010 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
2012 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
2014 if (!VARATT_IS_EXTENDED(chunk))
2016 chunksize = VARSIZE(chunk) - VARHDRSZ;
2017 chunkdata = VARDATA(chunk);
2019 else if (VARATT_IS_SHORT(chunk))
2021 /* could happen due to heap_form_tuple doing its thing */
2022 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
2023 chunkdata = VARDATA_SHORT(chunk);
2027 /* should never happen */
2028 elog(ERROR, "found toasted toast chunk for toast value %u in %s",
2029 toast_pointer.va_valueid,
2030 RelationGetRelationName(toastrel));
2031 chunksize = 0; /* keep compiler quiet */
2036 * Some checks on the data we've found
2038 if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
2039 elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
2041 toast_pointer.va_valueid,
2042 RelationGetRelationName(toastrel));
2043 if (residx < totalchunks - 1)
2045 if (chunksize != TOAST_MAX_CHUNK_SIZE)
2046 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice",
2047 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
2048 residx, totalchunks,
2049 toast_pointer.va_valueid,
2050 RelationGetRelationName(toastrel));
2052 else if (residx == totalchunks - 1)
2054 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
2055 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice",
2057 (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE),
2059 toast_pointer.va_valueid,
2060 RelationGetRelationName(toastrel));
2063 elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
2066 toast_pointer.va_valueid,
2067 RelationGetRelationName(toastrel));
2070 * Copy the data into proper place in our result
2073 chcpyend = chunksize - 1;
2074 if (residx == startchunk)
2075 chcpystrt = startoffset;
2076 if (residx == endchunk)
2077 chcpyend = endoffset;
2079 memcpy(VARDATA(result) +
2080 (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
2081 chunkdata + chcpystrt,
2082 (chcpyend - chcpystrt) + 1);
2088 * Final checks that we successfully fetched the datum
2090 if (nextidx != (endchunk + 1))
2091 elog(ERROR, "missing chunk number %d for toast value %u in %s",
2093 toast_pointer.va_valueid,
2094 RelationGetRelationName(toastrel));
2097 * End scan and close relations
2099 systable_endscan_ordered(toastscan);
2100 toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
2101 heap_close(toastrel, AccessShareLock);
2107 * toast_open_indexes
2109 * Get an array of the indexes associated to the given toast relation
2110 * and return as well the position of the valid index used by the toast
2111 * relation in this array. It is the responsibility of the caller of this
2112 * function to close the indexes as well as free them.
2115 toast_open_indexes(Relation toastrel,
2117 Relation **toastidxs,
2126 /* Get index list of the toast relation */
2127 indexlist = RelationGetIndexList(toastrel);
2128 Assert(indexlist != NIL);
2130 *num_indexes = list_length(indexlist);
2132 /* Open all the index relations */
2133 *toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation));
2134 foreach(lc, indexlist)
2135 (*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);
2137 /* Fetch the first valid index in list */
2138 for (i = 0; i < *num_indexes; i++)
2140 Relation toastidx = (*toastidxs)[i];
2142 if (toastidx->rd_index->indisvalid)
2151 * Free index list, not necessary anymore as relations are opened and a
2152 * valid index has been found.
2154 list_free(indexlist);
2157 * The toast relation should have one valid index, so something is going
2158 * wrong if there is nothing.
2161 elog(ERROR, "no valid index found for toast relation with Oid %u",
2162 RelationGetRelid(toastrel));
2168 * toast_close_indexes
2170 * Close an array of indexes for a toast relation and free it. This should
2171 * be called for a set of indexes opened previously with toast_open_indexes.
2174 toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
2178 /* Close relations and clean up things */
2179 for (i = 0; i < num_indexes; i++)
2180 index_close(toastidxs[i], lock);