1 /*-------------------------------------------------------------------------
4 * Support routines for external and compressed storage of
5 * variable size attributes.
7 * Copyright (c) 2000, PostgreSQL Global Development Group
11 * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.13 2000/10/23 23:42:04 tgl Exp $
15 * heap_tuple_toast_attrs -
16 * Try to make a given tuple fit into one page by compressing
17 * or moving off attributes
19 * heap_tuple_untoast_attr -
20 * Fetch back a given value from the "secondary" relation
22 *-------------------------------------------------------------------------
30 #include "access/heapam.h"
31 #include "access/genam.h"
32 #include "access/tuptoaster.h"
33 #include "catalog/catalog.h"
34 #include "utils/rel.h"
35 #include "utils/builtins.h"
36 #include "utils/fmgroids.h"
37 #include "utils/pg_lzcompress.h"
40 #ifdef TUPLE_TOASTER_ACTIVE
44 static void toast_delete(Relation rel, HeapTuple oldtup);
45 static void toast_delete_datum(Relation rel, Datum value);
46 static void toast_insert_or_update(Relation rel, HeapTuple newtup,
48 static Datum toast_compress_datum(Datum value);
49 static Datum toast_save_datum(Relation rel, Oid mainoid, int16 attno, Datum value);
50 static varattrib *toast_fetch_datum(varattrib *attr);
54 * heap_tuple_toast_attrs -
56 * This is the central public entry point for toasting from heapam.
58 * Calls the appropriate event specific action.
62 heap_tuple_toast_attrs(Relation rel, HeapTuple newtup, HeapTuple oldtup)
65 toast_delete(rel, oldtup);
67 toast_insert_or_update(rel, newtup, oldtup);
72 * heap_tuple_fetch_attr -
74 * Public entry point to get back a toasted value
75 * external storage (possibly still in compressed format).
79 heap_tuple_fetch_attr(varattrib *attr)
83 if (VARATT_IS_EXTERNAL(attr))
86 * This is an external stored plain value
89 result = toast_fetch_datum(attr);
94 * This is a plain value inside of the main tuple - why am I called?
105 * heap_tuple_untoast_attr -
107 * Public entry point to get back a toasted value from compression
108 * or external storage.
112 heap_tuple_untoast_attr(varattrib *attr)
116 if (VARATT_IS_EXTERNAL(attr))
118 if (VARATT_IS_COMPRESSED(attr))
121 * This is an external stored compressed value
122 * Fetch it from the toast heap and decompress.
127 tmp = toast_fetch_datum(attr);
128 result = (varattrib *)palloc(attr->va_content.va_external.va_rawsize
130 VARATT_SIZEP(result) = attr->va_content.va_external.va_rawsize
132 pglz_decompress((PGLZ_Header *)tmp, VARATT_DATA(result));
139 * This is an external stored plain value
142 result = toast_fetch_datum(attr);
145 else if (VARATT_IS_COMPRESSED(attr))
148 * This is a compressed value inside of the main tuple
151 result = (varattrib *)palloc(attr->va_content.va_compressed.va_rawsize
153 VARATT_SIZEP(result) = attr->va_content.va_compressed.va_rawsize
155 pglz_decompress((PGLZ_Header *)attr, VARATT_DATA(result));
159 * This is a plain value inside of the main tuple - why am I called?
171 * Cascaded delete toast-entries on DELETE
175 toast_delete(Relation rel, HeapTuple oldtup)
178 Form_pg_attribute *att;
185 * Get the tuple descriptor, the number of and attribute
189 tupleDesc = rel->rd_att;
190 numAttrs = tupleDesc->natts;
191 att = tupleDesc->attrs;
194 * Check for external stored attributes and delete them
195 * from the secondary relation.
198 for (i = 0; i < numAttrs; i++)
200 value = heap_getattr(oldtup, i + 1, tupleDesc, &isnull);
201 if (!isnull && att[i]->attlen == -1)
202 if (VARATT_IS_EXTERNAL(value))
203 toast_delete_datum(rel, value);
209 * toast_insert_or_update -
211 * Delete no-longer-used toast-entries and create new ones to
212 * make the new tuple fit on INSERT or UPDATE
216 toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
219 Form_pg_attribute *att;
225 bool need_change = false;
226 bool need_free = false;
227 bool need_delold = false;
228 bool has_nulls = false;
232 char toast_action[MaxHeapAttributeNumber];
233 char toast_nulls[MaxHeapAttributeNumber];
234 Datum toast_values[MaxHeapAttributeNumber];
235 int32 toast_sizes[MaxHeapAttributeNumber];
236 bool toast_free[MaxHeapAttributeNumber];
237 bool toast_delold[MaxHeapAttributeNumber];
240 * Get the tuple descriptor, the number of and attribute
241 * descriptors and the location of the tuple values.
244 tupleDesc = rel->rd_att;
245 numAttrs = tupleDesc->natts;
246 att = tupleDesc->attrs;
249 * Then collect information about the values given
251 * NOTE: toast_action[i] can have these values:
252 * ' ' default handling
253 * 'p' already processed --- don't touch it
254 * 'x' incompressible, but OK to move off
257 memset(toast_action, ' ', numAttrs * sizeof(char));
258 memset(toast_nulls, ' ', numAttrs * sizeof(char));
259 memset(toast_free, 0, numAttrs * sizeof(bool));
260 memset(toast_delold, 0, numAttrs * sizeof(bool));
261 for (i = 0; i < numAttrs; i++)
263 varattrib *old_value;
264 varattrib *new_value;
269 * For UPDATE get the old and new values of this attribute
272 old_value = (varattrib *)DatumGetPointer(
273 heap_getattr(oldtup, i + 1, tupleDesc, &old_isnull));
275 heap_getattr(newtup, i + 1, tupleDesc, &new_isnull);
276 new_value = (varattrib *)DatumGetPointer(toast_values[i]);
279 * If the old value is an external stored one, check if it
280 * has changed so we have to delete it later.
283 if (!old_isnull && att[i]->attlen == -1 &&
284 VARATT_IS_EXTERNAL(old_value))
286 if (new_isnull || !VARATT_IS_EXTERNAL(new_value) ||
287 old_value->va_content.va_external.va_rowid !=
288 new_value->va_content.va_external.va_rowid ||
289 old_value->va_content.va_external.va_attno !=
290 new_value->va_content.va_external.va_attno)
293 * The old external store value isn't needed any
294 * more after the update
297 toast_delold[i] = true;
303 * This attribute isn't changed by this update
304 * so we reuse the original reference to the old
305 * value in the new tuple.
308 toast_action[i] = 'p';
309 toast_sizes[i] = VARATT_SIZE(toast_values[i]);
317 * For INSERT simply get the new value
321 heap_getattr(newtup, i + 1, tupleDesc, &new_isnull);
325 * Handle NULL attributes
330 toast_action[i] = 'p';
331 toast_nulls[i] = 'n';
337 * Now look at varsize attributes
340 if (att[i]->attlen == -1)
343 * If the table's attribute says PLAIN always, force it so.
346 if (att[i]->attstorage == 'p')
347 toast_action[i] = 'p';
350 * We took care of UPDATE above, so any TOASTed value we find
351 * still in the tuple must be someone else's we cannot reuse.
352 * Expand it to plain (and, probably, toast it again below).
355 if (VARATT_IS_EXTENDED(DatumGetPointer(toast_values[i])))
357 toast_values[i] = PointerGetDatum(heap_tuple_untoast_attr(
358 (varattrib *)DatumGetPointer(toast_values[i])));
359 toast_free[i] = true;
365 * Remember the size of this attribute
368 toast_sizes[i] = VARATT_SIZE(DatumGetPointer(toast_values[i]));
373 * Not a variable size attribute, plain storage always
376 toast_action[i] = 'p';
377 toast_sizes[i] = att[i]->attlen;
382 * Compress and/or save external until data fits into target length
384 * 1: Inline compress attributes with attstorage 'x'
385 * 2: Store attributes with attstorage 'x' or 'e' external
386 * 3: Inline compress attributes with attstorage 'm'
387 * 4: Store attributes with attstorage 'm' external
390 maxDataLen = offsetof(HeapTupleHeaderData, t_bits);
392 maxDataLen += BITMAPLEN(numAttrs);
393 maxDataLen = TOAST_TUPLE_TARGET - MAXALIGN(maxDataLen);
396 * Look for attributes with attstorage 'x' to compress
399 while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
402 int biggest_attno = -1;
403 int32 biggest_size = MAXALIGN(sizeof(varattrib));
408 * Search for the biggest yet uncompressed internal attribute
411 for (i = 0; i < numAttrs; i++)
413 if (toast_action[i] != ' ')
415 if (VARATT_IS_EXTENDED(toast_values[i]))
417 if (att[i]->attstorage != 'x')
419 if (toast_sizes[i] > biggest_size)
422 biggest_size = toast_sizes[i];
426 if (biggest_attno < 0)
430 * Attempt to compress it inline
434 old_value = toast_values[i];
435 new_value = toast_compress_datum(old_value);
437 if (DatumGetPointer(new_value) != NULL)
439 /* successful compression */
441 pfree(DatumGetPointer(old_value));
442 toast_values[i] = new_value;
443 toast_free[i] = true;
444 toast_sizes[i] = VARATT_SIZE(toast_values[i]);
450 /* incompressible data, ignore on subsequent compression passes */
451 toast_action[i] = 'x';
456 * Second we look for attributes of attstorage 'x' or 'e' that
460 while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
461 maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid)
463 int biggest_attno = -1;
464 int32 biggest_size = MAXALIGN(sizeof(varattrib));
468 * Search for the biggest yet inlined attribute with
469 * attstorage = 'x' or 'e'
472 for (i = 0; i < numAttrs; i++)
474 if (toast_action[i] == 'p')
476 if (VARATT_IS_EXTERNAL(toast_values[i]))
478 if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
480 if (toast_sizes[i] > biggest_size)
483 biggest_size = toast_sizes[i];
487 if (biggest_attno < 0)
491 * Store this external
495 old_value = toast_values[i];
496 toast_action[i] = 'p';
497 toast_values[i] = toast_save_datum(rel,
498 newtup->t_data->t_oid,
502 pfree(DatumGetPointer(old_value));
504 toast_free[i] = true;
505 toast_sizes[i] = VARATT_SIZE(toast_values[i]);
512 * Round 3 - this time we take attributes with storage
513 * 'm' into compression
516 while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
519 int biggest_attno = -1;
520 int32 biggest_size = MAXALIGN(sizeof(varattrib));
525 * Search for the biggest yet uncompressed internal attribute
528 for (i = 0; i < numAttrs; i++)
530 if (toast_action[i] != ' ')
532 if (VARATT_IS_EXTENDED(toast_values[i]))
534 if (att[i]->attstorage != 'm')
536 if (toast_sizes[i] > biggest_size)
539 biggest_size = toast_sizes[i];
543 if (biggest_attno < 0)
547 * Attempt to compress it inline
551 old_value = toast_values[i];
552 new_value = toast_compress_datum(old_value);
554 if (DatumGetPointer(new_value) != NULL)
556 /* successful compression */
558 pfree(DatumGetPointer(old_value));
559 toast_values[i] = new_value;
560 toast_free[i] = true;
561 toast_sizes[i] = VARATT_SIZE(toast_values[i]);
567 /* incompressible data, ignore on subsequent compression passes */
568 toast_action[i] = 'x';
573 * Finally we store attributes of type 'm' external
576 while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
577 maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid)
579 int biggest_attno = -1;
580 int32 biggest_size = MAXALIGN(sizeof(varattrib));
584 * Search for the biggest yet inlined attribute with
588 for (i = 0; i < numAttrs; i++)
590 if (toast_action[i] == 'p')
592 if (VARATT_IS_EXTERNAL(toast_values[i]))
594 if (att[i]->attstorage != 'm')
596 if (toast_sizes[i] > biggest_size)
599 biggest_size = toast_sizes[i];
603 if (biggest_attno < 0)
607 * Store this external
611 old_value = toast_values[i];
612 toast_action[i] = 'p';
613 toast_values[i] = toast_save_datum(rel,
614 newtup->t_data->t_oid,
618 pfree(DatumGetPointer(old_value));
620 toast_free[i] = true;
621 toast_sizes[i] = VARATT_SIZE(toast_values[i]);
628 * In the case we toasted any values, we need to build
629 * a new heap tuple with the changed values.
636 MemoryContext oldcxt;
637 HeapTupleHeader olddata;
640 * Calculate the new size of the tuple
643 new_len = offsetof(HeapTupleHeaderData, t_bits);
645 new_len += BITMAPLEN(numAttrs);
646 new_len = MAXALIGN(new_len);
647 new_len += ComputeDataSize(tupleDesc, toast_values, toast_nulls);
650 * Remember the old memory location of the tuple (for below),
651 * switch to the memory context of the HeapTuple structure
652 * and allocate the new tuple.
655 olddata = newtup->t_data;
656 oldcxt = MemoryContextSwitchTo(newtup->t_datamcxt);
657 new_data = palloc(new_len);
660 * Put the tuple header and the changed values into place
663 memcpy(new_data, newtup->t_data, newtup->t_data->t_hoff);
664 newtup->t_data = (HeapTupleHeader)new_data;
665 newtup->t_len = new_len;
667 DataFill((char *)(MAXALIGN((long)new_data +
668 offsetof(HeapTupleHeaderData, t_bits) +
669 ((has_nulls) ? BITMAPLEN(numAttrs) : 0))),
673 &(newtup->t_data->t_infomask),
674 has_nulls ? newtup->t_data->t_bits : NULL);
677 * In the case we modified a previously modified tuple again,
678 * free the memory from the previous run
681 if ((char *)olddata != ((char *)newtup + HEAPTUPLESIZE))
685 * Switch back to the old memory context
688 MemoryContextSwitchTo(oldcxt);
692 * Free allocated temp values
696 for (i = 0; i < numAttrs; i++)
698 pfree(DatumGetPointer(toast_values[i]));
701 * Delete external values from the old tuple
705 for (i = 0; i < numAttrs; i++)
707 toast_delete_datum(rel,
708 heap_getattr(oldtup, i + 1, tupleDesc, &old_isnull));
713 * toast_compress_datum -
715 * Create a compressed version of a varlena datum
717 * If we fail (ie, compressed result is actually bigger than original)
718 * then return NULL. We must not use compressed data if it'd expand
723 toast_compress_datum(Datum value)
727 tmp = (varattrib *) palloc(sizeof(PGLZ_Header) + VARATT_SIZE(value));
728 pglz_compress(VARATT_DATA(value), VARATT_SIZE(value) - VARHDRSZ,
730 PGLZ_strategy_default);
731 if (VARATT_SIZE(tmp) < VARATT_SIZE(value))
733 /* successful compression */
734 VARATT_SIZEP(tmp) |= VARATT_FLAG_COMPRESSED;
735 return PointerGetDatum(tmp);
739 /* incompressible data */
741 return PointerGetDatum(NULL);
749 * Save one single datum into the secondary relation and return
750 * a varattrib reference for it.
754 toast_save_datum(Relation rel, Oid mainoid, int16 attno, Datum value)
759 InsertIndexResult idxres;
760 TupleDesc toasttupDesc;
764 char chunk_data[VARHDRSZ + TOAST_MAX_CHUNK_SIZE];
771 * Create the varattrib reference
774 result = (varattrib *)palloc(sizeof(varattrib));
776 result->va_header = sizeof(varattrib) | VARATT_FLAG_EXTERNAL;
777 if (VARATT_IS_COMPRESSED(value))
779 result->va_header |= VARATT_FLAG_COMPRESSED;
780 result->va_content.va_external.va_rawsize =
781 ((varattrib *)value)->va_content.va_compressed.va_rawsize;
784 result->va_content.va_external.va_rawsize = VARATT_SIZE(value);
786 result->va_content.va_external.va_extsize =
787 VARATT_SIZE(value) - VARHDRSZ;
788 result->va_content.va_external.va_valueid = newoid();
789 result->va_content.va_external.va_toastrelid =
790 rel->rd_rel->reltoastrelid;
791 result->va_content.va_external.va_toastidxid =
792 rel->rd_rel->reltoastidxid;
793 result->va_content.va_external.va_rowid = mainoid;
794 result->va_content.va_external.va_attno = attno;
797 * Initialize constant parts of the tuple data
800 t_values[0] = ObjectIdGetDatum(result->va_content.va_external.va_valueid);
801 t_values[2] = PointerGetDatum(chunk_data);
807 * Get the data to process
810 data_p = VARATT_DATA(value);
811 data_todo = VARATT_SIZE(value) - VARHDRSZ;
814 * Open the toast relation
817 toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
818 toasttupDesc = toastrel->rd_att;
819 toastidx = index_open(rel->rd_rel->reltoastidxid);
822 * Split up the item into chunks
825 while (data_todo > 0)
828 * Calculate the size of this chunk
831 chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
837 t_values[1] = Int32GetDatum(chunk_seq++);
838 VARATT_SIZEP(chunk_data) = chunk_size + VARHDRSZ;
839 memcpy(VARATT_DATA(chunk_data), data_p, chunk_size);
840 toasttup = heap_formtuple(toasttupDesc, t_values, t_nulls);
841 if (!HeapTupleIsValid(toasttup))
842 elog(ERROR, "Failed to build TOAST tuple");
845 * Store it and create the index entry
848 heap_insert(toastrel, toasttup);
849 idxres = index_insert(toastidx, t_values, t_nulls,
853 elog(ERROR, "Failed to insert index entry for TOAST tuple");
859 heap_freetuple(toasttup);
863 * Move on to next chunk
866 data_todo -= chunk_size;
867 data_p += chunk_size;
871 * Done - close toast relation and return the reference
874 index_close(toastidx);
875 heap_close(toastrel, RowExclusiveLock);
877 return PointerGetDatum(result);
882 * toast_delete_datum -
884 * Delete a single external stored value.
888 toast_delete_datum(Relation rel, Datum value)
890 register varattrib *attr = (varattrib *)value;
893 ScanKeyData toastkey;
894 IndexScanDesc toastscan;
895 HeapTupleData toasttup;
896 RetrieveIndexResult indexRes;
899 if (!VARATT_IS_EXTERNAL(attr))
903 * Open the toast relation and it's index
906 toastrel = heap_open(attr->va_content.va_external.va_toastrelid,
908 toastidx = index_open(attr->va_content.va_external.va_toastidxid);
911 * Setup a scan key to fetch from the index by va_valueid
914 ScanKeyEntryInitialize(&toastkey,
917 (RegProcedure) F_OIDEQ,
918 ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
921 * Read the chunks by index
924 toastscan = index_beginscan(toastidx, false, 1, &toastkey);
925 while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
927 toasttup.t_self = indexRes->heap_iptr;
928 heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer);
931 if (!toasttup.t_data)
935 * Have a chunk, delete it
938 heap_delete(toastrel, &toasttup.t_self, NULL);
940 ReleaseBuffer(buffer);
944 * End scan and close relations
947 index_endscan(toastscan);
948 index_close(toastidx);
949 heap_close(toastrel, RowExclusiveLock);
954 * toast_fetch_datum -
956 * Reconstruct an in memory varattrib from the chunks saved
957 * in the toast relation
961 toast_fetch_datum(varattrib *attr)
965 ScanKeyData toastkey;
966 IndexScanDesc toastscan;
967 HeapTupleData toasttup;
969 TupleDesc toasttupDesc;
970 RetrieveIndexResult indexRes;
982 char *chunks_expected;
984 ressize = attr->va_content.va_external.va_extsize;
985 numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
987 chunks_found = palloc(numchunks);
988 chunks_expected = palloc(numchunks);
989 memset(chunks_found, 0, numchunks);
990 memset(chunks_expected, 1, numchunks);
992 result = (varattrib *)palloc(ressize + VARHDRSZ);
993 VARATT_SIZEP(result) = ressize + VARHDRSZ;
994 if (VARATT_IS_COMPRESSED(attr))
995 VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED;
998 * Open the toast relation and it's index
1001 toastrel = heap_open(attr->va_content.va_external.va_toastrelid,
1003 toasttupDesc = toastrel->rd_att;
1004 toastidx = index_open(attr->va_content.va_external.va_toastidxid);
1007 * Setup a scan key to fetch from the index by va_valueid
1010 ScanKeyEntryInitialize(&toastkey,
1013 (RegProcedure) F_OIDEQ,
1014 ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
1017 * Read the chunks by index
1019 * Note we will not necessarily see the chunks in sequence-number order.
1022 toastscan = index_beginscan(toastidx, false, 1, &toastkey);
1023 while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
1025 toasttup.t_self = indexRes->heap_iptr;
1026 heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer);
1029 if (toasttup.t_data == NULL)
1034 * Have a chunk, extract the sequence number and the data
1037 residx = DatumGetInt32(heap_getattr(ttup, 2, toasttupDesc, &isnull));
1039 chunk = DatumGetPointer(heap_getattr(ttup, 3, toasttupDesc, &isnull));
1041 chunksize = VARATT_SIZE(chunk) - VARHDRSZ;
1044 * Some checks on the data we've found
1047 if (residx < 0 || residx >= numchunks)
1048 elog(ERROR, "unexpected chunk number %d for toast value %d",
1050 attr->va_content.va_external.va_valueid);
1051 if (residx < numchunks-1)
1053 if (chunksize != TOAST_MAX_CHUNK_SIZE)
1054 elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %d",
1056 attr->va_content.va_external.va_valueid);
1060 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
1061 elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %d",
1063 attr->va_content.va_external.va_valueid);
1065 if (chunks_found[residx]++ > 0)
1066 elog(ERROR, "chunk %d for toast value %d appears multiple times",
1068 attr->va_content.va_external.va_valueid);
1071 * Copy the data into proper place in our result
1074 memcpy(((char *)VARATT_DATA(result)) + residx * TOAST_MAX_CHUNK_SIZE,
1078 ReleaseBuffer(buffer);
1082 * Final checks that we successfully fetched the datum
1085 if (memcmp(chunks_found, chunks_expected, numchunks) != 0)
1086 elog(ERROR, "not all toast chunks found for value %d",
1087 attr->va_content.va_external.va_valueid);
1088 pfree(chunks_expected);
1089 pfree(chunks_found);
1092 * End scan and close relations
1095 index_endscan(toastscan);
1096 index_close(toastidx);
1097 heap_close(toastrel, AccessShareLock);
1103 #endif /* TUPLE_TOASTER_ACTIVE */