* Support routines for external and compressed storage of
* variable size attributes.
*
- * Copyright (c) 2000-2008, PostgreSQL Global Development Group
+ * Copyright (c) 2000-2017, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.81 2008/01/01 19:45:46 momjian Exp $
+ * src/backend/access/heap/tuptoaster.c
*
*
* INTERFACE ROUTINES
#include "access/tuptoaster.h"
#include "access/xact.h"
#include "catalog/catalog.h"
+#include "common/pg_lzcompress.h"
+#include "miscadmin.h"
+#include "utils/expandeddatum.h"
#include "utils/fmgroids.h"
-#include "utils/pg_lzcompress.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
#include "utils/typcache.h"
+#include "utils/tqual.h"
#undef TOAST_DEBUG
-/* Size of an EXTERNAL datum that contains a standard TOAST pointer */
-#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_external))
-
/*
- * Testing whether an externally-stored value is compressed now requires
- * comparing extsize (the actual length of the external data) to rawsize
- * (the original uncompressed datum's size). The latter includes VARHDRSZ
- * overhead, the former doesn't. We never use compression unless it actually
- * saves space, so we expect either equality or less-than.
+ * The information at the start of the compressed toast data.
*/
-#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
- ((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ)
+typedef struct toast_compress_header
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int32 rawsize;
+} toast_compress_header;
/*
- * Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
- * into a local "struct varatt_external" toast pointer. This should be
- * just a memcpy, but some versions of gcc seem to produce broken code
- * that assumes the datum contents are aligned. Introducing an explicit
- * intermediate "varattrib_1b_e *" variable seems to fix it.
+ * Utilities for manipulation of header information for compressed
+ * toast entries.
*/
-#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \
-do { \
- varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
- Assert(VARSIZE_ANY_EXHDR(attre) == sizeof(toast_pointer)); \
- memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
-} while (0)
-
-
-static void toast_delete_datum(Relation rel, Datum value);
+#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header))
+#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
+#define TOAST_COMPRESS_RAWDATA(ptr) \
+ (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
+#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
+ (((toast_compress_header *) (ptr))->rawsize = (len))
+
+static void toast_delete_datum(Relation rel, Datum value, bool is_speculative);
static Datum toast_save_datum(Relation rel, Datum value,
- bool use_wal, bool use_fsm);
+ struct varlena * oldexternal, int options);
+static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
+static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
static struct varlena *toast_fetch_datum(struct varlena * attr);
static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
int32 sliceoffset, int32 length);
+static struct varlena *toast_decompress_datum(struct varlena * attr);
+static int toast_open_indexes(Relation toastrel,
+ LOCKMODE lock,
+ Relation **toastidxs,
+ int *num_indexes);
+static void toast_close_indexes(Relation *toastidxs, int num_indexes,
+ LOCKMODE lock);
+static void init_toast_snapshot(Snapshot toast_snapshot);
/* ----------
* heap_tuple_fetch_attr -
*
* Public entry point to get back a toasted value from
- * external storage (possibly still in compressed format).
+ * external source (possibly still in compressed format).
*
* This will return a datum that contains all the data internally, ie, not
- * relying on external storage, but it can still be compressed or have a short
- * header.
- ----------
+ * relying on external storage or memory, but it can still be compressed or
+ * have a short header. Note some callers assume that if the input is an
+ * EXTERNAL datum, the result will be a pfree'able chunk.
+ * ----------
*/
struct varlena *
heap_tuple_fetch_attr(struct varlena * attr)
{
struct varlena *result;
- if (VARATT_IS_EXTERNAL(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* This is an external stored plain value
*/
result = toast_fetch_datum(attr);
}
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ /*
+ * This is an indirect pointer --- dereference it
+ */
+ struct varatt_indirect redirect;
+
+ VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+ attr = (struct varlena *) redirect.pointer;
+
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+ /* recurse if value is still external in some other way */
+ if (VARATT_IS_EXTERNAL(attr))
+ return heap_tuple_fetch_attr(attr);
+
+ /*
+ * Copy into the caller's memory context, in case caller tries to
+ * pfree the result.
+ */
+ result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+ memcpy(result, attr, VARSIZE_ANY(attr));
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /*
+ * This is an expanded-object pointer --- get flat format
+ */
+ ExpandedObjectHeader *eoh;
+ Size resultsize;
+
+ eoh = DatumGetEOHP(PointerGetDatum(attr));
+ resultsize = EOH_get_flat_size(eoh);
+ result = (struct varlena *) palloc(resultsize);
+ EOH_flatten_into(eoh, (void *) result, resultsize);
+ }
else
{
/*
* heap_tuple_untoast_attr -
*
* Public entry point to get back a toasted value from compression
- * or external storage.
+ * or external storage. The result is always non-extended varlena form.
+ *
+ * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
+ * datum, the result will be a pfree'able chunk.
* ----------
*/
struct varlena *
heap_tuple_untoast_attr(struct varlena * attr)
{
- if (VARATT_IS_EXTERNAL(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* This is an externally stored datum --- fetch it back from there
/* If it's compressed, decompress it */
if (VARATT_IS_COMPRESSED(attr))
{
- PGLZ_Header *tmp = (PGLZ_Header *) attr;
+ struct varlena *tmp = attr;
- attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
- SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
- pglz_decompress(tmp, VARDATA(attr));
+ attr = toast_decompress_datum(tmp);
pfree(tmp);
}
}
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ /*
+ * This is an indirect pointer --- dereference it
+ */
+ struct varatt_indirect redirect;
+
+ VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+ attr = (struct varlena *) redirect.pointer;
+
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+ /* recurse in case value is still extended in some other way */
+ attr = heap_tuple_untoast_attr(attr);
+
+ /* if it isn't, we'd better copy it */
+ if (attr == (struct varlena *) redirect.pointer)
+ {
+ struct varlena *result;
+
+ result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+ memcpy(result, attr, VARSIZE_ANY(attr));
+ attr = result;
+ }
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /*
+ * This is an expanded-object pointer --- get flat format
+ */
+ attr = heap_tuple_fetch_attr(attr);
+ /* flatteners are not allowed to produce compressed/short output */
+ Assert(!VARATT_IS_EXTENDED(attr));
+ }
else if (VARATT_IS_COMPRESSED(attr))
{
/*
* This is a compressed value inside of the main tuple
*/
- PGLZ_Header *tmp = (PGLZ_Header *) attr;
-
- attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
- SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
- pglz_decompress(tmp, VARDATA(attr));
+ attr = toast_decompress_datum(attr);
}
else if (VARATT_IS_SHORT(attr))
{
char *attrdata;
int32 attrsize;
- if (VARATT_IS_EXTERNAL(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
/* fetch it back (compressed marker will get set automatically) */
preslice = toast_fetch_datum(attr);
}
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ struct varatt_indirect redirect;
+
+ VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
+
+ return heap_tuple_untoast_attr_slice(redirect.pointer,
+ sliceoffset, slicelength);
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /* pass it off to heap_tuple_fetch_attr to flatten */
+ preslice = heap_tuple_fetch_attr(attr);
+ }
else
preslice = attr;
+ Assert(!VARATT_IS_EXTERNAL(preslice));
+
if (VARATT_IS_COMPRESSED(preslice))
{
- PGLZ_Header *tmp = (PGLZ_Header *) preslice;
- Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ;
+ struct varlena *tmp = preslice;
- preslice = (struct varlena *) palloc(size);
- SET_VARSIZE(preslice, size);
- pglz_decompress(tmp, VARDATA(preslice));
+ preslice = toast_decompress_datum(tmp);
- if (tmp != (PGLZ_Header *) attr)
+ if (tmp != attr)
pfree(tmp);
}
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
- if (VARATT_IS_EXTERNAL(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/* va_rawsize is the size of the original datum -- including header */
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = toast_pointer.va_rawsize;
}
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ struct varatt_indirect toast_pointer;
+
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
+
+ return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ result = EOH_get_flat_size(DatumGetEOHP(value));
+ }
else if (VARATT_IS_COMPRESSED(attr))
{
/* here, va_rawsize is just the payload size */
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
- if (VARATT_IS_EXTERNAL(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* Attribute is stored externally - return the extsize whether
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = toast_pointer.va_extsize;
}
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ struct varatt_indirect toast_pointer;
+
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+ return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ result = EOH_get_flat_size(DatumGetEOHP(value));
+ }
else if (VARATT_IS_SHORT(attr))
{
result = VARSIZE_SHORT(attr);
* ----------
*/
void
-toast_delete(Relation rel, HeapTuple oldtup)
+toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
{
TupleDesc tupleDesc;
Form_pg_attribute *att;
bool toast_isnull[MaxHeapAttributeNumber];
/*
- * We should only ever be called for tuples of plain relations ---
- * recursing on a toast rel is bad news.
+ * We should only ever be called for tuples of plain relations or
+ * materialized views --- recursing on a toast rel is bad news.
*/
- Assert(rel->rd_rel->relkind == RELKIND_RELATION);
+ Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_MATVIEW);
/*
* Get the tuple descriptor and break down the tuple into fields.
*
- * NOTE: it's debatable whether to use heap_deformtuple() here or just
+ * NOTE: it's debatable whether to use heap_deform_tuple() here or just
* heap_getattr() only the varlena columns. The latter could win if there
* are few varlena columns and many non-varlena ones. However,
- * heap_deformtuple costs only O(N) while the heap_getattr way would cost
+ * heap_deform_tuple costs only O(N) while the heap_getattr way would cost
* O(N^2) if there are many varlena columns, so it seems better to err on
* the side of linear cost. (We won't even be here unless there's at
* least one varlena column, by the way.)
{
Datum value = toast_values[i];
- if (!toast_isnull[i] && VARATT_IS_EXTERNAL(value))
- toast_delete_datum(rel, value);
+ if (toast_isnull[i])
+ continue;
+ else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value)))
+ toast_delete_datum(rel, value, is_speculative);
}
}
}
* Inputs:
* newtup: the candidate new tuple to be inserted
* oldtup: the old row version for UPDATE, or NULL for INSERT
- * use_wal, use_fsm: flags to be passed to heap_insert() for toast rows
+ * options: options to be passed to heap_insert() for toast rows
* Result:
* either newtup if no toasting is needed, or a palloc'd modified tuple
* that is what should actually get stored
*/
HeapTuple
toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
- bool use_wal, bool use_fsm)
+ int options)
{
HeapTuple result_tuple;
TupleDesc tupleDesc;
bool toast_oldisnull[MaxHeapAttributeNumber];
Datum toast_values[MaxHeapAttributeNumber];
Datum toast_oldvalues[MaxHeapAttributeNumber];
+ struct varlena *toast_oldexternal[MaxHeapAttributeNumber];
int32 toast_sizes[MaxHeapAttributeNumber];
bool toast_free[MaxHeapAttributeNumber];
bool toast_delold[MaxHeapAttributeNumber];
/*
- * We should only ever be called for tuples of plain relations ---
- * recursing on a toast rel is bad news.
+ * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super
+ * deletions just normally insert/delete the toast values. It seems
+ * easiest to deal with that here, instead on, potentially, multiple
+ * callers.
+ */
+ options &= ~HEAP_INSERT_SPECULATIVE;
+
+ /*
+ * We should only ever be called for tuples of plain relations or
+ * materialized views --- recursing on a toast rel is bad news.
*/
- Assert(rel->rd_rel->relkind == RELKIND_RELATION);
+ Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_MATVIEW);
/*
* Get the tuple descriptor and break down the tuple(s) into fields.
* ----------
*/
memset(toast_action, ' ', numAttrs * sizeof(char));
+ memset(toast_oldexternal, 0, numAttrs * sizeof(struct varlena *));
memset(toast_free, 0, numAttrs * sizeof(bool));
memset(toast_delold, 0, numAttrs * sizeof(bool));
new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
/*
- * If the old value is an external stored one, check if it has
- * changed so we have to delete it later.
+ * If the old value is stored on disk, check if it has changed so
+ * we have to delete it later.
*/
if (att[i]->attlen == -1 && !toast_oldisnull[i] &&
- VARATT_IS_EXTERNAL(old_value))
+ VARATT_IS_EXTERNAL_ONDISK(old_value))
{
- if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) ||
+ if (toast_isnull[i] || !VARATT_IS_EXTERNAL_ONDISK(new_value) ||
memcmp((char *) old_value, (char *) new_value,
VARSIZE_EXTERNAL(old_value)) != 0)
{
/*
* We took care of UPDATE above, so any external value we find
- * still in the tuple must be someone else's we cannot reuse.
+ * still in the tuple must be someone else's that we cannot reuse
+ * (this includes the case of an out-of-line in-memory datum).
* Fetch it back (without decompression, unless we are forcing
- * PLAIN storage). If necessary, we'll push it out as a new
+ * PLAIN storage). If necessary, we'll push it out as a new
* external value below.
*/
if (VARATT_IS_EXTERNAL(new_value))
{
+ toast_oldexternal[i] = new_value;
if (att[i]->attstorage == 'p')
new_value = heap_tuple_untoast_attr(new_value);
else
/* ----------
* Compress and/or save external until data fits into target length
*
- * 1: Inline compress attributes with attstorage 'x'
+ * 1: Inline compress attributes with attstorage 'x', and store very
+ * large attributes with attstorage 'x' or 'e' external immediately
* 2: Store attributes with attstorage 'x' or 'e' external
* 3: Inline compress attributes with attstorage 'm'
* 4: Store attributes with attstorage 'm' external
*/
/* compute header overhead --- this should match heap_form_tuple() */
- hoff = offsetof(HeapTupleHeaderData, t_bits);
+ hoff = SizeofHeapTupleHeader;
if (has_nulls)
hoff += BITMAPLEN(numAttrs);
if (newtup->t_data->t_infomask & HEAP_HASOID)
hoff += sizeof(Oid);
hoff = MAXALIGN(hoff);
- Assert(hoff == newtup->t_data->t_hoff);
/* now convert to a limit on the tuple data size */
maxDataLen = TOAST_TUPLE_TARGET - hoff;
/*
- * Look for attributes with attstorage 'x' to compress
+ * Look for attributes with attstorage 'x' to compress. Also find large
+ * attributes with attstorage 'x' or 'e', and store them external.
*/
while (heap_compute_data_size(tupleDesc,
toast_values, toast_isnull) > maxDataLen)
Datum new_value;
/*
- * Search for the biggest yet uncompressed internal attribute
+ * Search for the biggest yet unprocessed internal attribute
*/
for (i = 0; i < numAttrs; i++)
{
if (toast_action[i] != ' ')
continue;
- if (VARATT_IS_EXTERNAL(toast_values[i]))
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
continue; /* can't happen, toast_action would be 'p' */
- if (VARATT_IS_COMPRESSED(toast_values[i]))
+ if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
continue;
- if (att[i]->attstorage != 'x')
+ if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
continue;
if (toast_sizes[i] > biggest_size)
{
break;
/*
- * Attempt to compress it inline
+ * Attempt to compress it inline, if it has attstorage 'x'
*/
i = biggest_attno;
- old_value = toast_values[i];
- new_value = toast_compress_datum(old_value);
+ if (att[i]->attstorage == 'x')
+ {
+ old_value = toast_values[i];
+ new_value = toast_compress_datum(old_value);
- if (DatumGetPointer(new_value) != NULL)
+ if (DatumGetPointer(new_value) != NULL)
+ {
+ /* successful compression */
+ if (toast_free[i])
+ pfree(DatumGetPointer(old_value));
+ toast_values[i] = new_value;
+ toast_free[i] = true;
+ toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
+ need_change = true;
+ need_free = true;
+ }
+ else
+ {
+ /* incompressible, ignore on subsequent compression passes */
+ toast_action[i] = 'x';
+ }
+ }
+ else
{
- /* successful compression */
+ /* has attstorage 'e', ignore on subsequent compression passes */
+ toast_action[i] = 'x';
+ }
+
+ /*
+ * If this value is by itself more than maxDataLen (after compression
+ * if any), push it out to the toast table immediately, if possible.
+ * This avoids uselessly compressing other fields in the common case
+ * where we have one long field and several short ones.
+ *
+ * XXX maybe the threshold should be less than maxDataLen?
+ */
+ if (toast_sizes[i] > maxDataLen &&
+ rel->rd_rel->reltoastrelid != InvalidOid)
+ {
+ old_value = toast_values[i];
+ toast_action[i] = 'p';
+ toast_values[i] = toast_save_datum(rel, toast_values[i],
+ toast_oldexternal[i], options);
if (toast_free[i])
pfree(DatumGetPointer(old_value));
- toast_values[i] = new_value;
toast_free[i] = true;
- toast_sizes[i] = VARSIZE(toast_values[i]);
need_change = true;
need_free = true;
}
- else
- {
- /*
- * incompressible data, ignore on subsequent compression passes
- */
- toast_action[i] = 'x';
- }
}
/*
* Second we look for attributes of attstorage 'x' or 'e' that are still
- * inline. But skip this if there's no toast table to push them to.
+ * inline. But skip this if there's no toast table to push them to.
*/
while (heap_compute_data_size(tupleDesc,
toast_values, toast_isnull) > maxDataLen &&
{
if (toast_action[i] == 'p')
continue;
- if (VARATT_IS_EXTERNAL(toast_values[i]))
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
continue; /* can't happen, toast_action would be 'p' */
if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
continue;
old_value = toast_values[i];
toast_action[i] = 'p';
toast_values[i] = toast_save_datum(rel, toast_values[i],
- use_wal, use_fsm);
+ toast_oldexternal[i], options);
if (toast_free[i])
pfree(DatumGetPointer(old_value));
toast_free[i] = true;
{
if (toast_action[i] != ' ')
continue;
- if (VARATT_IS_EXTERNAL(toast_values[i]))
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
continue; /* can't happen, toast_action would be 'p' */
- if (VARATT_IS_COMPRESSED(toast_values[i]))
+ if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
continue;
if (att[i]->attstorage != 'm')
continue;
pfree(DatumGetPointer(old_value));
toast_values[i] = new_value;
toast_free[i] = true;
- toast_sizes[i] = VARSIZE(toast_values[i]);
+ toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
need_change = true;
need_free = true;
}
else
{
- /*
- * incompressible data, ignore on subsequent compression passes
- */
+ /* incompressible, ignore on subsequent compression passes */
toast_action[i] = 'x';
}
}
/*
- * Finally we store attributes of type 'm' external, if possible.
+ * Finally we store attributes of type 'm' externally. At this point we
+ * increase the target tuple size, so that 'm' attributes aren't stored
+ * externally unless really necessary.
*/
+ maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff;
+
while (heap_compute_data_size(tupleDesc,
toast_values, toast_isnull) > maxDataLen &&
rel->rd_rel->reltoastrelid != InvalidOid)
{
if (toast_action[i] == 'p')
continue;
- if (VARATT_IS_EXTERNAL(toast_values[i]))
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
continue; /* can't happen, toast_action would be 'p' */
if (att[i]->attstorage != 'm')
continue;
old_value = toast_values[i];
toast_action[i] = 'p';
toast_values[i] = toast_save_datum(rel, toast_values[i],
- use_wal, use_fsm);
+ toast_oldexternal[i], options);
if (toast_free[i])
pfree(DatumGetPointer(old_value));
toast_free[i] = true;
{
HeapTupleHeader olddata = newtup->t_data;
HeapTupleHeader new_data;
- int32 new_len;
+ int32 new_header_len;
int32 new_data_len;
+ int32 new_tuple_len;
/*
- * Calculate the new size of the tuple. Header size should not
- * change, but data size might.
+ * Calculate the new size of the tuple.
+ *
+ * Note: we used to assume here that the old tuple's t_hoff must equal
+ * the new_header_len value, but that was incorrect. The old tuple
+ * might have a smaller-than-current natts, if there's been an ALTER
+ * TABLE ADD COLUMN since it was stored; and that would lead to a
+ * different conclusion about the size of the null bitmap, or even
+ * whether there needs to be one at all.
*/
- new_len = offsetof(HeapTupleHeaderData, t_bits);
+ new_header_len = SizeofHeapTupleHeader;
if (has_nulls)
- new_len += BITMAPLEN(numAttrs);
+ new_header_len += BITMAPLEN(numAttrs);
if (olddata->t_infomask & HEAP_HASOID)
- new_len += sizeof(Oid);
- new_len = MAXALIGN(new_len);
- Assert(new_len == olddata->t_hoff);
+ new_header_len += sizeof(Oid);
+ new_header_len = MAXALIGN(new_header_len);
new_data_len = heap_compute_data_size(tupleDesc,
toast_values, toast_isnull);
- new_len += new_data_len;
+ new_tuple_len = new_header_len + new_data_len;
/*
* Allocate and zero the space needed, and fill HeapTupleData fields.
*/
- result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_len);
- result_tuple->t_len = new_len;
+ result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_tuple_len);
+ result_tuple->t_len = new_tuple_len;
result_tuple->t_self = newtup->t_self;
result_tuple->t_tableOid = newtup->t_tableOid;
new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
result_tuple->t_data = new_data;
/*
- * Put the existing tuple header and the changed values into place
+ * Copy the existing tuple header, but adjust natts and t_hoff.
*/
- memcpy(new_data, olddata, olddata->t_hoff);
+ memcpy(new_data, olddata, SizeofHeapTupleHeader);
+ HeapTupleHeaderSetNatts(new_data, numAttrs);
+ new_data->t_hoff = new_header_len;
+ if (olddata->t_infomask & HEAP_HASOID)
+ HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(olddata));
+ /* Copy over the data, and fill the null bitmap if needed */
heap_fill_tuple(tupleDesc,
toast_values,
toast_isnull,
- (char *) new_data + olddata->t_hoff,
+ (char *) new_data + new_header_len,
new_data_len,
&(new_data->t_infomask),
has_nulls ? new_data->t_bits : NULL);
if (need_delold)
for (i = 0; i < numAttrs; i++)
if (toast_delold[i])
- toast_delete_datum(rel, toast_oldvalues[i]);
+ toast_delete_datum(rel, toast_oldvalues[i], false);
return result_tuple;
}
/* ----------
- * toast_flatten_tuple_attribute -
+ * toast_flatten_tuple -
*
- * If a Datum is of composite type, "flatten" it to contain no toasted fields.
- * This must be invoked on any potentially-composite field that is to be
- * inserted into a tuple. Doing this preserves the invariant that toasting
- * goes only one level deep in a tuple.
+ * "Flatten" a tuple to contain no out-of-line toasted fields.
+ * (This does not eliminate compressed or short-header datums.)
*
- * Note that flattening does not mean expansion of short-header varlenas,
- * so in one sense toasting is allowed within composite datums.
+ * Note: we expect the caller already checked HeapTupleHasExternal(tup),
+ * so there is no need for a short-circuit path.
* ----------
*/
-Datum
-toast_flatten_tuple_attribute(Datum value,
- Oid typeId, int32 typeMod)
+HeapTuple
+toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
{
- TupleDesc tupleDesc;
- HeapTupleHeader olddata;
- HeapTupleHeader new_data;
- int32 new_len;
- int32 new_data_len;
- HeapTupleData tmptup;
- Form_pg_attribute *att;
- int numAttrs;
+ HeapTuple new_tuple;
+ Form_pg_attribute *att = tupleDesc->attrs;
+ int numAttrs = tupleDesc->natts;
int i;
- bool need_change = false;
- bool has_nulls = false;
Datum toast_values[MaxTupleAttributeNumber];
bool toast_isnull[MaxTupleAttributeNumber];
bool toast_free[MaxTupleAttributeNumber];
/*
- * See if it's a composite type, and get the tupdesc if so.
+ * Break down the tuple into fields.
*/
- tupleDesc = lookup_rowtype_tupdesc_noerror(typeId, typeMod, true);
- if (tupleDesc == NULL)
- return value; /* not a composite type */
+ Assert(numAttrs <= MaxTupleAttributeNumber);
+ heap_deform_tuple(tup, tupleDesc, toast_values, toast_isnull);
- att = tupleDesc->attrs;
- numAttrs = tupleDesc->natts;
+ memset(toast_free, 0, numAttrs * sizeof(bool));
+
+ for (i = 0; i < numAttrs; i++)
+ {
+ /*
+ * Look at non-null varlena attributes
+ */
+ if (!toast_isnull[i] && att[i]->attlen == -1)
+ {
+ struct varlena *new_value;
+
+ new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
+ if (VARATT_IS_EXTERNAL(new_value))
+ {
+ new_value = heap_tuple_fetch_attr(new_value);
+ toast_values[i] = PointerGetDatum(new_value);
+ toast_free[i] = true;
+ }
+ }
+ }
/*
- * Break down the tuple into fields.
+ * Form the reconfigured tuple.
*/
- olddata = DatumGetHeapTupleHeader(value);
- Assert(typeId == HeapTupleHeaderGetTypeId(olddata));
- Assert(typeMod == HeapTupleHeaderGetTypMod(olddata));
+ new_tuple = heap_form_tuple(tupleDesc, toast_values, toast_isnull);
+
+ /*
+ * Be sure to copy the tuple's OID and identity fields. We also make a
+ * point of copying visibility info, just in case anybody looks at those
+ * fields in a syscache entry.
+ */
+ if (tupleDesc->tdhasoid)
+ HeapTupleSetOid(new_tuple, HeapTupleGetOid(tup));
+
+ new_tuple->t_self = tup->t_self;
+ new_tuple->t_tableOid = tup->t_tableOid;
+
+ new_tuple->t_data->t_choice = tup->t_data->t_choice;
+ new_tuple->t_data->t_ctid = tup->t_data->t_ctid;
+ new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
+ new_tuple->t_data->t_infomask |=
+ tup->t_data->t_infomask & HEAP_XACT_MASK;
+ new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
+ new_tuple->t_data->t_infomask2 |=
+ tup->t_data->t_infomask2 & HEAP2_XACT_MASK;
+
+ /*
+ * Free allocated temp values
+ */
+ for (i = 0; i < numAttrs; i++)
+ if (toast_free[i])
+ pfree(DatumGetPointer(toast_values[i]));
+
+ return new_tuple;
+}
+
+
+/* ----------
+ * toast_flatten_tuple_to_datum -
+ *
+ * "Flatten" a tuple containing out-of-line toasted fields into a Datum.
+ * The result is always palloc'd in the current memory context.
+ *
+ * We have a general rule that Datums of container types (rows, arrays,
+ * ranges, etc) must not contain any external TOAST pointers. Without
+ * this rule, we'd have to look inside each Datum when preparing a tuple
+ * for storage, which would be expensive and would fail to extend cleanly
+ * to new sorts of container types.
+ *
+ * However, we don't want to say that tuples represented as HeapTuples
+ * can't contain toasted fields, so instead this routine should be called
+ * when such a HeapTuple is being converted into a Datum.
+ *
+ * While we're at it, we decompress any compressed fields too. This is not
+ * necessary for correctness, but reflects an expectation that compression
+ * will be more effective if applied to the whole tuple not individual
+ * fields. We are not so concerned about that that we want to deconstruct
+ * and reconstruct tuples just to get rid of compressed fields, however.
+ * So callers typically won't call this unless they see that the tuple has
+ * at least one external field.
+ *
+ * On the other hand, in-line short-header varlena fields are left alone.
+ * If we "untoasted" them here, they'd just get changed back to short-header
+ * format anyway within heap_fill_tuple.
+ * ----------
+ */
+Datum
+toast_flatten_tuple_to_datum(HeapTupleHeader tup,
+ uint32 tup_len,
+ TupleDesc tupleDesc)
+{
+ HeapTupleHeader new_data;
+ int32 new_header_len;
+ int32 new_data_len;
+ int32 new_tuple_len;
+ HeapTupleData tmptup;
+ Form_pg_attribute *att = tupleDesc->attrs;
+ int numAttrs = tupleDesc->natts;
+ int i;
+ bool has_nulls = false;
+ Datum toast_values[MaxTupleAttributeNumber];
+ bool toast_isnull[MaxTupleAttributeNumber];
+ bool toast_free[MaxTupleAttributeNumber];
+
/* Build a temporary HeapTuple control structure */
- tmptup.t_len = HeapTupleHeaderGetDatumLength(olddata);
+ tmptup.t_len = tup_len;
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
- tmptup.t_data = olddata;
+ tmptup.t_data = tup;
+ /*
+ * Break down the tuple into fields.
+ */
Assert(numAttrs <= MaxTupleAttributeNumber);
heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull);
new_value = heap_tuple_untoast_attr(new_value);
toast_values[i] = PointerGetDatum(new_value);
toast_free[i] = true;
- need_change = true;
}
}
}
/*
- * If nothing to untoast, just return the original tuple.
- */
- if (!need_change)
- {
- ReleaseTupleDesc(tupleDesc);
- return value;
- }
-
- /*
- * Calculate the new size of the tuple. Header size should not change,
- * but data size might.
+ * Calculate the new size of the tuple.
+ *
+ * This should match the reconstruction code in toast_insert_or_update.
*/
- new_len = offsetof(HeapTupleHeaderData, t_bits);
+ new_header_len = SizeofHeapTupleHeader;
if (has_nulls)
- new_len += BITMAPLEN(numAttrs);
- if (olddata->t_infomask & HEAP_HASOID)
- new_len += sizeof(Oid);
- new_len = MAXALIGN(new_len);
- Assert(new_len == olddata->t_hoff);
+ new_header_len += BITMAPLEN(numAttrs);
+ if (tup->t_infomask & HEAP_HASOID)
+ new_header_len += sizeof(Oid);
+ new_header_len = MAXALIGN(new_header_len);
new_data_len = heap_compute_data_size(tupleDesc,
toast_values, toast_isnull);
- new_len += new_data_len;
+ new_tuple_len = new_header_len + new_data_len;
- new_data = (HeapTupleHeader) palloc0(new_len);
+ new_data = (HeapTupleHeader) palloc0(new_tuple_len);
/*
- * Put the tuple header and the changed values into place
+ * Copy the existing tuple header, but adjust natts and t_hoff.
*/
- memcpy(new_data, olddata, olddata->t_hoff);
-
- HeapTupleHeaderSetDatumLength(new_data, new_len);
-
+ memcpy(new_data, tup, SizeofHeapTupleHeader);
+ HeapTupleHeaderSetNatts(new_data, numAttrs);
+ new_data->t_hoff = new_header_len;
+ if (tup->t_infomask & HEAP_HASOID)
+ HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(tup));
+
+ /* Set the composite-Datum header fields correctly */
+ HeapTupleHeaderSetDatumLength(new_data, new_tuple_len);
+ HeapTupleHeaderSetTypeId(new_data, tupleDesc->tdtypeid);
+ HeapTupleHeaderSetTypMod(new_data, tupleDesc->tdtypmod);
+
+ /* Copy over the data, and fill the null bitmap if needed */
heap_fill_tuple(tupleDesc,
toast_values,
toast_isnull,
- (char *) new_data + olddata->t_hoff,
+ (char *) new_data + new_header_len,
new_data_len,
&(new_data->t_infomask),
has_nulls ? new_data->t_bits : NULL);
for (i = 0; i < numAttrs; i++)
if (toast_free[i])
pfree(DatumGetPointer(toast_values[i]));
- ReleaseTupleDesc(tupleDesc);
return PointerGetDatum(new_data);
}
toast_compress_datum(Datum value)
{
struct varlena *tmp;
- int32 valsize = VARSIZE_ANY_EXHDR(value);
+ int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
+ int32 len;
- Assert(!VARATT_IS_EXTERNAL(value));
- Assert(!VARATT_IS_COMPRESSED(value));
+ Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
+ Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
/*
- * No point in wasting a palloc cycle if value is too short for
- * compression
+ * No point in wasting a palloc cycle if value size is out of the allowed
+ * range for compression
*/
- if (valsize < PGLZ_strategy_default->min_input_size)
+ if (valsize < PGLZ_strategy_default->min_input_size ||
+ valsize > PGLZ_strategy_default->max_input_size)
return PointerGetDatum(NULL);
- tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));
- if (pglz_compress(VARDATA_ANY(value), valsize,
- (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
- VARSIZE(tmp) < VARSIZE_ANY(value))
+ tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
+ TOAST_COMPRESS_HDRSZ);
+
+ /*
+ * We recheck the actual size even if pglz_compress() reports success,
+ * because it might be satisfied with having saved as little as one byte
+ * in the compressed data --- which could turn into a net loss once you
+ * consider header and alignment padding. Worst case, the compressed
+ * format might require three padding bytes (plus header, which is
+ * included in VARSIZE(tmp)), whereas the uncompressed format would take
+ * only one header byte and no padding if the value is short enough. So
+ * we insist on a savings of more than 2 bytes to ensure we have a gain.
+ */
+ len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)),
+ valsize,
+ TOAST_COMPRESS_RAWDATA(tmp),
+ PGLZ_strategy_default);
+ if (len >= 0 &&
+ len + TOAST_COMPRESS_HDRSZ < valsize - 2)
{
+ TOAST_COMPRESS_SET_RAWSIZE(tmp, valsize);
+ SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ);
/* successful compression */
return PointerGetDatum(tmp);
}
}
+/* ----------
+ * toast_get_valid_index
+ *
+ * Get OID of valid index associated to given toast relation. A toast
+ * relation can have only one valid index at the same time.
+ */
+Oid
+toast_get_valid_index(Oid toastoid, LOCKMODE lock)
+{
+ int num_indexes;
+ int validIndex;
+ Oid validIndexOid;
+ Relation *toastidxs;
+ Relation toastrel;
+
+ /* Open the toast relation */
+ toastrel = heap_open(toastoid, lock);
+
+ /* Look for the valid index of the toast relation */
+ validIndex = toast_open_indexes(toastrel,
+ lock,
+ &toastidxs,
+ &num_indexes);
+ validIndexOid = RelationGetRelid(toastidxs[validIndex]);
+
+ /* Close the toast relation and all its indexes */
+ toast_close_indexes(toastidxs, num_indexes, lock);
+ heap_close(toastrel, lock);
+
+ return validIndexOid;
+}
+
+
/* ----------
* toast_save_datum -
*
* Save one single datum into the secondary relation and return
* a Datum reference for it.
+ *
+ * rel: the main relation we're working with (not the toast rel!)
+ * value: datum to be pushed to toast storage
+ * oldexternal: if not NULL, toast pointer previously representing the datum
+ * options: options to be passed to heap_insert() for toast rows
* ----------
*/
static Datum
toast_save_datum(Relation rel, Datum value,
- bool use_wal, bool use_fsm)
+ struct varlena * oldexternal, int options)
{
Relation toastrel;
- Relation toastidx;
+ Relation *toastidxs;
HeapTuple toasttup;
TupleDesc toasttupDesc;
Datum t_values[3];
CommandId mycid = GetCurrentCommandId(true);
struct varlena *result;
struct varatt_external toast_pointer;
- struct
+ union
{
struct varlena hdr;
- char data[TOAST_MAX_CHUNK_SIZE];
+ /* this is to make the union big enough for a chunk: */
+ char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
+ /* ensure union is aligned well enough: */
+ int32 align_it;
} chunk_data;
int32 chunk_size;
int32 chunk_seq = 0;
char *data_p;
int32 data_todo;
+ Pointer dval = DatumGetPointer(value);
+ int num_indexes;
+ int validIndex;
+
+ Assert(!VARATT_IS_EXTERNAL(value));
/*
- * Open the toast relation and its index. We can use the index to check
+ * Open the toast relation and its indexes. We can use the index to check
* uniqueness of the OID we assign to the toasted item, even though it has
* additional columns besides OID.
*/
toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
toasttupDesc = toastrel->rd_att;
- toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock);
+
+ /* Open all the toast indexes and look for the valid one */
+ validIndex = toast_open_indexes(toastrel,
+ RowExclusiveLock,
+ &toastidxs,
+ &num_indexes);
/*
* Get the data pointer and length, and compute va_rawsize and va_extsize.
*
* va_extsize is the actual size of the data payload in the toast records.
*/
- if (VARATT_IS_SHORT(value))
+ if (VARATT_IS_SHORT(dval))
{
- data_p = VARDATA_SHORT(value);
- data_todo = VARSIZE_SHORT(value) - VARHDRSZ_SHORT;
+ data_p = VARDATA_SHORT(dval);
+ data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
toast_pointer.va_extsize = data_todo;
}
- else if (VARATT_IS_COMPRESSED(value))
+ else if (VARATT_IS_COMPRESSED(dval))
{
- data_p = VARDATA(value);
- data_todo = VARSIZE(value) - VARHDRSZ;
+ data_p = VARDATA(dval);
+ data_todo = VARSIZE(dval) - VARHDRSZ;
/* rawsize in a compressed datum is just the size of the payload */
- toast_pointer.va_rawsize = VARRAWSIZE_4B_C(value) + VARHDRSZ;
+ toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ;
toast_pointer.va_extsize = data_todo;
/* Assert that the numbers look like it's compressed */
Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
}
else
{
- data_p = VARDATA(value);
- data_todo = VARSIZE(value) - VARHDRSZ;
- toast_pointer.va_rawsize = VARSIZE(value);
+ data_p = VARDATA(dval);
+ data_todo = VARSIZE(dval) - VARHDRSZ;
+ toast_pointer.va_rawsize = VARSIZE(dval);
toast_pointer.va_extsize = data_todo;
}
- toast_pointer.va_valueid = GetNewOidWithIndex(toastrel, toastidx);
- toast_pointer.va_toastrelid = rel->rd_rel->reltoastrelid;
+ /*
+ * Insert the correct table OID into the result TOAST pointer.
+ *
+ * Normally this is the actual OID of the target toast table, but during
+ * table-rewriting operations such as CLUSTER, we have to insert the OID
+ * of the table's real permanent toast table instead. rd_toastoid is set
+ * if we have to substitute such an OID.
+ */
+ if (OidIsValid(rel->rd_toastoid))
+ toast_pointer.va_toastrelid = rel->rd_toastoid;
+ else
+ toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
+
+ /*
+ * Choose an OID to use as the value ID for this toast value.
+ *
+ * Normally we just choose an unused OID within the toast table. But
+ * during table-rewriting operations where we are preserving an existing
+ * toast table OID, we want to preserve toast value OIDs too. So, if
+ * rd_toastoid is set and we had a prior external value from that same
+ * toast table, re-use its value ID. If we didn't have a prior external
+ * value (which is a corner case, but possible if the table's attstorage
+ * options have been changed), we have to pick a value ID that doesn't
+ * conflict with either new or existing toast value OIDs.
+ */
+ if (!OidIsValid(rel->rd_toastoid))
+ {
+ /* normal case: just choose an unused OID */
+ toast_pointer.va_valueid =
+ GetNewOidWithIndex(toastrel,
+ RelationGetRelid(toastidxs[validIndex]),
+ (AttrNumber) 1);
+ }
+ else
+ {
+ /* rewrite case: check to see if value was in old toast table */
+ toast_pointer.va_valueid = InvalidOid;
+ if (oldexternal != NULL)
+ {
+ struct varatt_external old_toast_pointer;
+
+ Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
+ /* Must copy to access aligned fields */
+ VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
+ if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
+ {
+ /* This value came from the old toast table; reuse its OID */
+ toast_pointer.va_valueid = old_toast_pointer.va_valueid;
+
+ /*
+ * There is a corner case here: the table rewrite might have
+ * to copy both live and recently-dead versions of a row, and
+ * those versions could easily reference the same toast value.
+ * When we copy the second or later version of such a row,
+ * reusing the OID will mean we select an OID that's already
+ * in the new toast table. Check for that, and if so, just
+ * fall through without writing the data again.
+ *
+ * While annoying and ugly-looking, this is a good thing
+ * because it ensures that we wind up with only one copy of
+ * the toast value when there is only one copy in the old
+ * toast table. Before we detected this case, we'd have made
+ * multiple copies, wasting space; and what's worse, the
+ * copies belonging to already-deleted heap tuples would not
+ * be reclaimed by VACUUM.
+ */
+ if (toastrel_valueid_exists(toastrel,
+ toast_pointer.va_valueid))
+ {
+ /* Match, so short-circuit the data storage loop below */
+ data_todo = 0;
+ }
+ }
+ }
+ if (toast_pointer.va_valueid == InvalidOid)
+ {
+ /*
+ * new value; must choose an OID that doesn't conflict in either
+ * old or new toast table
+ */
+ do
+ {
+ toast_pointer.va_valueid =
+ GetNewOidWithIndex(toastrel,
+ RelationGetRelid(toastidxs[validIndex]),
+ (AttrNumber) 1);
+ } while (toastid_valueid_exists(rel->rd_toastoid,
+ toast_pointer.va_valueid));
+ }
+ }
/*
* Initialize constant parts of the tuple data
*/
while (data_todo > 0)
{
+ int i;
+
+ CHECK_FOR_INTERRUPTS();
+
/*
* Calculate the size of this chunk
*/
SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
memcpy(VARDATA(&chunk_data), data_p, chunk_size);
toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
- if (!HeapTupleIsValid(toasttup))
- elog(ERROR, "failed to build TOAST tuple");
- heap_insert(toastrel, toasttup, mycid, use_wal, use_fsm);
+ heap_insert(toastrel, toasttup, mycid, options, NULL);
/*
- * Create the index entry. We cheat a little here by not using
+ * Create the index entry. We cheat a little here by not using
* FormIndexDatum: this relies on the knowledge that the index columns
- * are the same as the initial columns of the table.
+ * are the same as the initial columns of the table for all the
+ * indexes. We also cheat by not providing an IndexInfo: this is okay
+ * for now because btree doesn't need one, but we might have to be
+ * more honest someday.
*
* Note also that there had better not be any user-created index on
* the TOAST table, since we don't bother to update anything else.
*/
- index_insert(toastidx, t_values, t_isnull,
- &(toasttup->t_self),
- toastrel, toastidx->rd_index->indisunique);
+ for (i = 0; i < num_indexes; i++)
+ {
+ /* Only index relations marked as ready can be updated */
+ if (IndexIsReady(toastidxs[i]->rd_index))
+ index_insert(toastidxs[i], t_values, t_isnull,
+ &(toasttup->t_self),
+ toastrel,
+ toastidxs[i]->rd_index->indisunique ?
+ UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
+ NULL);
+ }
/*
* Free memory
}
/*
- * Done - close toast relation
+ * Done - close toast relation and its indexes
*/
- index_close(toastidx, RowExclusiveLock);
+ toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
heap_close(toastrel, RowExclusiveLock);
/*
* Create the TOAST pointer value that we'll return
*/
result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
- SET_VARSIZE_EXTERNAL(result, TOAST_POINTER_SIZE);
+ SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
return PointerGetDatum(result);
* ----------
*/
static void
-toast_delete_datum(Relation rel, Datum value)
+toast_delete_datum(Relation rel, Datum value, bool is_speculative)
{
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
struct varatt_external toast_pointer;
Relation toastrel;
- Relation toastidx;
+ Relation *toastidxs;
ScanKeyData toastkey;
- IndexScanDesc toastscan;
+ SysScanDesc toastscan;
HeapTuple toasttup;
+ int num_indexes;
+ int validIndex;
+ SnapshotData SnapshotToast;
- if (!VARATT_IS_EXTERNAL(attr))
+ if (!VARATT_IS_EXTERNAL_ONDISK(attr))
return;
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/*
- * Open the toast relation and its index
+ * Open the toast relation and its indexes
*/
toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock);
- toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock);
+
+ /* Fetch valid relation used for process */
+ validIndex = toast_open_indexes(toastrel,
+ RowExclusiveLock,
+ &toastidxs,
+ &num_indexes);
/*
- * Setup a scan key to fetch from the index by va_valueid (we don't
- * particularly care whether we see them in sequence or not)
+ * Setup a scan key to find chunks with matching va_valueid
*/
ScanKeyInit(&toastkey,
(AttrNumber) 1,
ObjectIdGetDatum(toast_pointer.va_valueid));
/*
- * Find the chunks by index
+ * Find all the chunks. (We don't actually care whether we see them in
+ * sequence or not, but since we've already locked the index we might as
+ * well use systable_beginscan_ordered.)
*/
- toastscan = index_beginscan(toastrel, toastidx,
- SnapshotToast, 1, &toastkey);
- while ((toasttup = index_getnext(toastscan, ForwardScanDirection)) != NULL)
+ init_toast_snapshot(&SnapshotToast);
+ toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
+ &SnapshotToast, 1, &toastkey);
+ while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
/*
* Have a chunk, delete it
*/
- simple_heap_delete(toastrel, &toasttup->t_self);
+ if (is_speculative)
+ heap_abort_speculative(toastrel, toasttup);
+ else
+ simple_heap_delete(toastrel, &toasttup->t_self);
}
/*
* End scan and close relations
*/
- index_endscan(toastscan);
- index_close(toastidx, RowExclusiveLock);
+ systable_endscan_ordered(toastscan);
+ toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
heap_close(toastrel, RowExclusiveLock);
}
+/* ----------
+ * toastrel_valueid_exists -
+ *
+ * Test whether a toast value with the given ID exists in the toast relation
+ * ----------
+ */
+static bool
+toastrel_valueid_exists(Relation toastrel, Oid valueid)
+{
+ bool result = false;
+ ScanKeyData toastkey;
+ SysScanDesc toastscan;
+ int num_indexes;
+ int validIndex;
+ Relation *toastidxs;
+ SnapshotData SnapshotToast;
+
+ /* Fetch a valid index relation */
+ validIndex = toast_open_indexes(toastrel,
+ RowExclusiveLock,
+ &toastidxs,
+ &num_indexes);
+
+ /*
+ * Setup a scan key to find chunks with matching va_valueid
+ */
+ ScanKeyInit(&toastkey,
+ (AttrNumber) 1,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(valueid));
+
+ /*
+ * Is there any such chunk?
+ */
+ init_toast_snapshot(&SnapshotToast);
+ toastscan = systable_beginscan(toastrel,
+ RelationGetRelid(toastidxs[validIndex]),
+ true, &SnapshotToast, 1, &toastkey);
+
+ if (systable_getnext(toastscan) != NULL)
+ result = true;
+
+ systable_endscan(toastscan);
+
+ /* Clean up */
+ toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
+
+ return result;
+}
+
+/* ----------
+ * toastid_valueid_exists -
+ *
+ * As above, but work from toast rel's OID not an open relation
+ * ----------
+ */
+static bool
+toastid_valueid_exists(Oid toastrelid, Oid valueid)
+{
+ bool result;
+ Relation toastrel;
+
+ toastrel = heap_open(toastrelid, AccessShareLock);
+
+ result = toastrel_valueid_exists(toastrel, valueid);
+
+ heap_close(toastrel, AccessShareLock);
+
+ return result;
+}
+
+
/* ----------
* toast_fetch_datum -
*
toast_fetch_datum(struct varlena * attr)
{
Relation toastrel;
- Relation toastidx;
+ Relation *toastidxs;
ScanKeyData toastkey;
- IndexScanDesc toastscan;
+ SysScanDesc toastscan;
HeapTuple ttup;
TupleDesc toasttupDesc;
struct varlena *result;
bool isnull;
char *chunkdata;
int32 chunksize;
+ int num_indexes;
+ int validIndex;
+ SnapshotData SnapshotToast;
+
+ if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+ elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
SET_VARSIZE(result, ressize + VARHDRSZ);
/*
- * Open the toast relation and its index
+ * Open the toast relation and its indexes
*/
toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
toasttupDesc = toastrel->rd_att;
- toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock);
+
+ /* Look for the valid index of the toast relation */
+ validIndex = toast_open_indexes(toastrel,
+ AccessShareLock,
+ &toastidxs,
+ &num_indexes);
/*
* Setup a scan key to fetch from the index by va_valueid
*/
nextidx = 0;
- toastscan = index_beginscan(toastrel, toastidx,
- SnapshotToast, 1, &toastkey);
- while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL)
+ init_toast_snapshot(&SnapshotToast);
+ toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
+ &SnapshotToast, 1, &toastkey);
+ while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
/*
* Have a chunk, extract the sequence number and the data
else
{
/* should never happen */
- elog(ERROR, "found toasted toast chunk");
+ elog(ERROR, "found toasted toast chunk for toast value %u in %s",
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
chunksize = 0; /* keep compiler quiet */
chunkdata = NULL;
}
* Some checks on the data we've found
*/
if (residx != nextidx)
- elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u",
+ elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
residx, nextidx,
- toast_pointer.va_valueid);
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
if (residx < numchunks - 1)
{
if (chunksize != TOAST_MAX_CHUNK_SIZE)
- elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u",
+ elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
chunksize, (int) TOAST_MAX_CHUNK_SIZE,
residx, numchunks,
- toast_pointer.va_valueid);
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
}
else if (residx == numchunks - 1)
{
if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
- elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u",
+ elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
chunksize,
(int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
residx,
- toast_pointer.va_valueid);
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
}
else
- elog(ERROR, "unexpected chunk number %d for toast value %u (out of range %d..%d)",
+ elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
residx,
+ 0, numchunks - 1,
toast_pointer.va_valueid,
- 0, numchunks - 1);
+ RelationGetRelationName(toastrel));
/*
* Copy the data into proper place in our result
* Final checks that we successfully fetched the datum
*/
if (nextidx != numchunks)
- elog(ERROR, "missing chunk number %d for toast value %u",
+ elog(ERROR, "missing chunk number %d for toast value %u in %s",
nextidx,
- toast_pointer.va_valueid);
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
/*
* End scan and close relations
*/
- index_endscan(toastscan);
- index_close(toastidx, AccessShareLock);
+ systable_endscan_ordered(toastscan);
+ toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
heap_close(toastrel, AccessShareLock);
return result;
toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
{
Relation toastrel;
- Relation toastidx;
+ Relation *toastidxs;
ScanKeyData toastkey[3];
int nscankeys;
- IndexScanDesc toastscan;
+ SysScanDesc toastscan;
HeapTuple ttup;
TupleDesc toasttupDesc;
struct varlena *result;
int32 chunksize;
int32 chcpystrt;
int32 chcpyend;
+ int num_indexes;
+ int validIndex;
+ SnapshotData SnapshotToast;
- Assert(VARATT_IS_EXTERNAL(attr));
+ if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+ elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
/*
- * Open the toast relation and its index
+ * Open the toast relation and its indexes
*/
toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
toasttupDesc = toastrel->rd_att;
- toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock);
+
+ /* Look for the valid index of toast relation */
+ validIndex = toast_open_indexes(toastrel,
+ AccessShareLock,
+ &toastidxs,
+ &num_indexes);
/*
* Setup a scan key to fetch from the index. This is either two keys or
*
* The index is on (valueid, chunkidx) so they will come in order
*/
+ init_toast_snapshot(&SnapshotToast);
nextidx = startchunk;
- toastscan = index_beginscan(toastrel, toastidx,
- SnapshotToast, nscankeys, toastkey);
- while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL)
+ toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
+ &SnapshotToast, nscankeys, toastkey);
+ while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
/*
* Have a chunk, extract the sequence number and the data
else
{
/* should never happen */
- elog(ERROR, "found toasted toast chunk");
+ elog(ERROR, "found toasted toast chunk for toast value %u in %s",
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
chunksize = 0; /* keep compiler quiet */
chunkdata = NULL;
}
* Some checks on the data we've found
*/
if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
- elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u",
+ elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
residx, nextidx,
- toast_pointer.va_valueid);
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
if (residx < totalchunks - 1)
{
if (chunksize != TOAST_MAX_CHUNK_SIZE)
- elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u when fetching slice",
+ elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice",
chunksize, (int) TOAST_MAX_CHUNK_SIZE,
residx, totalchunks,
- toast_pointer.va_valueid);
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
}
else if (residx == totalchunks - 1)
{
if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
- elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u when fetching slice",
+ elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice",
chunksize,
(int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE),
residx,
- toast_pointer.va_valueid);
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
}
else
- elog(ERROR, "unexpected chunk number %d for toast value %u (out of range %d..%d)",
+ elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
residx,
+ 0, totalchunks - 1,
toast_pointer.va_valueid,
- 0, totalchunks - 1);
+ RelationGetRelationName(toastrel));
/*
* Copy the data into proper place in our result
* Final checks that we successfully fetched the datum
*/
if (nextidx != (endchunk + 1))
- elog(ERROR, "missing chunk number %d for toast value %u",
+ elog(ERROR, "missing chunk number %d for toast value %u in %s",
nextidx,
- toast_pointer.va_valueid);
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
/*
* End scan and close relations
*/
- index_endscan(toastscan);
- index_close(toastidx, AccessShareLock);
+ systable_endscan_ordered(toastscan);
+ toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
heap_close(toastrel, AccessShareLock);
return result;
}
+
+/* ----------
+ * toast_decompress_datum -
+ *
+ * Decompress a compressed version of a varlena datum
+ */
+static struct varlena *
+toast_decompress_datum(struct varlena * attr)
+{
+ struct varlena *result;
+
+ Assert(VARATT_IS_COMPRESSED(attr));
+
+ result = (struct varlena *)
+ palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
+ SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
+
+ if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
+ VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
+ VARDATA(result),
+ TOAST_COMPRESS_RAWSIZE(attr)) < 0)
+ elog(ERROR, "compressed data is corrupted");
+
+ return result;
+}
+
+
+/* ----------
+ * toast_open_indexes
+ *
+ * Get an array of the indexes associated to the given toast relation
+ * and return as well the position of the valid index used by the toast
+ * relation in this array. It is the responsibility of the caller of this
+ * function to close the indexes as well as free them.
+ */
+static int
+toast_open_indexes(Relation toastrel,
+ LOCKMODE lock,
+ Relation **toastidxs,
+ int *num_indexes)
+{
+ int i = 0;
+ int res = 0;
+ bool found = false;
+ List *indexlist;
+ ListCell *lc;
+
+ /* Get index list of the toast relation */
+ indexlist = RelationGetIndexList(toastrel);
+ Assert(indexlist != NIL);
+
+ *num_indexes = list_length(indexlist);
+
+ /* Open all the index relations */
+ *toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation));
+ foreach(lc, indexlist)
+ (*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);
+
+ /* Fetch the first valid index in list */
+ for (i = 0; i < *num_indexes; i++)
+ {
+ Relation toastidx = (*toastidxs)[i];
+
+ if (toastidx->rd_index->indisvalid)
+ {
+ res = i;
+ found = true;
+ break;
+ }
+ }
+
+ /*
+ * Free index list, not necessary anymore as relations are opened and a
+ * valid index has been found.
+ */
+ list_free(indexlist);
+
+ /*
+ * The toast relation should have one valid index, so something is going
+ * wrong if there is nothing.
+ */
+ if (!found)
+ elog(ERROR, "no valid index found for toast relation with Oid %u",
+ RelationGetRelid(toastrel));
+
+ return res;
+}
+
+/* ----------
+ * toast_close_indexes
+ *
+ * Close an array of indexes for a toast relation and free it. This should
+ * be called for a set of indexes opened previously with toast_open_indexes.
+ */
+static void
+toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
+{
+ int i;
+
+ /* Close relations and clean up things */
+ for (i = 0; i < num_indexes; i++)
+ index_close(toastidxs[i], lock);
+ pfree(toastidxs);
+}
+
+/* ----------
+ * init_toast_snapshot
+ *
+ * Initialize an appropriate TOAST snapshot. We must use an MVCC snapshot
+ * to initialize the TOAST snapshot; since we don't know which one to use,
+ * just use the oldest one. This is safe: at worst, we will get a "snapshot
+ * too old" error that might have been avoided otherwise.
+ */
+static void
+init_toast_snapshot(Snapshot toast_snapshot)
+{
+ Snapshot snapshot = GetOldestSnapshot();
+
+ if (snapshot == NULL)
+ elog(ERROR, "no known snapshots");
+
+ InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
+}