* Support routines for external and compressed storage of
* variable size attributes.
*
- * Copyright (c) 2000-2015, PostgreSQL Global Development Group
+ * Copyright (c) 2000-2017, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
#include "catalog/catalog.h"
#include "common/pg_lzcompress.h"
#include "miscadmin.h"
+#include "utils/expandeddatum.h"
#include "utils/fmgroids.h"
#include "utils/rel.h"
+#include "utils/snapmgr.h"
#include "utils/typcache.h"
#include "utils/tqual.h"
*/
typedef struct toast_compress_header
{
- int32 vl_len_; /* varlena header (do not touch directly!) */
+ int32 vl_len_; /* varlena header (do not touch directly!) */
int32 rawsize;
} toast_compress_header;
* Utilities for manipulation of header information for compressed
* toast entries.
*/
-#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header))
-#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) ptr)->rawsize)
+#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header))
+#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
#define TOAST_COMPRESS_RAWDATA(ptr) \
- (((char *) ptr) + TOAST_COMPRESS_HDRSZ)
+ (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
- (((toast_compress_header *) ptr)->rawsize = len)
+ (((toast_compress_header *) (ptr))->rawsize = (len))
-static void toast_delete_datum(Relation rel, Datum value);
+static void toast_delete_datum(Relation rel, Datum value, bool is_speculative);
static Datum toast_save_datum(Relation rel, Datum value,
struct varlena * oldexternal, int options);
static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
int *num_indexes);
static void toast_close_indexes(Relation *toastidxs, int num_indexes,
LOCKMODE lock);
+static void init_toast_snapshot(Snapshot toast_snapshot);
/* ----------
*
* This will return a datum that contains all the data internally, ie, not
* relying on external storage or memory, but it can still be compressed or
- * have a short header.
- ----------
+ * have a short header. Note some callers assume that if the input is an
+ * EXTERNAL datum, the result will be a pfree'able chunk.
+ * ----------
*/
struct varlena *
heap_tuple_fetch_attr(struct varlena * attr)
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
/*
- * copy into the caller's memory context. That's not required in all
- * cases but sufficient for now since this is mainly used when we need
- * to persist a Datum for unusually long time, like in a HOLD cursor.
+ * This is an indirect pointer --- dereference it
*/
struct varatt_indirect redirect;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
- /* doesn't make much sense, but better handle it */
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ /* recurse if value is still external in some other way */
+ if (VARATT_IS_EXTERNAL(attr))
return heap_tuple_fetch_attr(attr);
- /* copy datum verbatim */
+ /*
+ * Copy into the caller's memory context, in case caller tries to
+ * pfree the result.
+ */
result = (struct varlena *) palloc(VARSIZE_ANY(attr));
memcpy(result, attr, VARSIZE_ANY(attr));
}
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /*
+ * This is an expanded-object pointer --- get flat format
+ */
+ ExpandedObjectHeader *eoh;
+ Size resultsize;
+
+ eoh = DatumGetEOHP(PointerGetDatum(attr));
+ resultsize = EOH_get_flat_size(eoh);
+ result = (struct varlena *) palloc(resultsize);
+ EOH_flatten_into(eoh, (void *) result, resultsize);
+ }
else
{
/*
* heap_tuple_untoast_attr -
*
* Public entry point to get back a toasted value from compression
- * or external storage.
+ * or external storage. The result is always non-extended varlena form.
+ *
+ * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
+ * datum, the result will be a pfree'able chunk.
* ----------
*/
struct varlena *
if (VARATT_IS_COMPRESSED(attr))
{
struct varlena *tmp = attr;
+
attr = toast_decompress_datum(tmp);
pfree(tmp);
}
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
+ /*
+ * This is an indirect pointer --- dereference it
+ */
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+ /* recurse in case value is still extended in some other way */
attr = heap_tuple_untoast_attr(attr);
+
+ /* if it isn't, we'd better copy it */
+ if (attr == (struct varlena *) redirect.pointer)
+ {
+ struct varlena *result;
+
+ result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+ memcpy(result, attr, VARSIZE_ANY(attr));
+ attr = result;
+ }
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /*
+ * This is an expanded-object pointer --- get flat format
+ */
+ attr = heap_tuple_fetch_attr(attr);
+ /* flatteners are not allowed to produce compressed/short output */
+ Assert(!VARATT_IS_EXTENDED(attr));
}
else if (VARATT_IS_COMPRESSED(attr))
{
return heap_tuple_untoast_attr_slice(redirect.pointer,
sliceoffset, slicelength);
}
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /* pass it off to heap_tuple_fetch_attr to flatten */
+ preslice = heap_tuple_fetch_attr(attr);
+ }
else
preslice = attr;
+ Assert(!VARATT_IS_EXTERNAL(preslice));
+
if (VARATT_IS_COMPRESSED(preslice))
{
struct varlena *tmp = preslice;
+
preslice = toast_decompress_datum(tmp);
if (tmp != attr)
return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
}
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ result = EOH_get_flat_size(DatumGetEOHP(value));
+ }
else if (VARATT_IS_COMPRESSED(attr))
{
/* here, va_rawsize is just the payload size */
return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
}
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ result = EOH_get_flat_size(DatumGetEOHP(value));
+ }
else if (VARATT_IS_SHORT(attr))
{
result = VARSIZE_SHORT(attr);
* ----------
*/
void
-toast_delete(Relation rel, HeapTuple oldtup)
+toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
{
TupleDesc tupleDesc;
Form_pg_attribute *att;
if (toast_isnull[i])
continue;
else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value)))
- toast_delete_datum(rel, value);
- else if (VARATT_IS_EXTERNAL_INDIRECT(PointerGetDatum(value)))
- elog(ERROR, "attempt to delete tuple containing indirect datums");
+ toast_delete_datum(rel, value, is_speculative);
}
}
}
bool toast_free[MaxHeapAttributeNumber];
bool toast_delold[MaxHeapAttributeNumber];
+ /*
+ * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super
+ * deletions just normally insert/delete the toast values. It seems
+ * easiest to deal with that here, instead on, potentially, multiple
+ * callers.
+ */
+ options &= ~HEAP_INSERT_SPECULATIVE;
+
/*
* We should only ever be called for tuples of plain relations or
* materialized views --- recursing on a toast rel is bad news.
/*
* We took care of UPDATE above, so any external value we find
- * still in the tuple must be someone else's we cannot reuse.
+ * still in the tuple must be someone else's that we cannot reuse
+ * (this includes the case of an out-of-line in-memory datum).
* Fetch it back (without decompression, unless we are forcing
* PLAIN storage). If necessary, we'll push it out as a new
* external value below.
*/
/* compute header overhead --- this should match heap_form_tuple() */
- hoff = offsetof(HeapTupleHeaderData, t_bits);
+ hoff = SizeofHeapTupleHeader;
if (has_nulls)
hoff += BITMAPLEN(numAttrs);
if (newtup->t_data->t_infomask & HEAP_HASOID)
* different conclusion about the size of the null bitmap, or even
* whether there needs to be one at all.
*/
- new_header_len = offsetof(HeapTupleHeaderData, t_bits);
+ new_header_len = SizeofHeapTupleHeader;
if (has_nulls)
new_header_len += BITMAPLEN(numAttrs);
if (olddata->t_infomask & HEAP_HASOID)
/*
* Copy the existing tuple header, but adjust natts and t_hoff.
*/
- memcpy(new_data, olddata, offsetof(HeapTupleHeaderData, t_bits));
+ memcpy(new_data, olddata, SizeofHeapTupleHeader);
HeapTupleHeaderSetNatts(new_data, numAttrs);
new_data->t_hoff = new_header_len;
if (olddata->t_infomask & HEAP_HASOID)
if (need_delold)
for (i = 0; i < numAttrs; i++)
if (toast_delold[i])
- toast_delete_datum(rel, toast_oldvalues[i]);
+ toast_delete_datum(rel, toast_oldvalues[i], false);
return result_tuple;
}
new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
if (VARATT_IS_EXTERNAL(new_value))
{
- new_value = toast_fetch_datum(new_value);
+ new_value = heap_tuple_fetch_attr(new_value);
toast_values[i] = PointerGetDatum(new_value);
toast_free[i] = true;
}
*
* This should match the reconstruction code in toast_insert_or_update.
*/
- new_header_len = offsetof(HeapTupleHeaderData, t_bits);
+ new_header_len = SizeofHeapTupleHeader;
if (has_nulls)
new_header_len += BITMAPLEN(numAttrs);
if (tup->t_infomask & HEAP_HASOID)
/*
* Copy the existing tuple header, but adjust natts and t_hoff.
*/
- memcpy(new_data, tup, offsetof(HeapTupleHeaderData, t_bits));
+ memcpy(new_data, tup, SizeofHeapTupleHeader);
HeapTupleHeaderSetNatts(new_data, numAttrs);
new_data->t_hoff = new_header_len;
if (tup->t_infomask & HEAP_HASOID)
CommandId mycid = GetCurrentCommandId(true);
struct varlena *result;
struct varatt_external toast_pointer;
- struct
+ union
{
struct varlena hdr;
- char data[TOAST_MAX_CHUNK_SIZE]; /* make struct big enough */
- int32 align_it; /* ensure struct is aligned well enough */
+ /* this is to make the union big enough for a chunk: */
+ char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
+ /* ensure union is aligned well enough: */
+ int32 align_it;
} chunk_data;
int32 chunk_size;
int32 chunk_seq = 0;
* Create the index entry. We cheat a little here by not using
* FormIndexDatum: this relies on the knowledge that the index columns
* are the same as the initial columns of the table for all the
- * indexes.
+ * indexes. We also cheat by not providing an IndexInfo: this is okay
+ * for now because btree doesn't need one, but we might have to be
+ * more honest someday.
*
* Note also that there had better not be any user-created index on
* the TOAST table, since we don't bother to update anything else.
&(toasttup->t_self),
toastrel,
toastidxs[i]->rd_index->indisunique ?
- UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
+ UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
+ NULL);
}
/*
* ----------
*/
static void
-toast_delete_datum(Relation rel, Datum value)
+toast_delete_datum(Relation rel, Datum value, bool is_speculative)
{
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
struct varatt_external toast_pointer;
HeapTuple toasttup;
int num_indexes;
int validIndex;
+ SnapshotData SnapshotToast;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
return;
* sequence or not, but since we've already locked the index we might as
* well use systable_beginscan_ordered.)
*/
+ init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
- SnapshotToast, 1, &toastkey);
+ &SnapshotToast, 1, &toastkey);
while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
/*
* Have a chunk, delete it
*/
- simple_heap_delete(toastrel, &toasttup->t_self);
+ if (is_speculative)
+ heap_abort_speculative(toastrel, toasttup);
+ else
+ simple_heap_delete(toastrel, &toasttup->t_self);
}
/*
int num_indexes;
int validIndex;
Relation *toastidxs;
+ SnapshotData SnapshotToast;
/* Fetch a valid index relation */
validIndex = toast_open_indexes(toastrel,
/*
* Is there any such chunk?
*/
+ init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan(toastrel,
RelationGetRelid(toastidxs[validIndex]),
- true, SnapshotToast, 1, &toastkey);
+ true, &SnapshotToast, 1, &toastkey);
if (systable_getnext(toastscan) != NULL)
result = true;
int32 chunksize;
int num_indexes;
int validIndex;
+ SnapshotData SnapshotToast;
- if (VARATT_IS_EXTERNAL_INDIRECT(attr))
- elog(ERROR, "shouldn't be called for indirect tuples");
+ if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+ elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
*/
nextidx = 0;
+ init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
- SnapshotToast, 1, &toastkey);
+ &SnapshotToast, 1, &toastkey);
while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
/*
int32 chcpyend;
int num_indexes;
int validIndex;
+ SnapshotData SnapshotToast;
- Assert(VARATT_IS_EXTERNAL_ONDISK(attr));
+ if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+ elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
*
* The index is on (valueid, chunkidx) so they will come in order
*/
+ init_toast_snapshot(&SnapshotToast);
nextidx = startchunk;
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
- SnapshotToast, nscankeys, toastkey);
+ &SnapshotToast, nscankeys, toastkey);
while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
/*
index_close(toastidxs[i], lock);
pfree(toastidxs);
}
+
+/* ----------
+ * init_toast_snapshot
+ *
+ * Initialize an appropriate TOAST snapshot. We must use an MVCC snapshot
+ * to initialize the TOAST snapshot; since we don't know which one to use,
+ * just use the oldest one. This is safe: at worst, we will get a "snapshot
+ * too old" error that might have been avoided otherwise.
+ */
+static void
+init_toast_snapshot(Snapshot toast_snapshot)
+{
+ Snapshot snapshot = GetOldestSnapshot();
+
+ if (snapshot == NULL)
+ elog(ERROR, "no known snapshots");
+
+ InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
+}