]> granicus.if.org Git - postgresql/blobdiff - src/backend/access/heap/tuptoaster.c
Allow index AMs to cache data across aminsert calls within a SQL command.
[postgresql] / src / backend / access / heap / tuptoaster.c
index f643906da1f92bfce24f69f7875f00cb7dfcf244..19e704800284a45760d708fd989c6b3c5cef740b 100644 (file)
@@ -4,18 +4,21 @@
  *       Support routines for external and compressed storage of
  *       variable size attributes.
  *
- * Copyright (c) 2000-2003, PostgreSQL Global Development Group
+ * Copyright (c) 2000-2017, PostgreSQL Global Development Group
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.40 2003/11/12 21:15:46 tgl Exp $
+ *       src/backend/access/heap/tuptoaster.c
  *
  *
  * INTERFACE ROUTINES
- *             heap_tuple_toast_attrs -
+ *             toast_insert_or_update -
  *                     Try to make a given tuple fit into one page by compressing
  *                     or moving off attributes
  *
+ *             toast_delete -
+ *                     Reclaim toast storage when a tuple is deleted
+ *
  *             heap_tuple_untoast_attr -
  *                     Fetch back a given value from the "secondary" relation
  *
 #include <unistd.h>
 #include <fcntl.h>
 
-#include "access/heapam.h"
 #include "access/genam.h"
+#include "access/heapam.h"
 #include "access/tuptoaster.h"
+#include "access/xact.h"
 #include "catalog/catalog.h"
-#include "utils/rel.h"
-#include "utils/builtins.h"
+#include "common/pg_lzcompress.h"
+#include "miscadmin.h"
+#include "utils/expandeddatum.h"
 #include "utils/fmgroids.h"
-#include "utils/pg_lzcompress.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/typcache.h"
+#include "utils/tqual.h"
 
 
 #undef TOAST_DEBUG
 
-static void toast_delete(Relation rel, HeapTuple oldtup);
-static void toast_delete_datum(Relation rel, Datum value);
-static void toast_insert_or_update(Relation rel, HeapTuple newtup,
-                                          HeapTuple oldtup);
-static Datum toast_save_datum(Relation rel, Datum value);
-static varattrib *toast_fetch_datum(varattrib *attr);
-static varattrib *toast_fetch_datum_slice(varattrib *attr,
-                                               int32 sliceoffset, int32 length);
-
-
-/* ----------
- * heap_tuple_toast_attrs -
- *
- *     This is the central public entry point for toasting from heapam.
- *
- *     Calls the appropriate event specific action.
- * ----------
+/*
+ *     The information at the start of the compressed toast data.
  */
-void
-heap_tuple_toast_attrs(Relation rel, HeapTuple newtup, HeapTuple oldtup)
+typedef struct toast_compress_header
 {
-       if (newtup == NULL)
-               toast_delete(rel, oldtup);
-       else
-               toast_insert_or_update(rel, newtup, oldtup);
-}
+       int32           vl_len_;                /* varlena header (do not touch directly!) */
+       int32           rawsize;
+} toast_compress_header;
+
+/*
+ * Utilities for manipulation of header information for compressed
+ * toast entries.
+ */
+#define TOAST_COMPRESS_HDRSZ           ((int32) sizeof(toast_compress_header))
+#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
+#define TOAST_COMPRESS_RAWDATA(ptr) \
+       (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
+#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
+       (((toast_compress_header *) (ptr))->rawsize = (len))
+
+static void toast_delete_datum(Relation rel, Datum value, bool is_speculative);
+static Datum toast_save_datum(Relation rel, Datum value,
+                                struct varlena * oldexternal, int options);
+static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
+static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
+static struct varlena *toast_fetch_datum(struct varlena * attr);
+static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
+                                               int32 sliceoffset, int32 length);
+static struct varlena *toast_decompress_datum(struct varlena * attr);
+static int toast_open_indexes(Relation toastrel,
+                                  LOCKMODE lock,
+                                  Relation **toastidxs,
+                                  int *num_indexes);
+static void toast_close_indexes(Relation *toastidxs, int num_indexes,
+                                       LOCKMODE lock);
+static void init_toast_snapshot(Snapshot toast_snapshot);
 
 
 /* ----------
  * heap_tuple_fetch_attr -
  *
- *     Public entry point to get back a toasted value
- *     external storage (possibly still in compressed format).
+ *     Public entry point to get back a toasted value from
+ *     external source (possibly still in compressed format).
+ *
+ * This will return a datum that contains all the data internally, ie, not
+ * relying on external storage or memory, but it can still be compressed or
+ * have a short header.  Note some callers assume that if the input is an
+ * EXTERNAL datum, the result will be a pfree'able chunk.
  * ----------
  */
-varattrib *
-heap_tuple_fetch_attr(varattrib *attr)
+struct varlena *
+heap_tuple_fetch_attr(struct varlena * attr)
 {
-       varattrib  *result;
+       struct varlena *result;
 
-       if (VARATT_IS_EXTERNAL(attr))
+       if (VARATT_IS_EXTERNAL_ONDISK(attr))
        {
                /*
                 * This is an external stored plain value
                 */
                result = toast_fetch_datum(attr);
        }
+       else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+       {
+               /*
+                * This is an indirect pointer --- dereference it
+                */
+               struct varatt_indirect redirect;
+
+               VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+               attr = (struct varlena *) redirect.pointer;
+
+               /* nested indirect Datums aren't allowed */
+               Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+               /* recurse if value is still external in some other way */
+               if (VARATT_IS_EXTERNAL(attr))
+                       return heap_tuple_fetch_attr(attr);
+
+               /*
+                * Copy into the caller's memory context, in case caller tries to
+                * pfree the result.
+                */
+               result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+               memcpy(result, attr, VARSIZE_ANY(attr));
+       }
+       else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+       {
+               /*
+                * This is an expanded-object pointer --- get flat format
+                */
+               ExpandedObjectHeader *eoh;
+               Size            resultsize;
+
+               eoh = DatumGetEOHP(PointerGetDatum(attr));
+               resultsize = EOH_get_flat_size(eoh);
+               result = (struct varlena *) palloc(resultsize);
+               EOH_flatten_into(eoh, (void *) result, resultsize);
+       }
        else
        {
                /*
-                * This is a plain value inside of the main tuple - why am I
-                * called?
+                * This is a plain value inside of the main tuple - why am I called?
                 */
                result = attr;
        }
@@ -103,62 +162,88 @@ heap_tuple_fetch_attr(varattrib *attr)
  * heap_tuple_untoast_attr -
  *
  *     Public entry point to get back a toasted value from compression
- *     or external storage.
+ *     or external storage.  The result is always non-extended varlena form.
+ *
+ * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
+ * datum, the result will be a pfree'able chunk.
  * ----------
  */
-varattrib *
-heap_tuple_untoast_attr(varattrib *attr)
+struct varlena *
+heap_tuple_untoast_attr(struct varlena * attr)
 {
-       varattrib  *result;
-
-       if (VARATT_IS_EXTERNAL(attr))
+       if (VARATT_IS_EXTERNAL_ONDISK(attr))
        {
+               /*
+                * This is an externally stored datum --- fetch it back from there
+                */
+               attr = toast_fetch_datum(attr);
+               /* If it's compressed, decompress it */
                if (VARATT_IS_COMPRESSED(attr))
                {
-                       /* ----------
-                        * This is an external stored compressed value
-                        * Fetch it from the toast heap and decompress.
-                        * ----------
-                        */
-                       varattrib  *tmp;
-
-                       tmp = toast_fetch_datum(attr);
-                       result = (varattrib *) palloc(attr->va_content.va_external.va_rawsize
-                                                                                 + VARHDRSZ);
-                       VARATT_SIZEP(result) = attr->va_content.va_external.va_rawsize
-                               + VARHDRSZ;
-                       pglz_decompress((PGLZ_Header *) tmp, VARATT_DATA(result));
+                       struct varlena *tmp = attr;
 
+                       attr = toast_decompress_datum(tmp);
                        pfree(tmp);
                }
-               else
+       }
+       else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+       {
+               /*
+                * This is an indirect pointer --- dereference it
+                */
+               struct varatt_indirect redirect;
+
+               VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+               attr = (struct varlena *) redirect.pointer;
+
+               /* nested indirect Datums aren't allowed */
+               Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+               /* recurse in case value is still extended in some other way */
+               attr = heap_tuple_untoast_attr(attr);
+
+               /* if it isn't, we'd better copy it */
+               if (attr == (struct varlena *) redirect.pointer)
                {
-                       /*
-                        * This is an external stored plain value
-                        */
-                       result = toast_fetch_datum(attr);
+                       struct varlena *result;
+
+                       result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+                       memcpy(result, attr, VARSIZE_ANY(attr));
+                       attr = result;
                }
        }
+       else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+       {
+               /*
+                * This is an expanded-object pointer --- get flat format
+                */
+               attr = heap_tuple_fetch_attr(attr);
+               /* flatteners are not allowed to produce compressed/short output */
+               Assert(!VARATT_IS_EXTENDED(attr));
+       }
        else if (VARATT_IS_COMPRESSED(attr))
        {
                /*
                 * This is a compressed value inside of the main tuple
                 */
-               result = (varattrib *) palloc(attr->va_content.va_compressed.va_rawsize
-                                                                         + VARHDRSZ);
-               VARATT_SIZEP(result) = attr->va_content.va_compressed.va_rawsize
-                       + VARHDRSZ;
-               pglz_decompress((PGLZ_Header *) attr, VARATT_DATA(result));
+               attr = toast_decompress_datum(attr);
        }
-       else
-
+       else if (VARATT_IS_SHORT(attr))
+       {
                /*
-                * This is a plain value inside of the main tuple - why am I
-                * called?
+                * This is a short-header varlena --- convert to 4-byte header format
                 */
-               return attr;
+               Size            data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
+               Size            new_size = data_size + VARHDRSZ;
+               struct varlena *new_attr;
+
+               new_attr = (struct varlena *) palloc(new_size);
+               SET_VARSIZE(new_attr, new_size);
+               memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
+               attr = new_attr;
+       }
 
-       return result;
+       return attr;
 }
 
 
@@ -169,47 +254,73 @@ heap_tuple_untoast_attr(varattrib *attr)
  *             from compression or external storage.
  * ----------
  */
-varattrib *
-heap_tuple_untoast_attr_slice(varattrib *attr, int32 sliceoffset, int32 slicelength)
+struct varlena *
+heap_tuple_untoast_attr_slice(struct varlena * attr,
+                                                         int32 sliceoffset, int32 slicelength)
 {
-       varattrib  *preslice;
-       varattrib  *result;
+       struct varlena *preslice;
+       struct varlena *result;
+       char       *attrdata;
        int32           attrsize;
 
-       if (VARATT_IS_COMPRESSED(attr))
+       if (VARATT_IS_EXTERNAL_ONDISK(attr))
        {
-               varattrib  *tmp;
+               struct varatt_external toast_pointer;
 
-               if (VARATT_IS_EXTERNAL(attr))
-                       tmp = toast_fetch_datum(attr);
-               else
-               {
-                       tmp = attr;                     /* compressed in main tuple */
-               }
+               VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+               /* fast path for non-compressed external datums */
+               if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+                       return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
+
+               /* fetch it back (compressed marker will get set automatically) */
+               preslice = toast_fetch_datum(attr);
+       }
+       else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+       {
+               struct varatt_indirect redirect;
 
-               preslice = (varattrib *) palloc(attr->va_content.va_external.va_rawsize
-                                                                               + VARHDRSZ);
-               VARATT_SIZEP(preslice) = attr->va_content.va_external.va_rawsize + VARHDRSZ;
-               pglz_decompress((PGLZ_Header *) tmp, VARATT_DATA(preslice));
+               VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+
+               /* nested indirect Datums aren't allowed */
+               Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
+
+               return heap_tuple_untoast_attr_slice(redirect.pointer,
+                                                                                        sliceoffset, slicelength);
+       }
+       else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+       {
+               /* pass it off to heap_tuple_fetch_attr to flatten */
+               preslice = heap_tuple_fetch_attr(attr);
+       }
+       else
+               preslice = attr;
+
+       Assert(!VARATT_IS_EXTERNAL(preslice));
+
+       if (VARATT_IS_COMPRESSED(preslice))
+       {
+               struct varlena *tmp = preslice;
+
+               preslice = toast_decompress_datum(tmp);
 
                if (tmp != attr)
                        pfree(tmp);
        }
+
+       if (VARATT_IS_SHORT(preslice))
+       {
+               attrdata = VARDATA_SHORT(preslice);
+               attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
+       }
        else
        {
-               /* Plain value */
-               if (VARATT_IS_EXTERNAL(attr))
-               {
-                       /* fast path */
-                       return (toast_fetch_datum_slice(attr, sliceoffset, slicelength));
-               }
-               else
-                       preslice = attr;
+               attrdata = VARDATA(preslice);
+               attrsize = VARSIZE(preslice) - VARHDRSZ;
        }
 
        /* slicing of datum for compressed cases and plain value */
 
-       attrsize = VARSIZE(preslice) - VARHDRSZ;
        if (sliceoffset >= attrsize)
        {
                sliceoffset = 0;
@@ -219,10 +330,10 @@ heap_tuple_untoast_attr_slice(varattrib *attr, int32 sliceoffset, int32 slicelen
        if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
                slicelength = attrsize - sliceoffset;
 
-       result = (varattrib *) palloc(slicelength + VARHDRSZ);
-       VARATT_SIZEP(result) = slicelength + VARHDRSZ;
+       result = (struct varlena *) palloc(slicelength + VARHDRSZ);
+       SET_VARSIZE(result, slicelength + VARHDRSZ);
 
-       memcpy(VARDATA(result), VARDATA(preslice) + sliceoffset, slicelength);
+       memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
 
        if (preslice != attr)
                pfree(preslice);
@@ -235,33 +346,108 @@ heap_tuple_untoast_attr_slice(varattrib *attr, int32 sliceoffset, int32 slicelen
  * toast_raw_datum_size -
  *
  *     Return the raw (detoasted) size of a varlena datum
+ *     (including the VARHDRSZ header)
  * ----------
  */
 Size
 toast_raw_datum_size(Datum value)
 {
-       varattrib  *attr = (varattrib *) DatumGetPointer(value);
+       struct varlena *attr = (struct varlena *) DatumGetPointer(value);
        Size            result;
 
-       if (VARATT_IS_COMPRESSED(attr))
+       if (VARATT_IS_EXTERNAL_ONDISK(attr))
+       {
+               /* va_rawsize is the size of the original datum -- including header */
+               struct varatt_external toast_pointer;
+
+               VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+               result = toast_pointer.va_rawsize;
+       }
+       else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+       {
+               struct varatt_indirect toast_pointer;
+
+               VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+               /* nested indirect Datums aren't allowed */
+               Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
+
+               return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
+       }
+       else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+       {
+               result = EOH_get_flat_size(DatumGetEOHP(value));
+       }
+       else if (VARATT_IS_COMPRESSED(attr))
+       {
+               /* here, va_rawsize is just the payload size */
+               result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
+       }
+       else if (VARATT_IS_SHORT(attr))
        {
                /*
-                * va_rawsize shows the original data size, whether the datum is
-                * external or not.
+                * we have to normalize the header length to VARHDRSZ or else the
+                * callers of this function will be confused.
                 */
-               result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
+               result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
        }
-       else if (VARATT_IS_EXTERNAL(attr))
+       else
+       {
+               /* plain untoasted datum */
+               result = VARSIZE(attr);
+       }
+       return result;
+}
+
+/* ----------
+ * toast_datum_size
+ *
+ *     Return the physical storage size (possibly compressed) of a varlena datum
+ * ----------
+ */
+Size
+toast_datum_size(Datum value)
+{
+       struct varlena *attr = (struct varlena *) DatumGetPointer(value);
+       Size            result;
+
+       if (VARATT_IS_EXTERNAL_ONDISK(attr))
        {
                /*
-                * an uncompressed external attribute has rawsize including the
-                * header (not too consistent!)
+                * Attribute is stored externally - return the extsize whether
+                * compressed or not.  We do not count the size of the toast pointer
+                * ... should we?
                 */
-               result = attr->va_content.va_external.va_rawsize;
+               struct varatt_external toast_pointer;
+
+               VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+               result = toast_pointer.va_extsize;
+       }
+       else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+       {
+               struct varatt_indirect toast_pointer;
+
+               VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+               /* nested indirect Datums aren't allowed */
+               Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+               return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
+       }
+       else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+       {
+               result = EOH_get_flat_size(DatumGetEOHP(value));
+       }
+       else if (VARATT_IS_SHORT(attr))
+       {
+               result = VARSIZE_SHORT(attr);
        }
        else
        {
-               /* plain untoasted datum */
+               /*
+                * Attribute is stored inline either compressed or not, just calculate
+                * the size of the datum in either case.
+                */
                result = VARSIZE(attr);
        }
        return result;
@@ -274,34 +460,55 @@ toast_raw_datum_size(Datum value)
  *     Cascaded delete toast-entries on DELETE
  * ----------
  */
-static void
-toast_delete(Relation rel, HeapTuple oldtup)
+void
+toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
 {
        TupleDesc       tupleDesc;
        Form_pg_attribute *att;
        int                     numAttrs;
        int                     i;
-       Datum           value;
-       bool            isnull;
+       Datum           toast_values[MaxHeapAttributeNumber];
+       bool            toast_isnull[MaxHeapAttributeNumber];
+
+       /*
+        * We should only ever be called for tuples of plain relations or
+        * materialized views --- recursing on a toast rel is bad news.
+        */
+       Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
+                  rel->rd_rel->relkind == RELKIND_MATVIEW);
 
        /*
-        * Get the tuple descriptor, the number of and attribute descriptors.
+        * Get the tuple descriptor and break down the tuple into fields.
+        *
+        * NOTE: it's debatable whether to use heap_deform_tuple() here or just
+        * heap_getattr() only the varlena columns.  The latter could win if there
+        * are few varlena columns and many non-varlena ones. However,
+        * heap_deform_tuple costs only O(N) while the heap_getattr way would cost
+        * O(N^2) if there are many varlena columns, so it seems better to err on
+        * the side of linear cost.  (We won't even be here unless there's at
+        * least one varlena column, by the way.)
         */
        tupleDesc = rel->rd_att;
-       numAttrs = tupleDesc->natts;
        att = tupleDesc->attrs;
+       numAttrs = tupleDesc->natts;
+
+       Assert(numAttrs <= MaxHeapAttributeNumber);
+       heap_deform_tuple(oldtup, tupleDesc, toast_values, toast_isnull);
 
        /*
-        * Check for external stored attributes and delete them from the
-        * secondary relation.
+        * Check for external stored attributes and delete them from the secondary
+        * relation.
         */
        for (i = 0; i < numAttrs; i++)
        {
                if (att[i]->attlen == -1)
                {
-                       value = heap_getattr(oldtup, i + 1, tupleDesc, &isnull);
-                       if (!isnull && VARATT_IS_EXTERNAL(value))
-                               toast_delete_datum(rel, value);
+                       Datum           value = toast_values[i];
+
+                       if (toast_isnull[i])
+                               continue;
+                       else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value)))
+                               toast_delete_datum(rel, value, is_speculative);
                }
        }
 }
@@ -312,17 +519,28 @@ toast_delete(Relation rel, HeapTuple oldtup)
  *
  *     Delete no-longer-used toast-entries and create new ones to
  *     make the new tuple fit on INSERT or UPDATE
+ *
+ * Inputs:
+ *     newtup: the candidate new tuple to be inserted
+ *     oldtup: the old row version for UPDATE, or NULL for INSERT
+ *     options: options to be passed to heap_insert() for toast rows
+ * Result:
+ *     either newtup if no toasting is needed, or a palloc'd modified tuple
+ *     that is what should actually get stored
+ *
+ * NOTE: neither newtup nor oldtup will be modified.  This is a change
+ * from the pre-8.1 API of this routine.
  * ----------
  */
-static void
-toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
+HeapTuple
+toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
+                                          int options)
 {
+       HeapTuple       result_tuple;
        TupleDesc       tupleDesc;
        Form_pg_attribute *att;
        int                     numAttrs;
        int                     i;
-       bool            old_isnull;
-       bool            new_isnull;
 
        bool            need_change = false;
        bool            need_free = false;
@@ -330,21 +548,44 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
        bool            has_nulls = false;
 
        Size            maxDataLen;
+       Size            hoff;
 
        char            toast_action[MaxHeapAttributeNumber];
-       char            toast_nulls[MaxHeapAttributeNumber];
+       bool            toast_isnull[MaxHeapAttributeNumber];
+       bool            toast_oldisnull[MaxHeapAttributeNumber];
        Datum           toast_values[MaxHeapAttributeNumber];
+       Datum           toast_oldvalues[MaxHeapAttributeNumber];
+       struct varlena *toast_oldexternal[MaxHeapAttributeNumber];
        int32           toast_sizes[MaxHeapAttributeNumber];
        bool            toast_free[MaxHeapAttributeNumber];
        bool            toast_delold[MaxHeapAttributeNumber];
 
        /*
-        * Get the tuple descriptor, the number of and attribute descriptors
-        * and the location of the tuple values.
+        * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super
+        * deletions just normally insert/delete the toast values. It seems
+        * easiest to deal with that here, instead on, potentially, multiple
+        * callers.
+        */
+       options &= ~HEAP_INSERT_SPECULATIVE;
+
+       /*
+        * We should only ever be called for tuples of plain relations or
+        * materialized views --- recursing on a toast rel is bad news.
+        */
+       Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
+                  rel->rd_rel->relkind == RELKIND_MATVIEW);
+
+       /*
+        * Get the tuple descriptor and break down the tuple(s) into fields.
         */
        tupleDesc = rel->rd_att;
-       numAttrs = tupleDesc->natts;
        att = tupleDesc->attrs;
+       numAttrs = tupleDesc->natts;
+
+       Assert(numAttrs <= MaxHeapAttributeNumber);
+       heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull);
+       if (oldtup != NULL)
+               heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull);
 
        /* ----------
         * Then collect information about the values given
@@ -353,43 +594,42 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
         *              ' '             default handling
         *              'p'             already processed --- don't touch it
         *              'x'             incompressible, but OK to move off
+        *
+        * NOTE: toast_sizes[i] is only made valid for varlena attributes with
+        *              toast_action[i] different from 'p'.
         * ----------
         */
        memset(toast_action, ' ', numAttrs * sizeof(char));
-       memset(toast_nulls, ' ', numAttrs * sizeof(char));
+       memset(toast_oldexternal, 0, numAttrs * sizeof(struct varlena *));
        memset(toast_free, 0, numAttrs * sizeof(bool));
        memset(toast_delold, 0, numAttrs * sizeof(bool));
+
        for (i = 0; i < numAttrs; i++)
        {
-               varattrib  *old_value;
-               varattrib  *new_value;
+               struct varlena *old_value;
+               struct varlena *new_value;
 
                if (oldtup != NULL)
                {
                        /*
                         * For UPDATE get the old and new values of this attribute
                         */
-                       old_value = (varattrib *) DatumGetPointer(
-                                       heap_getattr(oldtup, i + 1, tupleDesc, &old_isnull));
-                       toast_values[i] =
-                               heap_getattr(newtup, i + 1, tupleDesc, &new_isnull);
-                       new_value = (varattrib *) DatumGetPointer(toast_values[i]);
+                       old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]);
+                       new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
 
                        /*
-                        * If the old value is an external stored one, check if it has
-                        * changed so we have to delete it later.
+                        * If the old value is stored on disk, check if it has changed so
+                        * we have to delete it later.
                         */
-                       if (!old_isnull && att[i]->attlen == -1 &&
-                               VARATT_IS_EXTERNAL(old_value))
+                       if (att[i]->attlen == -1 && !toast_oldisnull[i] &&
+                               VARATT_IS_EXTERNAL_ONDISK(old_value))
                        {
-                               if (new_isnull || !VARATT_IS_EXTERNAL(new_value) ||
-                                       old_value->va_content.va_external.va_valueid !=
-                                       new_value->va_content.va_external.va_valueid ||
-                                       old_value->va_content.va_external.va_toastrelid !=
-                                       new_value->va_content.va_external.va_toastrelid)
+                               if (toast_isnull[i] || !VARATT_IS_EXTERNAL_ONDISK(new_value) ||
+                                       memcmp((char *) old_value, (char *) new_value,
+                                                  VARSIZE_EXTERNAL(old_value)) != 0)
                                {
                                        /*
-                                        * The old external store value isn't needed any more
+                                        * The old external stored value isn't needed any more
                                         * after the update
                                         */
                                        toast_delold[i] = true;
@@ -398,12 +638,11 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
                                else
                                {
                                        /*
-                                        * This attribute isn't changed by this update so we
-                                        * reuse the original reference to the old value in
-                                        * the new tuple.
+                                        * This attribute isn't changed by this update so we reuse
+                                        * the original reference to the old value in the new
+                                        * tuple.
                                         */
                                        toast_action[i] = 'p';
-                                       toast_sizes[i] = VARATT_SIZE(toast_values[i]);
                                        continue;
                                }
                        }
@@ -413,23 +652,21 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
                        /*
                         * For INSERT simply get the new value
                         */
-                       toast_values[i] =
-                               heap_getattr(newtup, i + 1, tupleDesc, &new_isnull);
+                       new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
                }
 
                /*
                 * Handle NULL attributes
                 */
-               if (new_isnull)
+               if (toast_isnull[i])
                {
                        toast_action[i] = 'p';
-                       toast_nulls[i] = 'n';
                        has_nulls = true;
                        continue;
                }
 
                /*
-                * Now look at varsize attributes
+                * Now look at varlena attributes
                 */
                if (att[i]->attlen == -1)
                {
@@ -441,13 +678,20 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
 
                        /*
                         * We took care of UPDATE above, so any external value we find
-                        * still in the tuple must be someone else's we cannot reuse.
-                        * Expand it to plain (and, probably, toast it again below).
+                        * still in the tuple must be someone else's that we cannot reuse
+                        * (this includes the case of an out-of-line in-memory datum).
+                        * Fetch it back (without decompression, unless we are forcing
+                        * PLAIN storage).  If necessary, we'll push it out as a new
+                        * external value below.
                         */
-                       if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
+                       if (VARATT_IS_EXTERNAL(new_value))
                        {
-                               toast_values[i] = PointerGetDatum(heap_tuple_untoast_attr(
-                                               (varattrib *) DatumGetPointer(toast_values[i])));
+                               toast_oldexternal[i] = new_value;
+                               if (att[i]->attstorage == 'p')
+                                       new_value = heap_tuple_untoast_attr(new_value);
+                               else
+                                       new_value = heap_tuple_fetch_attr(new_value);
+                               toast_values[i] = PointerGetDatum(new_value);
                                toast_free[i] = true;
                                need_change = true;
                                need_free = true;
@@ -456,53 +700,62 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
                        /*
                         * Remember the size of this attribute
                         */
-                       toast_sizes[i] = VARATT_SIZE(DatumGetPointer(toast_values[i]));
+                       toast_sizes[i] = VARSIZE_ANY(new_value);
                }
                else
                {
                        /*
-                        * Not a variable size attribute, plain storage always
+                        * Not a varlena attribute, plain storage always
                         */
                        toast_action[i] = 'p';
-                       toast_sizes[i] = att[i]->attlen;
                }
        }
 
        /* ----------
         * Compress and/or save external until data fits into target length
         *
-        *      1: Inline compress attributes with attstorage 'x'
+        *      1: Inline compress attributes with attstorage 'x', and store very
+        *         large attributes with attstorage 'x' or 'e' external immediately
         *      2: Store attributes with attstorage 'x' or 'e' external
         *      3: Inline compress attributes with attstorage 'm'
         *      4: Store attributes with attstorage 'm' external
         * ----------
         */
-       maxDataLen = offsetof(HeapTupleHeaderData, t_bits);
+
+       /* compute header overhead --- this should match heap_form_tuple() */
+       hoff = SizeofHeapTupleHeader;
        if (has_nulls)
-               maxDataLen += BITMAPLEN(numAttrs);
-       maxDataLen = TOAST_TUPLE_TARGET - MAXALIGN(maxDataLen);
+               hoff += BITMAPLEN(numAttrs);
+       if (newtup->t_data->t_infomask & HEAP_HASOID)
+               hoff += sizeof(Oid);
+       hoff = MAXALIGN(hoff);
+       /* now convert to a limit on the tuple data size */
+       maxDataLen = TOAST_TUPLE_TARGET - hoff;
 
        /*
-        * Look for attributes with attstorage 'x' to compress
+        * Look for attributes with attstorage 'x' to compress.  Also find large
+        * attributes with attstorage 'x' or 'e', and store them external.
         */
-       while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
-                  maxDataLen)
+       while (heap_compute_data_size(tupleDesc,
+                                                                 toast_values, toast_isnull) > maxDataLen)
        {
                int                     biggest_attno = -1;
-               int32           biggest_size = MAXALIGN(sizeof(varattrib));
+               int32           biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
                Datum           old_value;
                Datum           new_value;
 
                /*
-                * Search for the biggest yet uncompressed internal attribute
+                * Search for the biggest yet unprocessed internal attribute
                 */
                for (i = 0; i < numAttrs; i++)
                {
                        if (toast_action[i] != ' ')
                                continue;
-                       if (VARATT_IS_EXTENDED(toast_values[i]))
+                       if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
+                               continue;               /* can't happen, toast_action would be 'p' */
+                       if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
                                continue;
-                       if (att[i]->attstorage != 'x')
+                       if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
                                continue;
                        if (toast_sizes[i] > biggest_size)
                        {
@@ -515,42 +768,70 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
                        break;
 
                /*
-                * Attempt to compress it inline
+                * Attempt to compress it inline, if it has attstorage 'x'
                 */
                i = biggest_attno;
-               old_value = toast_values[i];
-               new_value = toast_compress_datum(old_value);
+               if (att[i]->attstorage == 'x')
+               {
+                       old_value = toast_values[i];
+                       new_value = toast_compress_datum(old_value);
 
-               if (DatumGetPointer(new_value) != NULL)
+                       if (DatumGetPointer(new_value) != NULL)
+                       {
+                               /* successful compression */
+                               if (toast_free[i])
+                                       pfree(DatumGetPointer(old_value));
+                               toast_values[i] = new_value;
+                               toast_free[i] = true;
+                               toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
+                               need_change = true;
+                               need_free = true;
+                       }
+                       else
+                       {
+                               /* incompressible, ignore on subsequent compression passes */
+                               toast_action[i] = 'x';
+                       }
+               }
+               else
                {
-                       /* successful compression */
+                       /* has attstorage 'e', ignore on subsequent compression passes */
+                       toast_action[i] = 'x';
+               }
+
+               /*
+                * If this value is by itself more than maxDataLen (after compression
+                * if any), push it out to the toast table immediately, if possible.
+                * This avoids uselessly compressing other fields in the common case
+                * where we have one long field and several short ones.
+                *
+                * XXX maybe the threshold should be less than maxDataLen?
+                */
+               if (toast_sizes[i] > maxDataLen &&
+                       rel->rd_rel->reltoastrelid != InvalidOid)
+               {
+                       old_value = toast_values[i];
+                       toast_action[i] = 'p';
+                       toast_values[i] = toast_save_datum(rel, toast_values[i],
+                                                                                          toast_oldexternal[i], options);
                        if (toast_free[i])
                                pfree(DatumGetPointer(old_value));
-                       toast_values[i] = new_value;
                        toast_free[i] = true;
-                       toast_sizes[i] = VARATT_SIZE(toast_values[i]);
                        need_change = true;
                        need_free = true;
                }
-               else
-               {
-                       /*
-                        * incompressible data, ignore on subsequent compression
-                        * passes
-                        */
-                       toast_action[i] = 'x';
-               }
        }
 
        /*
-        * Second we look for attributes of attstorage 'x' or 'e' that are
-        * still inline.
+        * Second we look for attributes of attstorage 'x' or 'e' that are still
+        * inline.  But skip this if there's no toast table to push them to.
         */
-       while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
-                  maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid)
+       while (heap_compute_data_size(tupleDesc,
+                                                                 toast_values, toast_isnull) > maxDataLen &&
+                  rel->rd_rel->reltoastrelid != InvalidOid)
        {
                int                     biggest_attno = -1;
-               int32           biggest_size = MAXALIGN(sizeof(varattrib));
+               int32           biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
                Datum           old_value;
 
                /*------
@@ -562,8 +843,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
                {
                        if (toast_action[i] == 'p')
                                continue;
-                       if (VARATT_IS_EXTERNAL(toast_values[i]))
-                               continue;
+                       if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
+                               continue;               /* can't happen, toast_action would be 'p' */
                        if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
                                continue;
                        if (toast_sizes[i] > biggest_size)
@@ -582,12 +863,11 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
                i = biggest_attno;
                old_value = toast_values[i];
                toast_action[i] = 'p';
-               toast_values[i] = toast_save_datum(rel, toast_values[i]);
+               toast_values[i] = toast_save_datum(rel, toast_values[i],
+                                                                                  toast_oldexternal[i], options);
                if (toast_free[i])
                        pfree(DatumGetPointer(old_value));
-
                toast_free[i] = true;
-               toast_sizes[i] = VARATT_SIZE(toast_values[i]);
 
                need_change = true;
                need_free = true;
@@ -597,11 +877,11 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
         * Round 3 - this time we take attributes with storage 'm' into
         * compression
         */
-       while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
-                  maxDataLen)
+       while (heap_compute_data_size(tupleDesc,
+                                                                 toast_values, toast_isnull) > maxDataLen)
        {
                int                     biggest_attno = -1;
-               int32           biggest_size = MAXALIGN(sizeof(varattrib));
+               int32           biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
                Datum           old_value;
                Datum           new_value;
 
@@ -612,7 +892,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
                {
                        if (toast_action[i] != ' ')
                                continue;
-                       if (VARATT_IS_EXTENDED(toast_values[i]))
+                       if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
+                               continue;               /* can't happen, toast_action would be 'p' */
+                       if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
                                continue;
                        if (att[i]->attstorage != 'm')
                                continue;
@@ -640,28 +922,30 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
                                pfree(DatumGetPointer(old_value));
                        toast_values[i] = new_value;
                        toast_free[i] = true;
-                       toast_sizes[i] = VARATT_SIZE(toast_values[i]);
+                       toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
                        need_change = true;
                        need_free = true;
                }
                else
                {
-                       /*
-                        * incompressible data, ignore on subsequent compression
-                        * passes
-                        */
+                       /* incompressible, ignore on subsequent compression passes */
                        toast_action[i] = 'x';
                }
        }
 
        /*
-        * Finally we store attributes of type 'm' external
+        * Finally we store attributes of type 'm' externally.  At this point we
+        * increase the target tuple size, so that 'm' attributes aren't stored
+        * externally unless really necessary.
         */
-       while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
-                  maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid)
+       maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff;
+
+       while (heap_compute_data_size(tupleDesc,
+                                                                 toast_values, toast_isnull) > maxDataLen &&
+                  rel->rd_rel->reltoastrelid != InvalidOid)
        {
                int                     biggest_attno = -1;
-               int32           biggest_size = MAXALIGN(sizeof(varattrib));
+               int32           biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
                Datum           old_value;
 
                /*--------
@@ -673,8 +957,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
                {
                        if (toast_action[i] == 'p')
                                continue;
-                       if (VARATT_IS_EXTERNAL(toast_values[i]))
-                               continue;
+                       if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
+                               continue;               /* can't happen, toast_action would be 'p' */
                        if (att[i]->attstorage != 'm')
                                continue;
                        if (toast_sizes[i] > biggest_size)
@@ -693,66 +977,78 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
                i = biggest_attno;
                old_value = toast_values[i];
                toast_action[i] = 'p';
-               toast_values[i] = toast_save_datum(rel, toast_values[i]);
+               toast_values[i] = toast_save_datum(rel, toast_values[i],
+                                                                                  toast_oldexternal[i], options);
                if (toast_free[i])
                        pfree(DatumGetPointer(old_value));
-
                toast_free[i] = true;
-               toast_sizes[i] = VARATT_SIZE(toast_values[i]);
 
                need_change = true;
                need_free = true;
        }
 
        /*
-        * In the case we toasted any values, we need to build a new heap
-        * tuple with the changed values.
+        * In the case we toasted any values, we need to build a new heap tuple
+        * with the changed values.
         */
        if (need_change)
        {
                HeapTupleHeader olddata = newtup->t_data;
-               char       *new_data;
-               int32           new_len;
+               HeapTupleHeader new_data;
+               int32           new_header_len;
+               int32           new_data_len;
+               int32           new_tuple_len;
 
                /*
-                * Calculate the new size of the tuple.  Header size should not
-                * change, but data size might.
+                * Calculate the new size of the tuple.
+                *
+                * Note: we used to assume here that the old tuple's t_hoff must equal
+                * the new_header_len value, but that was incorrect.  The old tuple
+                * might have a smaller-than-current natts, if there's been an ALTER
+                * TABLE ADD COLUMN since it was stored; and that would lead to a
+                * different conclusion about the size of the null bitmap, or even
+                * whether there needs to be one at all.
                 */
-               new_len = offsetof(HeapTupleHeaderData, t_bits);
+               new_header_len = SizeofHeapTupleHeader;
                if (has_nulls)
-                       new_len += BITMAPLEN(numAttrs);
+                       new_header_len += BITMAPLEN(numAttrs);
                if (olddata->t_infomask & HEAP_HASOID)
-                       new_len += sizeof(Oid);
-               new_len = MAXALIGN(new_len);
-               Assert(new_len == olddata->t_hoff);
-               new_len += ComputeDataSize(tupleDesc, toast_values, toast_nulls);
-
-               /*
-                * Allocate new tuple in same context as old one.
-                */
-               new_data = (char *) MemoryContextAlloc(newtup->t_datamcxt, new_len);
-               newtup->t_data = (HeapTupleHeader) new_data;
-               newtup->t_len = new_len;
+                       new_header_len += sizeof(Oid);
+               new_header_len = MAXALIGN(new_header_len);
+               new_data_len = heap_compute_data_size(tupleDesc,
+                                                                                         toast_values, toast_isnull);
+               new_tuple_len = new_header_len + new_data_len;
 
                /*
-                * Put the tuple header and the changed values into place
+                * Allocate and zero the space needed, and fill HeapTupleData fields.
                 */
-               memcpy(new_data, olddata, olddata->t_hoff);
-
-               DataFill((char *) new_data + olddata->t_hoff,
-                                tupleDesc,
-                                toast_values,
-                                toast_nulls,
-                                &(newtup->t_data->t_infomask),
-                                has_nulls ? newtup->t_data->t_bits : NULL);
+               result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_tuple_len);
+               result_tuple->t_len = new_tuple_len;
+               result_tuple->t_self = newtup->t_self;
+               result_tuple->t_tableOid = newtup->t_tableOid;
+               new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
+               result_tuple->t_data = new_data;
 
                /*
-                * In the case we modified a previously modified tuple again, free
-                * the memory from the previous run
+                * Copy the existing tuple header, but adjust natts and t_hoff.
                 */
-               if ((char *) olddata != ((char *) newtup + HEAPTUPLESIZE))
-                       pfree(olddata);
+               memcpy(new_data, olddata, SizeofHeapTupleHeader);
+               HeapTupleHeaderSetNatts(new_data, numAttrs);
+               new_data->t_hoff = new_header_len;
+               if (olddata->t_infomask & HEAP_HASOID)
+                       HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(olddata));
+
+               /* Copy over the data, and fill the null bitmap if needed */
+               heap_fill_tuple(tupleDesc,
+                                               toast_values,
+                                               toast_isnull,
+                                               (char *) new_data + new_header_len,
+                                               new_data_len,
+                                               &(new_data->t_infomask),
+                                               has_nulls ? new_data->t_bits : NULL);
        }
+       else
+               result_tuple = newtup;
 
        /*
         * Free allocated temp values
@@ -768,8 +1064,227 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
        if (need_delold)
                for (i = 0; i < numAttrs; i++)
                        if (toast_delold[i])
-                               toast_delete_datum(rel,
-                                       heap_getattr(oldtup, i + 1, tupleDesc, &old_isnull));
+                               toast_delete_datum(rel, toast_oldvalues[i], false);
+
+       return result_tuple;
+}
+
+
+/* ----------
+ * toast_flatten_tuple -
+ *
+ *     "Flatten" a tuple to contain no out-of-line toasted fields.
+ *     (This does not eliminate compressed or short-header datums.)
+ *
+ *     Note: we expect the caller already checked HeapTupleHasExternal(tup),
+ *     so there is no need for a short-circuit path.
+ * ----------
+ */
+HeapTuple
+toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
+{
+       HeapTuple       new_tuple;
+       Form_pg_attribute *att = tupleDesc->attrs;
+       int                     numAttrs = tupleDesc->natts;
+       int                     i;
+       Datum           toast_values[MaxTupleAttributeNumber];
+       bool            toast_isnull[MaxTupleAttributeNumber];
+       bool            toast_free[MaxTupleAttributeNumber];
+
+       /*
+        * Break down the tuple into fields.
+        */
+       Assert(numAttrs <= MaxTupleAttributeNumber);
+       heap_deform_tuple(tup, tupleDesc, toast_values, toast_isnull);
+
+       memset(toast_free, 0, numAttrs * sizeof(bool));
+
+       for (i = 0; i < numAttrs; i++)
+       {
+               /*
+                * Look at non-null varlena attributes
+                */
+               if (!toast_isnull[i] && att[i]->attlen == -1)
+               {
+                       struct varlena *new_value;
+
+                       new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
+                       if (VARATT_IS_EXTERNAL(new_value))
+                       {
+                               new_value = heap_tuple_fetch_attr(new_value);
+                               toast_values[i] = PointerGetDatum(new_value);
+                               toast_free[i] = true;
+                       }
+               }
+       }
+
+       /*
+        * Form the reconfigured tuple.
+        */
+       new_tuple = heap_form_tuple(tupleDesc, toast_values, toast_isnull);
+
+       /*
+        * Be sure to copy the tuple's OID and identity fields.  We also make a
+        * point of copying visibility info, just in case anybody looks at those
+        * fields in a syscache entry.
+        */
+       if (tupleDesc->tdhasoid)
+               HeapTupleSetOid(new_tuple, HeapTupleGetOid(tup));
+
+       new_tuple->t_self = tup->t_self;
+       new_tuple->t_tableOid = tup->t_tableOid;
+
+       new_tuple->t_data->t_choice = tup->t_data->t_choice;
+       new_tuple->t_data->t_ctid = tup->t_data->t_ctid;
+       new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
+       new_tuple->t_data->t_infomask |=
+               tup->t_data->t_infomask & HEAP_XACT_MASK;
+       new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
+       new_tuple->t_data->t_infomask2 |=
+               tup->t_data->t_infomask2 & HEAP2_XACT_MASK;
+
+       /*
+        * Free allocated temp values
+        */
+       for (i = 0; i < numAttrs; i++)
+               if (toast_free[i])
+                       pfree(DatumGetPointer(toast_values[i]));
+
+       return new_tuple;
+}
+
+
+/* ----------
+ * toast_flatten_tuple_to_datum -
+ *
+ *     "Flatten" a tuple containing out-of-line toasted fields into a Datum.
+ *     The result is always palloc'd in the current memory context.
+ *
+ *     We have a general rule that Datums of container types (rows, arrays,
+ *     ranges, etc) must not contain any external TOAST pointers.  Without
+ *     this rule, we'd have to look inside each Datum when preparing a tuple
+ *     for storage, which would be expensive and would fail to extend cleanly
+ *     to new sorts of container types.
+ *
+ *     However, we don't want to say that tuples represented as HeapTuples
+ *     can't contain toasted fields, so instead this routine should be called
+ *     when such a HeapTuple is being converted into a Datum.
+ *
+ *     While we're at it, we decompress any compressed fields too.  This is not
+ *     necessary for correctness, but reflects an expectation that compression
+ *     will be more effective if applied to the whole tuple not individual
+ *     fields.  We are not so concerned about that that we want to deconstruct
+ *     and reconstruct tuples just to get rid of compressed fields, however.
+ *     So callers typically won't call this unless they see that the tuple has
+ *     at least one external field.
+ *
+ *     On the other hand, in-line short-header varlena fields are left alone.
+ *     If we "untoasted" them here, they'd just get changed back to short-header
+ *     format anyway within heap_fill_tuple.
+ * ----------
+ */
+Datum
+toast_flatten_tuple_to_datum(HeapTupleHeader tup,
+                                                        uint32 tup_len,
+                                                        TupleDesc tupleDesc)
+{
+       HeapTupleHeader new_data;
+       int32           new_header_len;
+       int32           new_data_len;
+       int32           new_tuple_len;
+       HeapTupleData tmptup;
+       Form_pg_attribute *att = tupleDesc->attrs;
+       int                     numAttrs = tupleDesc->natts;
+       int                     i;
+       bool            has_nulls = false;
+       Datum           toast_values[MaxTupleAttributeNumber];
+       bool            toast_isnull[MaxTupleAttributeNumber];
+       bool            toast_free[MaxTupleAttributeNumber];
+
+       /* Build a temporary HeapTuple control structure */
+       tmptup.t_len = tup_len;
+       ItemPointerSetInvalid(&(tmptup.t_self));
+       tmptup.t_tableOid = InvalidOid;
+       tmptup.t_data = tup;
+
+       /*
+        * Break down the tuple into fields.
+        */
+       Assert(numAttrs <= MaxTupleAttributeNumber);
+       heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull);
+
+       memset(toast_free, 0, numAttrs * sizeof(bool));
+
+       for (i = 0; i < numAttrs; i++)
+       {
+               /*
+                * Look at non-null varlena attributes
+                */
+               if (toast_isnull[i])
+                       has_nulls = true;
+               else if (att[i]->attlen == -1)
+               {
+                       struct varlena *new_value;
+
+                       new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
+                       if (VARATT_IS_EXTERNAL(new_value) ||
+                               VARATT_IS_COMPRESSED(new_value))
+                       {
+                               new_value = heap_tuple_untoast_attr(new_value);
+                               toast_values[i] = PointerGetDatum(new_value);
+                               toast_free[i] = true;
+                       }
+               }
+       }
+
+       /*
+        * Calculate the new size of the tuple.
+        *
+        * This should match the reconstruction code in toast_insert_or_update.
+        */
+       new_header_len = SizeofHeapTupleHeader;
+       if (has_nulls)
+               new_header_len += BITMAPLEN(numAttrs);
+       if (tup->t_infomask & HEAP_HASOID)
+               new_header_len += sizeof(Oid);
+       new_header_len = MAXALIGN(new_header_len);
+       new_data_len = heap_compute_data_size(tupleDesc,
+                                                                                 toast_values, toast_isnull);
+       new_tuple_len = new_header_len + new_data_len;
+
+       new_data = (HeapTupleHeader) palloc0(new_tuple_len);
+
+       /*
+        * Copy the existing tuple header, but adjust natts and t_hoff.
+        */
+       memcpy(new_data, tup, SizeofHeapTupleHeader);
+       HeapTupleHeaderSetNatts(new_data, numAttrs);
+       new_data->t_hoff = new_header_len;
+       if (tup->t_infomask & HEAP_HASOID)
+               HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(tup));
+
+       /* Set the composite-Datum header fields correctly */
+       HeapTupleHeaderSetDatumLength(new_data, new_tuple_len);
+       HeapTupleHeaderSetTypeId(new_data, tupleDesc->tdtypeid);
+       HeapTupleHeaderSetTypMod(new_data, tupleDesc->tdtypmod);
+
+       /* Copy over the data, and fill the null bitmap if needed */
+       heap_fill_tuple(tupleDesc,
+                                       toast_values,
+                                       toast_isnull,
+                                       (char *) new_data + new_header_len,
+                                       new_data_len,
+                                       &(new_data->t_infomask),
+                                       has_nulls ? new_data->t_bits : NULL);
+
+       /*
+        * Free allocated temp values
+        */
+       for (i = 0; i < numAttrs; i++)
+               if (toast_free[i])
+                       pfree(DatumGetPointer(toast_values[i]));
+
+       return PointerGetDatum(new_data);
 }
 
 
@@ -781,21 +1296,52 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
  *     If we fail (ie, compressed result is actually bigger than original)
  *     then return NULL.  We must not use compressed data if it'd expand
  *     the tuple!
+ *
+ *     We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
+ *     copying them.  But we can't handle external or compressed datums.
  * ----------
  */
 Datum
 toast_compress_datum(Datum value)
 {
-       varattrib  *tmp;
+       struct varlena *tmp;
+       int32           valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
+       int32           len;
+
+       Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
+       Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
+
+       /*
+        * No point in wasting a palloc cycle if value size is out of the allowed
+        * range for compression
+        */
+       if (valsize < PGLZ_strategy_default->min_input_size ||
+               valsize > PGLZ_strategy_default->max_input_size)
+               return PointerGetDatum(NULL);
 
-       tmp = (varattrib *) palloc(sizeof(PGLZ_Header) + VARATT_SIZE(value));
-       pglz_compress(VARATT_DATA(value), VARATT_SIZE(value) - VARHDRSZ,
-                                 (PGLZ_Header *) tmp,
-                                 PGLZ_strategy_default);
-       if (VARATT_SIZE(tmp) < VARATT_SIZE(value))
+       tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
+                                                                       TOAST_COMPRESS_HDRSZ);
+
+       /*
+        * We recheck the actual size even if pglz_compress() reports success,
+        * because it might be satisfied with having saved as little as one byte
+        * in the compressed data --- which could turn into a net loss once you
+        * consider header and alignment padding.  Worst case, the compressed
+        * format might require three padding bytes (plus header, which is
+        * included in VARSIZE(tmp)), whereas the uncompressed format would take
+        * only one header byte and no padding if the value is short enough.  So
+        * we insist on a savings of more than 2 bytes to ensure we have a gain.
+        */
+       len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)),
+                                               valsize,
+                                               TOAST_COMPRESS_RAWDATA(tmp),
+                                               PGLZ_strategy_default);
+       if (len >= 0 &&
+               len + TOAST_COMPRESS_HDRSZ < valsize - 2)
        {
+               TOAST_COMPRESS_SET_RAWSIZE(tmp, valsize);
+               SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ);
                /* successful compression */
-               VARATT_SIZEP(tmp) |= VARATT_FLAG_COMPRESSED;
                return PointerGetDatum(tmp);
        }
        else
@@ -807,82 +1353,238 @@ toast_compress_datum(Datum value)
 }
 
 
+/* ----------
+ * toast_get_valid_index
+ *
+ *     Get OID of valid index associated to given toast relation. A toast
+ *     relation can have only one valid index at the same time.
+ */
+Oid
+toast_get_valid_index(Oid toastoid, LOCKMODE lock)
+{
+       int                     num_indexes;
+       int                     validIndex;
+       Oid                     validIndexOid;
+       Relation   *toastidxs;
+       Relation        toastrel;
+
+       /* Open the toast relation */
+       toastrel = heap_open(toastoid, lock);
+
+       /* Look for the valid index of the toast relation */
+       validIndex = toast_open_indexes(toastrel,
+                                                                       lock,
+                                                                       &toastidxs,
+                                                                       &num_indexes);
+       validIndexOid = RelationGetRelid(toastidxs[validIndex]);
+
+       /* Close the toast relation and all its indexes */
+       toast_close_indexes(toastidxs, num_indexes, lock);
+       heap_close(toastrel, lock);
+
+       return validIndexOid;
+}
+
+
 /* ----------
  * toast_save_datum -
  *
  *     Save one single datum into the secondary relation and return
- *     a varattrib reference for it.
+ *     a Datum reference for it.
+ *
+ * rel: the main relation we're working with (not the toast rel!)
+ * value: datum to be pushed to toast storage
+ * oldexternal: if not NULL, toast pointer previously representing the datum
+ * options: options to be passed to heap_insert() for toast rows
  * ----------
  */
 static Datum
-toast_save_datum(Relation rel, Datum value)
+toast_save_datum(Relation rel, Datum value,
+                                struct varlena * oldexternal, int options)
 {
        Relation        toastrel;
-       Relation        toastidx;
+       Relation   *toastidxs;
        HeapTuple       toasttup;
-       InsertIndexResult idxres;
        TupleDesc       toasttupDesc;
        Datum           t_values[3];
-       char            t_nulls[3];
-       varattrib  *result;
-       struct
+       bool            t_isnull[3];
+       CommandId       mycid = GetCurrentCommandId(true);
+       struct varlena *result;
+       struct varatt_external toast_pointer;
+       union
        {
                struct varlena hdr;
-               char            data[TOAST_MAX_CHUNK_SIZE];
+               /* this is to make the union big enough for a chunk: */
+               char            data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
+               /* ensure union is aligned well enough: */
+               int32           align_it;
        }                       chunk_data;
        int32           chunk_size;
        int32           chunk_seq = 0;
        char       *data_p;
        int32           data_todo;
+       Pointer         dval = DatumGetPointer(value);
+       int                     num_indexes;
+       int                     validIndex;
+
+       Assert(!VARATT_IS_EXTERNAL(value));
 
        /*
-        * Create the varattrib reference
+        * Open the toast relation and its indexes.  We can use the index to check
+        * uniqueness of the OID we assign to the toasted item, even though it has
+        * additional columns besides OID.
         */
-       result = (varattrib *) palloc(sizeof(varattrib));
+       toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
+       toasttupDesc = toastrel->rd_att;
+
+       /* Open all the toast indexes and look for the valid one */
+       validIndex = toast_open_indexes(toastrel,
+                                                                       RowExclusiveLock,
+                                                                       &toastidxs,
+                                                                       &num_indexes);
 
-       result->va_header = sizeof(varattrib) | VARATT_FLAG_EXTERNAL;
-       if (VARATT_IS_COMPRESSED(value))
+       /*
+        * Get the data pointer and length, and compute va_rawsize and va_extsize.
+        *
+        * va_rawsize is the size of the equivalent fully uncompressed datum, so
+        * we have to adjust for short headers.
+        *
+        * va_extsize is the actual size of the data payload in the toast records.
+        */
+       if (VARATT_IS_SHORT(dval))
        {
-               result->va_header |= VARATT_FLAG_COMPRESSED;
-               result->va_content.va_external.va_rawsize =
-                       ((varattrib *) value)->va_content.va_compressed.va_rawsize;
+               data_p = VARDATA_SHORT(dval);
+               data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
+               toast_pointer.va_rawsize = data_todo + VARHDRSZ;                /* as if not short */
+               toast_pointer.va_extsize = data_todo;
+       }
+       else if (VARATT_IS_COMPRESSED(dval))
+       {
+               data_p = VARDATA(dval);
+               data_todo = VARSIZE(dval) - VARHDRSZ;
+               /* rawsize in a compressed datum is just the size of the payload */
+               toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ;
+               toast_pointer.va_extsize = data_todo;
+               /* Assert that the numbers look like it's compressed */
+               Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
        }
        else
-               result->va_content.va_external.va_rawsize = VARATT_SIZE(value);
-
-       result->va_content.va_external.va_extsize =
-               VARATT_SIZE(value) - VARHDRSZ;
-       result->va_content.va_external.va_valueid = newoid();
-       result->va_content.va_external.va_toastrelid =
-               rel->rd_rel->reltoastrelid;
+       {
+               data_p = VARDATA(dval);
+               data_todo = VARSIZE(dval) - VARHDRSZ;
+               toast_pointer.va_rawsize = VARSIZE(dval);
+               toast_pointer.va_extsize = data_todo;
+       }
 
        /*
-        * Initialize constant parts of the tuple data
+        * Insert the correct table OID into the result TOAST pointer.
+        *
+        * Normally this is the actual OID of the target toast table, but during
+        * table-rewriting operations such as CLUSTER, we have to insert the OID
+        * of the table's real permanent toast table instead.  rd_toastoid is set
+        * if we have to substitute such an OID.
         */
-       t_values[0] = ObjectIdGetDatum(result->va_content.va_external.va_valueid);
-       t_values[2] = PointerGetDatum(&chunk_data);
-       t_nulls[0] = ' ';
-       t_nulls[1] = ' ';
-       t_nulls[2] = ' ';
+       if (OidIsValid(rel->rd_toastoid))
+               toast_pointer.va_toastrelid = rel->rd_toastoid;
+       else
+               toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
 
        /*
-        * Get the data to process
+        * Choose an OID to use as the value ID for this toast value.
+        *
+        * Normally we just choose an unused OID within the toast table.  But
+        * during table-rewriting operations where we are preserving an existing
+        * toast table OID, we want to preserve toast value OIDs too.  So, if
+        * rd_toastoid is set and we had a prior external value from that same
+        * toast table, re-use its value ID.  If we didn't have a prior external
+        * value (which is a corner case, but possible if the table's attstorage
+        * options have been changed), we have to pick a value ID that doesn't
+        * conflict with either new or existing toast value OIDs.
         */
-       data_p = VARATT_DATA(value);
-       data_todo = VARATT_SIZE(value) - VARHDRSZ;
+       if (!OidIsValid(rel->rd_toastoid))
+       {
+               /* normal case: just choose an unused OID */
+               toast_pointer.va_valueid =
+                       GetNewOidWithIndex(toastrel,
+                                                          RelationGetRelid(toastidxs[validIndex]),
+                                                          (AttrNumber) 1);
+       }
+       else
+       {
+               /* rewrite case: check to see if value was in old toast table */
+               toast_pointer.va_valueid = InvalidOid;
+               if (oldexternal != NULL)
+               {
+                       struct varatt_external old_toast_pointer;
+
+                       Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
+                       /* Must copy to access aligned fields */
+                       VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
+                       if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
+                       {
+                               /* This value came from the old toast table; reuse its OID */
+                               toast_pointer.va_valueid = old_toast_pointer.va_valueid;
+
+                               /*
+                                * There is a corner case here: the table rewrite might have
+                                * to copy both live and recently-dead versions of a row, and
+                                * those versions could easily reference the same toast value.
+                                * When we copy the second or later version of such a row,
+                                * reusing the OID will mean we select an OID that's already
+                                * in the new toast table.  Check for that, and if so, just
+                                * fall through without writing the data again.
+                                *
+                                * While annoying and ugly-looking, this is a good thing
+                                * because it ensures that we wind up with only one copy of
+                                * the toast value when there is only one copy in the old
+                                * toast table.  Before we detected this case, we'd have made
+                                * multiple copies, wasting space; and what's worse, the
+                                * copies belonging to already-deleted heap tuples would not
+                                * be reclaimed by VACUUM.
+                                */
+                               if (toastrel_valueid_exists(toastrel,
+                                                                                       toast_pointer.va_valueid))
+                               {
+                                       /* Match, so short-circuit the data storage loop below */
+                                       data_todo = 0;
+                               }
+                       }
+               }
+               if (toast_pointer.va_valueid == InvalidOid)
+               {
+                       /*
+                        * new value; must choose an OID that doesn't conflict in either
+                        * old or new toast table
+                        */
+                       do
+                       {
+                               toast_pointer.va_valueid =
+                                       GetNewOidWithIndex(toastrel,
+                                                                        RelationGetRelid(toastidxs[validIndex]),
+                                                                          (AttrNumber) 1);
+                       } while (toastid_valueid_exists(rel->rd_toastoid,
+                                                                                       toast_pointer.va_valueid));
+               }
+       }
 
        /*
-        * Open the toast relation
+        * Initialize constant parts of the tuple data
         */
-       toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
-       toasttupDesc = toastrel->rd_att;
-       toastidx = index_open(toastrel->rd_rel->reltoastidxid);
+       t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
+       t_values[2] = PointerGetDatum(&chunk_data);
+       t_isnull[0] = false;
+       t_isnull[1] = false;
+       t_isnull[2] = false;
 
        /*
         * Split up the item into chunks
         */
        while (data_todo > 0)
        {
+               int                     i;
+
+               CHECK_FOR_INTERRUPTS();
+
                /*
                 * Calculate the size of this chunk
                 */
@@ -892,32 +1594,38 @@ toast_save_datum(Relation rel, Datum value)
                 * Build a tuple and store it
                 */
                t_values[1] = Int32GetDatum(chunk_seq++);
-               VARATT_SIZEP(&chunk_data) = chunk_size + VARHDRSZ;
-               memcpy(VARATT_DATA(&chunk_data), data_p, chunk_size);
-               toasttup = heap_formtuple(toasttupDesc, t_values, t_nulls);
-               if (!HeapTupleIsValid(toasttup))
-                       elog(ERROR, "failed to build TOAST tuple");
+               SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
+               memcpy(VARDATA(&chunk_data), data_p, chunk_size);
+               toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
 
-               simple_heap_insert(toastrel, toasttup);
+               heap_insert(toastrel, toasttup, mycid, options, NULL);
 
                /*
-                * Create the index entry.      We cheat a little here by not using
-                * FormIndexDatum: this relies on the knowledge that the index
-                * columns are the same as the initial columns of the table.
+                * Create the index entry.  We cheat a little here by not using
+                * FormIndexDatum: this relies on the knowledge that the index columns
+                * are the same as the initial columns of the table for all the
+                * indexes.  We also cheat by not providing an IndexInfo: this is okay
+                * for now because btree doesn't need one, but we might have to be
+                * more honest someday.
                 *
                 * Note also that there had better not be any user-created index on
                 * the TOAST table, since we don't bother to update anything else.
                 */
-               idxres = index_insert(toastidx, t_values, t_nulls,
-                                                         &(toasttup->t_self),
-                                                         toastrel, toastidx->rd_index->indisunique);
-               if (idxres == NULL)
-                       elog(ERROR, "failed to insert index entry for TOAST tuple");
+               for (i = 0; i < num_indexes; i++)
+               {
+                       /* Only index relations marked as ready can be updated */
+                       if (IndexIsReady(toastidxs[i]->rd_index))
+                               index_insert(toastidxs[i], t_values, t_isnull,
+                                                        &(toasttup->t_self),
+                                                        toastrel,
+                                                        toastidxs[i]->rd_index->indisunique ?
+                                                        UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
+                                                        NULL);
+               }
 
                /*
                 * Free memory
                 */
-               pfree(idxres);
                heap_freetuple(toasttup);
 
                /*
@@ -928,11 +1636,18 @@ toast_save_datum(Relation rel, Datum value)
        }
 
        /*
-        * Done - close toast relation and return the reference
+        * Done - close toast relation and its indexes
         */
-       index_close(toastidx);
+       toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
        heap_close(toastrel, RowExclusiveLock);
 
+       /*
+        * Create the TOAST pointer value that we'll return
+        */
+       result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
+       SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
+       memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
+
        return PointerGetDatum(result);
 }
 
@@ -944,96 +1659,201 @@ toast_save_datum(Relation rel, Datum value)
  * ----------
  */
 static void
-toast_delete_datum(Relation rel, Datum value)
+toast_delete_datum(Relation rel, Datum value, bool is_speculative)
 {
-       varattrib  *attr = (varattrib *) DatumGetPointer(value);
+       struct varlena *attr = (struct varlena *) DatumGetPointer(value);
+       struct varatt_external toast_pointer;
        Relation        toastrel;
-       Relation        toastidx;
+       Relation   *toastidxs;
        ScanKeyData toastkey;
-       IndexScanDesc toastscan;
+       SysScanDesc toastscan;
        HeapTuple       toasttup;
+       int                     num_indexes;
+       int                     validIndex;
+       SnapshotData SnapshotToast;
 
-       if (!VARATT_IS_EXTERNAL(attr))
+       if (!VARATT_IS_EXTERNAL_ONDISK(attr))
                return;
 
+       /* Must copy to access aligned fields */
+       VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
        /*
-        * Open the toast relation and it's index
+        * Open the toast relation and its indexes
         */
-       toastrel = heap_open(attr->va_content.va_external.va_toastrelid,
-                                                RowExclusiveLock);
-       toastidx = index_open(toastrel->rd_rel->reltoastidxid);
+       toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock);
+
+       /* Fetch valid relation used for process */
+       validIndex = toast_open_indexes(toastrel,
+                                                                       RowExclusiveLock,
+                                                                       &toastidxs,
+                                                                       &num_indexes);
 
        /*
-        * Setup a scan key to fetch from the index by va_valueid (we don't
-        * particularly care whether we see them in sequence or not)
+        * Setup a scan key to find chunks with matching va_valueid
         */
        ScanKeyInit(&toastkey,
                                (AttrNumber) 1,
                                BTEqualStrategyNumber, F_OIDEQ,
-                               ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
+                               ObjectIdGetDatum(toast_pointer.va_valueid));
 
        /*
-        * Find the chunks by index
+        * Find all the chunks.  (We don't actually care whether we see them in
+        * sequence or not, but since we've already locked the index we might as
+        * well use systable_beginscan_ordered.)
         */
-       toastscan = index_beginscan(toastrel, toastidx, SnapshotToast,
-                                                               1, &toastkey);
-       while ((toasttup = index_getnext(toastscan, ForwardScanDirection)) != NULL)
+       init_toast_snapshot(&SnapshotToast);
+       toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
+                                                                                  &SnapshotToast, 1, &toastkey);
+       while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
        {
                /*
                 * Have a chunk, delete it
                 */
-               simple_heap_delete(toastrel, &toasttup->t_self);
+               if (is_speculative)
+                       heap_abort_speculative(toastrel, toasttup);
+               else
+                       simple_heap_delete(toastrel, &toasttup->t_self);
        }
 
        /*
         * End scan and close relations
         */
-       index_endscan(toastscan);
-       index_close(toastidx);
+       systable_endscan_ordered(toastscan);
+       toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
        heap_close(toastrel, RowExclusiveLock);
 }
 
 
+/* ----------
+ * toastrel_valueid_exists -
+ *
+ *     Test whether a toast value with the given ID exists in the toast relation
+ * ----------
+ */
+static bool
+toastrel_valueid_exists(Relation toastrel, Oid valueid)
+{
+       bool            result = false;
+       ScanKeyData toastkey;
+       SysScanDesc toastscan;
+       int                     num_indexes;
+       int                     validIndex;
+       Relation   *toastidxs;
+       SnapshotData SnapshotToast;
+
+       /* Fetch a valid index relation */
+       validIndex = toast_open_indexes(toastrel,
+                                                                       RowExclusiveLock,
+                                                                       &toastidxs,
+                                                                       &num_indexes);
+
+       /*
+        * Setup a scan key to find chunks with matching va_valueid
+        */
+       ScanKeyInit(&toastkey,
+                               (AttrNumber) 1,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(valueid));
+
+       /*
+        * Is there any such chunk?
+        */
+       init_toast_snapshot(&SnapshotToast);
+       toastscan = systable_beginscan(toastrel,
+                                                                  RelationGetRelid(toastidxs[validIndex]),
+                                                                  true, &SnapshotToast, 1, &toastkey);
+
+       if (systable_getnext(toastscan) != NULL)
+               result = true;
+
+       systable_endscan(toastscan);
+
+       /* Clean up */
+       toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
+
+       return result;
+}
+
+/* ----------
+ * toastid_valueid_exists -
+ *
+ *     As above, but work from toast rel's OID not an open relation
+ * ----------
+ */
+static bool
+toastid_valueid_exists(Oid toastrelid, Oid valueid)
+{
+       bool            result;
+       Relation        toastrel;
+
+       toastrel = heap_open(toastrelid, AccessShareLock);
+
+       result = toastrel_valueid_exists(toastrel, valueid);
+
+       heap_close(toastrel, AccessShareLock);
+
+       return result;
+}
+
+
 /* ----------
  * toast_fetch_datum -
  *
- *     Reconstruct an in memory varattrib from the chunks saved
+ *     Reconstruct an in memory Datum from the chunks saved
  *     in the toast relation
  * ----------
  */
-static varattrib *
-toast_fetch_datum(varattrib *attr)
+static struct varlena *
+toast_fetch_datum(struct varlena * attr)
 {
        Relation        toastrel;
-       Relation        toastidx;
+       Relation   *toastidxs;
        ScanKeyData toastkey;
-       IndexScanDesc toastscan;
+       SysScanDesc toastscan;
        HeapTuple       ttup;
        TupleDesc       toasttupDesc;
-       varattrib  *result;
+       struct varlena *result;
+       struct varatt_external toast_pointer;
        int32           ressize;
        int32           residx,
                                nextidx;
        int32           numchunks;
        Pointer         chunk;
        bool            isnull;
+       char       *chunkdata;
        int32           chunksize;
+       int                     num_indexes;
+       int                     validIndex;
+       SnapshotData SnapshotToast;
+
+       if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+               elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
+
+       /* Must copy to access aligned fields */
+       VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
-       ressize = attr->va_content.va_external.va_extsize;
+       ressize = toast_pointer.va_extsize;
        numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
 
-       result = (varattrib *) palloc(ressize + VARHDRSZ);
-       VARATT_SIZEP(result) = ressize + VARHDRSZ;
-       if (VARATT_IS_COMPRESSED(attr))
-               VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED;
+       result = (struct varlena *) palloc(ressize + VARHDRSZ);
+
+       if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+               SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ);
+       else
+               SET_VARSIZE(result, ressize + VARHDRSZ);
 
        /*
-        * Open the toast relation and its index
+        * Open the toast relation and its indexes
         */
-       toastrel = heap_open(attr->va_content.va_external.va_toastrelid,
-                                                AccessShareLock);
+       toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
        toasttupDesc = toastrel->rd_att;
-       toastidx = index_open(toastrel->rd_rel->reltoastidxid);
+
+       /* Look for the valid index of the toast relation */
+       validIndex = toast_open_indexes(toastrel,
+                                                                       AccessShareLock,
+                                                                       &toastidxs,
+                                                                       &num_indexes);
 
        /*
         * Setup a scan key to fetch from the index by va_valueid
@@ -1041,61 +1861,89 @@ toast_fetch_datum(varattrib *attr)
        ScanKeyInit(&toastkey,
                                (AttrNumber) 1,
                                BTEqualStrategyNumber, F_OIDEQ,
-                               ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
+                               ObjectIdGetDatum(toast_pointer.va_valueid));
 
        /*
         * Read the chunks by index
         *
         * Note that because the index is actually on (valueid, chunkidx) we will
-        * see the chunks in chunkidx order, even though we didn't explicitly
-        * ask for it.
+        * see the chunks in chunkidx order, even though we didn't explicitly ask
+        * for it.
         */
        nextidx = 0;
 
-       toastscan = index_beginscan(toastrel, toastidx, SnapshotToast,
-                                                               1, &toastkey);
-       while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL)
+       init_toast_snapshot(&SnapshotToast);
+       toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
+                                                                                  &SnapshotToast, 1, &toastkey);
+       while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
        {
                /*
                 * Have a chunk, extract the sequence number and the data
                 */
-               residx = DatumGetInt32(heap_getattr(ttup, 2, toasttupDesc, &isnull));
+               residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
                Assert(!isnull);
-               chunk = DatumGetPointer(heap_getattr(ttup, 3, toasttupDesc, &isnull));
+               chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
                Assert(!isnull);
-               chunksize = VARATT_SIZE(chunk) - VARHDRSZ;
+               if (!VARATT_IS_EXTENDED(chunk))
+               {
+                       chunksize = VARSIZE(chunk) - VARHDRSZ;
+                       chunkdata = VARDATA(chunk);
+               }
+               else if (VARATT_IS_SHORT(chunk))
+               {
+                       /* could happen due to heap_form_tuple doing its thing */
+                       chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
+                       chunkdata = VARDATA_SHORT(chunk);
+               }
+               else
+               {
+                       /* should never happen */
+                       elog(ERROR, "found toasted toast chunk for toast value %u in %s",
+                                toast_pointer.va_valueid,
+                                RelationGetRelationName(toastrel));
+                       chunksize = 0;          /* keep compiler quiet */
+                       chunkdata = NULL;
+               }
 
                /*
                 * Some checks on the data we've found
                 */
                if (residx != nextidx)
-                       elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u",
+                       elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
                                 residx, nextidx,
-                                attr->va_content.va_external.va_valueid);
+                                toast_pointer.va_valueid,
+                                RelationGetRelationName(toastrel));
                if (residx < numchunks - 1)
                {
                        if (chunksize != TOAST_MAX_CHUNK_SIZE)
-                               elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
-                                        chunksize, residx,
-                                        attr->va_content.va_external.va_valueid);
+                               elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
+                                        chunksize, (int) TOAST_MAX_CHUNK_SIZE,
+                                        residx, numchunks,
+                                        toast_pointer.va_valueid,
+                                        RelationGetRelationName(toastrel));
                }
-               else if (residx < numchunks)
+               else if (residx == numchunks - 1)
                {
                        if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
-                               elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
-                                        chunksize, residx,
-                                        attr->va_content.va_external.va_valueid);
+                               elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
+                                        chunksize,
+                                        (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
+                                        residx,
+                                        toast_pointer.va_valueid,
+                                        RelationGetRelationName(toastrel));
                }
                else
-                       elog(ERROR, "unexpected chunk number %d for toast value %u",
+                       elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
                                 residx,
-                                attr->va_content.va_external.va_valueid);
+                                0, numchunks - 1,
+                                toast_pointer.va_valueid,
+                                RelationGetRelationName(toastrel));
 
                /*
                 * Copy the data into proper place in our result
                 */
-               memcpy(((char *) VARATT_DATA(result)) + residx * TOAST_MAX_CHUNK_SIZE,
-                          VARATT_DATA(chunk),
+               memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE,
+                          chunkdata,
                           chunksize);
 
                nextidx++;
@@ -1105,15 +1953,16 @@ toast_fetch_datum(varattrib *attr)
         * Final checks that we successfully fetched the datum
         */
        if (nextidx != numchunks)
-               elog(ERROR, "missing chunk number %d for toast value %u",
+               elog(ERROR, "missing chunk number %d for toast value %u in %s",
                         nextidx,
-                        attr->va_content.va_external.va_valueid);
+                        toast_pointer.va_valueid,
+                        RelationGetRelationName(toastrel));
 
        /*
         * End scan and close relations
         */
-       index_endscan(toastscan);
-       index_close(toastidx);
+       systable_endscan_ordered(toastscan);
+       toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
        heap_close(toastrel, AccessShareLock);
 
        return result;
@@ -1122,21 +1971,22 @@ toast_fetch_datum(varattrib *attr)
 /* ----------
  * toast_fetch_datum_slice -
  *
- *     Reconstruct a segment of a varattrib from the chunks saved
+ *     Reconstruct a segment of a Datum from the chunks saved
  *     in the toast relation
  * ----------
  */
-static varattrib *
-toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
+static struct varlena *
+toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
 {
        Relation        toastrel;
-       Relation        toastidx;
+       Relation   *toastidxs;
        ScanKeyData toastkey[3];
        int                     nscankeys;
-       IndexScanDesc toastscan;
+       SysScanDesc toastscan;
        HeapTuple       ttup;
        TupleDesc       toasttupDesc;
-       varattrib  *result;
+       struct varlena *result;
+       struct varatt_external toast_pointer;
        int32           attrsize;
        int32           residx;
        int32           nextidx;
@@ -1148,11 +1998,27 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
        int                     totalchunks;
        Pointer         chunk;
        bool            isnull;
+       char       *chunkdata;
        int32           chunksize;
        int32           chcpystrt;
        int32           chcpyend;
+       int                     num_indexes;
+       int                     validIndex;
+       SnapshotData SnapshotToast;
+
+       if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+               elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
+
+       /* Must copy to access aligned fields */
+       VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
-       attrsize = attr->va_content.va_external.va_extsize;
+       /*
+        * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
+        * we can't return a compressed datum which is meaningful to toast later
+        */
+       Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
+
+       attrsize = toast_pointer.va_extsize;
        totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
 
        if (sliceoffset >= attrsize)
@@ -1164,14 +2030,15 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
        if (((sliceoffset + length) > attrsize) || length < 0)
                length = attrsize - sliceoffset;
 
-       result = (varattrib *) palloc(length + VARHDRSZ);
-       VARATT_SIZEP(result) = length + VARHDRSZ;
+       result = (struct varlena *) palloc(length + VARHDRSZ);
 
-       if (VARATT_IS_COMPRESSED(attr))
-               VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED;
+       if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+               SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
+       else
+               SET_VARSIZE(result, length + VARHDRSZ);
 
        if (length == 0)
-               return (result);                /* Can save a lot of work at this point! */
+               return result;                  /* Can save a lot of work at this point! */
 
        startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
        endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
@@ -1181,21 +2048,25 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
        endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
 
        /*
-        * Open the toast relation and it's index
+        * Open the toast relation and its indexes
         */
-       toastrel = heap_open(attr->va_content.va_external.va_toastrelid,
-                                                AccessShareLock);
+       toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
        toasttupDesc = toastrel->rd_att;
-       toastidx = index_open(toastrel->rd_rel->reltoastidxid);
+
+       /* Look for the valid index of toast relation */
+       validIndex = toast_open_indexes(toastrel,
+                                                                       AccessShareLock,
+                                                                       &toastidxs,
+                                                                       &num_indexes);
 
        /*
-        * Setup a scan key to fetch from the index. This is either two keys
-        * or three depending on the number of chunks.
+        * Setup a scan key to fetch from the index. This is either two keys or
+        * three depending on the number of chunks.
         */
        ScanKeyInit(&toastkey[0],
                                (AttrNumber) 1,
                                BTEqualStrategyNumber, F_OIDEQ,
-                               ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
+                               ObjectIdGetDatum(toast_pointer.va_valueid));
 
        /*
         * Use equality condition for one chunk, a range condition otherwise:
@@ -1226,41 +2097,73 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
         *
         * The index is on (valueid, chunkidx) so they will come in order
         */
+       init_toast_snapshot(&SnapshotToast);
        nextidx = startchunk;
-       toastscan = index_beginscan(toastrel, toastidx, SnapshotToast,
-                                                               nscankeys, toastkey);
-       while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL)
+       toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
+                                                                               &SnapshotToast, nscankeys, toastkey);
+       while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
        {
                /*
                 * Have a chunk, extract the sequence number and the data
                 */
-               residx = DatumGetInt32(heap_getattr(ttup, 2, toasttupDesc, &isnull));
+               residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
                Assert(!isnull);
-               chunk = DatumGetPointer(heap_getattr(ttup, 3, toasttupDesc, &isnull));
+               chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
                Assert(!isnull);
-               chunksize = VARATT_SIZE(chunk) - VARHDRSZ;
+               if (!VARATT_IS_EXTENDED(chunk))
+               {
+                       chunksize = VARSIZE(chunk) - VARHDRSZ;
+                       chunkdata = VARDATA(chunk);
+               }
+               else if (VARATT_IS_SHORT(chunk))
+               {
+                       /* could happen due to heap_form_tuple doing its thing */
+                       chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
+                       chunkdata = VARDATA_SHORT(chunk);
+               }
+               else
+               {
+                       /* should never happen */
+                       elog(ERROR, "found toasted toast chunk for toast value %u in %s",
+                                toast_pointer.va_valueid,
+                                RelationGetRelationName(toastrel));
+                       chunksize = 0;          /* keep compiler quiet */
+                       chunkdata = NULL;
+               }
 
                /*
                 * Some checks on the data we've found
                 */
                if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
-                       elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u",
+                       elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
                                 residx, nextidx,
-                                attr->va_content.va_external.va_valueid);
+                                toast_pointer.va_valueid,
+                                RelationGetRelationName(toastrel));
                if (residx < totalchunks - 1)
                {
                        if (chunksize != TOAST_MAX_CHUNK_SIZE)
-                               elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
-                                        chunksize, residx,
-                                        attr->va_content.va_external.va_valueid);
+                               elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice",
+                                        chunksize, (int) TOAST_MAX_CHUNK_SIZE,
+                                        residx, totalchunks,
+                                        toast_pointer.va_valueid,
+                                        RelationGetRelationName(toastrel));
                }
-               else
+               else if (residx == totalchunks - 1)
                {
                        if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
-                               elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
-                                        chunksize, residx,
-                                        attr->va_content.va_external.va_valueid);
+                               elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice",
+                                        chunksize,
+                                        (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE),
+                                        residx,
+                                        toast_pointer.va_valueid,
+                                        RelationGetRelationName(toastrel));
                }
+               else
+                       elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
+                                residx,
+                                0, totalchunks - 1,
+                                toast_pointer.va_valueid,
+                                RelationGetRelationName(toastrel));
 
                /*
                 * Copy the data into proper place in our result
@@ -1272,9 +2175,9 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
                if (residx == endchunk)
                        chcpyend = endoffset;
 
-               memcpy(((char *) VARATT_DATA(result)) +
+               memcpy(VARDATA(result) +
                           (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
-                          VARATT_DATA(chunk) + chcpystrt,
+                          chunkdata + chcpystrt,
                           (chcpyend - chcpystrt) + 1);
 
                nextidx++;
@@ -1284,16 +2187,140 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
         * Final checks that we successfully fetched the datum
         */
        if (nextidx != (endchunk + 1))
-               elog(ERROR, "missing chunk number %d for toast value %u",
+               elog(ERROR, "missing chunk number %d for toast value %u in %s",
                         nextidx,
-                        attr->va_content.va_external.va_valueid);
+                        toast_pointer.va_valueid,
+                        RelationGetRelationName(toastrel));
 
        /*
         * End scan and close relations
         */
-       index_endscan(toastscan);
-       index_close(toastidx);
+       systable_endscan_ordered(toastscan);
+       toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
        heap_close(toastrel, AccessShareLock);
 
        return result;
 }
+
+/* ----------
+ * toast_decompress_datum -
+ *
+ * Decompress a compressed version of a varlena datum
+ */
+static struct varlena *
+toast_decompress_datum(struct varlena * attr)
+{
+       struct varlena *result;
+
+       Assert(VARATT_IS_COMPRESSED(attr));
+
+       result = (struct varlena *)
+               palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
+       SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
+
+       if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
+                                               VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
+                                               VARDATA(result),
+                                               TOAST_COMPRESS_RAWSIZE(attr)) < 0)
+               elog(ERROR, "compressed data is corrupted");
+
+       return result;
+}
+
+
+/* ----------
+ * toast_open_indexes
+ *
+ *     Get an array of the indexes associated to the given toast relation
+ *     and return as well the position of the valid index used by the toast
+ *     relation in this array. It is the responsibility of the caller of this
+ *     function to close the indexes as well as free them.
+ */
+static int
+toast_open_indexes(Relation toastrel,
+                                  LOCKMODE lock,
+                                  Relation **toastidxs,
+                                  int *num_indexes)
+{
+       int                     i = 0;
+       int                     res = 0;
+       bool            found = false;
+       List       *indexlist;
+       ListCell   *lc;
+
+       /* Get index list of the toast relation */
+       indexlist = RelationGetIndexList(toastrel);
+       Assert(indexlist != NIL);
+
+       *num_indexes = list_length(indexlist);
+
+       /* Open all the index relations */
+       *toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation));
+       foreach(lc, indexlist)
+               (*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);
+
+       /* Fetch the first valid index in list */
+       for (i = 0; i < *num_indexes; i++)
+       {
+               Relation        toastidx = (*toastidxs)[i];
+
+               if (toastidx->rd_index->indisvalid)
+               {
+                       res = i;
+                       found = true;
+                       break;
+               }
+       }
+
+       /*
+        * Free index list, not necessary anymore as relations are opened and a
+        * valid index has been found.
+        */
+       list_free(indexlist);
+
+       /*
+        * The toast relation should have one valid index, so something is going
+        * wrong if there is nothing.
+        */
+       if (!found)
+               elog(ERROR, "no valid index found for toast relation with Oid %u",
+                        RelationGetRelid(toastrel));
+
+       return res;
+}
+
+/* ----------
+ * toast_close_indexes
+ *
+ *     Close an array of indexes for a toast relation and free it. This should
+ *     be called for a set of indexes opened previously with toast_open_indexes.
+ */
+static void
+toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
+{
+       int                     i;
+
+       /* Close relations and clean up things */
+       for (i = 0; i < num_indexes; i++)
+               index_close(toastidxs[i], lock);
+       pfree(toastidxs);
+}
+
+/* ----------
+ * init_toast_snapshot
+ *
+ *     Initialize an appropriate TOAST snapshot.  We must use an MVCC snapshot
+ *     to initialize the TOAST snapshot; since we don't know which one to use,
+ *     just use the oldest one.  This is safe: at worst, we will get a "snapshot
+ *     too old" error that might have been avoided otherwise.
+ */
+static void
+init_toast_snapshot(Snapshot toast_snapshot)
+{
+       Snapshot        snapshot = GetOldestSnapshot();
+
+       if (snapshot == NULL)
+               elog(ERROR, "no known snapshots");
+
+       InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
+}