]> granicus.if.org Git - postgresql/blobdiff - src/backend/access/heap/tuptoaster.c
Allow index AMs to cache data across aminsert calls within a SQL command.
[postgresql] / src / backend / access / heap / tuptoaster.c
index d19e7220c548c061ad9289ab2dd244462a73bffb..19e704800284a45760d708fd989c6b3c5cef740b 100644 (file)
@@ -4,7 +4,7 @@
  *       Support routines for external and compressed storage of
  *       variable size attributes.
  *
- * Copyright (c) 2000-2015, PostgreSQL Global Development Group
+ * Copyright (c) 2000-2017, PostgreSQL Global Development Group
  *
  *
  * IDENTIFICATION
 #include "catalog/catalog.h"
 #include "common/pg_lzcompress.h"
 #include "miscadmin.h"
+#include "utils/expandeddatum.h"
 #include "utils/fmgroids.h"
 #include "utils/rel.h"
+#include "utils/snapmgr.h"
 #include "utils/typcache.h"
 #include "utils/tqual.h"
 
@@ -50,7 +52,7 @@
  */
 typedef struct toast_compress_header
 {
-       int32           vl_len_;        /* varlena header (do not touch directly!) */
+       int32           vl_len_;                /* varlena header (do not touch directly!) */
        int32           rawsize;
 } toast_compress_header;
 
@@ -58,14 +60,14 @@ typedef struct toast_compress_header
  * Utilities for manipulation of header information for compressed
  * toast entries.
  */
-#define        TOAST_COMPRESS_HDRSZ            ((int32) sizeof(toast_compress_header))
-#define TOAST_COMPRESS_RAWSIZE(ptr)    (((toast_compress_header *) ptr)->rawsize)
+#define TOAST_COMPRESS_HDRSZ           ((int32) sizeof(toast_compress_header))
+#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
 #define TOAST_COMPRESS_RAWDATA(ptr) \
-       (((char *) ptr) + TOAST_COMPRESS_HDRSZ)
+       (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
 #define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
-       (((toast_compress_header *) ptr)->rawsize = len)
+       (((toast_compress_header *) (ptr))->rawsize = (len))
 
-static void toast_delete_datum(Relation rel, Datum value);
+static void toast_delete_datum(Relation rel, Datum value, bool is_speculative);
 static Datum toast_save_datum(Relation rel, Datum value,
                                 struct varlena * oldexternal, int options);
 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
@@ -80,6 +82,7 @@ static int toast_open_indexes(Relation toastrel,
                                   int *num_indexes);
 static void toast_close_indexes(Relation *toastidxs, int num_indexes,
                                        LOCKMODE lock);
+static void init_toast_snapshot(Snapshot toast_snapshot);
 
 
 /* ----------
@@ -90,8 +93,9 @@ static void toast_close_indexes(Relation *toastidxs, int num_indexes,
  *
  * This will return a datum that contains all the data internally, ie, not
  * relying on external storage or memory, but it can still be compressed or
- * have a short header.
- ----------
+ * have a short header.  Note some callers assume that if the input is an
+ * EXTERNAL datum, the result will be a pfree'able chunk.
+ * ----------
  */
 struct varlena *
 heap_tuple_fetch_attr(struct varlena * attr)
@@ -108,9 +112,7 @@ heap_tuple_fetch_attr(struct varlena * attr)
        else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
        {
                /*
-                * copy into the caller's memory context. That's not required in all
-                * cases but sufficient for now since this is mainly used when we need
-                * to persist a Datum for unusually long time, like in a HOLD cursor.
+                * This is an indirect pointer --- dereference it
                 */
                struct varatt_indirect redirect;
 
@@ -120,14 +122,30 @@ heap_tuple_fetch_attr(struct varlena * attr)
                /* nested indirect Datums aren't allowed */
                Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
 
-               /* doesn't make much sense, but better handle it */
-               if (VARATT_IS_EXTERNAL_ONDISK(attr))
+               /* recurse if value is still external in some other way */
+               if (VARATT_IS_EXTERNAL(attr))
                        return heap_tuple_fetch_attr(attr);
 
-               /* copy datum verbatim */
+               /*
+                * Copy into the caller's memory context, in case caller tries to
+                * pfree the result.
+                */
                result = (struct varlena *) palloc(VARSIZE_ANY(attr));
                memcpy(result, attr, VARSIZE_ANY(attr));
        }
+       else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+       {
+               /*
+                * This is an expanded-object pointer --- get flat format
+                */
+               ExpandedObjectHeader *eoh;
+               Size            resultsize;
+
+               eoh = DatumGetEOHP(PointerGetDatum(attr));
+               resultsize = EOH_get_flat_size(eoh);
+               result = (struct varlena *) palloc(resultsize);
+               EOH_flatten_into(eoh, (void *) result, resultsize);
+       }
        else
        {
                /*
@@ -144,7 +162,10 @@ heap_tuple_fetch_attr(struct varlena * attr)
  * heap_tuple_untoast_attr -
  *
  *     Public entry point to get back a toasted value from compression
- *     or external storage.
+ *     or external storage.  The result is always non-extended varlena form.
+ *
+ * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
+ * datum, the result will be a pfree'able chunk.
  * ----------
  */
 struct varlena *
@@ -160,12 +181,16 @@ heap_tuple_untoast_attr(struct varlena * attr)
                if (VARATT_IS_COMPRESSED(attr))
                {
                        struct varlena *tmp = attr;
+
                        attr = toast_decompress_datum(tmp);
                        pfree(tmp);
                }
        }
        else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
        {
+               /*
+                * This is an indirect pointer --- dereference it
+                */
                struct varatt_indirect redirect;
 
                VARATT_EXTERNAL_GET_POINTER(redirect, attr);
@@ -174,7 +199,27 @@ heap_tuple_untoast_attr(struct varlena * attr)
                /* nested indirect Datums aren't allowed */
                Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
 
+               /* recurse in case value is still extended in some other way */
                attr = heap_tuple_untoast_attr(attr);
+
+               /* if it isn't, we'd better copy it */
+               if (attr == (struct varlena *) redirect.pointer)
+               {
+                       struct varlena *result;
+
+                       result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+                       memcpy(result, attr, VARSIZE_ANY(attr));
+                       attr = result;
+               }
+       }
+       else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+       {
+               /*
+                * This is an expanded-object pointer --- get flat format
+                */
+               attr = heap_tuple_fetch_attr(attr);
+               /* flatteners are not allowed to produce compressed/short output */
+               Assert(!VARATT_IS_EXTENDED(attr));
        }
        else if (VARATT_IS_COMPRESSED(attr))
        {
@@ -243,12 +288,20 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
                return heap_tuple_untoast_attr_slice(redirect.pointer,
                                                                                         sliceoffset, slicelength);
        }
+       else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+       {
+               /* pass it off to heap_tuple_fetch_attr to flatten */
+               preslice = heap_tuple_fetch_attr(attr);
+       }
        else
                preslice = attr;
 
+       Assert(!VARATT_IS_EXTERNAL(preslice));
+
        if (VARATT_IS_COMPRESSED(preslice))
        {
                struct varlena *tmp = preslice;
+
                preslice = toast_decompress_datum(tmp);
 
                if (tmp != attr)
@@ -321,6 +374,10 @@ toast_raw_datum_size(Datum value)
 
                return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
        }
+       else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+       {
+               result = EOH_get_flat_size(DatumGetEOHP(value));
+       }
        else if (VARATT_IS_COMPRESSED(attr))
        {
                /* here, va_rawsize is just the payload size */
@@ -377,6 +434,10 @@ toast_datum_size(Datum value)
 
                return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
        }
+       else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+       {
+               result = EOH_get_flat_size(DatumGetEOHP(value));
+       }
        else if (VARATT_IS_SHORT(attr))
        {
                result = VARSIZE_SHORT(attr);
@@ -400,7 +461,7 @@ toast_datum_size(Datum value)
  * ----------
  */
 void
-toast_delete(Relation rel, HeapTuple oldtup)
+toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
 {
        TupleDesc       tupleDesc;
        Form_pg_attribute *att;
@@ -447,9 +508,7 @@ toast_delete(Relation rel, HeapTuple oldtup)
                        if (toast_isnull[i])
                                continue;
                        else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value)))
-                               toast_delete_datum(rel, value);
-                       else if (VARATT_IS_EXTERNAL_INDIRECT(PointerGetDatum(value)))
-                               elog(ERROR, "attempt to delete tuple containing indirect datums");
+                               toast_delete_datum(rel, value, is_speculative);
                }
        }
 }
@@ -501,6 +560,14 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
        bool            toast_free[MaxHeapAttributeNumber];
        bool            toast_delold[MaxHeapAttributeNumber];
 
+       /*
+        * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super
+        * deletions just normally insert/delete the toast values. It seems
+        * easiest to deal with that here, instead on, potentially, multiple
+        * callers.
+        */
+       options &= ~HEAP_INSERT_SPECULATIVE;
+
        /*
         * We should only ever be called for tuples of plain relations or
         * materialized views --- recursing on a toast rel is bad news.
@@ -611,7 +678,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 
                        /*
                         * We took care of UPDATE above, so any external value we find
-                        * still in the tuple must be someone else's we cannot reuse.
+                        * still in the tuple must be someone else's that we cannot reuse
+                        * (this includes the case of an out-of-line in-memory datum).
                         * Fetch it back (without decompression, unless we are forcing
                         * PLAIN storage).  If necessary, we'll push it out as a new
                         * external value below.
@@ -655,7 +723,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
         */
 
        /* compute header overhead --- this should match heap_form_tuple() */
-       hoff = offsetof(HeapTupleHeaderData, t_bits);
+       hoff = SizeofHeapTupleHeader;
        if (has_nulls)
                hoff += BITMAPLEN(numAttrs);
        if (newtup->t_data->t_infomask & HEAP_HASOID)
@@ -941,7 +1009,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
                 * different conclusion about the size of the null bitmap, or even
                 * whether there needs to be one at all.
                 */
-               new_header_len = offsetof(HeapTupleHeaderData, t_bits);
+               new_header_len = SizeofHeapTupleHeader;
                if (has_nulls)
                        new_header_len += BITMAPLEN(numAttrs);
                if (olddata->t_infomask & HEAP_HASOID)
@@ -964,7 +1032,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
                /*
                 * Copy the existing tuple header, but adjust natts and t_hoff.
                 */
-               memcpy(new_data, olddata, offsetof(HeapTupleHeaderData, t_bits));
+               memcpy(new_data, olddata, SizeofHeapTupleHeader);
                HeapTupleHeaderSetNatts(new_data, numAttrs);
                new_data->t_hoff = new_header_len;
                if (olddata->t_infomask & HEAP_HASOID)
@@ -996,7 +1064,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
        if (need_delold)
                for (i = 0; i < numAttrs; i++)
                        if (toast_delold[i])
-                               toast_delete_datum(rel, toast_oldvalues[i]);
+                               toast_delete_datum(rel, toast_oldvalues[i], false);
 
        return result_tuple;
 }
@@ -1043,7 +1111,7 @@ toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
                        new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
                        if (VARATT_IS_EXTERNAL(new_value))
                        {
-                               new_value = toast_fetch_datum(new_value);
+                               new_value = heap_tuple_fetch_attr(new_value);
                                toast_values[i] = PointerGetDatum(new_value);
                                toast_free[i] = true;
                        }
@@ -1174,7 +1242,7 @@ toast_flatten_tuple_to_datum(HeapTupleHeader tup,
         *
         * This should match the reconstruction code in toast_insert_or_update.
         */
-       new_header_len = offsetof(HeapTupleHeaderData, t_bits);
+       new_header_len = SizeofHeapTupleHeader;
        if (has_nulls)
                new_header_len += BITMAPLEN(numAttrs);
        if (tup->t_infomask & HEAP_HASOID)
@@ -1189,7 +1257,7 @@ toast_flatten_tuple_to_datum(HeapTupleHeader tup,
        /*
         * Copy the existing tuple header, but adjust natts and t_hoff.
         */
-       memcpy(new_data, tup, offsetof(HeapTupleHeaderData, t_bits));
+       memcpy(new_data, tup, SizeofHeapTupleHeader);
        HeapTupleHeaderSetNatts(new_data, numAttrs);
        new_data->t_hoff = new_header_len;
        if (tup->t_infomask & HEAP_HASOID)
@@ -1343,11 +1411,13 @@ toast_save_datum(Relation rel, Datum value,
        CommandId       mycid = GetCurrentCommandId(true);
        struct varlena *result;
        struct varatt_external toast_pointer;
-       struct
+       union
        {
                struct varlena hdr;
-               char            data[TOAST_MAX_CHUNK_SIZE]; /* make struct big enough */
-               int32           align_it;       /* ensure struct is aligned well enough */
+               /* this is to make the union big enough for a chunk: */
+               char            data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
+               /* ensure union is aligned well enough: */
+               int32           align_it;
        }                       chunk_data;
        int32           chunk_size;
        int32           chunk_seq = 0;
@@ -1534,7 +1604,9 @@ toast_save_datum(Relation rel, Datum value,
                 * Create the index entry.  We cheat a little here by not using
                 * FormIndexDatum: this relies on the knowledge that the index columns
                 * are the same as the initial columns of the table for all the
-                * indexes.
+                * indexes.  We also cheat by not providing an IndexInfo: this is okay
+                * for now because btree doesn't need one, but we might have to be
+                * more honest someday.
                 *
                 * Note also that there had better not be any user-created index on
                 * the TOAST table, since we don't bother to update anything else.
@@ -1547,7 +1619,8 @@ toast_save_datum(Relation rel, Datum value,
                                                         &(toasttup->t_self),
                                                         toastrel,
                                                         toastidxs[i]->rd_index->indisunique ?
-                                                        UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
+                                                        UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
+                                                        NULL);
                }
 
                /*
@@ -1586,7 +1659,7 @@ toast_save_datum(Relation rel, Datum value,
  * ----------
  */
 static void
-toast_delete_datum(Relation rel, Datum value)
+toast_delete_datum(Relation rel, Datum value, bool is_speculative)
 {
        struct varlena *attr = (struct varlena *) DatumGetPointer(value);
        struct varatt_external toast_pointer;
@@ -1597,6 +1670,7 @@ toast_delete_datum(Relation rel, Datum value)
        HeapTuple       toasttup;
        int                     num_indexes;
        int                     validIndex;
+       SnapshotData SnapshotToast;
 
        if (!VARATT_IS_EXTERNAL_ONDISK(attr))
                return;
@@ -1628,14 +1702,18 @@ toast_delete_datum(Relation rel, Datum value)
         * sequence or not, but since we've already locked the index we might as
         * well use systable_beginscan_ordered.)
         */
+       init_toast_snapshot(&SnapshotToast);
        toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
-                                                                                  SnapshotToast, 1, &toastkey);
+                                                                                  &SnapshotToast, 1, &toastkey);
        while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
        {
                /*
                 * Have a chunk, delete it
                 */
-               simple_heap_delete(toastrel, &toasttup->t_self);
+               if (is_speculative)
+                       heap_abort_speculative(toastrel, toasttup);
+               else
+                       simple_heap_delete(toastrel, &toasttup->t_self);
        }
 
        /*
@@ -1662,6 +1740,7 @@ toastrel_valueid_exists(Relation toastrel, Oid valueid)
        int                     num_indexes;
        int                     validIndex;
        Relation   *toastidxs;
+       SnapshotData SnapshotToast;
 
        /* Fetch a valid index relation */
        validIndex = toast_open_indexes(toastrel,
@@ -1680,9 +1759,10 @@ toastrel_valueid_exists(Relation toastrel, Oid valueid)
        /*
         * Is there any such chunk?
         */
+       init_toast_snapshot(&SnapshotToast);
        toastscan = systable_beginscan(toastrel,
                                                                   RelationGetRelid(toastidxs[validIndex]),
-                                                                  true, SnapshotToast, 1, &toastkey);
+                                                                  true, &SnapshotToast, 1, &toastkey);
 
        if (systable_getnext(toastscan) != NULL)
                result = true;
@@ -1745,9 +1825,10 @@ toast_fetch_datum(struct varlena * attr)
        int32           chunksize;
        int                     num_indexes;
        int                     validIndex;
+       SnapshotData SnapshotToast;
 
-       if (VARATT_IS_EXTERNAL_INDIRECT(attr))
-               elog(ERROR, "shouldn't be called for indirect tuples");
+       if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+               elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
 
        /* Must copy to access aligned fields */
        VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
@@ -1791,8 +1872,9 @@ toast_fetch_datum(struct varlena * attr)
         */
        nextidx = 0;
 
+       init_toast_snapshot(&SnapshotToast);
        toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
-                                                                                  SnapshotToast, 1, &toastkey);
+                                                                                  &SnapshotToast, 1, &toastkey);
        while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
        {
                /*
@@ -1922,8 +2004,10 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
        int32           chcpyend;
        int                     num_indexes;
        int                     validIndex;
+       SnapshotData SnapshotToast;
 
-       Assert(VARATT_IS_EXTERNAL_ONDISK(attr));
+       if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+               elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
 
        /* Must copy to access aligned fields */
        VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
@@ -2013,9 +2097,10 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
         *
         * The index is on (valueid, chunkidx) so they will come in order
         */
+       init_toast_snapshot(&SnapshotToast);
        nextidx = startchunk;
        toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
-                                                                                SnapshotToast, nscankeys, toastkey);
+                                                                               &SnapshotToast, nscankeys, toastkey);
        while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
        {
                /*
@@ -2220,3 +2305,22 @@ toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
                index_close(toastidxs[i], lock);
        pfree(toastidxs);
 }
+
+/* ----------
+ * init_toast_snapshot
+ *
+ *     Initialize an appropriate TOAST snapshot.  We must use an MVCC snapshot
+ *     to initialize the TOAST snapshot; since we don't know which one to use,
+ *     just use the oldest one.  This is safe: at worst, we will get a "snapshot
+ *     too old" error that might have been avoided otherwise.
+ */
+static void
+init_toast_snapshot(Snapshot toast_snapshot)
+{
+       Snapshot        snapshot = GetOldestSnapshot();
+
+       if (snapshot == NULL)
+               elog(ERROR, "no known snapshots");
+
+       InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
+}