]> granicus.if.org Git - postgresql/blob - src/backend/access/heap/tuptoaster.c
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
[postgresql] / src / backend / access / heap / tuptoaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * tuptoaster.c
4  *        Support routines for external and compressed storage of
5  *        variable size attributes.
6  *
7  * Copyright (c) 2000-2015, PostgreSQL Global Development Group
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/access/heap/tuptoaster.c
12  *
13  *
14  * INTERFACE ROUTINES
15  *              toast_insert_or_update -
16  *                      Try to make a given tuple fit into one page by compressing
17  *                      or moving off attributes
18  *
19  *              toast_delete -
20  *                      Reclaim toast storage when a tuple is deleted
21  *
22  *              heap_tuple_untoast_attr -
23  *                      Fetch back a given value from the "secondary" relation
24  *
25  *-------------------------------------------------------------------------
26  */
27
28 #include "postgres.h"
29
30 #include <unistd.h>
31 #include <fcntl.h>
32
33 #include "access/genam.h"
34 #include "access/heapam.h"
35 #include "access/tuptoaster.h"
36 #include "access/xact.h"
37 #include "catalog/catalog.h"
38 #include "common/pg_lzcompress.h"
39 #include "miscadmin.h"
40 #include "utils/fmgroids.h"
41 #include "utils/rel.h"
42 #include "utils/typcache.h"
43 #include "utils/tqual.h"
44
45
46 #undef TOAST_DEBUG
47
48 /*
49  *      The information at the start of the compressed toast data.
50  */
51 typedef struct toast_compress_header
52 {
53         int32           vl_len_;                /* varlena header (do not touch directly!) */
54         int32           rawsize;
55 } toast_compress_header;
56
57 /*
58  * Utilities for manipulation of header information for compressed
59  * toast entries.
60  */
61 #define TOAST_COMPRESS_HDRSZ            ((int32) sizeof(toast_compress_header))
62 #define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
63 #define TOAST_COMPRESS_RAWDATA(ptr) \
64         (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
65 #define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
66         (((toast_compress_header *) (ptr))->rawsize = (len))
67
68 static void toast_delete_datum(Relation rel, Datum value);
69 static Datum toast_save_datum(Relation rel, Datum value,
70                                  struct varlena * oldexternal, int options);
71 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
72 static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
73 static struct varlena *toast_fetch_datum(struct varlena * attr);
74 static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
75                                                 int32 sliceoffset, int32 length);
76 static struct varlena *toast_decompress_datum(struct varlena * attr);
77 static int toast_open_indexes(Relation toastrel,
78                                    LOCKMODE lock,
79                                    Relation **toastidxs,
80                                    int *num_indexes);
81 static void toast_close_indexes(Relation *toastidxs, int num_indexes,
82                                         LOCKMODE lock);
83
84
85 /* ----------
86  * heap_tuple_fetch_attr -
87  *
88  *      Public entry point to get back a toasted value from
89  *      external source (possibly still in compressed format).
90  *
91  * This will return a datum that contains all the data internally, ie, not
92  * relying on external storage or memory, but it can still be compressed or
93  * have a short header.  Note some callers assume that if the input is an
94  * EXTERNAL datum, the result will be a pfree'able chunk.
95  * ----------
96  */
97 struct varlena *
98 heap_tuple_fetch_attr(struct varlena * attr)
99 {
100         struct varlena *result;
101
102         if (VARATT_IS_EXTERNAL_ONDISK(attr))
103         {
104                 /*
105                  * This is an external stored plain value
106                  */
107                 result = toast_fetch_datum(attr);
108         }
109         else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
110         {
111                 /*
112                  * This is an indirect pointer --- dereference it
113                  */
114                 struct varatt_indirect redirect;
115
116                 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
117                 attr = (struct varlena *) redirect.pointer;
118
119                 /* nested indirect Datums aren't allowed */
120                 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
121
122                 /* recurse if value is still external in some other way */
123                 if (VARATT_IS_EXTERNAL(attr))
124                         return heap_tuple_fetch_attr(attr);
125
126                 /*
127                  * Copy into the caller's memory context, in case caller tries to
128                  * pfree the result.
129                  */
130                 result = (struct varlena *) palloc(VARSIZE_ANY(attr));
131                 memcpy(result, attr, VARSIZE_ANY(attr));
132         }
133         else
134         {
135                 /*
136                  * This is a plain value inside of the main tuple - why am I called?
137                  */
138                 result = attr;
139         }
140
141         return result;
142 }
143
144
145 /* ----------
146  * heap_tuple_untoast_attr -
147  *
148  *      Public entry point to get back a toasted value from compression
149  *      or external storage.  The result is always non-extended varlena form.
150  *
151  * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
152  * datum, the result will be a pfree'able chunk.
153  * ----------
154  */
155 struct varlena *
156 heap_tuple_untoast_attr(struct varlena * attr)
157 {
158         if (VARATT_IS_EXTERNAL_ONDISK(attr))
159         {
160                 /*
161                  * This is an externally stored datum --- fetch it back from there
162                  */
163                 attr = toast_fetch_datum(attr);
164                 /* If it's compressed, decompress it */
165                 if (VARATT_IS_COMPRESSED(attr))
166                 {
167                         struct varlena *tmp = attr;
168
169                         attr = toast_decompress_datum(tmp);
170                         pfree(tmp);
171                 }
172         }
173         else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
174         {
175                 /*
176                  * This is an indirect pointer --- dereference it
177                  */
178                 struct varatt_indirect redirect;
179
180                 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
181                 attr = (struct varlena *) redirect.pointer;
182
183                 /* nested indirect Datums aren't allowed */
184                 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
185
186                 /* recurse in case value is still extended in some other way */
187                 attr = heap_tuple_untoast_attr(attr);
188
189                 /* if it isn't, we'd better copy it */
190                 if (attr == (struct varlena *) redirect.pointer)
191                 {
192                         struct varlena *result;
193
194                         result = (struct varlena *) palloc(VARSIZE_ANY(attr));
195                         memcpy(result, attr, VARSIZE_ANY(attr));
196                         attr = result;
197                 }
198         }
199         else if (VARATT_IS_COMPRESSED(attr))
200         {
201                 /*
202                  * This is a compressed value inside of the main tuple
203                  */
204                 attr = toast_decompress_datum(attr);
205         }
206         else if (VARATT_IS_SHORT(attr))
207         {
208                 /*
209                  * This is a short-header varlena --- convert to 4-byte header format
210                  */
211                 Size            data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
212                 Size            new_size = data_size + VARHDRSZ;
213                 struct varlena *new_attr;
214
215                 new_attr = (struct varlena *) palloc(new_size);
216                 SET_VARSIZE(new_attr, new_size);
217                 memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
218                 attr = new_attr;
219         }
220
221         return attr;
222 }
223
224
225 /* ----------
226  * heap_tuple_untoast_attr_slice -
227  *
228  *              Public entry point to get back part of a toasted value
229  *              from compression or external storage.
230  * ----------
231  */
232 struct varlena *
233 heap_tuple_untoast_attr_slice(struct varlena * attr,
234                                                           int32 sliceoffset, int32 slicelength)
235 {
236         struct varlena *preslice;
237         struct varlena *result;
238         char       *attrdata;
239         int32           attrsize;
240
241         if (VARATT_IS_EXTERNAL_ONDISK(attr))
242         {
243                 struct varatt_external toast_pointer;
244
245                 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
246
247                 /* fast path for non-compressed external datums */
248                 if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
249                         return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
250
251                 /* fetch it back (compressed marker will get set automatically) */
252                 preslice = toast_fetch_datum(attr);
253         }
254         else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
255         {
256                 struct varatt_indirect redirect;
257
258                 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
259
260                 /* nested indirect Datums aren't allowed */
261                 Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
262
263                 return heap_tuple_untoast_attr_slice(redirect.pointer,
264                                                                                          sliceoffset, slicelength);
265         }
266         else
267                 preslice = attr;
268
269         Assert(!VARATT_IS_EXTERNAL(preslice));
270
271         if (VARATT_IS_COMPRESSED(preslice))
272         {
273                 struct varlena *tmp = preslice;
274
275                 preslice = toast_decompress_datum(tmp);
276
277                 if (tmp != attr)
278                         pfree(tmp);
279         }
280
281         if (VARATT_IS_SHORT(preslice))
282         {
283                 attrdata = VARDATA_SHORT(preslice);
284                 attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
285         }
286         else
287         {
288                 attrdata = VARDATA(preslice);
289                 attrsize = VARSIZE(preslice) - VARHDRSZ;
290         }
291
292         /* slicing of datum for compressed cases and plain value */
293
294         if (sliceoffset >= attrsize)
295         {
296                 sliceoffset = 0;
297                 slicelength = 0;
298         }
299
300         if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
301                 slicelength = attrsize - sliceoffset;
302
303         result = (struct varlena *) palloc(slicelength + VARHDRSZ);
304         SET_VARSIZE(result, slicelength + VARHDRSZ);
305
306         memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
307
308         if (preslice != attr)
309                 pfree(preslice);
310
311         return result;
312 }
313
314
315 /* ----------
316  * toast_raw_datum_size -
317  *
318  *      Return the raw (detoasted) size of a varlena datum
319  *      (including the VARHDRSZ header)
320  * ----------
321  */
322 Size
323 toast_raw_datum_size(Datum value)
324 {
325         struct varlena *attr = (struct varlena *) DatumGetPointer(value);
326         Size            result;
327
328         if (VARATT_IS_EXTERNAL_ONDISK(attr))
329         {
330                 /* va_rawsize is the size of the original datum -- including header */
331                 struct varatt_external toast_pointer;
332
333                 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
334                 result = toast_pointer.va_rawsize;
335         }
336         else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
337         {
338                 struct varatt_indirect toast_pointer;
339
340                 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
341
342                 /* nested indirect Datums aren't allowed */
343                 Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
344
345                 return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
346         }
347         else if (VARATT_IS_COMPRESSED(attr))
348         {
349                 /* here, va_rawsize is just the payload size */
350                 result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
351         }
352         else if (VARATT_IS_SHORT(attr))
353         {
354                 /*
355                  * we have to normalize the header length to VARHDRSZ or else the
356                  * callers of this function will be confused.
357                  */
358                 result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
359         }
360         else
361         {
362                 /* plain untoasted datum */
363                 result = VARSIZE(attr);
364         }
365         return result;
366 }
367
368 /* ----------
369  * toast_datum_size
370  *
371  *      Return the physical storage size (possibly compressed) of a varlena datum
372  * ----------
373  */
374 Size
375 toast_datum_size(Datum value)
376 {
377         struct varlena *attr = (struct varlena *) DatumGetPointer(value);
378         Size            result;
379
380         if (VARATT_IS_EXTERNAL_ONDISK(attr))
381         {
382                 /*
383                  * Attribute is stored externally - return the extsize whether
384                  * compressed or not.  We do not count the size of the toast pointer
385                  * ... should we?
386                  */
387                 struct varatt_external toast_pointer;
388
389                 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
390                 result = toast_pointer.va_extsize;
391         }
392         else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
393         {
394                 struct varatt_indirect toast_pointer;
395
396                 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
397
398                 /* nested indirect Datums aren't allowed */
399                 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
400
401                 return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
402         }
403         else if (VARATT_IS_SHORT(attr))
404         {
405                 result = VARSIZE_SHORT(attr);
406         }
407         else
408         {
409                 /*
410                  * Attribute is stored inline either compressed or not, just calculate
411                  * the size of the datum in either case.
412                  */
413                 result = VARSIZE(attr);
414         }
415         return result;
416 }
417
418
419 /* ----------
420  * toast_delete -
421  *
422  *      Cascaded delete toast-entries on DELETE
423  * ----------
424  */
425 void
426 toast_delete(Relation rel, HeapTuple oldtup)
427 {
428         TupleDesc       tupleDesc;
429         Form_pg_attribute *att;
430         int                     numAttrs;
431         int                     i;
432         Datum           toast_values[MaxHeapAttributeNumber];
433         bool            toast_isnull[MaxHeapAttributeNumber];
434
435         /*
436          * We should only ever be called for tuples of plain relations or
437          * materialized views --- recursing on a toast rel is bad news.
438          */
439         Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
440                    rel->rd_rel->relkind == RELKIND_MATVIEW);
441
442         /*
443          * Get the tuple descriptor and break down the tuple into fields.
444          *
445          * NOTE: it's debatable whether to use heap_deform_tuple() here or just
446          * heap_getattr() only the varlena columns.  The latter could win if there
447          * are few varlena columns and many non-varlena ones. However,
448          * heap_deform_tuple costs only O(N) while the heap_getattr way would cost
449          * O(N^2) if there are many varlena columns, so it seems better to err on
450          * the side of linear cost.  (We won't even be here unless there's at
451          * least one varlena column, by the way.)
452          */
453         tupleDesc = rel->rd_att;
454         att = tupleDesc->attrs;
455         numAttrs = tupleDesc->natts;
456
457         Assert(numAttrs <= MaxHeapAttributeNumber);
458         heap_deform_tuple(oldtup, tupleDesc, toast_values, toast_isnull);
459
460         /*
461          * Check for external stored attributes and delete them from the secondary
462          * relation.
463          */
464         for (i = 0; i < numAttrs; i++)
465         {
466                 if (att[i]->attlen == -1)
467                 {
468                         Datum           value = toast_values[i];
469
470                         if (toast_isnull[i])
471                                 continue;
472                         else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value)))
473                                 toast_delete_datum(rel, value);
474                 }
475         }
476 }
477
478
479 /* ----------
480  * toast_insert_or_update -
481  *
482  *      Delete no-longer-used toast-entries and create new ones to
483  *      make the new tuple fit on INSERT or UPDATE
484  *
485  * Inputs:
486  *      newtup: the candidate new tuple to be inserted
487  *      oldtup: the old row version for UPDATE, or NULL for INSERT
488  *      options: options to be passed to heap_insert() for toast rows
489  * Result:
490  *      either newtup if no toasting is needed, or a palloc'd modified tuple
491  *      that is what should actually get stored
492  *
493  * NOTE: neither newtup nor oldtup will be modified.  This is a change
494  * from the pre-8.1 API of this routine.
495  * ----------
496  */
497 HeapTuple
498 toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
499                                            int options)
500 {
501         HeapTuple       result_tuple;
502         TupleDesc       tupleDesc;
503         Form_pg_attribute *att;
504         int                     numAttrs;
505         int                     i;
506
507         bool            need_change = false;
508         bool            need_free = false;
509         bool            need_delold = false;
510         bool            has_nulls = false;
511
512         Size            maxDataLen;
513         Size            hoff;
514
515         char            toast_action[MaxHeapAttributeNumber];
516         bool            toast_isnull[MaxHeapAttributeNumber];
517         bool            toast_oldisnull[MaxHeapAttributeNumber];
518         Datum           toast_values[MaxHeapAttributeNumber];
519         Datum           toast_oldvalues[MaxHeapAttributeNumber];
520         struct varlena *toast_oldexternal[MaxHeapAttributeNumber];
521         int32           toast_sizes[MaxHeapAttributeNumber];
522         bool            toast_free[MaxHeapAttributeNumber];
523         bool            toast_delold[MaxHeapAttributeNumber];
524
525         /*
526          * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super
527          * deletions just normally insert/delete the toast values. It seems
528          * easiest to deal with that here, instead on, potentially, multiple
529          * callers.
530          */
531         options &= ~HEAP_INSERT_SPECULATIVE;
532
533         /*
534          * We should only ever be called for tuples of plain relations or
535          * materialized views --- recursing on a toast rel is bad news.
536          */
537         Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
538                    rel->rd_rel->relkind == RELKIND_MATVIEW);
539
540         /*
541          * Get the tuple descriptor and break down the tuple(s) into fields.
542          */
543         tupleDesc = rel->rd_att;
544         att = tupleDesc->attrs;
545         numAttrs = tupleDesc->natts;
546
547         Assert(numAttrs <= MaxHeapAttributeNumber);
548         heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull);
549         if (oldtup != NULL)
550                 heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull);
551
552         /* ----------
553          * Then collect information about the values given
554          *
555          * NOTE: toast_action[i] can have these values:
556          *              ' '             default handling
557          *              'p'             already processed --- don't touch it
558          *              'x'             incompressible, but OK to move off
559          *
560          * NOTE: toast_sizes[i] is only made valid for varlena attributes with
561          *              toast_action[i] different from 'p'.
562          * ----------
563          */
564         memset(toast_action, ' ', numAttrs * sizeof(char));
565         memset(toast_oldexternal, 0, numAttrs * sizeof(struct varlena *));
566         memset(toast_free, 0, numAttrs * sizeof(bool));
567         memset(toast_delold, 0, numAttrs * sizeof(bool));
568
569         for (i = 0; i < numAttrs; i++)
570         {
571                 struct varlena *old_value;
572                 struct varlena *new_value;
573
574                 if (oldtup != NULL)
575                 {
576                         /*
577                          * For UPDATE get the old and new values of this attribute
578                          */
579                         old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]);
580                         new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
581
582                         /*
583                          * If the old value is stored on disk, check if it has changed so
584                          * we have to delete it later.
585                          */
586                         if (att[i]->attlen == -1 && !toast_oldisnull[i] &&
587                                 VARATT_IS_EXTERNAL_ONDISK(old_value))
588                         {
589                                 if (toast_isnull[i] || !VARATT_IS_EXTERNAL_ONDISK(new_value) ||
590                                         memcmp((char *) old_value, (char *) new_value,
591                                                    VARSIZE_EXTERNAL(old_value)) != 0)
592                                 {
593                                         /*
594                                          * The old external stored value isn't needed any more
595                                          * after the update
596                                          */
597                                         toast_delold[i] = true;
598                                         need_delold = true;
599                                 }
600                                 else
601                                 {
602                                         /*
603                                          * This attribute isn't changed by this update so we reuse
604                                          * the original reference to the old value in the new
605                                          * tuple.
606                                          */
607                                         toast_action[i] = 'p';
608                                         continue;
609                                 }
610                         }
611                 }
612                 else
613                 {
614                         /*
615                          * For INSERT simply get the new value
616                          */
617                         new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
618                 }
619
620                 /*
621                  * Handle NULL attributes
622                  */
623                 if (toast_isnull[i])
624                 {
625                         toast_action[i] = 'p';
626                         has_nulls = true;
627                         continue;
628                 }
629
630                 /*
631                  * Now look at varlena attributes
632                  */
633                 if (att[i]->attlen == -1)
634                 {
635                         /*
636                          * If the table's attribute says PLAIN always, force it so.
637                          */
638                         if (att[i]->attstorage == 'p')
639                                 toast_action[i] = 'p';
640
641                         /*
642                          * We took care of UPDATE above, so any external value we find
643                          * still in the tuple must be someone else's that we cannot reuse
644                          * (this includes the case of an out-of-line in-memory datum).
645                          * Fetch it back (without decompression, unless we are forcing
646                          * PLAIN storage).  If necessary, we'll push it out as a new
647                          * external value below.
648                          */
649                         if (VARATT_IS_EXTERNAL(new_value))
650                         {
651                                 toast_oldexternal[i] = new_value;
652                                 if (att[i]->attstorage == 'p')
653                                         new_value = heap_tuple_untoast_attr(new_value);
654                                 else
655                                         new_value = heap_tuple_fetch_attr(new_value);
656                                 toast_values[i] = PointerGetDatum(new_value);
657                                 toast_free[i] = true;
658                                 need_change = true;
659                                 need_free = true;
660                         }
661
662                         /*
663                          * Remember the size of this attribute
664                          */
665                         toast_sizes[i] = VARSIZE_ANY(new_value);
666                 }
667                 else
668                 {
669                         /*
670                          * Not a varlena attribute, plain storage always
671                          */
672                         toast_action[i] = 'p';
673                 }
674         }
675
676         /* ----------
677          * Compress and/or save external until data fits into target length
678          *
679          *      1: Inline compress attributes with attstorage 'x', and store very
680          *         large attributes with attstorage 'x' or 'e' external immediately
681          *      2: Store attributes with attstorage 'x' or 'e' external
682          *      3: Inline compress attributes with attstorage 'm'
683          *      4: Store attributes with attstorage 'm' external
684          * ----------
685          */
686
687         /* compute header overhead --- this should match heap_form_tuple() */
688         hoff = SizeofHeapTupleHeader;
689         if (has_nulls)
690                 hoff += BITMAPLEN(numAttrs);
691         if (newtup->t_data->t_infomask & HEAP_HASOID)
692                 hoff += sizeof(Oid);
693         hoff = MAXALIGN(hoff);
694         /* now convert to a limit on the tuple data size */
695         maxDataLen = TOAST_TUPLE_TARGET - hoff;
696
697         /*
698          * Look for attributes with attstorage 'x' to compress.  Also find large
699          * attributes with attstorage 'x' or 'e', and store them external.
700          */
701         while (heap_compute_data_size(tupleDesc,
702                                                                   toast_values, toast_isnull) > maxDataLen)
703         {
704                 int                     biggest_attno = -1;
705                 int32           biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
706                 Datum           old_value;
707                 Datum           new_value;
708
709                 /*
710                  * Search for the biggest yet unprocessed internal attribute
711                  */
712                 for (i = 0; i < numAttrs; i++)
713                 {
714                         if (toast_action[i] != ' ')
715                                 continue;
716                         if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
717                                 continue;               /* can't happen, toast_action would be 'p' */
718                         if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
719                                 continue;
720                         if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
721                                 continue;
722                         if (toast_sizes[i] > biggest_size)
723                         {
724                                 biggest_attno = i;
725                                 biggest_size = toast_sizes[i];
726                         }
727                 }
728
729                 if (biggest_attno < 0)
730                         break;
731
732                 /*
733                  * Attempt to compress it inline, if it has attstorage 'x'
734                  */
735                 i = biggest_attno;
736                 if (att[i]->attstorage == 'x')
737                 {
738                         old_value = toast_values[i];
739                         new_value = toast_compress_datum(old_value);
740
741                         if (DatumGetPointer(new_value) != NULL)
742                         {
743                                 /* successful compression */
744                                 if (toast_free[i])
745                                         pfree(DatumGetPointer(old_value));
746                                 toast_values[i] = new_value;
747                                 toast_free[i] = true;
748                                 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
749                                 need_change = true;
750                                 need_free = true;
751                         }
752                         else
753                         {
754                                 /* incompressible, ignore on subsequent compression passes */
755                                 toast_action[i] = 'x';
756                         }
757                 }
758                 else
759                 {
760                         /* has attstorage 'e', ignore on subsequent compression passes */
761                         toast_action[i] = 'x';
762                 }
763
764                 /*
765                  * If this value is by itself more than maxDataLen (after compression
766                  * if any), push it out to the toast table immediately, if possible.
767                  * This avoids uselessly compressing other fields in the common case
768                  * where we have one long field and several short ones.
769                  *
770                  * XXX maybe the threshold should be less than maxDataLen?
771                  */
772                 if (toast_sizes[i] > maxDataLen &&
773                         rel->rd_rel->reltoastrelid != InvalidOid)
774                 {
775                         old_value = toast_values[i];
776                         toast_action[i] = 'p';
777                         toast_values[i] = toast_save_datum(rel, toast_values[i],
778                                                                                            toast_oldexternal[i], options);
779                         if (toast_free[i])
780                                 pfree(DatumGetPointer(old_value));
781                         toast_free[i] = true;
782                         need_change = true;
783                         need_free = true;
784                 }
785         }
786
787         /*
788          * Second we look for attributes of attstorage 'x' or 'e' that are still
789          * inline.  But skip this if there's no toast table to push them to.
790          */
791         while (heap_compute_data_size(tupleDesc,
792                                                                   toast_values, toast_isnull) > maxDataLen &&
793                    rel->rd_rel->reltoastrelid != InvalidOid)
794         {
795                 int                     biggest_attno = -1;
796                 int32           biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
797                 Datum           old_value;
798
799                 /*------
800                  * Search for the biggest yet inlined attribute with
801                  * attstorage equals 'x' or 'e'
802                  *------
803                  */
804                 for (i = 0; i < numAttrs; i++)
805                 {
806                         if (toast_action[i] == 'p')
807                                 continue;
808                         if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
809                                 continue;               /* can't happen, toast_action would be 'p' */
810                         if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
811                                 continue;
812                         if (toast_sizes[i] > biggest_size)
813                         {
814                                 biggest_attno = i;
815                                 biggest_size = toast_sizes[i];
816                         }
817                 }
818
819                 if (biggest_attno < 0)
820                         break;
821
822                 /*
823                  * Store this external
824                  */
825                 i = biggest_attno;
826                 old_value = toast_values[i];
827                 toast_action[i] = 'p';
828                 toast_values[i] = toast_save_datum(rel, toast_values[i],
829                                                                                    toast_oldexternal[i], options);
830                 if (toast_free[i])
831                         pfree(DatumGetPointer(old_value));
832                 toast_free[i] = true;
833
834                 need_change = true;
835                 need_free = true;
836         }
837
838         /*
839          * Round 3 - this time we take attributes with storage 'm' into
840          * compression
841          */
842         while (heap_compute_data_size(tupleDesc,
843                                                                   toast_values, toast_isnull) > maxDataLen)
844         {
845                 int                     biggest_attno = -1;
846                 int32           biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
847                 Datum           old_value;
848                 Datum           new_value;
849
850                 /*
851                  * Search for the biggest yet uncompressed internal attribute
852                  */
853                 for (i = 0; i < numAttrs; i++)
854                 {
855                         if (toast_action[i] != ' ')
856                                 continue;
857                         if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
858                                 continue;               /* can't happen, toast_action would be 'p' */
859                         if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
860                                 continue;
861                         if (att[i]->attstorage != 'm')
862                                 continue;
863                         if (toast_sizes[i] > biggest_size)
864                         {
865                                 biggest_attno = i;
866                                 biggest_size = toast_sizes[i];
867                         }
868                 }
869
870                 if (biggest_attno < 0)
871                         break;
872
873                 /*
874                  * Attempt to compress it inline
875                  */
876                 i = biggest_attno;
877                 old_value = toast_values[i];
878                 new_value = toast_compress_datum(old_value);
879
880                 if (DatumGetPointer(new_value) != NULL)
881                 {
882                         /* successful compression */
883                         if (toast_free[i])
884                                 pfree(DatumGetPointer(old_value));
885                         toast_values[i] = new_value;
886                         toast_free[i] = true;
887                         toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
888                         need_change = true;
889                         need_free = true;
890                 }
891                 else
892                 {
893                         /* incompressible, ignore on subsequent compression passes */
894                         toast_action[i] = 'x';
895                 }
896         }
897
898         /*
899          * Finally we store attributes of type 'm' externally.  At this point we
900          * increase the target tuple size, so that 'm' attributes aren't stored
901          * externally unless really necessary.
902          */
903         maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff;
904
905         while (heap_compute_data_size(tupleDesc,
906                                                                   toast_values, toast_isnull) > maxDataLen &&
907                    rel->rd_rel->reltoastrelid != InvalidOid)
908         {
909                 int                     biggest_attno = -1;
910                 int32           biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
911                 Datum           old_value;
912
913                 /*--------
914                  * Search for the biggest yet inlined attribute with
915                  * attstorage = 'm'
916                  *--------
917                  */
918                 for (i = 0; i < numAttrs; i++)
919                 {
920                         if (toast_action[i] == 'p')
921                                 continue;
922                         if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
923                                 continue;               /* can't happen, toast_action would be 'p' */
924                         if (att[i]->attstorage != 'm')
925                                 continue;
926                         if (toast_sizes[i] > biggest_size)
927                         {
928                                 biggest_attno = i;
929                                 biggest_size = toast_sizes[i];
930                         }
931                 }
932
933                 if (biggest_attno < 0)
934                         break;
935
936                 /*
937                  * Store this external
938                  */
939                 i = biggest_attno;
940                 old_value = toast_values[i];
941                 toast_action[i] = 'p';
942                 toast_values[i] = toast_save_datum(rel, toast_values[i],
943                                                                                    toast_oldexternal[i], options);
944                 if (toast_free[i])
945                         pfree(DatumGetPointer(old_value));
946                 toast_free[i] = true;
947
948                 need_change = true;
949                 need_free = true;
950         }
951
952         /*
953          * In the case we toasted any values, we need to build a new heap tuple
954          * with the changed values.
955          */
956         if (need_change)
957         {
958                 HeapTupleHeader olddata = newtup->t_data;
959                 HeapTupleHeader new_data;
960                 int32           new_header_len;
961                 int32           new_data_len;
962                 int32           new_tuple_len;
963
964                 /*
965                  * Calculate the new size of the tuple.
966                  *
967                  * Note: we used to assume here that the old tuple's t_hoff must equal
968                  * the new_header_len value, but that was incorrect.  The old tuple
969                  * might have a smaller-than-current natts, if there's been an ALTER
970                  * TABLE ADD COLUMN since it was stored; and that would lead to a
971                  * different conclusion about the size of the null bitmap, or even
972                  * whether there needs to be one at all.
973                  */
974                 new_header_len = SizeofHeapTupleHeader;
975                 if (has_nulls)
976                         new_header_len += BITMAPLEN(numAttrs);
977                 if (olddata->t_infomask & HEAP_HASOID)
978                         new_header_len += sizeof(Oid);
979                 new_header_len = MAXALIGN(new_header_len);
980                 new_data_len = heap_compute_data_size(tupleDesc,
981                                                                                           toast_values, toast_isnull);
982                 new_tuple_len = new_header_len + new_data_len;
983
984                 /*
985                  * Allocate and zero the space needed, and fill HeapTupleData fields.
986                  */
987                 result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_tuple_len);
988                 result_tuple->t_len = new_tuple_len;
989                 result_tuple->t_self = newtup->t_self;
990                 result_tuple->t_tableOid = newtup->t_tableOid;
991                 new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
992                 result_tuple->t_data = new_data;
993
994                 /*
995                  * Copy the existing tuple header, but adjust natts and t_hoff.
996                  */
997                 memcpy(new_data, olddata, SizeofHeapTupleHeader);
998                 HeapTupleHeaderSetNatts(new_data, numAttrs);
999                 new_data->t_hoff = new_header_len;
1000                 if (olddata->t_infomask & HEAP_HASOID)
1001                         HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(olddata));
1002
1003                 /* Copy over the data, and fill the null bitmap if needed */
1004                 heap_fill_tuple(tupleDesc,
1005                                                 toast_values,
1006                                                 toast_isnull,
1007                                                 (char *) new_data + new_header_len,
1008                                                 new_data_len,
1009                                                 &(new_data->t_infomask),
1010                                                 has_nulls ? new_data->t_bits : NULL);
1011         }
1012         else
1013                 result_tuple = newtup;
1014
1015         /*
1016          * Free allocated temp values
1017          */
1018         if (need_free)
1019                 for (i = 0; i < numAttrs; i++)
1020                         if (toast_free[i])
1021                                 pfree(DatumGetPointer(toast_values[i]));
1022
1023         /*
1024          * Delete external values from the old tuple
1025          */
1026         if (need_delold)
1027                 for (i = 0; i < numAttrs; i++)
1028                         if (toast_delold[i])
1029                                 toast_delete_datum(rel, toast_oldvalues[i]);
1030
1031         return result_tuple;
1032 }
1033
1034
1035 /* ----------
1036  * toast_flatten_tuple -
1037  *
1038  *      "Flatten" a tuple to contain no out-of-line toasted fields.
1039  *      (This does not eliminate compressed or short-header datums.)
1040  *
1041  *      Note: we expect the caller already checked HeapTupleHasExternal(tup),
1042  *      so there is no need for a short-circuit path.
1043  * ----------
1044  */
1045 HeapTuple
1046 toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
1047 {
1048         HeapTuple       new_tuple;
1049         Form_pg_attribute *att = tupleDesc->attrs;
1050         int                     numAttrs = tupleDesc->natts;
1051         int                     i;
1052         Datum           toast_values[MaxTupleAttributeNumber];
1053         bool            toast_isnull[MaxTupleAttributeNumber];
1054         bool            toast_free[MaxTupleAttributeNumber];
1055
1056         /*
1057          * Break down the tuple into fields.
1058          */
1059         Assert(numAttrs <= MaxTupleAttributeNumber);
1060         heap_deform_tuple(tup, tupleDesc, toast_values, toast_isnull);
1061
1062         memset(toast_free, 0, numAttrs * sizeof(bool));
1063
1064         for (i = 0; i < numAttrs; i++)
1065         {
1066                 /*
1067                  * Look at non-null varlena attributes
1068                  */
1069                 if (!toast_isnull[i] && att[i]->attlen == -1)
1070                 {
1071                         struct varlena *new_value;
1072
1073                         new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
1074                         if (VARATT_IS_EXTERNAL(new_value))
1075                         {
1076                                 new_value = heap_tuple_fetch_attr(new_value);
1077                                 toast_values[i] = PointerGetDatum(new_value);
1078                                 toast_free[i] = true;
1079                         }
1080                 }
1081         }
1082
1083         /*
1084          * Form the reconfigured tuple.
1085          */
1086         new_tuple = heap_form_tuple(tupleDesc, toast_values, toast_isnull);
1087
1088         /*
1089          * Be sure to copy the tuple's OID and identity fields.  We also make a
1090          * point of copying visibility info, just in case anybody looks at those
1091          * fields in a syscache entry.
1092          */
1093         if (tupleDesc->tdhasoid)
1094                 HeapTupleSetOid(new_tuple, HeapTupleGetOid(tup));
1095
1096         new_tuple->t_self = tup->t_self;
1097         new_tuple->t_tableOid = tup->t_tableOid;
1098
1099         new_tuple->t_data->t_choice = tup->t_data->t_choice;
1100         new_tuple->t_data->t_ctid = tup->t_data->t_ctid;
1101         new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
1102         new_tuple->t_data->t_infomask |=
1103                 tup->t_data->t_infomask & HEAP_XACT_MASK;
1104         new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
1105         new_tuple->t_data->t_infomask2 |=
1106                 tup->t_data->t_infomask2 & HEAP2_XACT_MASK;
1107
1108         /*
1109          * Free allocated temp values
1110          */
1111         for (i = 0; i < numAttrs; i++)
1112                 if (toast_free[i])
1113                         pfree(DatumGetPointer(toast_values[i]));
1114
1115         return new_tuple;
1116 }
1117
1118
1119 /* ----------
1120  * toast_flatten_tuple_to_datum -
1121  *
1122  *      "Flatten" a tuple containing out-of-line toasted fields into a Datum.
1123  *      The result is always palloc'd in the current memory context.
1124  *
1125  *      We have a general rule that Datums of container types (rows, arrays,
1126  *      ranges, etc) must not contain any external TOAST pointers.  Without
1127  *      this rule, we'd have to look inside each Datum when preparing a tuple
1128  *      for storage, which would be expensive and would fail to extend cleanly
1129  *      to new sorts of container types.
1130  *
1131  *      However, we don't want to say that tuples represented as HeapTuples
1132  *      can't contain toasted fields, so instead this routine should be called
1133  *      when such a HeapTuple is being converted into a Datum.
1134  *
1135  *      While we're at it, we decompress any compressed fields too.  This is not
1136  *      necessary for correctness, but reflects an expectation that compression
1137  *      will be more effective if applied to the whole tuple not individual
1138  *      fields.  We are not so concerned about that that we want to deconstruct
1139  *      and reconstruct tuples just to get rid of compressed fields, however.
1140  *      So callers typically won't call this unless they see that the tuple has
1141  *      at least one external field.
1142  *
1143  *      On the other hand, in-line short-header varlena fields are left alone.
1144  *      If we "untoasted" them here, they'd just get changed back to short-header
1145  *      format anyway within heap_fill_tuple.
1146  * ----------
1147  */
1148 Datum
1149 toast_flatten_tuple_to_datum(HeapTupleHeader tup,
1150                                                          uint32 tup_len,
1151                                                          TupleDesc tupleDesc)
1152 {
1153         HeapTupleHeader new_data;
1154         int32           new_header_len;
1155         int32           new_data_len;
1156         int32           new_tuple_len;
1157         HeapTupleData tmptup;
1158         Form_pg_attribute *att = tupleDesc->attrs;
1159         int                     numAttrs = tupleDesc->natts;
1160         int                     i;
1161         bool            has_nulls = false;
1162         Datum           toast_values[MaxTupleAttributeNumber];
1163         bool            toast_isnull[MaxTupleAttributeNumber];
1164         bool            toast_free[MaxTupleAttributeNumber];
1165
1166         /* Build a temporary HeapTuple control structure */
1167         tmptup.t_len = tup_len;
1168         ItemPointerSetInvalid(&(tmptup.t_self));
1169         tmptup.t_tableOid = InvalidOid;
1170         tmptup.t_data = tup;
1171
1172         /*
1173          * Break down the tuple into fields.
1174          */
1175         Assert(numAttrs <= MaxTupleAttributeNumber);
1176         heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull);
1177
1178         memset(toast_free, 0, numAttrs * sizeof(bool));
1179
1180         for (i = 0; i < numAttrs; i++)
1181         {
1182                 /*
1183                  * Look at non-null varlena attributes
1184                  */
1185                 if (toast_isnull[i])
1186                         has_nulls = true;
1187                 else if (att[i]->attlen == -1)
1188                 {
1189                         struct varlena *new_value;
1190
1191                         new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
1192                         if (VARATT_IS_EXTERNAL(new_value) ||
1193                                 VARATT_IS_COMPRESSED(new_value))
1194                         {
1195                                 new_value = heap_tuple_untoast_attr(new_value);
1196                                 toast_values[i] = PointerGetDatum(new_value);
1197                                 toast_free[i] = true;
1198                         }
1199                 }
1200         }
1201
1202         /*
1203          * Calculate the new size of the tuple.
1204          *
1205          * This should match the reconstruction code in toast_insert_or_update.
1206          */
1207         new_header_len = SizeofHeapTupleHeader;
1208         if (has_nulls)
1209                 new_header_len += BITMAPLEN(numAttrs);
1210         if (tup->t_infomask & HEAP_HASOID)
1211                 new_header_len += sizeof(Oid);
1212         new_header_len = MAXALIGN(new_header_len);
1213         new_data_len = heap_compute_data_size(tupleDesc,
1214                                                                                   toast_values, toast_isnull);
1215         new_tuple_len = new_header_len + new_data_len;
1216
1217         new_data = (HeapTupleHeader) palloc0(new_tuple_len);
1218
1219         /*
1220          * Copy the existing tuple header, but adjust natts and t_hoff.
1221          */
1222         memcpy(new_data, tup, SizeofHeapTupleHeader);
1223         HeapTupleHeaderSetNatts(new_data, numAttrs);
1224         new_data->t_hoff = new_header_len;
1225         if (tup->t_infomask & HEAP_HASOID)
1226                 HeapTupleHeaderSetOid(new_data, HeapTupleHeaderGetOid(tup));
1227
1228         /* Set the composite-Datum header fields correctly */
1229         HeapTupleHeaderSetDatumLength(new_data, new_tuple_len);
1230         HeapTupleHeaderSetTypeId(new_data, tupleDesc->tdtypeid);
1231         HeapTupleHeaderSetTypMod(new_data, tupleDesc->tdtypmod);
1232
1233         /* Copy over the data, and fill the null bitmap if needed */
1234         heap_fill_tuple(tupleDesc,
1235                                         toast_values,
1236                                         toast_isnull,
1237                                         (char *) new_data + new_header_len,
1238                                         new_data_len,
1239                                         &(new_data->t_infomask),
1240                                         has_nulls ? new_data->t_bits : NULL);
1241
1242         /*
1243          * Free allocated temp values
1244          */
1245         for (i = 0; i < numAttrs; i++)
1246                 if (toast_free[i])
1247                         pfree(DatumGetPointer(toast_values[i]));
1248
1249         return PointerGetDatum(new_data);
1250 }
1251
1252
1253 /* ----------
1254  * toast_compress_datum -
1255  *
1256  *      Create a compressed version of a varlena datum
1257  *
1258  *      If we fail (ie, compressed result is actually bigger than original)
1259  *      then return NULL.  We must not use compressed data if it'd expand
1260  *      the tuple!
1261  *
1262  *      We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
1263  *      copying them.  But we can't handle external or compressed datums.
1264  * ----------
1265  */
1266 Datum
1267 toast_compress_datum(Datum value)
1268 {
1269         struct varlena *tmp;
1270         int32           valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
1271         int32           len;
1272
1273         Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
1274         Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
1275
1276         /*
1277          * No point in wasting a palloc cycle if value size is out of the allowed
1278          * range for compression
1279          */
1280         if (valsize < PGLZ_strategy_default->min_input_size ||
1281                 valsize > PGLZ_strategy_default->max_input_size)
1282                 return PointerGetDatum(NULL);
1283
1284         tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
1285                                                                         TOAST_COMPRESS_HDRSZ);
1286
1287         /*
1288          * We recheck the actual size even if pglz_compress() reports success,
1289          * because it might be satisfied with having saved as little as one byte
1290          * in the compressed data --- which could turn into a net loss once you
1291          * consider header and alignment padding.  Worst case, the compressed
1292          * format might require three padding bytes (plus header, which is
1293          * included in VARSIZE(tmp)), whereas the uncompressed format would take
1294          * only one header byte and no padding if the value is short enough.  So
1295          * we insist on a savings of more than 2 bytes to ensure we have a gain.
1296          */
1297         len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)),
1298                                                 valsize,
1299                                                 TOAST_COMPRESS_RAWDATA(tmp),
1300                                                 PGLZ_strategy_default);
1301         if (len >= 0 &&
1302                 len + TOAST_COMPRESS_HDRSZ < valsize - 2)
1303         {
1304                 TOAST_COMPRESS_SET_RAWSIZE(tmp, valsize);
1305                 SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ);
1306                 /* successful compression */
1307                 return PointerGetDatum(tmp);
1308         }
1309         else
1310         {
1311                 /* incompressible data */
1312                 pfree(tmp);
1313                 return PointerGetDatum(NULL);
1314         }
1315 }
1316
1317
1318 /* ----------
1319  * toast_get_valid_index
1320  *
1321  *      Get OID of valid index associated to given toast relation. A toast
1322  *      relation can have only one valid index at the same time.
1323  */
1324 Oid
1325 toast_get_valid_index(Oid toastoid, LOCKMODE lock)
1326 {
1327         int                     num_indexes;
1328         int                     validIndex;
1329         Oid                     validIndexOid;
1330         Relation   *toastidxs;
1331         Relation        toastrel;
1332
1333         /* Open the toast relation */
1334         toastrel = heap_open(toastoid, lock);
1335
1336         /* Look for the valid index of the toast relation */
1337         validIndex = toast_open_indexes(toastrel,
1338                                                                         lock,
1339                                                                         &toastidxs,
1340                                                                         &num_indexes);
1341         validIndexOid = RelationGetRelid(toastidxs[validIndex]);
1342
1343         /* Close the toast relation and all its indexes */
1344         toast_close_indexes(toastidxs, num_indexes, lock);
1345         heap_close(toastrel, lock);
1346
1347         return validIndexOid;
1348 }
1349
1350
1351 /* ----------
1352  * toast_save_datum -
1353  *
1354  *      Save one single datum into the secondary relation and return
1355  *      a Datum reference for it.
1356  *
1357  * rel: the main relation we're working with (not the toast rel!)
1358  * value: datum to be pushed to toast storage
1359  * oldexternal: if not NULL, toast pointer previously representing the datum
1360  * options: options to be passed to heap_insert() for toast rows
1361  * ----------
1362  */
1363 static Datum
1364 toast_save_datum(Relation rel, Datum value,
1365                                  struct varlena * oldexternal, int options)
1366 {
1367         Relation        toastrel;
1368         Relation   *toastidxs;
1369         HeapTuple       toasttup;
1370         TupleDesc       toasttupDesc;
1371         Datum           t_values[3];
1372         bool            t_isnull[3];
1373         CommandId       mycid = GetCurrentCommandId(true);
1374         struct varlena *result;
1375         struct varatt_external toast_pointer;
1376         union
1377         {
1378                 struct varlena hdr;
1379                 /* this is to make the union big enough for a chunk: */
1380                 char            data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
1381                 /* ensure union is aligned well enough: */
1382                 int32           align_it;
1383         }                       chunk_data;
1384         int32           chunk_size;
1385         int32           chunk_seq = 0;
1386         char       *data_p;
1387         int32           data_todo;
1388         Pointer         dval = DatumGetPointer(value);
1389         int                     num_indexes;
1390         int                     validIndex;
1391
1392         Assert(!VARATT_IS_EXTERNAL(value));
1393
1394         /*
1395          * Open the toast relation and its indexes.  We can use the index to check
1396          * uniqueness of the OID we assign to the toasted item, even though it has
1397          * additional columns besides OID.
1398          */
1399         toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
1400         toasttupDesc = toastrel->rd_att;
1401
1402         /* Open all the toast indexes and look for the valid one */
1403         validIndex = toast_open_indexes(toastrel,
1404                                                                         RowExclusiveLock,
1405                                                                         &toastidxs,
1406                                                                         &num_indexes);
1407
1408         /*
1409          * Get the data pointer and length, and compute va_rawsize and va_extsize.
1410          *
1411          * va_rawsize is the size of the equivalent fully uncompressed datum, so
1412          * we have to adjust for short headers.
1413          *
1414          * va_extsize is the actual size of the data payload in the toast records.
1415          */
1416         if (VARATT_IS_SHORT(dval))
1417         {
1418                 data_p = VARDATA_SHORT(dval);
1419                 data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
1420                 toast_pointer.va_rawsize = data_todo + VARHDRSZ;                /* as if not short */
1421                 toast_pointer.va_extsize = data_todo;
1422         }
1423         else if (VARATT_IS_COMPRESSED(dval))
1424         {
1425                 data_p = VARDATA(dval);
1426                 data_todo = VARSIZE(dval) - VARHDRSZ;
1427                 /* rawsize in a compressed datum is just the size of the payload */
1428                 toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ;
1429                 toast_pointer.va_extsize = data_todo;
1430                 /* Assert that the numbers look like it's compressed */
1431                 Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1432         }
1433         else
1434         {
1435                 data_p = VARDATA(dval);
1436                 data_todo = VARSIZE(dval) - VARHDRSZ;
1437                 toast_pointer.va_rawsize = VARSIZE(dval);
1438                 toast_pointer.va_extsize = data_todo;
1439         }
1440
1441         /*
1442          * Insert the correct table OID into the result TOAST pointer.
1443          *
1444          * Normally this is the actual OID of the target toast table, but during
1445          * table-rewriting operations such as CLUSTER, we have to insert the OID
1446          * of the table's real permanent toast table instead.  rd_toastoid is set
1447          * if we have to substitute such an OID.
1448          */
1449         if (OidIsValid(rel->rd_toastoid))
1450                 toast_pointer.va_toastrelid = rel->rd_toastoid;
1451         else
1452                 toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
1453
1454         /*
1455          * Choose an OID to use as the value ID for this toast value.
1456          *
1457          * Normally we just choose an unused OID within the toast table.  But
1458          * during table-rewriting operations where we are preserving an existing
1459          * toast table OID, we want to preserve toast value OIDs too.  So, if
1460          * rd_toastoid is set and we had a prior external value from that same
1461          * toast table, re-use its value ID.  If we didn't have a prior external
1462          * value (which is a corner case, but possible if the table's attstorage
1463          * options have been changed), we have to pick a value ID that doesn't
1464          * conflict with either new or existing toast value OIDs.
1465          */
1466         if (!OidIsValid(rel->rd_toastoid))
1467         {
1468                 /* normal case: just choose an unused OID */
1469                 toast_pointer.va_valueid =
1470                         GetNewOidWithIndex(toastrel,
1471                                                            RelationGetRelid(toastidxs[validIndex]),
1472                                                            (AttrNumber) 1);
1473         }
1474         else
1475         {
1476                 /* rewrite case: check to see if value was in old toast table */
1477                 toast_pointer.va_valueid = InvalidOid;
1478                 if (oldexternal != NULL)
1479                 {
1480                         struct varatt_external old_toast_pointer;
1481
1482                         Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
1483                         /* Must copy to access aligned fields */
1484                         VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
1485                         if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
1486                         {
1487                                 /* This value came from the old toast table; reuse its OID */
1488                                 toast_pointer.va_valueid = old_toast_pointer.va_valueid;
1489
1490                                 /*
1491                                  * There is a corner case here: the table rewrite might have
1492                                  * to copy both live and recently-dead versions of a row, and
1493                                  * those versions could easily reference the same toast value.
1494                                  * When we copy the second or later version of such a row,
1495                                  * reusing the OID will mean we select an OID that's already
1496                                  * in the new toast table.  Check for that, and if so, just
1497                                  * fall through without writing the data again.
1498                                  *
1499                                  * While annoying and ugly-looking, this is a good thing
1500                                  * because it ensures that we wind up with only one copy of
1501                                  * the toast value when there is only one copy in the old
1502                                  * toast table.  Before we detected this case, we'd have made
1503                                  * multiple copies, wasting space; and what's worse, the
1504                                  * copies belonging to already-deleted heap tuples would not
1505                                  * be reclaimed by VACUUM.
1506                                  */
1507                                 if (toastrel_valueid_exists(toastrel,
1508                                                                                         toast_pointer.va_valueid))
1509                                 {
1510                                         /* Match, so short-circuit the data storage loop below */
1511                                         data_todo = 0;
1512                                 }
1513                         }
1514                 }
1515                 if (toast_pointer.va_valueid == InvalidOid)
1516                 {
1517                         /*
1518                          * new value; must choose an OID that doesn't conflict in either
1519                          * old or new toast table
1520                          */
1521                         do
1522                         {
1523                                 toast_pointer.va_valueid =
1524                                         GetNewOidWithIndex(toastrel,
1525                                                                          RelationGetRelid(toastidxs[validIndex]),
1526                                                                            (AttrNumber) 1);
1527                         } while (toastid_valueid_exists(rel->rd_toastoid,
1528                                                                                         toast_pointer.va_valueid));
1529                 }
1530         }
1531
1532         /*
1533          * Initialize constant parts of the tuple data
1534          */
1535         t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
1536         t_values[2] = PointerGetDatum(&chunk_data);
1537         t_isnull[0] = false;
1538         t_isnull[1] = false;
1539         t_isnull[2] = false;
1540
1541         /*
1542          * Split up the item into chunks
1543          */
1544         while (data_todo > 0)
1545         {
1546                 int                     i;
1547
1548                 CHECK_FOR_INTERRUPTS();
1549
1550                 /*
1551                  * Calculate the size of this chunk
1552                  */
1553                 chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
1554
1555                 /*
1556                  * Build a tuple and store it
1557                  */
1558                 t_values[1] = Int32GetDatum(chunk_seq++);
1559                 SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
1560                 memcpy(VARDATA(&chunk_data), data_p, chunk_size);
1561                 toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
1562
1563                 heap_insert(toastrel, toasttup, mycid, options, NULL);
1564
1565                 /*
1566                  * Create the index entry.  We cheat a little here by not using
1567                  * FormIndexDatum: this relies on the knowledge that the index columns
1568                  * are the same as the initial columns of the table for all the
1569                  * indexes.
1570                  *
1571                  * Note also that there had better not be any user-created index on
1572                  * the TOAST table, since we don't bother to update anything else.
1573                  */
1574                 for (i = 0; i < num_indexes; i++)
1575                 {
1576                         /* Only index relations marked as ready can be updated */
1577                         if (IndexIsReady(toastidxs[i]->rd_index))
1578                                 index_insert(toastidxs[i], t_values, t_isnull,
1579                                                          &(toasttup->t_self),
1580                                                          toastrel,
1581                                                          toastidxs[i]->rd_index->indisunique ?
1582                                                          UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
1583                 }
1584
1585                 /*
1586                  * Free memory
1587                  */
1588                 heap_freetuple(toasttup);
1589
1590                 /*
1591                  * Move on to next chunk
1592                  */
1593                 data_todo -= chunk_size;
1594                 data_p += chunk_size;
1595         }
1596
1597         /*
1598          * Done - close toast relation and its indexes
1599          */
1600         toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1601         heap_close(toastrel, RowExclusiveLock);
1602
1603         /*
1604          * Create the TOAST pointer value that we'll return
1605          */
1606         result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
1607         SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
1608         memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
1609
1610         return PointerGetDatum(result);
1611 }
1612
1613
1614 /* ----------
1615  * toast_delete_datum -
1616  *
1617  *      Delete a single external stored value.
1618  * ----------
1619  */
1620 static void
1621 toast_delete_datum(Relation rel, Datum value)
1622 {
1623         struct varlena *attr = (struct varlena *) DatumGetPointer(value);
1624         struct varatt_external toast_pointer;
1625         Relation        toastrel;
1626         Relation   *toastidxs;
1627         ScanKeyData toastkey;
1628         SysScanDesc toastscan;
1629         HeapTuple       toasttup;
1630         int                     num_indexes;
1631         int                     validIndex;
1632
1633         if (!VARATT_IS_EXTERNAL_ONDISK(attr))
1634                 return;
1635
1636         /* Must copy to access aligned fields */
1637         VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1638
1639         /*
1640          * Open the toast relation and its indexes
1641          */
1642         toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock);
1643
1644         /* Fetch valid relation used for process */
1645         validIndex = toast_open_indexes(toastrel,
1646                                                                         RowExclusiveLock,
1647                                                                         &toastidxs,
1648                                                                         &num_indexes);
1649
1650         /*
1651          * Setup a scan key to find chunks with matching va_valueid
1652          */
1653         ScanKeyInit(&toastkey,
1654                                 (AttrNumber) 1,
1655                                 BTEqualStrategyNumber, F_OIDEQ,
1656                                 ObjectIdGetDatum(toast_pointer.va_valueid));
1657
1658         /*
1659          * Find all the chunks.  (We don't actually care whether we see them in
1660          * sequence or not, but since we've already locked the index we might as
1661          * well use systable_beginscan_ordered.)
1662          */
1663         toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
1664                                                                                    SnapshotToast, 1, &toastkey);
1665         while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1666         {
1667                 /*
1668                  * Have a chunk, delete it
1669                  */
1670                 simple_heap_delete(toastrel, &toasttup->t_self);
1671         }
1672
1673         /*
1674          * End scan and close relations
1675          */
1676         systable_endscan_ordered(toastscan);
1677         toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1678         heap_close(toastrel, RowExclusiveLock);
1679 }
1680
1681
1682 /* ----------
1683  * toastrel_valueid_exists -
1684  *
1685  *      Test whether a toast value with the given ID exists in the toast relation
1686  * ----------
1687  */
1688 static bool
1689 toastrel_valueid_exists(Relation toastrel, Oid valueid)
1690 {
1691         bool            result = false;
1692         ScanKeyData toastkey;
1693         SysScanDesc toastscan;
1694         int                     num_indexes;
1695         int                     validIndex;
1696         Relation   *toastidxs;
1697
1698         /* Fetch a valid index relation */
1699         validIndex = toast_open_indexes(toastrel,
1700                                                                         RowExclusiveLock,
1701                                                                         &toastidxs,
1702                                                                         &num_indexes);
1703
1704         /*
1705          * Setup a scan key to find chunks with matching va_valueid
1706          */
1707         ScanKeyInit(&toastkey,
1708                                 (AttrNumber) 1,
1709                                 BTEqualStrategyNumber, F_OIDEQ,
1710                                 ObjectIdGetDatum(valueid));
1711
1712         /*
1713          * Is there any such chunk?
1714          */
1715         toastscan = systable_beginscan(toastrel,
1716                                                                    RelationGetRelid(toastidxs[validIndex]),
1717                                                                    true, SnapshotToast, 1, &toastkey);
1718
1719         if (systable_getnext(toastscan) != NULL)
1720                 result = true;
1721
1722         systable_endscan(toastscan);
1723
1724         /* Clean up */
1725         toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1726
1727         return result;
1728 }
1729
1730 /* ----------
1731  * toastid_valueid_exists -
1732  *
1733  *      As above, but work from toast rel's OID not an open relation
1734  * ----------
1735  */
1736 static bool
1737 toastid_valueid_exists(Oid toastrelid, Oid valueid)
1738 {
1739         bool            result;
1740         Relation        toastrel;
1741
1742         toastrel = heap_open(toastrelid, AccessShareLock);
1743
1744         result = toastrel_valueid_exists(toastrel, valueid);
1745
1746         heap_close(toastrel, AccessShareLock);
1747
1748         return result;
1749 }
1750
1751
1752 /* ----------
1753  * toast_fetch_datum -
1754  *
1755  *      Reconstruct an in memory Datum from the chunks saved
1756  *      in the toast relation
1757  * ----------
1758  */
1759 static struct varlena *
1760 toast_fetch_datum(struct varlena * attr)
1761 {
1762         Relation        toastrel;
1763         Relation   *toastidxs;
1764         ScanKeyData toastkey;
1765         SysScanDesc toastscan;
1766         HeapTuple       ttup;
1767         TupleDesc       toasttupDesc;
1768         struct varlena *result;
1769         struct varatt_external toast_pointer;
1770         int32           ressize;
1771         int32           residx,
1772                                 nextidx;
1773         int32           numchunks;
1774         Pointer         chunk;
1775         bool            isnull;
1776         char       *chunkdata;
1777         int32           chunksize;
1778         int                     num_indexes;
1779         int                     validIndex;
1780
1781         if (!VARATT_IS_EXTERNAL_ONDISK(attr))
1782                 elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
1783
1784         /* Must copy to access aligned fields */
1785         VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1786
1787         ressize = toast_pointer.va_extsize;
1788         numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1789
1790         result = (struct varlena *) palloc(ressize + VARHDRSZ);
1791
1792         if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1793                 SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ);
1794         else
1795                 SET_VARSIZE(result, ressize + VARHDRSZ);
1796
1797         /*
1798          * Open the toast relation and its indexes
1799          */
1800         toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
1801         toasttupDesc = toastrel->rd_att;
1802
1803         /* Look for the valid index of the toast relation */
1804         validIndex = toast_open_indexes(toastrel,
1805                                                                         AccessShareLock,
1806                                                                         &toastidxs,
1807                                                                         &num_indexes);
1808
1809         /*
1810          * Setup a scan key to fetch from the index by va_valueid
1811          */
1812         ScanKeyInit(&toastkey,
1813                                 (AttrNumber) 1,
1814                                 BTEqualStrategyNumber, F_OIDEQ,
1815                                 ObjectIdGetDatum(toast_pointer.va_valueid));
1816
1817         /*
1818          * Read the chunks by index
1819          *
1820          * Note that because the index is actually on (valueid, chunkidx) we will
1821          * see the chunks in chunkidx order, even though we didn't explicitly ask
1822          * for it.
1823          */
1824         nextidx = 0;
1825
1826         toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
1827                                                                                    SnapshotToast, 1, &toastkey);
1828         while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1829         {
1830                 /*
1831                  * Have a chunk, extract the sequence number and the data
1832                  */
1833                 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
1834                 Assert(!isnull);
1835                 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
1836                 Assert(!isnull);
1837                 if (!VARATT_IS_EXTENDED(chunk))
1838                 {
1839                         chunksize = VARSIZE(chunk) - VARHDRSZ;
1840                         chunkdata = VARDATA(chunk);
1841                 }
1842                 else if (VARATT_IS_SHORT(chunk))
1843                 {
1844                         /* could happen due to heap_form_tuple doing its thing */
1845                         chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
1846                         chunkdata = VARDATA_SHORT(chunk);
1847                 }
1848                 else
1849                 {
1850                         /* should never happen */
1851                         elog(ERROR, "found toasted toast chunk for toast value %u in %s",
1852                                  toast_pointer.va_valueid,
1853                                  RelationGetRelationName(toastrel));
1854                         chunksize = 0;          /* keep compiler quiet */
1855                         chunkdata = NULL;
1856                 }
1857
1858                 /*
1859                  * Some checks on the data we've found
1860                  */
1861                 if (residx != nextidx)
1862                         elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
1863                                  residx, nextidx,
1864                                  toast_pointer.va_valueid,
1865                                  RelationGetRelationName(toastrel));
1866                 if (residx < numchunks - 1)
1867                 {
1868                         if (chunksize != TOAST_MAX_CHUNK_SIZE)
1869                                 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
1870                                          chunksize, (int) TOAST_MAX_CHUNK_SIZE,
1871                                          residx, numchunks,
1872                                          toast_pointer.va_valueid,
1873                                          RelationGetRelationName(toastrel));
1874                 }
1875                 else if (residx == numchunks - 1)
1876                 {
1877                         if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
1878                                 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
1879                                          chunksize,
1880                                          (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
1881                                          residx,
1882                                          toast_pointer.va_valueid,
1883                                          RelationGetRelationName(toastrel));
1884                 }
1885                 else
1886                         elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
1887                                  residx,
1888                                  0, numchunks - 1,
1889                                  toast_pointer.va_valueid,
1890                                  RelationGetRelationName(toastrel));
1891
1892                 /*
1893                  * Copy the data into proper place in our result
1894                  */
1895                 memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE,
1896                            chunkdata,
1897                            chunksize);
1898
1899                 nextidx++;
1900         }
1901
1902         /*
1903          * Final checks that we successfully fetched the datum
1904          */
1905         if (nextidx != numchunks)
1906                 elog(ERROR, "missing chunk number %d for toast value %u in %s",
1907                          nextidx,
1908                          toast_pointer.va_valueid,
1909                          RelationGetRelationName(toastrel));
1910
1911         /*
1912          * End scan and close relations
1913          */
1914         systable_endscan_ordered(toastscan);
1915         toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
1916         heap_close(toastrel, AccessShareLock);
1917
1918         return result;
1919 }
1920
1921 /* ----------
1922  * toast_fetch_datum_slice -
1923  *
1924  *      Reconstruct a segment of a Datum from the chunks saved
1925  *      in the toast relation
1926  * ----------
1927  */
1928 static struct varlena *
1929 toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
1930 {
1931         Relation        toastrel;
1932         Relation   *toastidxs;
1933         ScanKeyData toastkey[3];
1934         int                     nscankeys;
1935         SysScanDesc toastscan;
1936         HeapTuple       ttup;
1937         TupleDesc       toasttupDesc;
1938         struct varlena *result;
1939         struct varatt_external toast_pointer;
1940         int32           attrsize;
1941         int32           residx;
1942         int32           nextidx;
1943         int                     numchunks;
1944         int                     startchunk;
1945         int                     endchunk;
1946         int32           startoffset;
1947         int32           endoffset;
1948         int                     totalchunks;
1949         Pointer         chunk;
1950         bool            isnull;
1951         char       *chunkdata;
1952         int32           chunksize;
1953         int32           chcpystrt;
1954         int32           chcpyend;
1955         int                     num_indexes;
1956         int                     validIndex;
1957
1958         if (!VARATT_IS_EXTERNAL_ONDISK(attr))
1959                 elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
1960
1961         /* Must copy to access aligned fields */
1962         VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1963
1964         /*
1965          * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
1966          * we can't return a compressed datum which is meaningful to toast later
1967          */
1968         Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1969
1970         attrsize = toast_pointer.va_extsize;
1971         totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1972
1973         if (sliceoffset >= attrsize)
1974         {
1975                 sliceoffset = 0;
1976                 length = 0;
1977         }
1978
1979         if (((sliceoffset + length) > attrsize) || length < 0)
1980                 length = attrsize - sliceoffset;
1981
1982         result = (struct varlena *) palloc(length + VARHDRSZ);
1983
1984         if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1985                 SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
1986         else
1987                 SET_VARSIZE(result, length + VARHDRSZ);
1988
1989         if (length == 0)
1990                 return result;                  /* Can save a lot of work at this point! */
1991
1992         startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
1993         endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
1994         numchunks = (endchunk - startchunk) + 1;
1995
1996         startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE;
1997         endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
1998
1999         /*
2000          * Open the toast relation and its indexes
2001          */
2002         toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
2003         toasttupDesc = toastrel->rd_att;
2004
2005         /* Look for the valid index of toast relation */
2006         validIndex = toast_open_indexes(toastrel,
2007                                                                         AccessShareLock,
2008                                                                         &toastidxs,
2009                                                                         &num_indexes);
2010
2011         /*
2012          * Setup a scan key to fetch from the index. This is either two keys or
2013          * three depending on the number of chunks.
2014          */
2015         ScanKeyInit(&toastkey[0],
2016                                 (AttrNumber) 1,
2017                                 BTEqualStrategyNumber, F_OIDEQ,
2018                                 ObjectIdGetDatum(toast_pointer.va_valueid));
2019
2020         /*
2021          * Use equality condition for one chunk, a range condition otherwise:
2022          */
2023         if (numchunks == 1)
2024         {
2025                 ScanKeyInit(&toastkey[1],
2026                                         (AttrNumber) 2,
2027                                         BTEqualStrategyNumber, F_INT4EQ,
2028                                         Int32GetDatum(startchunk));
2029                 nscankeys = 2;
2030         }
2031         else
2032         {
2033                 ScanKeyInit(&toastkey[1],
2034                                         (AttrNumber) 2,
2035                                         BTGreaterEqualStrategyNumber, F_INT4GE,
2036                                         Int32GetDatum(startchunk));
2037                 ScanKeyInit(&toastkey[2],
2038                                         (AttrNumber) 2,
2039                                         BTLessEqualStrategyNumber, F_INT4LE,
2040                                         Int32GetDatum(endchunk));
2041                 nscankeys = 3;
2042         }
2043
2044         /*
2045          * Read the chunks by index
2046          *
2047          * The index is on (valueid, chunkidx) so they will come in order
2048          */
2049         nextidx = startchunk;
2050         toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
2051                                                                                  SnapshotToast, nscankeys, toastkey);
2052         while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
2053         {
2054                 /*
2055                  * Have a chunk, extract the sequence number and the data
2056                  */
2057                 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
2058                 Assert(!isnull);
2059                 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
2060                 Assert(!isnull);
2061                 if (!VARATT_IS_EXTENDED(chunk))
2062                 {
2063                         chunksize = VARSIZE(chunk) - VARHDRSZ;
2064                         chunkdata = VARDATA(chunk);
2065                 }
2066                 else if (VARATT_IS_SHORT(chunk))
2067                 {
2068                         /* could happen due to heap_form_tuple doing its thing */
2069                         chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
2070                         chunkdata = VARDATA_SHORT(chunk);
2071                 }
2072                 else
2073                 {
2074                         /* should never happen */
2075                         elog(ERROR, "found toasted toast chunk for toast value %u in %s",
2076                                  toast_pointer.va_valueid,
2077                                  RelationGetRelationName(toastrel));
2078                         chunksize = 0;          /* keep compiler quiet */
2079                         chunkdata = NULL;
2080                 }
2081
2082                 /*
2083                  * Some checks on the data we've found
2084                  */
2085                 if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
2086                         elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
2087                                  residx, nextidx,
2088                                  toast_pointer.va_valueid,
2089                                  RelationGetRelationName(toastrel));
2090                 if (residx < totalchunks - 1)
2091                 {
2092                         if (chunksize != TOAST_MAX_CHUNK_SIZE)
2093                                 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice",
2094                                          chunksize, (int) TOAST_MAX_CHUNK_SIZE,
2095                                          residx, totalchunks,
2096                                          toast_pointer.va_valueid,
2097                                          RelationGetRelationName(toastrel));
2098                 }
2099                 else if (residx == totalchunks - 1)
2100                 {
2101                         if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
2102                                 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice",
2103                                          chunksize,
2104                                          (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE),
2105                                          residx,
2106                                          toast_pointer.va_valueid,
2107                                          RelationGetRelationName(toastrel));
2108                 }
2109                 else
2110                         elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
2111                                  residx,
2112                                  0, totalchunks - 1,
2113                                  toast_pointer.va_valueid,
2114                                  RelationGetRelationName(toastrel));
2115
2116                 /*
2117                  * Copy the data into proper place in our result
2118                  */
2119                 chcpystrt = 0;
2120                 chcpyend = chunksize - 1;
2121                 if (residx == startchunk)
2122                         chcpystrt = startoffset;
2123                 if (residx == endchunk)
2124                         chcpyend = endoffset;
2125
2126                 memcpy(VARDATA(result) +
2127                            (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
2128                            chunkdata + chcpystrt,
2129                            (chcpyend - chcpystrt) + 1);
2130
2131                 nextidx++;
2132         }
2133
2134         /*
2135          * Final checks that we successfully fetched the datum
2136          */
2137         if (nextidx != (endchunk + 1))
2138                 elog(ERROR, "missing chunk number %d for toast value %u in %s",
2139                          nextidx,
2140                          toast_pointer.va_valueid,
2141                          RelationGetRelationName(toastrel));
2142
2143         /*
2144          * End scan and close relations
2145          */
2146         systable_endscan_ordered(toastscan);
2147         toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
2148         heap_close(toastrel, AccessShareLock);
2149
2150         return result;
2151 }
2152
2153 /* ----------
2154  * toast_decompress_datum -
2155  *
2156  * Decompress a compressed version of a varlena datum
2157  */
2158 static struct varlena *
2159 toast_decompress_datum(struct varlena * attr)
2160 {
2161         struct varlena *result;
2162
2163         Assert(VARATT_IS_COMPRESSED(attr));
2164
2165         result = (struct varlena *)
2166                 palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
2167         SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
2168
2169         if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
2170                                                 VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
2171                                                 VARDATA(result),
2172                                                 TOAST_COMPRESS_RAWSIZE(attr)) < 0)
2173                 elog(ERROR, "compressed data is corrupted");
2174
2175         return result;
2176 }
2177
2178
2179 /* ----------
2180  * toast_open_indexes
2181  *
2182  *      Get an array of the indexes associated to the given toast relation
2183  *      and return as well the position of the valid index used by the toast
2184  *      relation in this array. It is the responsibility of the caller of this
2185  *      function to close the indexes as well as free them.
2186  */
2187 static int
2188 toast_open_indexes(Relation toastrel,
2189                                    LOCKMODE lock,
2190                                    Relation **toastidxs,
2191                                    int *num_indexes)
2192 {
2193         int                     i = 0;
2194         int                     res = 0;
2195         bool            found = false;
2196         List       *indexlist;
2197         ListCell   *lc;
2198
2199         /* Get index list of the toast relation */
2200         indexlist = RelationGetIndexList(toastrel);
2201         Assert(indexlist != NIL);
2202
2203         *num_indexes = list_length(indexlist);
2204
2205         /* Open all the index relations */
2206         *toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation));
2207         foreach(lc, indexlist)
2208                 (*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);
2209
2210         /* Fetch the first valid index in list */
2211         for (i = 0; i < *num_indexes; i++)
2212         {
2213                 Relation        toastidx = (*toastidxs)[i];
2214
2215                 if (toastidx->rd_index->indisvalid)
2216                 {
2217                         res = i;
2218                         found = true;
2219                         break;
2220                 }
2221         }
2222
2223         /*
2224          * Free index list, not necessary anymore as relations are opened and a
2225          * valid index has been found.
2226          */
2227         list_free(indexlist);
2228
2229         /*
2230          * The toast relation should have one valid index, so something is going
2231          * wrong if there is nothing.
2232          */
2233         if (!found)
2234                 elog(ERROR, "no valid index found for toast relation with Oid %u",
2235                          RelationGetRelid(toastrel));
2236
2237         return res;
2238 }
2239
2240 /* ----------
2241  * toast_close_indexes
2242  *
2243  *      Close an array of indexes for a toast relation and free it. This should
2244  *      be called for a set of indexes opened previously with toast_open_indexes.
2245  */
2246 static void
2247 toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
2248 {
2249         int                     i;
2250
2251         /* Close relations and clean up things */
2252         for (i = 0; i < num_indexes; i++)
2253                 index_close(toastidxs[i], lock);
2254         pfree(toastidxs);
2255 }