From 1dc5ebc9077ab742079ce5dac9a6664248d42916 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 14 May 2015 12:08:40 -0400 Subject: [PATCH] Support "expanded" objects, particularly arrays, for better performance. This patch introduces the ability for complex datatypes to have an in-memory representation that is different from their on-disk format. On-disk formats are typically optimized for minimal size, and in any case they can't contain pointers, so they are often not well-suited for computation. Now a datatype can invent an "expanded" in-memory format that is better suited for its operations, and then pass that around among the C functions that operate on the datatype. There are also provisions (rudimentary as yet) to allow an expanded object to be modified in-place under suitable conditions, so that operations like assignment to an element of an array need not involve copying the entire array. The initial application for this feature is arrays, but it is not hard to foresee using it for other container types like JSON, XML and hstore. I have hopes that it will be useful to PostGIS as well. In this initial implementation, a few heuristics have been hard-wired into plpgsql to improve performance for arrays that are stored in plpgsql variables. We would like to generalize those hacks so that other datatypes can obtain similar improvements, but figuring out some appropriate APIs is left as a task for future work. (The heuristics themselves are probably not optimal yet, either, as they sometimes force expansion of arrays that would be better left alone.) Preliminary performance testing shows impressive speed gains for plpgsql functions that do element-by-element access or update of large arrays. There are other cases that get a little slower, as a result of added array format conversions; but we can hope to improve anything that's annoyingly bad. In any case most applications should see a net win. Tom Lane, reviewed by Andres Freund --- doc/src/sgml/storage.sgml | 42 +- doc/src/sgml/xtypes.sgml | 71 ++ src/backend/access/common/heaptuple.c | 45 +- src/backend/access/heap/tuptoaster.c | 36 + src/backend/executor/execQual.c | 12 +- src/backend/executor/execTuples.c | 47 ++ src/backend/executor/nodeSubqueryscan.c | 8 + src/backend/executor/spi.c | 21 + src/backend/utils/adt/Makefile | 9 +- src/backend/utils/adt/array_expanded.c | 455 +++++++++++ src/backend/utils/adt/array_userfuncs.c | 105 +-- src/backend/utils/adt/arrayfuncs.c | 991 ++++++++++++++---------- src/backend/utils/adt/datum.c | 86 +- src/backend/utils/adt/expandeddatum.c | 163 ++++ src/backend/utils/mmgr/mcxt.c | 4 + src/include/executor/spi.h | 1 + src/include/executor/tuptable.h | 1 + src/include/nodes/primnodes.h | 4 + src/include/postgres.h | 30 + src/include/utils/array.h | 143 +++- src/include/utils/arrayaccess.h | 133 ++++ src/include/utils/datum.h | 8 +- src/include/utils/expandeddatum.h | 151 ++++ src/pl/plpgsql/src/pl_comp.c | 16 + src/pl/plpgsql/src/pl_exec.c | 301 ++++++- src/pl/plpgsql/src/pl_gram.y | 3 + src/pl/plpgsql/src/plpgsql.h | 2 + 27 files changed, 2362 insertions(+), 526 deletions(-) create mode 100644 src/backend/utils/adt/array_expanded.c create mode 100644 src/backend/utils/adt/expandeddatum.c create mode 100644 src/include/utils/arrayaccess.h create mode 100644 src/include/utils/expandeddatum.h diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index d8c52875d8..e5b7b4b68d 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -503,8 +503,9 @@ comparison table, in which all the HTML pages were cut down to 7 kB to fit. TOAST pointers can point to data that is not on disk, but is elsewhere in the memory of the current server process. Such pointers obviously cannot be long-lived, but they are nonetheless useful. There -is currently just one sub-case: -pointers to indirect data. +are currently two sub-cases: +pointers to indirect data and +pointers to expanded data. @@ -518,6 +519,43 @@ that the referenced data survives for as long as the pointer could exist, and there is no infrastructure to help with this. + +Expanded TOAST pointers are useful for complex data types +whose on-disk representation is not especially suited for computational +purposes. As an example, the standard varlena representation of a +PostgreSQL array includes dimensionality information, a +nulls bitmap if there are any null elements, then the values of all the +elements in order. When the element type itself is variable-length, the +only way to find the N'th element is to scan through all the +preceding elements. This representation is appropriate for on-disk storage +because of its compactness, but for computations with the array it's much +nicer to have an expanded or deconstructed +representation in which all the element starting locations have been +identified. The TOAST pointer mechanism supports this need by +allowing a pass-by-reference Datum to point to either a standard varlena +value (the on-disk representation) or a TOAST pointer that +points to an expanded representation somewhere in memory. The details of +this expanded representation are up to the data type, though it must have +a standard header and meet the other API requirements given +in src/include/utils/expandeddatum.h. C-level functions +working with the data type can choose to handle either representation. +Functions that do not know about the expanded representation, but simply +apply PG_DETOAST_DATUM to their inputs, will automatically +receive the traditional varlena representation; so support for an expanded +representation can be introduced incrementally, one function at a time. + + + +TOAST pointers to expanded values are further broken down +into read-write and read-only pointers. +The pointed-to representation is the same either way, but a function that +receives a read-write pointer is allowed to modify the referenced value +in-place, whereas one that receives a read-only pointer must not; it must +first create a copy if it wants to make a modified version of the value. +This distinction and some associated conventions make it possible to avoid +unnecessary copying of expanded values during query execution. + + For all types of in-memory TOAST pointer, the TOAST management code ensures that no such pointer datum can accidentally get diff --git a/doc/src/sgml/xtypes.sgml b/doc/src/sgml/xtypes.sgml index 2459616281..ac0b8a2943 100644 --- a/doc/src/sgml/xtypes.sgml +++ b/doc/src/sgml/xtypes.sgml @@ -300,6 +300,77 @@ CREATE TYPE complex ( + + Another feature that's enabled by TOAST support is the + possibility of having an expanded in-memory data + representation that is more convenient to work with than the format that + is stored on disk. The regular or flat varlena storage format + is ultimately just a blob of bytes; it cannot for example contain + pointers, since it may get copied to other locations in memory. + For complex data types, the flat format may be quite expensive to work + with, so PostgreSQL provides a way to expand + the flat format into a representation that is more suited to computation, + and then pass that format in-memory between functions of the data type. + + + + To use expanded storage, a data type must define an expanded format that + follows the rules given in src/include/utils/expandeddatum.h, + and provide functions to expand a flat varlena value into + expanded format and flatten the expanded format back to the + regular varlena representation. Then ensure that all C functions for + the data type can accept either representation, possibly by converting + one into the other immediately upon receipt. This does not require fixing + all existing functions for the data type at once, because the standard + PG_DETOAST_DATUM macro is defined to convert expanded inputs + into regular flat format. Therefore, existing functions that work with + the flat varlena format will continue to work, though slightly + inefficiently, with expanded inputs; they need not be converted until and + unless better performance is important. + + + + C functions that know how to work with an expanded representation + typically fall into two categories: those that can only handle expanded + format, and those that can handle either expanded or flat varlena inputs. + The former are easier to write but may be less efficient overall, because + converting a flat input to expanded form for use by a single function may + cost more than is saved by operating on the expanded format. + When only expanded format need be handled, conversion of flat inputs to + expanded form can be hidden inside an argument-fetching macro, so that + the function appears no more complex than one working with traditional + varlena input. + To handle both types of input, write an argument-fetching function that + will detoast external, short-header, and compressed varlena inputs, but + not expanded inputs. Such a function can be defined as returning a + pointer to a union of the flat varlena format and the expanded format. + Callers can use the VARATT_IS_EXPANDED_HEADER() macro to + determine which format they received. + + + + The TOAST infrastructure not only allows regular varlena + values to be distinguished from expanded values, but also + distinguishes read-write and read-only pointers to + expanded values. C functions that only need to examine an expanded + value, or will only change it in safe and non-semantically-visible ways, + need not care which type of pointer they receive. C functions that + produce a modified version of an input value are allowed to modify an + expanded input value in-place if they receive a read-write pointer, but + must not modify the input if they receive a read-only pointer; in that + case they have to copy the value first, producing a new value to modify. + A C function that has constructed a new expanded value should always + return a read-write pointer to it. Also, a C function that is modifying + a read-write expanded value in-place should take care to leave the value + in a sane state if it fails partway through. + + + + For examples of working with expanded values, see the standard array + infrastructure, particularly + src/backend/utils/adt/array_expanded.c. + + diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index f58f81e1ed..09aea79c99 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -60,6 +60,7 @@ #include "access/sysattr.h" #include "access/tuptoaster.h" #include "executor/tuptable.h" +#include "utils/expandeddatum.h" /* Does att's datatype allow packing into the 1-byte-header varlena format? */ @@ -93,13 +94,15 @@ heap_compute_data_size(TupleDesc tupleDesc, for (i = 0; i < numberOfAttributes; i++) { Datum val; + Form_pg_attribute atti; if (isnull[i]) continue; val = values[i]; + atti = att[i]; - if (ATT_IS_PACKABLE(att[i]) && + if (ATT_IS_PACKABLE(atti) && VARATT_CAN_MAKE_SHORT(DatumGetPointer(val))) { /* @@ -108,11 +111,21 @@ heap_compute_data_size(TupleDesc tupleDesc, */ data_length += VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(val)); } + else if (atti->attlen == -1 && + VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(val))) + { + /* + * we want to flatten the expanded value so that the constructed + * tuple doesn't depend on it + */ + data_length = att_align_nominal(data_length, atti->attalign); + data_length += EOH_get_flat_size(DatumGetEOHP(val)); + } else { - data_length = att_align_datum(data_length, att[i]->attalign, - att[i]->attlen, val); - data_length = att_addlength_datum(data_length, att[i]->attlen, + data_length = att_align_datum(data_length, atti->attalign, + atti->attlen, val); + data_length = att_addlength_datum(data_length, atti->attlen, val); } } @@ -203,10 +216,26 @@ heap_fill_tuple(TupleDesc tupleDesc, *infomask |= HEAP_HASVARWIDTH; if (VARATT_IS_EXTERNAL(val)) { - *infomask |= HEAP_HASEXTERNAL; - /* no alignment, since it's short by definition */ - data_length = VARSIZE_EXTERNAL(val); - memcpy(data, val, data_length); + if (VARATT_IS_EXTERNAL_EXPANDED(val)) + { + /* + * we want to flatten the expanded value so that the + * constructed tuple doesn't depend on it + */ + ExpandedObjectHeader *eoh = DatumGetEOHP(values[i]); + + data = (char *) att_align_nominal(data, + att[i]->attalign); + data_length = EOH_get_flat_size(eoh); + EOH_flatten_into(eoh, data, data_length); + } + else + { + *infomask |= HEAP_HASEXTERNAL; + /* no alignment, since it's short by definition */ + data_length = VARSIZE_EXTERNAL(val); + memcpy(data, val, data_length); + } } else if (VARATT_IS_SHORT(val)) { diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 274155ad0c..b9691a57be 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -37,6 +37,7 @@ #include "catalog/catalog.h" #include "common/pg_lzcompress.h" #include "miscadmin.h" +#include "utils/expandeddatum.h" #include "utils/fmgroids.h" #include "utils/rel.h" #include "utils/typcache.h" @@ -130,6 +131,19 @@ heap_tuple_fetch_attr(struct varlena * attr) result = (struct varlena *) palloc(VARSIZE_ANY(attr)); memcpy(result, attr, VARSIZE_ANY(attr)); } + else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) + { + /* + * This is an expanded-object pointer --- get flat format + */ + ExpandedObjectHeader *eoh; + Size resultsize; + + eoh = DatumGetEOHP(PointerGetDatum(attr)); + resultsize = EOH_get_flat_size(eoh); + result = (struct varlena *) palloc(resultsize); + EOH_flatten_into(eoh, (void *) result, resultsize); + } else { /* @@ -196,6 +210,15 @@ heap_tuple_untoast_attr(struct varlena * attr) attr = result; } } + else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) + { + /* + * This is an expanded-object pointer --- get flat format + */ + attr = heap_tuple_fetch_attr(attr); + /* flatteners are not allowed to produce compressed/short output */ + Assert(!VARATT_IS_EXTENDED(attr)); + } else if (VARATT_IS_COMPRESSED(attr)) { /* @@ -263,6 +286,11 @@ heap_tuple_untoast_attr_slice(struct varlena * attr, return heap_tuple_untoast_attr_slice(redirect.pointer, sliceoffset, slicelength); } + else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) + { + /* pass it off to heap_tuple_fetch_attr to flatten */ + preslice = heap_tuple_fetch_attr(attr); + } else preslice = attr; @@ -344,6 +372,10 @@ toast_raw_datum_size(Datum value) return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer)); } + else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) + { + result = EOH_get_flat_size(DatumGetEOHP(value)); + } else if (VARATT_IS_COMPRESSED(attr)) { /* here, va_rawsize is just the payload size */ @@ -400,6 +432,10 @@ toast_datum_size(Datum value) return toast_datum_size(PointerGetDatum(toast_pointer.pointer)); } + else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) + { + result = EOH_get_flat_size(DatumGetEOHP(value)); + } else if (VARATT_IS_SHORT(attr)) { result = VARSIZE_SHORT(attr); diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index d94fe581df..e5994112a4 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -4248,7 +4248,6 @@ ExecEvalArrayCoerceExpr(ArrayCoerceExprState *astate, { ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) astate->xprstate.expr; Datum result; - ArrayType *array; FunctionCallInfoData locfcinfo; result = ExecEvalExpr(astate->arg, econtext, isNull, isDone); @@ -4265,14 +4264,12 @@ ExecEvalArrayCoerceExpr(ArrayCoerceExprState *astate, if (!OidIsValid(acoerce->elemfuncid)) { /* Detoast input array if necessary, and copy in any case */ - array = DatumGetArrayTypePCopy(result); + ArrayType *array = DatumGetArrayTypePCopy(result); + ARR_ELEMTYPE(array) = astate->resultelemtype; PG_RETURN_ARRAYTYPE_P(array); } - /* Detoast input array if necessary, but don't make a useless copy */ - array = DatumGetArrayTypeP(result); - /* Initialize function cache if first time through */ if (astate->elemfunc.fn_oid == InvalidOid) { @@ -4302,15 +4299,14 @@ ExecEvalArrayCoerceExpr(ArrayCoerceExprState *astate, */ InitFunctionCallInfoData(locfcinfo, &(astate->elemfunc), 3, InvalidOid, NULL, NULL); - locfcinfo.arg[0] = PointerGetDatum(array); + locfcinfo.arg[0] = result; locfcinfo.arg[1] = Int32GetDatum(acoerce->resulttypmod); locfcinfo.arg[2] = BoolGetDatum(acoerce->isExplicit); locfcinfo.argnull[0] = false; locfcinfo.argnull[1] = false; locfcinfo.argnull[2] = false; - return array_map(&locfcinfo, ARR_ELEMTYPE(array), astate->resultelemtype, - astate->amstate); + return array_map(&locfcinfo, astate->resultelemtype, astate->amstate); } /* ---------------------------------------------------------------- diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 753754dce6..a05d8b1115 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -88,6 +88,7 @@ #include "nodes/nodeFuncs.h" #include "storage/bufmgr.h" #include "utils/builtins.h" +#include "utils/expandeddatum.h" #include "utils/lsyscache.h" #include "utils/typcache.h" @@ -812,6 +813,52 @@ ExecCopySlot(TupleTableSlot *dstslot, TupleTableSlot *srcslot) return ExecStoreTuple(newTuple, dstslot, InvalidBuffer, true); } +/* -------------------------------- + * ExecMakeSlotContentsReadOnly + * Mark any R/W expanded datums in the slot as read-only. + * + * This is needed when a slot that might contain R/W datum references is to be + * used as input for general expression evaluation. Since the expression(s) + * might contain more than one Var referencing the same R/W datum, we could + * get wrong answers if functions acting on those Vars thought they could + * modify the expanded value in-place. + * + * For notational reasons, we return the same slot passed in. + * -------------------------------- + */ +TupleTableSlot * +ExecMakeSlotContentsReadOnly(TupleTableSlot *slot) +{ + /* + * sanity checks + */ + Assert(slot != NULL); + Assert(slot->tts_tupleDescriptor != NULL); + Assert(!slot->tts_isempty); + + /* + * If the slot contains a physical tuple, it can't contain any expanded + * datums, because we flatten those when making a physical tuple. This + * might change later; but for now, we need do nothing unless the slot is + * virtual. + */ + if (slot->tts_tuple == NULL) + { + Form_pg_attribute *att = slot->tts_tupleDescriptor->attrs; + int attnum; + + for (attnum = 0; attnum < slot->tts_nvalid; attnum++) + { + slot->tts_values[attnum] = + MakeExpandedObjectReadOnly(slot->tts_values[attnum], + slot->tts_isnull[attnum], + att[attnum]->attlen); + } + } + + return slot; +} + /* ---------------------------------------------------------------- * convenience initialization routines diff --git a/src/backend/executor/nodeSubqueryscan.c b/src/backend/executor/nodeSubqueryscan.c index 3f66e243d2..e5d1e540c4 100644 --- a/src/backend/executor/nodeSubqueryscan.c +++ b/src/backend/executor/nodeSubqueryscan.c @@ -56,7 +56,15 @@ SubqueryNext(SubqueryScanState *node) * We just return the subplan's result slot, rather than expending extra * cycles for ExecCopySlot(). (Our own ScanTupleSlot is used only for * EvalPlanQual rechecks.) + * + * We do need to mark the slot contents read-only to prevent interference + * between different functions reading the same datum from the slot. It's + * a bit hokey to do this to the subplan's slot, but should be safe + * enough. */ + if (!TupIsNull(slot)) + slot = ExecMakeSlotContentsReadOnly(slot); + return slot; } diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 557d153f2a..472de41f9b 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -1015,6 +1015,27 @@ SPI_pfree(void *pointer) pfree(pointer); } +Datum +SPI_datumTransfer(Datum value, bool typByVal, int typLen) +{ + MemoryContext oldcxt = NULL; + Datum result; + + if (_SPI_curid + 1 == _SPI_connected) /* connected */ + { + if (_SPI_current != &(_SPI_stack[_SPI_curid + 1])) + elog(ERROR, "SPI stack corrupted"); + oldcxt = MemoryContextSwitchTo(_SPI_current->savedcxt); + } + + result = datumTransfer(value, typByVal, typLen); + + if (oldcxt) + MemoryContextSwitchTo(oldcxt); + + return result; +} + void SPI_freetuple(HeapTuple tuple) { diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 1f1bee7fb5..3ed0b4409e 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -16,10 +16,11 @@ endif endif # keep this list arranged alphabetically or it gets to be a mess -OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ - array_userfuncs.o arrayutils.o ascii.o bool.o \ - cash.o char.o date.o datetime.o datum.o dbsize.o domains.o \ - encode.o enum.o float.o format_type.o formatting.o genfile.o \ +OBJS = acl.o arrayfuncs.o array_expanded.o array_selfuncs.o \ + array_typanalyze.o array_userfuncs.o arrayutils.o ascii.o \ + bool.o cash.o char.o date.o datetime.o datum.o dbsize.o domains.o \ + encode.o enum.o expandeddatum.o \ + float.o format_type.o formatting.o genfile.o \ geo_ops.o geo_selfuncs.o inet_cidr_ntop.o inet_net_pton.o int.o \ int8.o json.o jsonb.o jsonb_gin.o jsonb_op.o jsonb_util.o \ jsonfuncs.o like.o lockfuncs.o mac.o misc.o nabstime.o name.o \ diff --git a/src/backend/utils/adt/array_expanded.c b/src/backend/utils/adt/array_expanded.c new file mode 100644 index 0000000000..97fd4440c8 --- /dev/null +++ b/src/backend/utils/adt/array_expanded.c @@ -0,0 +1,455 @@ +/*------------------------------------------------------------------------- + * + * array_expanded.c + * Basic functions for manipulating expanded arrays. + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/array_expanded.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tupmacs.h" +#include "utils/array.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" + + +/* "Methods" required for an expanded object */ +static Size EA_get_flat_size(ExpandedObjectHeader *eohptr); +static void EA_flatten_into(ExpandedObjectHeader *eohptr, + void *result, Size allocated_size); + +static const ExpandedObjectMethods EA_methods = +{ + EA_get_flat_size, + EA_flatten_into +}; + +/* Other local functions */ +static void copy_byval_expanded_array(ExpandedArrayHeader *eah, + ExpandedArrayHeader *oldeah); + + +/* + * expand_array: convert an array Datum into an expanded array + * + * The expanded object will be a child of parentcontext. + * + * Some callers can provide cache space to avoid repeated lookups of element + * type data across calls; if so, pass a metacache pointer, making sure that + * metacache->element_type is initialized to InvalidOid before first call. + * If no cross-call caching is required, pass NULL for metacache. + */ +Datum +expand_array(Datum arraydatum, MemoryContext parentcontext, + ArrayMetaState *metacache) +{ + ArrayType *array; + ExpandedArrayHeader *eah; + MemoryContext objcxt; + MemoryContext oldcxt; + ArrayMetaState fakecache; + + /* + * Allocate private context for expanded object. We start by assuming + * that the array won't be very large; but if it does grow a lot, don't + * constrain aset.c's large-context behavior. + */ + objcxt = AllocSetContextCreate(parentcontext, + "expanded array", + ALLOCSET_SMALL_MINSIZE, + ALLOCSET_SMALL_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + /* Set up expanded array header */ + eah = (ExpandedArrayHeader *) + MemoryContextAlloc(objcxt, sizeof(ExpandedArrayHeader)); + + EOH_init_header(&eah->hdr, &EA_methods, objcxt); + eah->ea_magic = EA_MAGIC; + + /* If the source is an expanded array, we may be able to optimize */ + if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(arraydatum))) + { + ExpandedArrayHeader *oldeah = (ExpandedArrayHeader *) DatumGetEOHP(arraydatum); + + Assert(oldeah->ea_magic == EA_MAGIC); + + /* + * Update caller's cache if provided; we don't need it this time, but + * next call might be for a non-expanded source array. Furthermore, + * if the caller didn't provide a cache area, use some local storage + * to cache anyway, thereby avoiding a catalog lookup in the case + * where we fall through to the flat-copy code path. + */ + if (metacache == NULL) + metacache = &fakecache; + metacache->element_type = oldeah->element_type; + metacache->typlen = oldeah->typlen; + metacache->typbyval = oldeah->typbyval; + metacache->typalign = oldeah->typalign; + + /* + * If element type is pass-by-value and we have a Datum-array + * representation, just copy the source's metadata and Datum/isnull + * arrays. The original flat array, if present at all, adds no + * additional information so we need not copy it. + */ + if (oldeah->typbyval && oldeah->dvalues != NULL) + { + copy_byval_expanded_array(eah, oldeah); + /* return a R/W pointer to the expanded array */ + return EOHPGetRWDatum(&eah->hdr); + } + + /* + * Otherwise, either we have only a flat representation or the + * elements are pass-by-reference. In either case, the best thing + * seems to be to copy the source as a flat representation and then + * deconstruct that later if necessary. For the pass-by-ref case, we + * could perhaps save some cycles with custom code that generates the + * deconstructed representation in parallel with copying the values, + * but it would be a lot of extra code for fairly marginal gain. So, + * fall through into the flat-source code path. + */ + } + + /* + * Detoast and copy source array into private context, as a flat array. + * + * Note that this coding risks leaking some memory in the private context + * if we have to fetch data from a TOAST table; however, experimentation + * says that the leak is minimal. Doing it this way saves a copy step, + * which seems worthwhile, especially if the array is large enough to need + * external storage. + */ + oldcxt = MemoryContextSwitchTo(objcxt); + array = DatumGetArrayTypePCopy(arraydatum); + MemoryContextSwitchTo(oldcxt); + + eah->ndims = ARR_NDIM(array); + /* note these pointers point into the fvalue header! */ + eah->dims = ARR_DIMS(array); + eah->lbound = ARR_LBOUND(array); + + /* Save array's element-type data for possible use later */ + eah->element_type = ARR_ELEMTYPE(array); + if (metacache && metacache->element_type == eah->element_type) + { + /* We have a valid cache of representational data */ + eah->typlen = metacache->typlen; + eah->typbyval = metacache->typbyval; + eah->typalign = metacache->typalign; + } + else + { + /* No, so look it up */ + get_typlenbyvalalign(eah->element_type, + &eah->typlen, + &eah->typbyval, + &eah->typalign); + /* Update cache if provided */ + if (metacache) + { + metacache->element_type = eah->element_type; + metacache->typlen = eah->typlen; + metacache->typbyval = eah->typbyval; + metacache->typalign = eah->typalign; + } + } + + /* we don't make a deconstructed representation now */ + eah->dvalues = NULL; + eah->dnulls = NULL; + eah->dvalueslen = 0; + eah->nelems = 0; + eah->flat_size = 0; + + /* remember we have a flat representation */ + eah->fvalue = array; + eah->fstartptr = ARR_DATA_PTR(array); + eah->fendptr = ((char *) array) + ARR_SIZE(array); + + /* return a R/W pointer to the expanded array */ + return EOHPGetRWDatum(&eah->hdr); +} + +/* + * helper for expand_array(): copy pass-by-value Datum-array representation + */ +static void +copy_byval_expanded_array(ExpandedArrayHeader *eah, + ExpandedArrayHeader *oldeah) +{ + MemoryContext objcxt = eah->hdr.eoh_context; + int ndims = oldeah->ndims; + int dvalueslen = oldeah->dvalueslen; + + /* Copy array dimensionality information */ + eah->ndims = ndims; + /* We can alloc both dimensionality arrays with one palloc */ + eah->dims = (int *) MemoryContextAlloc(objcxt, ndims * 2 * sizeof(int)); + eah->lbound = eah->dims + ndims; + /* .. but don't assume the source's arrays are contiguous */ + memcpy(eah->dims, oldeah->dims, ndims * sizeof(int)); + memcpy(eah->lbound, oldeah->lbound, ndims * sizeof(int)); + + /* Copy element-type data */ + eah->element_type = oldeah->element_type; + eah->typlen = oldeah->typlen; + eah->typbyval = oldeah->typbyval; + eah->typalign = oldeah->typalign; + + /* Copy the deconstructed representation */ + eah->dvalues = (Datum *) MemoryContextAlloc(objcxt, + dvalueslen * sizeof(Datum)); + memcpy(eah->dvalues, oldeah->dvalues, dvalueslen * sizeof(Datum)); + if (oldeah->dnulls) + { + eah->dnulls = (bool *) MemoryContextAlloc(objcxt, + dvalueslen * sizeof(bool)); + memcpy(eah->dnulls, oldeah->dnulls, dvalueslen * sizeof(bool)); + } + else + eah->dnulls = NULL; + eah->dvalueslen = dvalueslen; + eah->nelems = oldeah->nelems; + eah->flat_size = oldeah->flat_size; + + /* we don't make a flat representation */ + eah->fvalue = NULL; + eah->fstartptr = NULL; + eah->fendptr = NULL; +} + +/* + * get_flat_size method for expanded arrays + */ +static Size +EA_get_flat_size(ExpandedObjectHeader *eohptr) +{ + ExpandedArrayHeader *eah = (ExpandedArrayHeader *) eohptr; + int nelems; + int ndims; + Datum *dvalues; + bool *dnulls; + Size nbytes; + int i; + + Assert(eah->ea_magic == EA_MAGIC); + + /* Easy if we have a valid flattened value */ + if (eah->fvalue) + return ARR_SIZE(eah->fvalue); + + /* If we have a cached size value, believe that */ + if (eah->flat_size) + return eah->flat_size; + + /* + * Compute space needed by examining dvalues/dnulls. Note that the result + * array will have a nulls bitmap if dnulls isn't NULL, even if the array + * doesn't actually contain any nulls now. + */ + nelems = eah->nelems; + ndims = eah->ndims; + Assert(nelems == ArrayGetNItems(ndims, eah->dims)); + dvalues = eah->dvalues; + dnulls = eah->dnulls; + nbytes = 0; + for (i = 0; i < nelems; i++) + { + if (dnulls && dnulls[i]) + continue; + nbytes = att_addlength_datum(nbytes, eah->typlen, dvalues[i]); + nbytes = att_align_nominal(nbytes, eah->typalign); + /* check for overflow of total request */ + if (!AllocSizeIsValid(nbytes)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxAllocSize))); + } + + if (dnulls) + nbytes += ARR_OVERHEAD_WITHNULLS(ndims, nelems); + else + nbytes += ARR_OVERHEAD_NONULLS(ndims); + + /* cache for next time */ + eah->flat_size = nbytes; + + return nbytes; +} + +/* + * flatten_into method for expanded arrays + */ +static void +EA_flatten_into(ExpandedObjectHeader *eohptr, + void *result, Size allocated_size) +{ + ExpandedArrayHeader *eah = (ExpandedArrayHeader *) eohptr; + ArrayType *aresult = (ArrayType *) result; + int nelems; + int ndims; + int32 dataoffset; + + Assert(eah->ea_magic == EA_MAGIC); + + /* Easy if we have a valid flattened value */ + if (eah->fvalue) + { + Assert(allocated_size == ARR_SIZE(eah->fvalue)); + memcpy(result, eah->fvalue, allocated_size); + return; + } + + /* Else allocation should match previous get_flat_size result */ + Assert(allocated_size == eah->flat_size); + + /* Fill result array from dvalues/dnulls */ + nelems = eah->nelems; + ndims = eah->ndims; + + if (eah->dnulls) + dataoffset = ARR_OVERHEAD_WITHNULLS(ndims, nelems); + else + dataoffset = 0; /* marker for no null bitmap */ + + /* We must ensure that any pad space is zero-filled */ + memset(aresult, 0, allocated_size); + + SET_VARSIZE(aresult, allocated_size); + aresult->ndim = ndims; + aresult->dataoffset = dataoffset; + aresult->elemtype = eah->element_type; + memcpy(ARR_DIMS(aresult), eah->dims, ndims * sizeof(int)); + memcpy(ARR_LBOUND(aresult), eah->lbound, ndims * sizeof(int)); + + CopyArrayEls(aresult, + eah->dvalues, eah->dnulls, nelems, + eah->typlen, eah->typbyval, eah->typalign, + false); +} + +/* + * Argument fetching support code + */ + +/* + * DatumGetExpandedArray: get a writable expanded array from an input argument + * + * Caution: if the input is a read/write pointer, this returns the input + * argument; so callers must be sure that their changes are "safe", that is + * they cannot leave the array in a corrupt state. + */ +ExpandedArrayHeader * +DatumGetExpandedArray(Datum d) +{ + /* If it's a writable expanded array already, just return it */ + if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))) + { + ExpandedArrayHeader *eah = (ExpandedArrayHeader *) DatumGetEOHP(d); + + Assert(eah->ea_magic == EA_MAGIC); + return eah; + } + + /* Else expand the hard way */ + d = expand_array(d, CurrentMemoryContext, NULL); + return (ExpandedArrayHeader *) DatumGetEOHP(d); +} + +/* + * As above, when caller has the ability to cache element type info + */ +ExpandedArrayHeader * +DatumGetExpandedArrayX(Datum d, ArrayMetaState *metacache) +{ + /* If it's a writable expanded array already, just return it */ + if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))) + { + ExpandedArrayHeader *eah = (ExpandedArrayHeader *) DatumGetEOHP(d); + + Assert(eah->ea_magic == EA_MAGIC); + /* Update cache if provided */ + if (metacache) + { + metacache->element_type = eah->element_type; + metacache->typlen = eah->typlen; + metacache->typbyval = eah->typbyval; + metacache->typalign = eah->typalign; + } + return eah; + } + + /* Else expand using caller's cache if any */ + d = expand_array(d, CurrentMemoryContext, metacache); + return (ExpandedArrayHeader *) DatumGetEOHP(d); +} + +/* + * DatumGetAnyArray: return either an expanded array or a detoasted varlena + * array. The result must not be modified in-place. + */ +AnyArrayType * +DatumGetAnyArray(Datum d) +{ + ExpandedArrayHeader *eah; + + /* + * If it's an expanded array (RW or RO), return the header pointer. + */ + if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(d))) + { + eah = (ExpandedArrayHeader *) DatumGetEOHP(d); + Assert(eah->ea_magic == EA_MAGIC); + return (AnyArrayType *) eah; + } + + /* Else do regular detoasting as needed */ + return (AnyArrayType *) PG_DETOAST_DATUM(d); +} + +/* + * Create the Datum/isnull representation of an expanded array object + * if we didn't do so previously + */ +void +deconstruct_expanded_array(ExpandedArrayHeader *eah) +{ + if (eah->dvalues == NULL) + { + MemoryContext oldcxt = MemoryContextSwitchTo(eah->hdr.eoh_context); + Datum *dvalues; + bool *dnulls; + int nelems; + + dnulls = NULL; + deconstruct_array(eah->fvalue, + eah->element_type, + eah->typlen, eah->typbyval, eah->typalign, + &dvalues, + ARR_HASNULL(eah->fvalue) ? &dnulls : NULL, + &nelems); + + /* + * Update header only after successful completion of this step. If + * deconstruct_array fails partway through, worst consequence is some + * leaked memory in the object's context. If the caller fails at a + * later point, that's fine, since the deconstructed representation is + * valid anyhow. + */ + eah->dvalues = dvalues; + eah->dnulls = dnulls; + eah->dvalueslen = eah->nelems = nelems; + MemoryContextSwitchTo(oldcxt); + } +} diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c index 4177d2da17..f7b57da48e 100644 --- a/src/backend/utils/adt/array_userfuncs.c +++ b/src/backend/utils/adt/array_userfuncs.c @@ -25,22 +25,36 @@ static Datum array_position_common(FunctionCallInfo fcinfo); /* * fetch_array_arg_replace_nulls * - * Fetch an array-valued argument; if it's null, construct an empty array - * value of the proper data type. Also cache basic element type information - * in fn_extra. + * Fetch an array-valued argument in expanded form; if it's null, construct an + * empty array value of the proper data type. Also cache basic element type + * information in fn_extra. + * + * Caution: if the input is a read/write pointer, this returns the input + * argument; so callers must be sure that their changes are "safe", that is + * they cannot leave the array in a corrupt state. */ -static ArrayType * +static ExpandedArrayHeader * fetch_array_arg_replace_nulls(FunctionCallInfo fcinfo, int argno) { - ArrayType *v; + ExpandedArrayHeader *eah; Oid element_type; ArrayMetaState *my_extra; - /* First collect the array value */ + /* If first time through, create datatype cache struct */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + my_extra = (ArrayMetaState *) + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra->element_type = InvalidOid; + fcinfo->flinfo->fn_extra = my_extra; + } + + /* Now collect the array value */ if (!PG_ARGISNULL(argno)) { - v = PG_GETARG_ARRAYTYPE_P(argno); - element_type = ARR_ELEMTYPE(v); + eah = PG_GETARG_EXPANDED_ARRAYX(argno, my_extra); } else { @@ -57,30 +71,12 @@ fetch_array_arg_replace_nulls(FunctionCallInfo fcinfo, int argno) (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("input data type is not an array"))); - v = construct_empty_array(element_type); - } - - /* Now cache required info, which might change from call to call */ - my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; - if (my_extra == NULL) - { - my_extra = (ArrayMetaState *) - MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, - sizeof(ArrayMetaState)); - my_extra->element_type = InvalidOid; - fcinfo->flinfo->fn_extra = my_extra; - } - - if (my_extra->element_type != element_type) - { - get_typlenbyvalalign(element_type, - &my_extra->typlen, - &my_extra->typbyval, - &my_extra->typalign); - my_extra->element_type = element_type; + eah = construct_empty_expanded_array(element_type, + CurrentMemoryContext, + my_extra); } - return v; + return eah; } /*----------------------------------------------------------------------------- @@ -91,29 +87,29 @@ fetch_array_arg_replace_nulls(FunctionCallInfo fcinfo, int argno) Datum array_append(PG_FUNCTION_ARGS) { - ArrayType *v; + ExpandedArrayHeader *eah; Datum newelem; bool isNull; - ArrayType *result; + Datum result; int *dimv, *lb; int indx; ArrayMetaState *my_extra; - v = fetch_array_arg_replace_nulls(fcinfo, 0); + eah = fetch_array_arg_replace_nulls(fcinfo, 0); isNull = PG_ARGISNULL(1); if (isNull) newelem = (Datum) 0; else newelem = PG_GETARG_DATUM(1); - if (ARR_NDIM(v) == 1) + if (eah->ndims == 1) { /* append newelem */ int ub; - lb = ARR_LBOUND(v); - dimv = ARR_DIMS(v); + lb = eah->lbound; + dimv = eah->dims; ub = dimv[0] + lb[0] - 1; indx = ub + 1; @@ -123,7 +119,7 @@ array_append(PG_FUNCTION_ARGS) (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("integer out of range"))); } - else if (ARR_NDIM(v) == 0) + else if (eah->ndims == 0) indx = 1; else ereport(ERROR, @@ -133,10 +129,11 @@ array_append(PG_FUNCTION_ARGS) /* Perform element insertion */ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; - result = array_set(v, 1, &indx, newelem, isNull, + result = array_set_element(EOHPGetRWDatum(&eah->hdr), + 1, &indx, newelem, isNull, -1, my_extra->typlen, my_extra->typbyval, my_extra->typalign); - PG_RETURN_ARRAYTYPE_P(result); + PG_RETURN_DATUM(result); } /*----------------------------------------------------------------------------- @@ -147,12 +144,13 @@ array_append(PG_FUNCTION_ARGS) Datum array_prepend(PG_FUNCTION_ARGS) { - ArrayType *v; + ExpandedArrayHeader *eah; Datum newelem; bool isNull; - ArrayType *result; + Datum result; int *lb; int indx; + int lb0; ArrayMetaState *my_extra; isNull = PG_ARGISNULL(0); @@ -160,13 +158,14 @@ array_prepend(PG_FUNCTION_ARGS) newelem = (Datum) 0; else newelem = PG_GETARG_DATUM(0); - v = fetch_array_arg_replace_nulls(fcinfo, 1); + eah = fetch_array_arg_replace_nulls(fcinfo, 1); - if (ARR_NDIM(v) == 1) + if (eah->ndims == 1) { /* prepend newelem */ - lb = ARR_LBOUND(v); + lb = eah->lbound; indx = lb[0] - 1; + lb0 = lb[0]; /* overflow? */ if (indx > lb[0]) @@ -174,8 +173,11 @@ array_prepend(PG_FUNCTION_ARGS) (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("integer out of range"))); } - else if (ARR_NDIM(v) == 0) + else if (eah->ndims == 0) + { indx = 1; + lb0 = 1; + } else ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), @@ -184,14 +186,19 @@ array_prepend(PG_FUNCTION_ARGS) /* Perform element insertion */ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; - result = array_set(v, 1, &indx, newelem, isNull, + result = array_set_element(EOHPGetRWDatum(&eah->hdr), + 1, &indx, newelem, isNull, -1, my_extra->typlen, my_extra->typbyval, my_extra->typalign); /* Readjust result's LB to match the input's, as expected for prepend */ - if (ARR_NDIM(v) == 1) - ARR_LBOUND(result)[0] = ARR_LBOUND(v)[0]; + Assert(result == EOHPGetRWDatum(&eah->hdr)); + if (eah->ndims == 1) + { + /* This is ok whether we've deconstructed or not */ + eah->lbound[0] = lb0; + } - PG_RETURN_ARRAYTYPE_P(result); + PG_RETURN_DATUM(result); } /*----------------------------------------------------------------------------- diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 9117a5515a..82d79977d7 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -20,11 +20,15 @@ #endif #include +/* See arrayaccess.h */ +#define ARRAYACCESS_INCLUDE_DEFINITIONS + #include "access/htup_details.h" #include "catalog/pg_type.h" #include "funcapi.h" #include "libpq/pqformat.h" #include "utils/array.h" +#include "utils/arrayaccess.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/lsyscache.h" @@ -42,6 +46,12 @@ bool Array_nulls = true; */ #define ASSGN "=" +#define AARR_FREE_IF_COPY(array,n) \ + do { \ + if (!VARATT_IS_EXPANDED_HEADER(array)) \ + PG_FREE_IF_COPY(array, n); \ + } while (0) + typedef enum { ARRAY_NO_LEVEL, @@ -93,10 +103,16 @@ static void ReadArrayBinary(StringInfo buf, int nitems, int typlen, bool typbyval, char typalign, Datum *values, bool *nulls, bool *hasnulls, int32 *nbytes); -static void CopyArrayEls(ArrayType *array, - Datum *values, bool *nulls, int nitems, - int typlen, bool typbyval, char typalign, - bool freedata); +static Datum array_get_element_expanded(Datum arraydatum, + int nSubscripts, int *indx, + int arraytyplen, + int elmlen, bool elmbyval, char elmalign, + bool *isNull); +static Datum array_set_element_expanded(Datum arraydatum, + int nSubscripts, int *indx, + Datum dataValue, bool isNull, + int arraytyplen, + int elmlen, bool elmbyval, char elmalign); static bool array_get_isnull(const bits8 *nullbitmap, int offset); static void array_set_isnull(bits8 *nullbitmap, int offset, bool isNull); static Datum ArrayCast(char *value, bool byval, int len); @@ -939,7 +955,7 @@ ReadArrayStr(char *arrayStr, * the values are not toasted. (Doing it here doesn't work since the * caller has already allocated space for the array...) */ -static void +void CopyArrayEls(ArrayType *array, Datum *values, bool *nulls, @@ -997,8 +1013,8 @@ CopyArrayEls(ArrayType *array, Datum array_out(PG_FUNCTION_ARGS) { - ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); - Oid element_type = ARR_ELEMTYPE(v); + AnyArrayType *v = PG_GETARG_ANY_ARRAY(0); + Oid element_type = AARR_ELEMTYPE(v); int typlen; bool typbyval; char typalign; @@ -1014,8 +1030,6 @@ array_out(PG_FUNCTION_ARGS) * * +2 allows for assignment operator + trailing null */ - bits8 *bitmap; - int bitmask; bool *needquotes, needdims = false; int nitems, @@ -1027,6 +1041,7 @@ array_out(PG_FUNCTION_ARGS) int ndim, *dims, *lb; + array_iter iter; ArrayMetaState *my_extra; /* @@ -1061,9 +1076,9 @@ array_out(PG_FUNCTION_ARGS) typalign = my_extra->typalign; typdelim = my_extra->typdelim; - ndim = ARR_NDIM(v); - dims = ARR_DIMS(v); - lb = ARR_LBOUND(v); + ndim = AARR_NDIM(v); + dims = AARR_DIMS(v); + lb = AARR_LBOUND(v); nitems = ArrayGetNItems(ndim, dims); if (nitems == 0) @@ -1094,16 +1109,19 @@ array_out(PG_FUNCTION_ARGS) needquotes = (bool *) palloc(nitems * sizeof(bool)); overall_length = 1; /* don't forget to count \0 at end. */ - p = ARR_DATA_PTR(v); - bitmap = ARR_NULLBITMAP(v); - bitmask = 1; + array_iter_setup(&iter, v); for (i = 0; i < nitems; i++) { + Datum itemvalue; + bool isnull; bool needquote; /* Get source element, checking for NULL */ - if (bitmap && (*bitmap & bitmask) == 0) + itemvalue = array_iter_next(&iter, &isnull, i, + typlen, typbyval, typalign); + + if (isnull) { values[i] = pstrdup("NULL"); overall_length += 4; @@ -1111,12 +1129,7 @@ array_out(PG_FUNCTION_ARGS) } else { - Datum itemvalue; - - itemvalue = fetch_att(p, typbyval, typlen); values[i] = OutputFunctionCall(&my_extra->proc, itemvalue); - p = att_addlength_pointer(p, typlen, p); - p = (char *) att_align_nominal(p, typalign); /* count data plus backslashes; detect chars needing quotes */ if (values[i][0] == '\0') @@ -1149,17 +1162,6 @@ array_out(PG_FUNCTION_ARGS) overall_length += 2; /* and the comma */ overall_length += 1; - - /* advance bitmap pointer if any */ - if (bitmap) - { - bitmask <<= 1; - if (bitmask == 0x100) - { - bitmap++; - bitmask = 1; - } - } } /* @@ -1534,19 +1536,18 @@ ReadArrayBinary(StringInfo buf, Datum array_send(PG_FUNCTION_ARGS) { - ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); - Oid element_type = ARR_ELEMTYPE(v); + AnyArrayType *v = PG_GETARG_ANY_ARRAY(0); + Oid element_type = AARR_ELEMTYPE(v); int typlen; bool typbyval; char typalign; - char *p; - bits8 *bitmap; - int bitmask; int nitems, i; int ndim, - *dim; + *dim, + *lb; StringInfoData buf; + array_iter iter; ArrayMetaState *my_extra; /* @@ -1583,60 +1584,49 @@ array_send(PG_FUNCTION_ARGS) typbyval = my_extra->typbyval; typalign = my_extra->typalign; - ndim = ARR_NDIM(v); - dim = ARR_DIMS(v); + ndim = AARR_NDIM(v); + dim = AARR_DIMS(v); + lb = AARR_LBOUND(v); nitems = ArrayGetNItems(ndim, dim); pq_begintypsend(&buf); /* Send the array header information */ pq_sendint(&buf, ndim, 4); - pq_sendint(&buf, ARR_HASNULL(v) ? 1 : 0, 4); + pq_sendint(&buf, AARR_HASNULL(v) ? 1 : 0, 4); pq_sendint(&buf, element_type, sizeof(Oid)); for (i = 0; i < ndim; i++) { - pq_sendint(&buf, ARR_DIMS(v)[i], 4); - pq_sendint(&buf, ARR_LBOUND(v)[i], 4); + pq_sendint(&buf, dim[i], 4); + pq_sendint(&buf, lb[i], 4); } /* Send the array elements using the element's own sendproc */ - p = ARR_DATA_PTR(v); - bitmap = ARR_NULLBITMAP(v); - bitmask = 1; + array_iter_setup(&iter, v); for (i = 0; i < nitems; i++) { + Datum itemvalue; + bool isnull; + /* Get source element, checking for NULL */ - if (bitmap && (*bitmap & bitmask) == 0) + itemvalue = array_iter_next(&iter, &isnull, i, + typlen, typbyval, typalign); + + if (isnull) { /* -1 length means a NULL */ pq_sendint(&buf, -1, 4); } else { - Datum itemvalue; bytea *outputbytes; - itemvalue = fetch_att(p, typbyval, typlen); outputbytes = SendFunctionCall(&my_extra->proc, itemvalue); pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4); pq_sendbytes(&buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); pfree(outputbytes); - - p = att_addlength_pointer(p, typlen, p); - p = (char *) att_align_nominal(p, typalign); - } - - /* advance bitmap pointer if any */ - if (bitmap) - { - bitmask <<= 1; - if (bitmask == 0x100) - { - bitmap++; - bitmask = 1; - } } } @@ -1650,13 +1640,13 @@ array_send(PG_FUNCTION_ARGS) Datum array_ndims(PG_FUNCTION_ARGS) { - ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + AnyArrayType *v = PG_GETARG_ANY_ARRAY(0); /* Sanity check: does it look like an array at all? */ - if (ARR_NDIM(v) <= 0 || ARR_NDIM(v) > MAXDIM) + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) PG_RETURN_NULL(); - PG_RETURN_INT32(ARR_NDIM(v)); + PG_RETURN_INT32(AARR_NDIM(v)); } /* @@ -1666,7 +1656,7 @@ array_ndims(PG_FUNCTION_ARGS) Datum array_dims(PG_FUNCTION_ARGS) { - ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + AnyArrayType *v = PG_GETARG_ANY_ARRAY(0); char *p; int i; int *dimv, @@ -1680,14 +1670,14 @@ array_dims(PG_FUNCTION_ARGS) char buf[MAXDIM * 33 + 1]; /* Sanity check: does it look like an array at all? */ - if (ARR_NDIM(v) <= 0 || ARR_NDIM(v) > MAXDIM) + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) PG_RETURN_NULL(); - dimv = ARR_DIMS(v); - lb = ARR_LBOUND(v); + dimv = AARR_DIMS(v); + lb = AARR_LBOUND(v); p = buf; - for (i = 0; i < ARR_NDIM(v); i++) + for (i = 0; i < AARR_NDIM(v); i++) { sprintf(p, "[%d:%d]", lb[i], dimv[i] + lb[i] - 1); p += strlen(p); @@ -1704,20 +1694,20 @@ array_dims(PG_FUNCTION_ARGS) Datum array_lower(PG_FUNCTION_ARGS) { - ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + AnyArrayType *v = PG_GETARG_ANY_ARRAY(0); int reqdim = PG_GETARG_INT32(1); int *lb; int result; /* Sanity check: does it look like an array at all? */ - if (ARR_NDIM(v) <= 0 || ARR_NDIM(v) > MAXDIM) + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) PG_RETURN_NULL(); /* Sanity check: was the requested dim valid */ - if (reqdim <= 0 || reqdim > ARR_NDIM(v)) + if (reqdim <= 0 || reqdim > AARR_NDIM(v)) PG_RETURN_NULL(); - lb = ARR_LBOUND(v); + lb = AARR_LBOUND(v); result = lb[reqdim - 1]; PG_RETURN_INT32(result); @@ -1731,22 +1721,22 @@ array_lower(PG_FUNCTION_ARGS) Datum array_upper(PG_FUNCTION_ARGS) { - ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + AnyArrayType *v = PG_GETARG_ANY_ARRAY(0); int reqdim = PG_GETARG_INT32(1); int *dimv, *lb; int result; /* Sanity check: does it look like an array at all? */ - if (ARR_NDIM(v) <= 0 || ARR_NDIM(v) > MAXDIM) + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) PG_RETURN_NULL(); /* Sanity check: was the requested dim valid */ - if (reqdim <= 0 || reqdim > ARR_NDIM(v)) + if (reqdim <= 0 || reqdim > AARR_NDIM(v)) PG_RETURN_NULL(); - lb = ARR_LBOUND(v); - dimv = ARR_DIMS(v); + lb = AARR_LBOUND(v); + dimv = AARR_DIMS(v); result = dimv[reqdim - 1] + lb[reqdim - 1] - 1; @@ -1761,20 +1751,20 @@ array_upper(PG_FUNCTION_ARGS) Datum array_length(PG_FUNCTION_ARGS) { - ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + AnyArrayType *v = PG_GETARG_ANY_ARRAY(0); int reqdim = PG_GETARG_INT32(1); int *dimv; int result; /* Sanity check: does it look like an array at all? */ - if (ARR_NDIM(v) <= 0 || ARR_NDIM(v) > MAXDIM) + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) PG_RETURN_NULL(); /* Sanity check: was the requested dim valid */ - if (reqdim <= 0 || reqdim > ARR_NDIM(v)) + if (reqdim <= 0 || reqdim > AARR_NDIM(v)) PG_RETURN_NULL(); - dimv = ARR_DIMS(v); + dimv = AARR_DIMS(v); result = dimv[reqdim - 1]; @@ -1788,9 +1778,9 @@ array_length(PG_FUNCTION_ARGS) Datum array_cardinality(PG_FUNCTION_ARGS) { - ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + AnyArrayType *v = PG_GETARG_ANY_ARRAY(0); - PG_RETURN_INT32(ArrayGetNItems(ARR_NDIM(v), ARR_DIMS(v))); + PG_RETURN_INT32(ArrayGetNItems(AARR_NDIM(v), AARR_DIMS(v))); } @@ -1825,7 +1815,6 @@ array_get_element(Datum arraydatum, char elmalign, bool *isNull) { - ArrayType *array; int i, ndim, *dim, @@ -1850,10 +1839,22 @@ array_get_element(Datum arraydatum, arraydataptr = (char *) DatumGetPointer(arraydatum); arraynullsptr = NULL; } + else if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(arraydatum))) + { + /* expanded array: let's do this in a separate function */ + return array_get_element_expanded(arraydatum, + nSubscripts, + indx, + arraytyplen, + elmlen, + elmbyval, + elmalign, + isNull); + } else { - /* detoast input array if necessary */ - array = DatumGetArrayTypeP(arraydatum); + /* detoast array if necessary, producing normal varlena input */ + ArrayType *array = DatumGetArrayTypeP(arraydatum); ndim = ARR_NDIM(array); dim = ARR_DIMS(array); @@ -1902,6 +1903,88 @@ array_get_element(Datum arraydatum, return ArrayCast(retptr, elmbyval, elmlen); } +/* + * Implementation of array_get_element() for an expanded array + */ +static Datum +array_get_element_expanded(Datum arraydatum, + int nSubscripts, int *indx, + int arraytyplen, + int elmlen, bool elmbyval, char elmalign, + bool *isNull) +{ + ExpandedArrayHeader *eah; + int i, + ndim, + *dim, + *lb, + offset; + Datum *dvalues; + bool *dnulls; + + eah = (ExpandedArrayHeader *) DatumGetEOHP(arraydatum); + Assert(eah->ea_magic == EA_MAGIC); + + /* sanity-check caller's info against object */ + Assert(arraytyplen == -1); + Assert(elmlen == eah->typlen); + Assert(elmbyval == eah->typbyval); + Assert(elmalign == eah->typalign); + + ndim = eah->ndims; + dim = eah->dims; + lb = eah->lbound; + + /* + * Return NULL for invalid subscript + */ + if (ndim != nSubscripts || ndim <= 0 || ndim > MAXDIM) + { + *isNull = true; + return (Datum) 0; + } + for (i = 0; i < ndim; i++) + { + if (indx[i] < lb[i] || indx[i] >= (dim[i] + lb[i])) + { + *isNull = true; + return (Datum) 0; + } + } + + /* + * Calculate the element number + */ + offset = ArrayGetOffset(nSubscripts, dim, lb, indx); + + /* + * Deconstruct array if we didn't already. Note that we apply this even + * if the input is nominally read-only: it should be safe enough. + */ + deconstruct_expanded_array(eah); + + dvalues = eah->dvalues; + dnulls = eah->dnulls; + + /* + * Check for NULL array element + */ + if (dnulls && dnulls[offset]) + { + *isNull = true; + return (Datum) 0; + } + + /* + * OK, get the element. It's OK to return a pass-by-ref value as a + * pointer into the expanded array, for the same reason that regular + * array_get_element can return a pointer into flat arrays: the value is + * assumed not to change for as long as the Datum reference can exist. + */ + *isNull = false; + return dvalues[offset]; +} + /* * array_get_slice : * This routine takes an array and a range of indices (upperIndex and @@ -2083,7 +2166,9 @@ array_get_slice(Datum arraydatum, * * Result: * A new array is returned, just like the old except for the one - * modified entry. The original array object is not changed. + * modified entry. The original array object is not changed, + * unless what is passed is a read-write reference to an expanded + * array object; in that case the expanded array is updated in-place. * * For one-dimensional arrays only, we allow the array to be extended * by assigning to a position outside the existing subscript range; any @@ -2166,6 +2251,20 @@ array_set_element(Datum arraydatum, if (elmlen == -1 && !isNull) dataValue = PointerGetDatum(PG_DETOAST_DATUM(dataValue)); + if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(arraydatum))) + { + /* expanded array: let's do this in a separate function */ + return array_set_element_expanded(arraydatum, + nSubscripts, + indx, + dataValue, + isNull, + arraytyplen, + elmlen, + elmbyval, + elmalign); + } + /* detoast input array if necessary */ array = DatumGetArrayTypeP(arraydatum); @@ -2354,6 +2453,251 @@ array_set_element(Datum arraydatum, return PointerGetDatum(newarray); } +/* + * Implementation of array_set_element() for an expanded array + * + * Note: as with any operation on a read/write expanded object, we must + * take pains not to leave the object in a corrupt state if we fail partway + * through. + */ +static Datum +array_set_element_expanded(Datum arraydatum, + int nSubscripts, int *indx, + Datum dataValue, bool isNull, + int arraytyplen, + int elmlen, bool elmbyval, char elmalign) +{ + ExpandedArrayHeader *eah; + Datum *dvalues; + bool *dnulls; + int i, + ndim, + dim[MAXDIM], + lb[MAXDIM], + offset; + bool dimschanged, + newhasnulls; + int addedbefore, + addedafter; + char *oldValue; + + /* Convert to R/W object if not so already */ + eah = DatumGetExpandedArray(arraydatum); + + /* Sanity-check caller's info against object; we don't use it otherwise */ + Assert(arraytyplen == -1); + Assert(elmlen == eah->typlen); + Assert(elmbyval == eah->typbyval); + Assert(elmalign == eah->typalign); + + /* + * Copy dimension info into local storage. This allows us to modify the + * dimensions if needed, while not messing up the expanded value if we + * fail partway through. + */ + ndim = eah->ndims; + Assert(ndim >= 0 && ndim <= MAXDIM); + memcpy(dim, eah->dims, ndim * sizeof(int)); + memcpy(lb, eah->lbound, ndim * sizeof(int)); + dimschanged = false; + + /* + * if number of dims is zero, i.e. an empty array, create an array with + * nSubscripts dimensions, and set the lower bounds to the supplied + * subscripts. + */ + if (ndim == 0) + { + /* + * Allocate adequate space for new dimension info. This is harmless + * if we fail later. + */ + Assert(nSubscripts > 0 && nSubscripts <= MAXDIM); + eah->dims = (int *) MemoryContextAllocZero(eah->hdr.eoh_context, + nSubscripts * sizeof(int)); + eah->lbound = (int *) MemoryContextAllocZero(eah->hdr.eoh_context, + nSubscripts * sizeof(int)); + + /* Update local copies of dimension info */ + ndim = nSubscripts; + for (i = 0; i < nSubscripts; i++) + { + dim[i] = 0; + lb[i] = indx[i]; + } + dimschanged = true; + } + else if (ndim != nSubscripts) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + /* + * Deconstruct array if we didn't already. (Someday maybe add a special + * case path for fixed-length, no-nulls cases, where we can overwrite an + * element in place without ever deconstructing. But today is not that + * day.) + */ + deconstruct_expanded_array(eah); + + /* + * Copy new element into array's context, if needed (we assume it's + * already detoasted, so no junk should be created). If we fail further + * down, this memory is leaked, but that's reasonably harmless. + */ + if (!eah->typbyval && !isNull) + { + MemoryContext oldcxt = MemoryContextSwitchTo(eah->hdr.eoh_context); + + dataValue = datumCopy(dataValue, false, eah->typlen); + MemoryContextSwitchTo(oldcxt); + } + + dvalues = eah->dvalues; + dnulls = eah->dnulls; + + newhasnulls = ((dnulls != NULL) || isNull); + addedbefore = addedafter = 0; + + /* + * Check subscripts (this logic matches original array_set_element) + */ + if (ndim == 1) + { + if (indx[0] < lb[0]) + { + addedbefore = lb[0] - indx[0]; + dim[0] += addedbefore; + lb[0] = indx[0]; + dimschanged = true; + if (addedbefore > 1) + newhasnulls = true; /* will insert nulls */ + } + if (indx[0] >= (dim[0] + lb[0])) + { + addedafter = indx[0] - (dim[0] + lb[0]) + 1; + dim[0] += addedafter; + dimschanged = true; + if (addedafter > 1) + newhasnulls = true; /* will insert nulls */ + } + } + else + { + /* + * XXX currently we do not support extending multi-dimensional arrays + * during assignment + */ + for (i = 0; i < ndim; i++) + { + if (indx[i] < lb[i] || + indx[i] >= (dim[i] + lb[i])) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array subscript out of range"))); + } + } + + /* Now we can calculate linear offset of target item in array */ + offset = ArrayGetOffset(nSubscripts, dim, lb, indx); + + /* Physically enlarge existing dvalues/dnulls arrays if needed */ + if (dim[0] > eah->dvalueslen) + { + /* We want some extra space if we're enlarging */ + int newlen = dim[0] + dim[0] / 8; + + newlen = Max(newlen, dim[0]); /* integer overflow guard */ + eah->dvalues = dvalues = (Datum *) + repalloc(dvalues, newlen * sizeof(Datum)); + if (dnulls) + eah->dnulls = dnulls = (bool *) + repalloc(dnulls, newlen * sizeof(bool)); + eah->dvalueslen = newlen; + } + + /* + * If we need a nulls bitmap and don't already have one, create it, being + * sure to mark all existing entries as not null. + */ + if (newhasnulls && dnulls == NULL) + eah->dnulls = dnulls = (bool *) + MemoryContextAllocZero(eah->hdr.eoh_context, + eah->dvalueslen * sizeof(bool)); + + /* + * We now have all the needed space allocated, so we're ready to make + * irreversible changes. Be very wary of allowing failure below here. + */ + + /* Flattened value will no longer represent array accurately */ + eah->fvalue = NULL; + /* And we don't know the flattened size either */ + eah->flat_size = 0; + + /* Update dimensionality info if needed */ + if (dimschanged) + { + eah->ndims = ndim; + memcpy(eah->dims, dim, ndim * sizeof(int)); + memcpy(eah->lbound, lb, ndim * sizeof(int)); + } + + /* Reposition items if needed, and fill addedbefore items with nulls */ + if (addedbefore > 0) + { + memmove(dvalues + addedbefore, dvalues, eah->nelems * sizeof(Datum)); + for (i = 0; i < addedbefore; i++) + dvalues[i] = (Datum) 0; + if (dnulls) + { + memmove(dnulls + addedbefore, dnulls, eah->nelems * sizeof(bool)); + for (i = 0; i < addedbefore; i++) + dnulls[i] = true; + } + eah->nelems += addedbefore; + } + + /* fill addedafter items with nulls */ + if (addedafter > 0) + { + for (i = 0; i < addedafter; i++) + dvalues[eah->nelems + i] = (Datum) 0; + if (dnulls) + { + for (i = 0; i < addedafter; i++) + dnulls[eah->nelems + i] = true; + } + eah->nelems += addedafter; + } + + /* Grab old element value for pfree'ing, if needed. */ + if (!eah->typbyval && (dnulls == NULL || !dnulls[offset])) + oldValue = (char *) DatumGetPointer(dvalues[offset]); + else + oldValue = NULL; + + /* And finally we can insert the new element. */ + dvalues[offset] = dataValue; + if (dnulls) + dnulls[offset] = isNull; + + /* + * Free old element if needed; this keeps repeated element replacements + * from bloating the array's storage. If the pfree somehow fails, it + * won't corrupt the array. + */ + if (oldValue) + { + /* Don't try to pfree a part of the original flat array */ + if (oldValue < eah->fstartptr || oldValue >= eah->fendptr) + pfree(oldValue); + } + + /* Done, return standard TOAST pointer for object */ + return EOHPGetRWDatum(&eah->hdr); +} + /* * array_set_slice : * This routine sets the value of a range of array locations (specified @@ -2734,8 +3078,6 @@ array_set(ArrayType *array, int nSubscripts, int *indx, * the function fn(), and if nargs > 1 then argument positions after the * first must be preset to the additional values to be passed. The * first argument position initially holds the input array value. - * * inpType: OID of element type of input array. This must be the same as, - * or binary-compatible with, the first argument type of fn(). * * retType: OID of element type of output array. This must be the same as, * or binary-compatible with, the result type of fn(). * * amstate: workspace for array_map. Must be zeroed by caller before @@ -2749,14 +3091,12 @@ array_set(ArrayType *array, int nSubscripts, int *indx, * the array are OK however. */ Datum -array_map(FunctionCallInfo fcinfo, Oid inpType, Oid retType, - ArrayMapState *amstate) +array_map(FunctionCallInfo fcinfo, Oid retType, ArrayMapState *amstate) { - ArrayType *v; + AnyArrayType *v; ArrayType *result; Datum *values; bool *nulls; - Datum elt; int *dim; int ndim; int nitems; @@ -2764,15 +3104,14 @@ array_map(FunctionCallInfo fcinfo, Oid inpType, Oid retType, int32 nbytes = 0; int32 dataoffset; bool hasnulls; + Oid inpType; int inp_typlen; bool inp_typbyval; char inp_typalign; int typlen; bool typbyval; char typalign; - char *s; - bits8 *bitmap; - int bitmask; + array_iter iter; ArrayMetaState *inp_extra; ArrayMetaState *ret_extra; @@ -2781,12 +3120,11 @@ array_map(FunctionCallInfo fcinfo, Oid inpType, Oid retType, elog(ERROR, "invalid nargs: %d", fcinfo->nargs); if (PG_ARGISNULL(0)) elog(ERROR, "null input array"); - v = PG_GETARG_ARRAYTYPE_P(0); - - Assert(ARR_ELEMTYPE(v) == inpType); + v = PG_GETARG_ANY_ARRAY(0); - ndim = ARR_NDIM(v); - dim = ARR_DIMS(v); + inpType = AARR_ELEMTYPE(v); + ndim = AARR_NDIM(v); + dim = AARR_DIMS(v); nitems = ArrayGetNItems(ndim, dim); /* Check for empty array */ @@ -2833,9 +3171,7 @@ array_map(FunctionCallInfo fcinfo, Oid inpType, Oid retType, nulls = (bool *) palloc(nitems * sizeof(bool)); /* Loop over source data */ - s = ARR_DATA_PTR(v); - bitmap = ARR_NULLBITMAP(v); - bitmask = 1; + array_iter_setup(&iter, v); hasnulls = false; for (i = 0; i < nitems; i++) @@ -2843,18 +3179,8 @@ array_map(FunctionCallInfo fcinfo, Oid inpType, Oid retType, bool callit = true; /* Get source element, checking for NULL */ - if (bitmap && (*bitmap & bitmask) == 0) - { - fcinfo->argnull[0] = true; - } - else - { - elt = fetch_att(s, inp_typbyval, inp_typlen); - s = att_addlength_datum(s, inp_typlen, elt); - s = (char *) att_align_nominal(s, inp_typalign); - fcinfo->arg[0] = elt; - fcinfo->argnull[0] = false; - } + fcinfo->arg[0] = array_iter_next(&iter, &fcinfo->argnull[0], i, + inp_typlen, inp_typbyval, inp_typalign); /* * Apply the given function to source elt and extra args. @@ -2899,17 +3225,6 @@ array_map(FunctionCallInfo fcinfo, Oid inpType, Oid retType, errmsg("array size exceeds the maximum allowed (%d)", (int) MaxAllocSize))); } - - /* advance bitmap pointer if any */ - if (bitmap) - { - bitmask <<= 1; - if (bitmask == 0x100) - { - bitmap++; - bitmask = 1; - } - } } /* Allocate and initialize the result array */ @@ -2928,7 +3243,8 @@ array_map(FunctionCallInfo fcinfo, Oid inpType, Oid retType, result->ndim = ndim; result->dataoffset = dataoffset; result->elemtype = retType; - memcpy(ARR_DIMS(result), ARR_DIMS(v), 2 * ndim * sizeof(int)); + memcpy(ARR_DIMS(result), AARR_DIMS(v), ndim * sizeof(int)); + memcpy(ARR_LBOUND(result), AARR_LBOUND(v), ndim * sizeof(int)); /* * Note: do not risk trying to pfree the results of the called function @@ -3091,6 +3407,23 @@ construct_empty_array(Oid elmtype) return result; } +/* + * construct_empty_expanded_array: make an empty expanded array + * given only type information. (metacache can be NULL if not needed.) + */ +ExpandedArrayHeader * +construct_empty_expanded_array(Oid element_type, + MemoryContext parentcontext, + ArrayMetaState *metacache) +{ + ArrayType *array = construct_empty_array(element_type); + Datum d; + + d = expand_array(PointerGetDatum(array), parentcontext, metacache); + pfree(array); + return (ExpandedArrayHeader *) DatumGetEOHP(d); +} + /* * deconstruct_array --- simple method for extracting data from an array * @@ -3229,36 +3562,36 @@ array_contains_nulls(ArrayType *array) Datum array_eq(PG_FUNCTION_ARGS) { - ArrayType *array1 = PG_GETARG_ARRAYTYPE_P(0); - ArrayType *array2 = PG_GETARG_ARRAYTYPE_P(1); + AnyArrayType *array1 = PG_GETARG_ANY_ARRAY(0); + AnyArrayType *array2 = PG_GETARG_ANY_ARRAY(1); Oid collation = PG_GET_COLLATION(); - int ndims1 = ARR_NDIM(array1); - int ndims2 = ARR_NDIM(array2); - int *dims1 = ARR_DIMS(array1); - int *dims2 = ARR_DIMS(array2); - Oid element_type = ARR_ELEMTYPE(array1); + int ndims1 = AARR_NDIM(array1); + int ndims2 = AARR_NDIM(array2); + int *dims1 = AARR_DIMS(array1); + int *dims2 = AARR_DIMS(array2); + int *lbs1 = AARR_LBOUND(array1); + int *lbs2 = AARR_LBOUND(array2); + Oid element_type = AARR_ELEMTYPE(array1); bool result = true; int nitems; TypeCacheEntry *typentry; int typlen; bool typbyval; char typalign; - char *ptr1; - char *ptr2; - bits8 *bitmap1; - bits8 *bitmap2; - int bitmask; + array_iter it1; + array_iter it2; int i; FunctionCallInfoData locfcinfo; - if (element_type != ARR_ELEMTYPE(array2)) + if (element_type != AARR_ELEMTYPE(array2)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("cannot compare arrays of different element types"))); /* fast path if the arrays do not have the same dimensionality */ if (ndims1 != ndims2 || - memcmp(dims1, dims2, 2 * ndims1 * sizeof(int)) != 0) + memcmp(dims1, dims2, ndims1 * sizeof(int)) != 0 || + memcmp(lbs1, lbs2, ndims1 * sizeof(int)) != 0) result = false; else { @@ -3293,11 +3626,8 @@ array_eq(PG_FUNCTION_ARGS) /* Loop over source data */ nitems = ArrayGetNItems(ndims1, dims1); - ptr1 = ARR_DATA_PTR(array1); - ptr2 = ARR_DATA_PTR(array2); - bitmap1 = ARR_NULLBITMAP(array1); - bitmap2 = ARR_NULLBITMAP(array2); - bitmask = 1; /* use same bitmask for both arrays */ + array_iter_setup(&it1, array1); + array_iter_setup(&it2, array2); for (i = 0; i < nitems; i++) { @@ -3308,42 +3638,10 @@ array_eq(PG_FUNCTION_ARGS) bool oprresult; /* Get elements, checking for NULL */ - if (bitmap1 && (*bitmap1 & bitmask) == 0) - { - isnull1 = true; - elt1 = (Datum) 0; - } - else - { - isnull1 = false; - elt1 = fetch_att(ptr1, typbyval, typlen); - ptr1 = att_addlength_pointer(ptr1, typlen, ptr1); - ptr1 = (char *) att_align_nominal(ptr1, typalign); - } - - if (bitmap2 && (*bitmap2 & bitmask) == 0) - { - isnull2 = true; - elt2 = (Datum) 0; - } - else - { - isnull2 = false; - elt2 = fetch_att(ptr2, typbyval, typlen); - ptr2 = att_addlength_pointer(ptr2, typlen, ptr2); - ptr2 = (char *) att_align_nominal(ptr2, typalign); - } - - /* advance bitmap pointers if any */ - bitmask <<= 1; - if (bitmask == 0x100) - { - if (bitmap1) - bitmap1++; - if (bitmap2) - bitmap2++; - bitmask = 1; - } + elt1 = array_iter_next(&it1, &isnull1, i, + typlen, typbyval, typalign); + elt2 = array_iter_next(&it2, &isnull2, i, + typlen, typbyval, typalign); /* * We consider two NULLs equal; NULL and not-NULL are unequal. @@ -3374,8 +3672,8 @@ array_eq(PG_FUNCTION_ARGS) } /* Avoid leaking memory when handed toasted input. */ - PG_FREE_IF_COPY(array1, 0); - PG_FREE_IF_COPY(array2, 1); + AARR_FREE_IF_COPY(array1, 0); + AARR_FREE_IF_COPY(array2, 1); PG_RETURN_BOOL(result); } @@ -3435,31 +3733,28 @@ btarraycmp(PG_FUNCTION_ARGS) static int array_cmp(FunctionCallInfo fcinfo) { - ArrayType *array1 = PG_GETARG_ARRAYTYPE_P(0); - ArrayType *array2 = PG_GETARG_ARRAYTYPE_P(1); + AnyArrayType *array1 = PG_GETARG_ANY_ARRAY(0); + AnyArrayType *array2 = PG_GETARG_ANY_ARRAY(1); Oid collation = PG_GET_COLLATION(); - int ndims1 = ARR_NDIM(array1); - int ndims2 = ARR_NDIM(array2); - int *dims1 = ARR_DIMS(array1); - int *dims2 = ARR_DIMS(array2); + int ndims1 = AARR_NDIM(array1); + int ndims2 = AARR_NDIM(array2); + int *dims1 = AARR_DIMS(array1); + int *dims2 = AARR_DIMS(array2); int nitems1 = ArrayGetNItems(ndims1, dims1); int nitems2 = ArrayGetNItems(ndims2, dims2); - Oid element_type = ARR_ELEMTYPE(array1); + Oid element_type = AARR_ELEMTYPE(array1); int result = 0; TypeCacheEntry *typentry; int typlen; bool typbyval; char typalign; int min_nitems; - char *ptr1; - char *ptr2; - bits8 *bitmap1; - bits8 *bitmap2; - int bitmask; + array_iter it1; + array_iter it2; int i; FunctionCallInfoData locfcinfo; - if (element_type != ARR_ELEMTYPE(array2)) + if (element_type != AARR_ELEMTYPE(array2)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("cannot compare arrays of different element types"))); @@ -3495,11 +3790,8 @@ array_cmp(FunctionCallInfo fcinfo) /* Loop over source data */ min_nitems = Min(nitems1, nitems2); - ptr1 = ARR_DATA_PTR(array1); - ptr2 = ARR_DATA_PTR(array2); - bitmap1 = ARR_NULLBITMAP(array1); - bitmap2 = ARR_NULLBITMAP(array2); - bitmask = 1; /* use same bitmask for both arrays */ + array_iter_setup(&it1, array1); + array_iter_setup(&it2, array2); for (i = 0; i < min_nitems; i++) { @@ -3510,42 +3802,8 @@ array_cmp(FunctionCallInfo fcinfo) int32 cmpresult; /* Get elements, checking for NULL */ - if (bitmap1 && (*bitmap1 & bitmask) == 0) - { - isnull1 = true; - elt1 = (Datum) 0; - } - else - { - isnull1 = false; - elt1 = fetch_att(ptr1, typbyval, typlen); - ptr1 = att_addlength_pointer(ptr1, typlen, ptr1); - ptr1 = (char *) att_align_nominal(ptr1, typalign); - } - - if (bitmap2 && (*bitmap2 & bitmask) == 0) - { - isnull2 = true; - elt2 = (Datum) 0; - } - else - { - isnull2 = false; - elt2 = fetch_att(ptr2, typbyval, typlen); - ptr2 = att_addlength_pointer(ptr2, typlen, ptr2); - ptr2 = (char *) att_align_nominal(ptr2, typalign); - } - - /* advance bitmap pointers if any */ - bitmask <<= 1; - if (bitmask == 0x100) - { - if (bitmap1) - bitmap1++; - if (bitmap2) - bitmap2++; - bitmask = 1; - } + elt1 = array_iter_next(&it1, &isnull1, i, typlen, typbyval, typalign); + elt2 = array_iter_next(&it2, &isnull2, i, typlen, typbyval, typalign); /* * We consider two NULLs equal; NULL > not-NULL. @@ -3604,8 +3862,7 @@ array_cmp(FunctionCallInfo fcinfo) result = (ndims1 < ndims2) ? -1 : 1; else { - /* this relies on LB array immediately following DIMS array */ - for (i = 0; i < ndims1 * 2; i++) + for (i = 0; i < ndims1; i++) { if (dims1[i] != dims2[i]) { @@ -3613,12 +3870,26 @@ array_cmp(FunctionCallInfo fcinfo) break; } } + if (result == 0) + { + int *lbound1 = AARR_LBOUND(array1); + int *lbound2 = AARR_LBOUND(array2); + + for (i = 0; i < ndims1; i++) + { + if (lbound1[i] != lbound2[i]) + { + result = (lbound1[i] < lbound2[i]) ? -1 : 1; + break; + } + } + } } } /* Avoid leaking memory when handed toasted input. */ - PG_FREE_IF_COPY(array1, 0); - PG_FREE_IF_COPY(array2, 1); + AARR_FREE_IF_COPY(array1, 0); + AARR_FREE_IF_COPY(array2, 1); return result; } @@ -3633,20 +3904,18 @@ array_cmp(FunctionCallInfo fcinfo) Datum hash_array(PG_FUNCTION_ARGS) { - ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); - int ndims = ARR_NDIM(array); - int *dims = ARR_DIMS(array); - Oid element_type = ARR_ELEMTYPE(array); + AnyArrayType *array = PG_GETARG_ANY_ARRAY(0); + int ndims = AARR_NDIM(array); + int *dims = AARR_DIMS(array); + Oid element_type = AARR_ELEMTYPE(array); uint32 result = 1; int nitems; TypeCacheEntry *typentry; int typlen; bool typbyval; char typalign; - char *ptr; - bits8 *bitmap; - int bitmask; int i; + array_iter iter; FunctionCallInfoData locfcinfo; /* @@ -3680,28 +3949,24 @@ hash_array(PG_FUNCTION_ARGS) /* Loop over source data */ nitems = ArrayGetNItems(ndims, dims); - ptr = ARR_DATA_PTR(array); - bitmap = ARR_NULLBITMAP(array); - bitmask = 1; + array_iter_setup(&iter, array); for (i = 0; i < nitems; i++) { + Datum elt; + bool isnull; uint32 elthash; /* Get element, checking for NULL */ - if (bitmap && (*bitmap & bitmask) == 0) + elt = array_iter_next(&iter, &isnull, i, typlen, typbyval, typalign); + + if (isnull) { /* Treat nulls as having hashvalue 0 */ elthash = 0; } else { - Datum elt; - - elt = fetch_att(ptr, typbyval, typlen); - ptr = att_addlength_pointer(ptr, typlen, ptr); - ptr = (char *) att_align_nominal(ptr, typalign); - /* Apply the hash function */ locfcinfo.arg[0] = elt; locfcinfo.argnull[0] = false; @@ -3709,17 +3974,6 @@ hash_array(PG_FUNCTION_ARGS) elthash = DatumGetUInt32(FunctionCallInvoke(&locfcinfo)); } - /* advance bitmap pointer if any */ - if (bitmap) - { - bitmask <<= 1; - if (bitmask == 0x100) - { - bitmap++; - bitmask = 1; - } - } - /* * Combine hash values of successive elements by multiplying the * current value by 31 and adding on the new element's hash value. @@ -3735,7 +3989,7 @@ hash_array(PG_FUNCTION_ARGS) } /* Avoid leaking memory when handed toasted input. */ - PG_FREE_IF_COPY(array, 0); + AARR_FREE_IF_COPY(array, 0); PG_RETURN_UINT32(result); } @@ -3756,11 +4010,11 @@ hash_array(PG_FUNCTION_ARGS) * When matchall is false, return true if any members of array1 are in array2. */ static bool -array_contain_compare(ArrayType *array1, ArrayType *array2, Oid collation, +array_contain_compare(AnyArrayType *array1, AnyArrayType *array2, Oid collation, bool matchall, void **fn_extra) { bool result = matchall; - Oid element_type = ARR_ELEMTYPE(array1); + Oid element_type = AARR_ELEMTYPE(array1); TypeCacheEntry *typentry; int nelems1; Datum *values2; @@ -3769,14 +4023,12 @@ array_contain_compare(ArrayType *array1, ArrayType *array2, Oid collation, int typlen; bool typbyval; char typalign; - char *ptr1; - bits8 *bitmap1; - int bitmask; int i; int j; + array_iter it1; FunctionCallInfoData locfcinfo; - if (element_type != ARR_ELEMTYPE(array2)) + if (element_type != AARR_ELEMTYPE(array2)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("cannot compare arrays of different element types"))); @@ -3809,8 +4061,18 @@ array_contain_compare(ArrayType *array1, ArrayType *array2, Oid collation, * worthwhile to use deconstruct_array on it. We scan array1 the hard way * however, since we very likely won't need to look at all of it. */ - deconstruct_array(array2, element_type, typlen, typbyval, typalign, - &values2, &nulls2, &nelems2); + if (VARATT_IS_EXPANDED_HEADER(array2)) + { + /* This should be safe even if input is read-only */ + deconstruct_expanded_array(&(array2->xpn)); + values2 = array2->xpn.dvalues; + nulls2 = array2->xpn.dnulls; + nelems2 = array2->xpn.nelems; + } + else + deconstruct_array(&(array2->flt), + element_type, typlen, typbyval, typalign, + &values2, &nulls2, &nelems2); /* * Apply the comparison operator to each pair of array elements. @@ -3819,10 +4081,8 @@ array_contain_compare(ArrayType *array1, ArrayType *array2, Oid collation, collation, NULL, NULL); /* Loop over source data */ - nelems1 = ArrayGetNItems(ARR_NDIM(array1), ARR_DIMS(array1)); - ptr1 = ARR_DATA_PTR(array1); - bitmap1 = ARR_NULLBITMAP(array1); - bitmask = 1; + nelems1 = ArrayGetNItems(AARR_NDIM(array1), AARR_DIMS(array1)); + array_iter_setup(&it1, array1); for (i = 0; i < nelems1; i++) { @@ -3830,27 +4090,7 @@ array_contain_compare(ArrayType *array1, ArrayType *array2, Oid collation, bool isnull1; /* Get element, checking for NULL */ - if (bitmap1 && (*bitmap1 & bitmask) == 0) - { - isnull1 = true; - elt1 = (Datum) 0; - } - else - { - isnull1 = false; - elt1 = fetch_att(ptr1, typbyval, typlen); - ptr1 = att_addlength_pointer(ptr1, typlen, ptr1); - ptr1 = (char *) att_align_nominal(ptr1, typalign); - } - - /* advance bitmap pointer if any */ - bitmask <<= 1; - if (bitmask == 0x100) - { - if (bitmap1) - bitmap1++; - bitmask = 1; - } + elt1 = array_iter_next(&it1, &isnull1, i, typlen, typbyval, typalign); /* * We assume that the comparison operator is strict, so a NULL can't @@ -3909,17 +4149,14 @@ array_contain_compare(ArrayType *array1, ArrayType *array2, Oid collation, } } - pfree(values2); - pfree(nulls2); - return result; } Datum arrayoverlap(PG_FUNCTION_ARGS) { - ArrayType *array1 = PG_GETARG_ARRAYTYPE_P(0); - ArrayType *array2 = PG_GETARG_ARRAYTYPE_P(1); + AnyArrayType *array1 = PG_GETARG_ANY_ARRAY(0); + AnyArrayType *array2 = PG_GETARG_ANY_ARRAY(1); Oid collation = PG_GET_COLLATION(); bool result; @@ -3927,8 +4164,8 @@ arrayoverlap(PG_FUNCTION_ARGS) &fcinfo->flinfo->fn_extra); /* Avoid leaking memory when handed toasted input. */ - PG_FREE_IF_COPY(array1, 0); - PG_FREE_IF_COPY(array2, 1); + AARR_FREE_IF_COPY(array1, 0); + AARR_FREE_IF_COPY(array2, 1); PG_RETURN_BOOL(result); } @@ -3936,8 +4173,8 @@ arrayoverlap(PG_FUNCTION_ARGS) Datum arraycontains(PG_FUNCTION_ARGS) { - ArrayType *array1 = PG_GETARG_ARRAYTYPE_P(0); - ArrayType *array2 = PG_GETARG_ARRAYTYPE_P(1); + AnyArrayType *array1 = PG_GETARG_ANY_ARRAY(0); + AnyArrayType *array2 = PG_GETARG_ANY_ARRAY(1); Oid collation = PG_GET_COLLATION(); bool result; @@ -3945,8 +4182,8 @@ arraycontains(PG_FUNCTION_ARGS) &fcinfo->flinfo->fn_extra); /* Avoid leaking memory when handed toasted input. */ - PG_FREE_IF_COPY(array1, 0); - PG_FREE_IF_COPY(array2, 1); + AARR_FREE_IF_COPY(array1, 0); + AARR_FREE_IF_COPY(array2, 1); PG_RETURN_BOOL(result); } @@ -3954,8 +4191,8 @@ arraycontains(PG_FUNCTION_ARGS) Datum arraycontained(PG_FUNCTION_ARGS) { - ArrayType *array1 = PG_GETARG_ARRAYTYPE_P(0); - ArrayType *array2 = PG_GETARG_ARRAYTYPE_P(1); + AnyArrayType *array1 = PG_GETARG_ANY_ARRAY(0); + AnyArrayType *array2 = PG_GETARG_ANY_ARRAY(1); Oid collation = PG_GET_COLLATION(); bool result; @@ -3963,8 +4200,8 @@ arraycontained(PG_FUNCTION_ARGS) &fcinfo->flinfo->fn_extra); /* Avoid leaking memory when handed toasted input. */ - PG_FREE_IF_COPY(array1, 0); - PG_FREE_IF_COPY(array2, 1); + AARR_FREE_IF_COPY(array1, 0); + AARR_FREE_IF_COPY(array2, 1); PG_RETURN_BOOL(result); } @@ -4702,7 +4939,8 @@ initArrayResult(Oid element_type, MemoryContext rcontext, bool subcontext) MemoryContextAlloc(arr_context, sizeof(ArrayBuildState)); astate->mcontext = arr_context; astate->private_cxt = subcontext; - astate->alen = (subcontext ? 64 : 8); /* arbitrary starting array size */ + astate->alen = (subcontext ? 64 : 8); /* arbitrary starting array + * size */ astate->dvalues = (Datum *) MemoryContextAlloc(arr_context, astate->alen * sizeof(Datum)); astate->dnulls = (bool *) @@ -4878,10 +5116,11 @@ initArrayResultArr(Oid array_type, Oid element_type, MemoryContext rcontext, bool subcontext) { ArrayBuildStateArr *astate; - MemoryContext arr_context = rcontext; /* by default use the parent ctx */ + MemoryContext arr_context = rcontext; /* by default use the parent + * ctx */ /* Lookup element type, unless element_type already provided */ - if (! OidIsValid(element_type)) + if (!OidIsValid(element_type)) { element_type = get_element_type(array_type); @@ -5259,31 +5498,19 @@ makeArrayResultAny(ArrayBuildStateAny *astate, Datum array_larger(PG_FUNCTION_ARGS) { - ArrayType *v1, - *v2, - *result; - - v1 = PG_GETARG_ARRAYTYPE_P(0); - v2 = PG_GETARG_ARRAYTYPE_P(1); - - result = ((array_cmp(fcinfo) > 0) ? v1 : v2); - - PG_RETURN_ARRAYTYPE_P(result); + if (array_cmp(fcinfo) > 0) + PG_RETURN_DATUM(PG_GETARG_DATUM(0)); + else + PG_RETURN_DATUM(PG_GETARG_DATUM(1)); } Datum array_smaller(PG_FUNCTION_ARGS) { - ArrayType *v1, - *v2, - *result; - - v1 = PG_GETARG_ARRAYTYPE_P(0); - v2 = PG_GETARG_ARRAYTYPE_P(1); - - result = ((array_cmp(fcinfo) < 0) ? v1 : v2); - - PG_RETURN_ARRAYTYPE_P(result); + if (array_cmp(fcinfo) < 0) + PG_RETURN_DATUM(PG_GETARG_DATUM(0)); + else + PG_RETURN_DATUM(PG_GETARG_DATUM(1)); } @@ -5308,7 +5535,7 @@ generate_subscripts(PG_FUNCTION_ARGS) /* stuff done only on the first call of the function */ if (SRF_IS_FIRSTCALL()) { - ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + AnyArrayType *v = PG_GETARG_ANY_ARRAY(0); int reqdim = PG_GETARG_INT32(1); int *lb, *dimv; @@ -5317,11 +5544,11 @@ generate_subscripts(PG_FUNCTION_ARGS) funcctx = SRF_FIRSTCALL_INIT(); /* Sanity check: does it look like an array at all? */ - if (ARR_NDIM(v) <= 0 || ARR_NDIM(v) > MAXDIM) + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) SRF_RETURN_DONE(funcctx); /* Sanity check: was the requested dim valid */ - if (reqdim <= 0 || reqdim > ARR_NDIM(v)) + if (reqdim <= 0 || reqdim > AARR_NDIM(v)) SRF_RETURN_DONE(funcctx); /* @@ -5330,8 +5557,8 @@ generate_subscripts(PG_FUNCTION_ARGS) oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); fctx = (generate_subscripts_fctx *) palloc(sizeof(generate_subscripts_fctx)); - lb = ARR_LBOUND(v); - dimv = ARR_DIMS(v); + lb = AARR_LBOUND(v); + dimv = AARR_DIMS(v); fctx->lower = lb[reqdim - 1]; fctx->upper = dimv[reqdim - 1] + lb[reqdim - 1] - 1; @@ -5650,11 +5877,9 @@ array_unnest(PG_FUNCTION_ARGS) { typedef struct { - ArrayType *arr; + array_iter iter; int nextelem; int numelems; - char *elemdataptr; /* this moves with nextelem */ - bits8 *arraynullsptr; /* this does not */ int16 elmlen; bool elmbyval; char elmalign; @@ -5667,7 +5892,7 @@ array_unnest(PG_FUNCTION_ARGS) /* stuff done only on the first call of the function */ if (SRF_IS_FIRSTCALL()) { - ArrayType *arr; + AnyArrayType *arr; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); @@ -5684,23 +5909,28 @@ array_unnest(PG_FUNCTION_ARGS) * and not before. (If no detoast happens, we assume the originally * passed array will stick around till then.) */ - arr = PG_GETARG_ARRAYTYPE_P(0); + arr = PG_GETARG_ANY_ARRAY(0); /* allocate memory for user context */ fctx = (array_unnest_fctx *) palloc(sizeof(array_unnest_fctx)); /* initialize state */ - fctx->arr = arr; + array_iter_setup(&fctx->iter, arr); fctx->nextelem = 0; - fctx->numelems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); - - fctx->elemdataptr = ARR_DATA_PTR(arr); - fctx->arraynullsptr = ARR_NULLBITMAP(arr); + fctx->numelems = ArrayGetNItems(AARR_NDIM(arr), AARR_DIMS(arr)); - get_typlenbyvalalign(ARR_ELEMTYPE(arr), - &fctx->elmlen, - &fctx->elmbyval, - &fctx->elmalign); + if (VARATT_IS_EXPANDED_HEADER(arr)) + { + /* we can just grab the type data from expanded array */ + fctx->elmlen = arr->xpn.typlen; + fctx->elmbyval = arr->xpn.typbyval; + fctx->elmalign = arr->xpn.typalign; + } + else + get_typlenbyvalalign(AARR_ELEMTYPE(arr), + &fctx->elmlen, + &fctx->elmbyval, + &fctx->elmalign); funcctx->user_fctx = fctx; MemoryContextSwitchTo(oldcontext); @@ -5715,32 +5945,8 @@ array_unnest(PG_FUNCTION_ARGS) int offset = fctx->nextelem++; Datum elem; - /* - * Check for NULL array element - */ - if (array_get_isnull(fctx->arraynullsptr, offset)) - { - fcinfo->isnull = true; - elem = (Datum) 0; - /* elemdataptr does not move */ - } - else - { - /* - * OK, get the element - */ - char *ptr = fctx->elemdataptr; - - fcinfo->isnull = false; - elem = ArrayCast(ptr, fctx->elmbyval, fctx->elmlen); - - /* - * Advance elemdataptr over it - */ - ptr = att_addlength_pointer(ptr, fctx->elmlen, ptr); - ptr = (char *) att_align_nominal(ptr, fctx->elmalign); - fctx->elemdataptr = ptr; - } + elem = array_iter_next(&fctx->iter, &fcinfo->isnull, offset, + fctx->elmlen, fctx->elmbyval, fctx->elmalign); SRF_RETURN_NEXT(funcctx, elem); } @@ -5992,7 +6198,8 @@ array_replace_internal(ArrayType *array, result->ndim = ndim; result->dataoffset = dataoffset; result->elemtype = element_type; - memcpy(ARR_DIMS(result), ARR_DIMS(array), 2 * ndim * sizeof(int)); + memcpy(ARR_DIMS(result), ARR_DIMS(array), ndim * sizeof(int)); + memcpy(ARR_LBOUND(result), ARR_LBOUND(array), ndim * sizeof(int)); if (remove) { diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c index 014eca5185..e8af0304c0 100644 --- a/src/backend/utils/adt/datum.c +++ b/src/backend/utils/adt/datum.c @@ -12,8 +12,9 @@ * *------------------------------------------------------------------------- */ + /* - * In the implementation of the next routines we assume the following: + * In the implementation of these routines we assume the following: * * A) if a type is "byVal" then all the information is stored in the * Datum itself (i.e. no pointers involved!). In this case the @@ -34,11 +35,15 @@ * * Note that we do not treat "toasted" datums specially; therefore what * will be copied or compared is the compressed data or toast reference. + * An exception is made for datumCopy() of an expanded object, however, + * because most callers expect to get a simple contiguous (and pfree'able) + * result from datumCopy(). See also datumTransfer(). */ #include "postgres.h" #include "utils/datum.h" +#include "utils/expandeddatum.h" /*------------------------------------------------------------------------- @@ -46,6 +51,7 @@ * * Find the "real" size of a datum, given the datum value, * whether it is a "by value", and the declared type length. + * (For TOAST pointer datums, this is the size of the pointer datum.) * * This is essentially an out-of-line version of the att_addlength_datum() * macro in access/tupmacs.h. We do a tad more error checking though. @@ -106,9 +112,16 @@ datumGetSize(Datum value, bool typByVal, int typLen) /*------------------------------------------------------------------------- * datumCopy * - * make a copy of a datum + * Make a copy of a non-NULL datum. * * If the datatype is pass-by-reference, memory is obtained with palloc(). + * + * If the value is a reference to an expanded object, we flatten into memory + * obtained with palloc(). We need to copy because one of the main uses of + * this function is to copy a datum out of a transient memory context that's + * about to be destroyed, and the expanded object is probably in a child + * context that will also go away. Moreover, many callers assume that the + * result is a single pfree-able chunk. *------------------------------------------------------------------------- */ Datum @@ -118,44 +131,71 @@ datumCopy(Datum value, bool typByVal, int typLen) if (typByVal) res = value; + else if (typLen == -1) + { + /* It is a varlena datatype */ + struct varlena *vl = (struct varlena *) DatumGetPointer(value); + + if (VARATT_IS_EXTERNAL_EXPANDED(vl)) + { + /* Flatten into the caller's memory context */ + ExpandedObjectHeader *eoh = DatumGetEOHP(value); + Size resultsize; + char *resultptr; + + resultsize = EOH_get_flat_size(eoh); + resultptr = (char *) palloc(resultsize); + EOH_flatten_into(eoh, (void *) resultptr, resultsize); + res = PointerGetDatum(resultptr); + } + else + { + /* Otherwise, just copy the varlena datum verbatim */ + Size realSize; + char *resultptr; + + realSize = (Size) VARSIZE_ANY(vl); + resultptr = (char *) palloc(realSize); + memcpy(resultptr, vl, realSize); + res = PointerGetDatum(resultptr); + } + } else { + /* Pass by reference, but not varlena, so not toasted */ Size realSize; - char *s; - - if (DatumGetPointer(value) == NULL) - return PointerGetDatum(NULL); + char *resultptr; realSize = datumGetSize(value, typByVal, typLen); - s = (char *) palloc(realSize); - memcpy(s, DatumGetPointer(value), realSize); - res = PointerGetDatum(s); + resultptr = (char *) palloc(realSize); + memcpy(resultptr, DatumGetPointer(value), realSize); + res = PointerGetDatum(resultptr); } return res; } /*------------------------------------------------------------------------- - * datumFree + * datumTransfer * - * Free the space occupied by a datum CREATED BY "datumCopy" + * Transfer a non-NULL datum into the current memory context. * - * NOTE: DO NOT USE THIS ROUTINE with datums returned by heap_getattr() etc. - * ONLY datums created by "datumCopy" can be freed! + * This is equivalent to datumCopy() except when the datum is a read-write + * pointer to an expanded object. In that case we merely reparent the object + * into the current context, and return its standard R/W pointer (in case the + * given one is a transient pointer of shorter lifespan). *------------------------------------------------------------------------- */ -#ifdef NOT_USED -void -datumFree(Datum value, bool typByVal, int typLen) +Datum +datumTransfer(Datum value, bool typByVal, int typLen) { - if (!typByVal) - { - Pointer s = DatumGetPointer(value); - - pfree(s); - } + if (!typByVal && typLen == -1 && + VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value))) + value = TransferExpandedObject(value, CurrentMemoryContext); + else + value = datumCopy(value, typByVal, typLen); + return value; } -#endif /*------------------------------------------------------------------------- * datumIsEqual diff --git a/src/backend/utils/adt/expandeddatum.c b/src/backend/utils/adt/expandeddatum.c new file mode 100644 index 0000000000..039671bc7d --- /dev/null +++ b/src/backend/utils/adt/expandeddatum.c @@ -0,0 +1,163 @@ +/*------------------------------------------------------------------------- + * + * expandeddatum.c + * Support functions for "expanded" value representations. + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/expandeddatum.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "utils/expandeddatum.h" +#include "utils/memutils.h" + +/* + * DatumGetEOHP + * + * Given a Datum that is an expanded-object reference, extract the pointer. + * + * This is a bit tedious since the pointer may not be properly aligned; + * compare VARATT_EXTERNAL_GET_POINTER(). + */ +ExpandedObjectHeader * +DatumGetEOHP(Datum d) +{ + varattrib_1b_e *datum = (varattrib_1b_e *) DatumGetPointer(d); + varatt_expanded ptr; + + Assert(VARATT_IS_EXTERNAL_EXPANDED(datum)); + memcpy(&ptr, VARDATA_EXTERNAL(datum), sizeof(ptr)); + Assert(VARATT_IS_EXPANDED_HEADER(ptr.eohptr)); + return ptr.eohptr; +} + +/* + * EOH_init_header + * + * Initialize the common header of an expanded object. + * + * The main thing this encapsulates is initializing the TOAST pointers. + */ +void +EOH_init_header(ExpandedObjectHeader *eohptr, + const ExpandedObjectMethods *methods, + MemoryContext obj_context) +{ + varatt_expanded ptr; + + eohptr->vl_len_ = EOH_HEADER_MAGIC; + eohptr->eoh_methods = methods; + eohptr->eoh_context = obj_context; + + ptr.eohptr = eohptr; + + SET_VARTAG_EXTERNAL(eohptr->eoh_rw_ptr, VARTAG_EXPANDED_RW); + memcpy(VARDATA_EXTERNAL(eohptr->eoh_rw_ptr), &ptr, sizeof(ptr)); + + SET_VARTAG_EXTERNAL(eohptr->eoh_ro_ptr, VARTAG_EXPANDED_RO); + memcpy(VARDATA_EXTERNAL(eohptr->eoh_ro_ptr), &ptr, sizeof(ptr)); +} + +/* + * EOH_get_flat_size + * EOH_flatten_into + * + * Convenience functions for invoking the "methods" of an expanded object. + */ + +Size +EOH_get_flat_size(ExpandedObjectHeader *eohptr) +{ + return (*eohptr->eoh_methods->get_flat_size) (eohptr); +} + +void +EOH_flatten_into(ExpandedObjectHeader *eohptr, + void *result, Size allocated_size) +{ + (*eohptr->eoh_methods->flatten_into) (eohptr, result, allocated_size); +} + +/* + * Does the Datum represent a writable expanded object? + */ +bool +DatumIsReadWriteExpandedObject(Datum d, bool isnull, int16 typlen) +{ + /* Reject if it's NULL or not a varlena type */ + if (isnull || typlen != -1) + return false; + + /* Reject if not a read-write expanded-object pointer */ + if (!VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))) + return false; + + return true; +} + +/* + * If the Datum represents a R/W expanded object, change it to R/O. + * Otherwise return the original Datum. + */ +Datum +MakeExpandedObjectReadOnly(Datum d, bool isnull, int16 typlen) +{ + ExpandedObjectHeader *eohptr; + + /* Nothing to do if it's NULL or not a varlena type */ + if (isnull || typlen != -1) + return d; + + /* Nothing to do if not a read-write expanded-object pointer */ + if (!VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))) + return d; + + /* Now safe to extract the object pointer */ + eohptr = DatumGetEOHP(d); + + /* Return the built-in read-only pointer instead of given pointer */ + return EOHPGetRODatum(eohptr); +} + +/* + * Transfer ownership of an expanded object to a new parent memory context. + * The object must be referenced by a R/W pointer, and what we return is + * always its "standard" R/W pointer, which is certain to have the same + * lifespan as the object itself. (The passed-in pointer might not, and + * in any case wouldn't provide a unique identifier if it's not that one.) + */ +Datum +TransferExpandedObject(Datum d, MemoryContext new_parent) +{ + ExpandedObjectHeader *eohptr = DatumGetEOHP(d); + + /* Assert caller gave a R/W pointer */ + Assert(VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))); + + /* Transfer ownership */ + MemoryContextSetParent(eohptr->eoh_context, new_parent); + + /* Return the object's standard read-write pointer */ + return EOHPGetRWDatum(eohptr); +} + +/* + * Delete an expanded object (must be referenced by a R/W pointer). + */ +void +DeleteExpandedObject(Datum d) +{ + ExpandedObjectHeader *eohptr = DatumGetEOHP(d); + + /* Assert caller gave a R/W pointer */ + Assert(VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))); + + /* Kill it */ + MemoryContextDelete(eohptr->eoh_context); +} diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index c42a6b6005..34f4e7202f 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -323,6 +323,10 @@ MemoryContextSetParent(MemoryContext context, MemoryContext new_parent) AssertArg(MemoryContextIsValid(context)); AssertArg(context != new_parent); + /* Fast path if it's got correct parent already */ + if (new_parent == context->parent) + return; + /* Delink from existing parent, if any */ if (context->parent) { diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h index 9e912ba7bf..fbcae0cce7 100644 --- a/src/include/executor/spi.h +++ b/src/include/executor/spi.h @@ -124,6 +124,7 @@ extern char *SPI_getnspname(Relation rel); extern void *SPI_palloc(Size size); extern void *SPI_repalloc(void *pointer, Size size); extern void SPI_pfree(void *pointer); +extern Datum SPI_datumTransfer(Datum value, bool typByVal, int typLen); extern void SPI_freetuple(HeapTuple pointer); extern void SPI_freetuptable(SPITupleTable *tuptable); diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h index 48f84bfe20..00686b0441 100644 --- a/src/include/executor/tuptable.h +++ b/src/include/executor/tuptable.h @@ -163,6 +163,7 @@ extern Datum ExecFetchSlotTupleDatum(TupleTableSlot *slot); extern HeapTuple ExecMaterializeSlot(TupleTableSlot *slot); extern TupleTableSlot *ExecCopySlot(TupleTableSlot *dstslot, TupleTableSlot *srcslot); +extern TupleTableSlot *ExecMakeSlotContentsReadOnly(TupleTableSlot *slot); /* in access/common/heaptuple.c */ extern Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull); diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index f10ae4efa8..4a4dd7e9ef 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -309,6 +309,10 @@ typedef struct WindowFunc * Note: the result datatype is the element type when fetching a single * element; but it is the array type when doing subarray fetch or either * type of store. + * + * Note: for the cases where an array is returned, if refexpr yields a R/W + * expanded array, then the implementation is allowed to modify that object + * in-place and return the same object.) * ---------------- */ typedef struct ArrayRef diff --git a/src/include/postgres.h b/src/include/postgres.h index be37313fa5..ccf1605455 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -87,6 +87,23 @@ typedef struct varatt_indirect struct varlena *pointer; /* Pointer to in-memory varlena */ } varatt_indirect; +/* + * struct varatt_expanded is a "TOAST pointer" representing an out-of-line + * Datum that is stored in memory, in some type-specific, not necessarily + * physically contiguous format that is convenient for computation not + * storage. APIs for this, in particular the definition of struct + * ExpandedObjectHeader, are in src/include/utils/expandeddatum.h. + * + * Note that just as for struct varatt_external, this struct is stored + * unaligned within any containing tuple. + */ +typedef struct ExpandedObjectHeader ExpandedObjectHeader; + +typedef struct varatt_expanded +{ + ExpandedObjectHeader *eohptr; +} varatt_expanded; + /* * Type tag for the various sorts of "TOAST pointer" datums. The peculiar * value for VARTAG_ONDISK comes from a requirement for on-disk compatibility @@ -95,11 +112,18 @@ typedef struct varatt_indirect typedef enum vartag_external { VARTAG_INDIRECT = 1, + VARTAG_EXPANDED_RO = 2, + VARTAG_EXPANDED_RW = 3, VARTAG_ONDISK = 18 } vartag_external; +/* this test relies on the specific tag values above */ +#define VARTAG_IS_EXPANDED(tag) \ + (((tag) & ~1) == VARTAG_EXPANDED_RO) + #define VARTAG_SIZE(tag) \ ((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \ + VARTAG_IS_EXPANDED(tag) ? sizeof(varatt_expanded) : \ (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \ TrapMacro(true, "unrecognized TOAST vartag")) @@ -294,6 +318,12 @@ typedef struct (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK) #define VARATT_IS_EXTERNAL_INDIRECT(PTR) \ (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_INDIRECT) +#define VARATT_IS_EXTERNAL_EXPANDED_RO(PTR) \ + (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_EXPANDED_RO) +#define VARATT_IS_EXTERNAL_EXPANDED_RW(PTR) \ + (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_EXPANDED_RW) +#define VARATT_IS_EXTERNAL_EXPANDED(PTR) \ + (VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR))) #define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR) #define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR)) diff --git a/src/include/utils/array.h b/src/include/utils/array.h index 0a488e7b0c..c25b80d272 100644 --- a/src/include/utils/array.h +++ b/src/include/utils/array.h @@ -45,6 +45,11 @@ * We support subscripting on these types, but array_in() and array_out() * only work with varlena arrays. * + * In addition, arrays are a major user of the "expanded object" TOAST + * infrastructure. This allows a varlena array to be converted to a + * separate representation that may include "deconstructed" Datum/isnull + * arrays holding the elements. + * * * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -57,6 +62,8 @@ #define ARRAY_H #include "fmgr.h" +#include "utils/expandeddatum.h" + /* * Arrays are varlena objects, so must meet the varlena convention that @@ -74,6 +81,86 @@ typedef struct Oid elemtype; /* element type OID */ } ArrayType; +/* + * An expanded array is contained within a private memory context (as + * all expanded objects must be) and has a control structure as below. + * + * The expanded array might contain a regular "flat" array if that was the + * original input and we've not modified it significantly. Otherwise, the + * contents are represented by Datum/isnull arrays plus dimensionality and + * type information. We could also have both forms, if we've deconstructed + * the original array for access purposes but not yet changed it. For pass- + * by-reference element types, the Datums would point into the flat array in + * this situation. Once we start modifying array elements, new pass-by-ref + * elements are separately palloc'd within the memory context. + */ +#define EA_MAGIC 689375833 /* ID for debugging crosschecks */ + +typedef struct ExpandedArrayHeader +{ + /* Standard header for expanded objects */ + ExpandedObjectHeader hdr; + + /* Magic value identifying an expanded array (for debugging only) */ + int ea_magic; + + /* Dimensionality info (always valid) */ + int ndims; /* # of dimensions */ + int *dims; /* array dimensions */ + int *lbound; /* index lower bounds for each dimension */ + + /* Element type info (always valid) */ + Oid element_type; /* element type OID */ + int16 typlen; /* needed info about element datatype */ + bool typbyval; + char typalign; + + /* + * If we have a Datum-array representation of the array, it's kept here; + * else dvalues/dnulls are NULL. The dvalues and dnulls arrays are always + * palloc'd within the object private context, but may change size from + * time to time. For pass-by-ref element types, dvalues entries might + * point either into the fstartptr..fendptr area, or to separately + * palloc'd chunks. Elements should always be fully detoasted, as they + * are in the standard flat representation. + * + * Even when dvalues is valid, dnulls can be NULL if there are no null + * elements. + */ + Datum *dvalues; /* array of Datums */ + bool *dnulls; /* array of is-null flags for Datums */ + int dvalueslen; /* allocated length of above arrays */ + int nelems; /* number of valid entries in above arrays */ + + /* + * flat_size is the current space requirement for the flat equivalent of + * the expanded array, if known; otherwise it's 0. We store this to make + * consecutive calls of get_flat_size cheap. + */ + Size flat_size; + + /* + * fvalue points to the flat representation if it is valid, else it is + * NULL. If we have or ever had a flat representation then + * fstartptr/fendptr point to the start and end+1 of its data area; this + * is so that we can tell which Datum pointers point into the flat + * representation rather than being pointers to separately palloc'd data. + */ + ArrayType *fvalue; /* must be a fully detoasted array */ + char *fstartptr; /* start of its data area */ + char *fendptr; /* end+1 of its data area */ +} ExpandedArrayHeader; + +/* + * Functions that can handle either a "flat" varlena array or an expanded + * array use this union to work with their input. + */ +typedef union AnyArrayType +{ + ArrayType flt; + ExpandedArrayHeader xpn; +} AnyArrayType; + /* * working state for accumArrayResult() and friends * note that the input must be scalars (legal array elements) @@ -151,17 +238,24 @@ typedef struct ArrayMapState /* ArrayIteratorData is private in arrayfuncs.c */ typedef struct ArrayIteratorData *ArrayIterator; -/* - * fmgr macros for array objects - */ +/* fmgr macros for regular varlena array objects */ #define DatumGetArrayTypeP(X) ((ArrayType *) PG_DETOAST_DATUM(X)) #define DatumGetArrayTypePCopy(X) ((ArrayType *) PG_DETOAST_DATUM_COPY(X)) #define PG_GETARG_ARRAYTYPE_P(n) DatumGetArrayTypeP(PG_GETARG_DATUM(n)) #define PG_GETARG_ARRAYTYPE_P_COPY(n) DatumGetArrayTypePCopy(PG_GETARG_DATUM(n)) #define PG_RETURN_ARRAYTYPE_P(x) PG_RETURN_POINTER(x) +/* fmgr macros for expanded array objects */ +#define PG_GETARG_EXPANDED_ARRAY(n) DatumGetExpandedArray(PG_GETARG_DATUM(n)) +#define PG_GETARG_EXPANDED_ARRAYX(n, metacache) \ + DatumGetExpandedArrayX(PG_GETARG_DATUM(n), metacache) +#define PG_RETURN_EXPANDED_ARRAY(x) PG_RETURN_DATUM(EOHPGetRWDatum(&(x)->hdr)) + +/* fmgr macros for AnyArrayType (ie, get either varlena or expanded form) */ +#define PG_GETARG_ANY_ARRAY(n) DatumGetAnyArray(PG_GETARG_DATUM(n)) + /* - * Access macros for array header fields. + * Access macros for varlena array header fields. * * ARR_DIMS returns a pointer to an array of array dimensions (number of * elements along the various array axes). @@ -209,6 +303,22 @@ typedef struct ArrayIteratorData *ArrayIterator; #define ARR_DATA_PTR(a) \ (((char *) (a)) + ARR_DATA_OFFSET(a)) +/* + * Macros for working with AnyArrayType inputs. Beware multiple references! + */ +#define AARR_NDIM(a) \ + (VARATT_IS_EXPANDED_HEADER(a) ? (a)->xpn.ndims : ARR_NDIM(&(a)->flt)) +#define AARR_HASNULL(a) \ + (VARATT_IS_EXPANDED_HEADER(a) ? \ + ((a)->xpn.dvalues != NULL ? (a)->xpn.dnulls != NULL : ARR_HASNULL((a)->xpn.fvalue)) : \ + ARR_HASNULL(&(a)->flt)) +#define AARR_ELEMTYPE(a) \ + (VARATT_IS_EXPANDED_HEADER(a) ? (a)->xpn.element_type : ARR_ELEMTYPE(&(a)->flt)) +#define AARR_DIMS(a) \ + (VARATT_IS_EXPANDED_HEADER(a) ? (a)->xpn.dims : ARR_DIMS(&(a)->flt)) +#define AARR_LBOUND(a) \ + (VARATT_IS_EXPANDED_HEADER(a) ? (a)->xpn.lbound : ARR_LBOUND(&(a)->flt)) + /* * GUC parameter @@ -250,6 +360,15 @@ extern Datum array_remove(PG_FUNCTION_ARGS); extern Datum array_replace(PG_FUNCTION_ARGS); extern Datum width_bucket_array(PG_FUNCTION_ARGS); +extern void CopyArrayEls(ArrayType *array, + Datum *values, + bool *nulls, + int nitems, + int typlen, + bool typbyval, + char typalign, + bool freedata); + extern Datum array_get_element(Datum arraydatum, int nSubscripts, int *indx, int arraytyplen, int elmlen, bool elmbyval, char elmalign, bool *isNull); @@ -271,7 +390,7 @@ extern ArrayType *array_set(ArrayType *array, int nSubscripts, int *indx, Datum dataValue, bool isNull, int arraytyplen, int elmlen, bool elmbyval, char elmalign); -extern Datum array_map(FunctionCallInfo fcinfo, Oid inpType, Oid retType, +extern Datum array_map(FunctionCallInfo fcinfo, Oid retType, ArrayMapState *amstate); extern void array_bitmap_copy(bits8 *destbitmap, int destoffset, @@ -288,6 +407,9 @@ extern ArrayType *construct_md_array(Datum *elems, int *lbs, Oid elmtype, int elmlen, bool elmbyval, char elmalign); extern ArrayType *construct_empty_array(Oid elmtype); +extern ExpandedArrayHeader *construct_empty_expanded_array(Oid element_type, + MemoryContext parentcontext, + ArrayMetaState *metacache); extern void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, @@ -340,6 +462,17 @@ extern void mda_get_offset_values(int n, int *dist, const int *prod, const int * extern int mda_next_tuple(int n, int *curr, const int *span); extern int32 *ArrayGetIntegerTypmods(ArrayType *arr, int *n); +/* + * prototypes for functions defined in array_expanded.c + */ +extern Datum expand_array(Datum arraydatum, MemoryContext parentcontext, + ArrayMetaState *metacache); +extern ExpandedArrayHeader *DatumGetExpandedArray(Datum d); +extern ExpandedArrayHeader *DatumGetExpandedArrayX(Datum d, + ArrayMetaState *metacache); +extern AnyArrayType *DatumGetAnyArray(Datum d); +extern void deconstruct_expanded_array(ExpandedArrayHeader *eah); + /* * prototypes for functions defined in array_userfuncs.c */ diff --git a/src/include/utils/arrayaccess.h b/src/include/utils/arrayaccess.h new file mode 100644 index 0000000000..72575d4a82 --- /dev/null +++ b/src/include/utils/arrayaccess.h @@ -0,0 +1,133 @@ +/*------------------------------------------------------------------------- + * + * arrayaccess.h + * Declarations for element-by-element access to Postgres arrays. + * + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/utils/arrayaccess.h + * + *------------------------------------------------------------------------- + */ +#ifndef ARRAYACCESS_H +#define ARRAYACCESS_H + +#include "access/tupmacs.h" +#include "utils/array.h" + + +/* + * Functions for iterating through elements of a flat or expanded array. + * These require a state struct "array_iter iter". + * + * Use "array_iter_setup(&iter, arrayptr);" to prepare to iterate, and + * "datumvar = array_iter_next(&iter, &isnullvar, index, ...);" to fetch + * the next element into datumvar/isnullvar. + * "index" must be the zero-origin element number; we make caller provide + * this since caller is generally counting the elements anyway. Despite + * that, these functions can only fetch elements sequentially. + */ + +typedef struct array_iter +{ + /* datumptr being NULL or not tells if we have flat or expanded array */ + + /* Fields used when we have an expanded array */ + Datum *datumptr; /* Pointer to Datum array */ + bool *isnullptr; /* Pointer to isnull array */ + + /* Fields used when we have a flat array */ + char *dataptr; /* Current spot in the data area */ + bits8 *bitmapptr; /* Current byte of the nulls bitmap, or NULL */ + int bitmask; /* mask for current bit in nulls bitmap */ +} array_iter; + +/* + * We want the functions below to be inline; but if the compiler doesn't + * support that, fall back on providing them as regular functions. See + * STATIC_IF_INLINE in c.h. + */ +#ifndef PG_USE_INLINE +extern void array_iter_setup(array_iter *it, AnyArrayType *a); +extern Datum array_iter_next(array_iter *it, bool *isnull, int i, + int elmlen, bool elmbyval, char elmalign); +#endif /* !PG_USE_INLINE */ + +#if defined(PG_USE_INLINE) || defined(ARRAYACCESS_INCLUDE_DEFINITIONS) + +STATIC_IF_INLINE void +array_iter_setup(array_iter *it, AnyArrayType *a) +{ + if (VARATT_IS_EXPANDED_HEADER(a)) + { + if (a->xpn.dvalues) + { + it->datumptr = a->xpn.dvalues; + it->isnullptr = a->xpn.dnulls; + /* we must fill all fields to prevent compiler warnings */ + it->dataptr = NULL; + it->bitmapptr = NULL; + } + else + { + /* Work with flat array embedded in the expanded datum */ + it->datumptr = NULL; + it->isnullptr = NULL; + it->dataptr = ARR_DATA_PTR(a->xpn.fvalue); + it->bitmapptr = ARR_NULLBITMAP(a->xpn.fvalue); + } + } + else + { + it->datumptr = NULL; + it->isnullptr = NULL; + it->dataptr = ARR_DATA_PTR(&a->flt); + it->bitmapptr = ARR_NULLBITMAP(&a->flt); + } + it->bitmask = 1; +} + +STATIC_IF_INLINE Datum +array_iter_next(array_iter *it, bool *isnull, int i, + int elmlen, bool elmbyval, char elmalign) +{ + Datum ret; + + if (it->datumptr) + { + ret = it->datumptr[i]; + *isnull = it->isnullptr ? it->isnullptr[i] : false; + } + else + { + if (it->bitmapptr && (*(it->bitmapptr) & it->bitmask) == 0) + { + *isnull = true; + ret = (Datum) 0; + } + else + { + *isnull = false; + ret = fetch_att(it->dataptr, elmbyval, elmlen); + it->dataptr = att_addlength_pointer(it->dataptr, elmlen, + it->dataptr); + it->dataptr = (char *) att_align_nominal(it->dataptr, elmalign); + } + it->bitmask <<= 1; + if (it->bitmask == 0x100) + { + if (it->bitmapptr) + it->bitmapptr++; + it->bitmask = 1; + } + } + + return ret; +} + +#endif /* defined(PG_USE_INLINE) || + * defined(ARRAYACCESS_INCLUDE_DEFINITIONS) */ + +#endif /* ARRAYACCESS_H */ diff --git a/src/include/utils/datum.h b/src/include/utils/datum.h index 663414b793..c572f790a5 100644 --- a/src/include/utils/datum.h +++ b/src/include/utils/datum.h @@ -24,18 +24,18 @@ extern Size datumGetSize(Datum value, bool typByVal, int typLen); /* - * datumCopy - make a copy of a datum. + * datumCopy - make a copy of a non-NULL datum. * * If the datatype is pass-by-reference, memory is obtained with palloc(). */ extern Datum datumCopy(Datum value, bool typByVal, int typLen); /* - * datumFree - free a datum previously allocated by datumCopy, if any. + * datumTransfer - transfer a non-NULL datum into the current memory context. * - * Does nothing if datatype is pass-by-value. + * Differs from datumCopy() in its handling of read-write expanded objects. */ -extern void datumFree(Datum value, bool typByVal, int typLen); +extern Datum datumTransfer(Datum value, bool typByVal, int typLen); /* * datumIsEqual diff --git a/src/include/utils/expandeddatum.h b/src/include/utils/expandeddatum.h new file mode 100644 index 0000000000..331be910dd --- /dev/null +++ b/src/include/utils/expandeddatum.h @@ -0,0 +1,151 @@ +/*------------------------------------------------------------------------- + * + * expandeddatum.h + * Declarations for access to "expanded" value representations. + * + * Complex data types, particularly container types such as arrays and + * records, usually have on-disk representations that are compact but not + * especially convenient to modify. What's more, when we do modify them, + * having to recopy all the rest of the value can be extremely inefficient. + * Therefore, we provide a notion of an "expanded" representation that is used + * only in memory and is optimized more for computation than storage. + * The format appearing on disk is called the data type's "flattened" + * representation, since it is required to be a contiguous blob of bytes -- + * but the type can have an expanded representation that is not. Data types + * must provide means to translate an expanded representation back to + * flattened form. + * + * An expanded object is meant to survive across multiple operations, but + * not to be enormously long-lived; for example it might be a local variable + * in a PL/pgSQL procedure. So its extra bulk compared to the on-disk format + * is a worthwhile trade-off. + * + * References to expanded objects are a type of TOAST pointer. + * Because of longstanding conventions in Postgres, this means that the + * flattened form of such an object must always be a varlena object. + * Fortunately that's no restriction in practice. + * + * There are actually two kinds of TOAST pointers for expanded objects: + * read-only and read-write pointers. Possession of one of the latter + * authorizes a function to modify the value in-place rather than copying it + * as would normally be required. Functions should always return a read-write + * pointer to any new expanded object they create. Functions that modify an + * argument value in-place must take care that they do not corrupt the old + * value if they fail partway through. + * + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/utils/expandeddatum.h + * + *------------------------------------------------------------------------- + */ +#ifndef EXPANDEDDATUM_H +#define EXPANDEDDATUM_H + +/* Size of an EXTERNAL datum that contains a pointer to an expanded object */ +#define EXPANDED_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_expanded)) + +/* + * "Methods" that must be provided for any expanded object. + * + * get_flat_size: compute space needed for flattened representation (total, + * including header). + * + * flatten_into: construct flattened representation in the caller-allocated + * space at *result, of size allocated_size (which will always be the result + * of a preceding get_flat_size call; it's passed for cross-checking). + * + * The flattened representation must be a valid in-line, non-compressed, + * 4-byte-header varlena object. + * + * Note: construction of a heap tuple from an expanded datum calls + * get_flat_size twice, so it's worthwhile to make sure that that doesn't + * incur too much overhead. + */ +typedef Size (*EOM_get_flat_size_method) (ExpandedObjectHeader *eohptr); +typedef void (*EOM_flatten_into_method) (ExpandedObjectHeader *eohptr, + void *result, Size allocated_size); + +/* Struct of function pointers for an expanded object's methods */ +typedef struct ExpandedObjectMethods +{ + EOM_get_flat_size_method get_flat_size; + EOM_flatten_into_method flatten_into; +} ExpandedObjectMethods; + +/* + * Every expanded object must contain this header; typically the header + * is embedded in some larger struct that adds type-specific fields. + * + * It is presumed that the header object and all subsidiary data are stored + * in eoh_context, so that the object can be freed by deleting that context, + * or its storage lifespan can be altered by reparenting the context. + * (In principle the object could own additional resources, such as malloc'd + * storage, and use a memory context reset callback to free them upon reset or + * deletion of eoh_context.) + * + * We set up two TOAST pointers within the standard header, one read-write + * and one read-only. This allows functions to return either kind of pointer + * without making an additional allocation, and in particular without worrying + * whether a separately palloc'd object would have sufficient lifespan. + * But note that these pointers are just a convenience; a pointer object + * appearing somewhere else would still be legal. + * + * The typedef declaration for this appears in postgres.h. + */ +struct ExpandedObjectHeader +{ + /* Phony varlena header */ + int32 vl_len_; /* always EOH_HEADER_MAGIC, see below */ + + /* Pointer to methods required for object type */ + const ExpandedObjectMethods *eoh_methods; + + /* Memory context containing this header and subsidiary data */ + MemoryContext eoh_context; + + /* Standard R/W TOAST pointer for this object is kept here */ + char eoh_rw_ptr[EXPANDED_POINTER_SIZE]; + + /* Standard R/O TOAST pointer for this object is kept here */ + char eoh_ro_ptr[EXPANDED_POINTER_SIZE]; +}; + +/* + * Particularly for read-only functions, it is handy to be able to work with + * either regular "flat" varlena inputs or expanded inputs of the same data + * type. To allow determining which case an argument-fetching function has + * returned, the first int32 of an ExpandedObjectHeader always contains -1 + * (EOH_HEADER_MAGIC to the code). This works since no 4-byte-header varlena + * could have that as its first 4 bytes. Caution: we could not reliably tell + * the difference between an ExpandedObjectHeader and a short-header object + * with this trick. However, it works fine if the argument fetching code + * always returns either a 4-byte-header flat object or an expanded object. + */ +#define EOH_HEADER_MAGIC (-1) +#define VARATT_IS_EXPANDED_HEADER(PTR) \ + (((ExpandedObjectHeader *) (PTR))->vl_len_ == EOH_HEADER_MAGIC) + +/* + * Generic support functions for expanded objects. + * (More of these might be worth inlining later.) + */ + +#define EOHPGetRWDatum(eohptr) PointerGetDatum((eohptr)->eoh_rw_ptr) +#define EOHPGetRODatum(eohptr) PointerGetDatum((eohptr)->eoh_ro_ptr) + +extern ExpandedObjectHeader *DatumGetEOHP(Datum d); +extern void EOH_init_header(ExpandedObjectHeader *eohptr, + const ExpandedObjectMethods *methods, + MemoryContext obj_context); +extern Size EOH_get_flat_size(ExpandedObjectHeader *eohptr); +extern void EOH_flatten_into(ExpandedObjectHeader *eohptr, + void *result, Size allocated_size); +extern bool DatumIsReadWriteExpandedObject(Datum d, bool isnull, int16 typlen); +extern Datum MakeExpandedObjectReadOnly(Datum d, bool isnull, int16 typlen); +extern Datum TransferExpandedObject(Datum d, MemoryContext new_parent); +extern void DeleteExpandedObject(Datum d); + +#endif /* EXPANDEDDATUM_H */ diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index 650cc48c09..0ff20860f3 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -2200,6 +2200,22 @@ build_datatype(HeapTuple typeTup, int32 typmod, Oid collation) typ->collation = typeStruct->typcollation; if (OidIsValid(collation) && OidIsValid(typ->collation)) typ->collation = collation; + /* Detect if type is true array, or domain thereof */ + /* NB: this is only used to decide whether to apply expand_array */ + if (typeStruct->typtype == TYPTYPE_BASE) + { + /* this test should match what get_element_type() checks */ + typ->typisarray = (typeStruct->typlen == -1 && + OidIsValid(typeStruct->typelem)); + } + else if (typeStruct->typtype == TYPTYPE_DOMAIN) + { + /* we can short-circuit looking up base types if it's not varlena */ + typ->typisarray = (typeStruct->typlen == -1 && + OidIsValid(get_base_element_type(typeStruct->typbasetype))); + } + else + typ->typisarray = false; typ->atttypmod = typmod; return typ; diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index deefb1f9de..aac7cdaf7c 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -34,6 +34,7 @@ #include "utils/array.h" #include "utils/builtins.h" #include "utils/datum.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -173,6 +174,8 @@ static void exec_prepare_plan(PLpgSQL_execstate *estate, static bool exec_simple_check_node(Node *node); static void exec_simple_check_plan(PLpgSQL_expr *expr); static void exec_simple_recheck_plan(PLpgSQL_expr *expr, CachedPlan *cplan); +static void exec_check_rw_parameter(PLpgSQL_expr *expr, int target_dno); +static bool contains_target_param(Node *node, int *target_dno); static bool exec_eval_simple_expr(PLpgSQL_execstate *estate, PLpgSQL_expr *expr, Datum *result, @@ -312,6 +315,44 @@ plpgsql_exec_function(PLpgSQL_function *func, FunctionCallInfo fcinfo, var->value = fcinfo->arg[i]; var->isnull = fcinfo->argnull[i]; var->freeval = false; + + /* + * Force any array-valued parameter to be stored in + * expanded form in our local variable, in hopes of + * improving efficiency of uses of the variable. (This is + * a hack, really: why only arrays? Need more thought + * about which cases are likely to win. See also + * typisarray-specific heuristic in exec_assign_value.) + * + * Special cases: If passed a R/W expanded pointer, assume + * we can commandeer the object rather than having to copy + * it. If passed a R/O expanded pointer, just keep it as + * the value of the variable for the moment. (We'll force + * it to R/W if the variable gets modified, but that may + * very well never happen.) + */ + if (!var->isnull && var->datatype->typisarray) + { + if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(var->value))) + { + /* take ownership of R/W object */ + var->value = TransferExpandedObject(var->value, + CurrentMemoryContext); + var->freeval = true; + } + else if (VARATT_IS_EXTERNAL_EXPANDED_RO(DatumGetPointer(var->value))) + { + /* R/O pointer, keep it as-is until assigned to */ + } + else + { + /* flat array, so force to expanded form */ + var->value = expand_array(var->value, + CurrentMemoryContext, + NULL); + var->freeval = true; + } + } } break; @@ -477,18 +518,14 @@ plpgsql_exec_function(PLpgSQL_function *func, FunctionCallInfo fcinfo, /* * If the function's return type isn't by value, copy the value - * into upper executor memory context. + * into upper executor memory context. However, if we have a R/W + * expanded datum, we can just transfer its ownership out to the + * upper executor context. */ if (!fcinfo->isnull && !func->fn_retbyval) - { - Size len; - void *tmp; - - len = datumGetSize(estate.retval, false, func->fn_rettyplen); - tmp = SPI_palloc(len); - memcpy(tmp, DatumGetPointer(estate.retval), len); - estate.retval = PointerGetDatum(tmp); - } + estate.retval = SPI_datumTransfer(estate.retval, + false, + func->fn_rettyplen); } } @@ -2476,6 +2513,13 @@ exec_stmt_return(PLpgSQL_execstate *estate, PLpgSQL_stmt_return *stmt) * Special case path when the RETURN expression is a simple variable * reference; in particular, this path is always taken in functions with * one or more OUT parameters. + * + * This special case is especially efficient for returning variables that + * have R/W expanded values: we can put the R/W pointer directly into + * estate->retval, leading to transferring the value to the caller's + * context cheaply. If we went through exec_eval_expr we'd end up with a + * R/O pointer. It's okay to skip MakeExpandedObjectReadOnly here since + * we know we won't need the variable's value within the function anymore. */ if (stmt->retvarno >= 0) { @@ -2604,6 +2648,11 @@ exec_stmt_return_next(PLpgSQL_execstate *estate, * Special case path when the RETURN NEXT expression is a simple variable * reference; in particular, this path is always taken in functions with * one or more OUT parameters. + * + * Unlike exec_statement_return, there's no special win here for R/W + * expanded values, since they'll have to get flattened to go into the + * tuplestore. Indeed, we'd better make them R/O to avoid any risk of the + * casting step changing them in-place. */ if (stmt->retvarno >= 0) { @@ -2622,6 +2671,11 @@ exec_stmt_return_next(PLpgSQL_execstate *estate, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("wrong result type supplied in RETURN NEXT"))); + /* let's be very paranoid about the cast step */ + retval = MakeExpandedObjectReadOnly(retval, + isNull, + var->datatype->typlen); + /* coerce type if needed */ retval = exec_cast_value(estate, retval, @@ -3333,6 +3387,13 @@ exec_prepare_plan(PLpgSQL_execstate *estate, /* Check to see if it's a simple expression */ exec_simple_check_plan(expr); + + /* + * Mark expression as not using a read-write param. exec_assign_value has + * to take steps to override this if appropriate; that seems cleaner than + * adding parameters to all other callers. + */ + expr->rwparam = -1; } @@ -4071,6 +4132,19 @@ exec_assign_expr(PLpgSQL_execstate *estate, PLpgSQL_datum *target, Oid valtype; int32 valtypmod; + /* + * If first time through, create a plan for this expression, and then see + * if we can pass the target variable as a read-write parameter to the + * expression. (This is a bit messy, but it seems cleaner than modifying + * the API of exec_eval_expr for the purpose.) + */ + if (expr->plan == NULL) + { + exec_prepare_plan(estate, expr, 0); + if (target->dtype == PLPGSQL_DTYPE_VAR) + exec_check_rw_parameter(expr, target->dno); + } + value = exec_eval_expr(estate, expr, &isnull, &valtype, &valtypmod); exec_assign_value(estate, target, value, isnull, valtype, valtypmod); exec_eval_cleanup(estate); @@ -4140,26 +4214,51 @@ exec_assign_value(PLpgSQL_execstate *estate, /* * If type is by-reference, copy the new value (which is * probably in the eval_econtext) into the procedure's memory - * context. + * context. But if it's a read/write reference to an expanded + * object, no physical copy needs to happen; at most we need + * to reparent the object's memory context. + * + * If it's an array, we force the value to be stored in R/W + * expanded form. This wins if the function later does, say, + * a lot of array subscripting operations on the variable, and + * otherwise might lose. We might need to use a different + * heuristic, but it's too soon to tell. Also, are there + * cases where it'd be useful to force non-array values into + * expanded form? */ if (!var->datatype->typbyval && !isNull) - newvalue = datumCopy(newvalue, - false, - var->datatype->typlen); + { + if (var->datatype->typisarray && + !VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(newvalue))) + { + /* array and not already R/W, so apply expand_array */ + newvalue = expand_array(newvalue, + CurrentMemoryContext, + NULL); + } + else + { + /* else transfer value if R/W, else just datumCopy */ + newvalue = datumTransfer(newvalue, + false, + var->datatype->typlen); + } + } /* - * Now free the old value. (We can't do this any earlier - * because of the possibility that we are assigning the var's - * old value to it, eg "foo := foo". We could optimize out - * the assignment altogether in such cases, but it's too - * infrequent to be worth testing for.) + * Now free the old value, unless it's the same as the new + * value (ie, we're doing "foo := foo"). Note that for + * expanded objects, this test is necessary and cannot + * reliably be made any earlier; we have to be looking at the + * object's standard R/W pointer to be sure pointer equality + * is meaningful. */ - free_var(var); + if (var->value != newvalue || var->isnull || isNull) + free_var(var); var->value = newvalue; var->isnull = isNull; - if (!var->datatype->typbyval && !isNull) - var->freeval = true; + var->freeval = (!var->datatype->typbyval && !isNull); break; } @@ -4505,10 +4604,14 @@ exec_assign_value(PLpgSQL_execstate *estate, * * At present this doesn't handle PLpgSQL_expr or PLpgSQL_arrayelem datums. * - * NOTE: caller must not modify the returned value, since it points right - * at the stored value in the case of pass-by-reference datatypes. In some - * cases we have to palloc a return value, and in such cases we put it into - * the estate's short-term memory context. + * NOTE: the returned Datum points right at the stored value in the case of + * pass-by-reference datatypes. Generally callers should take care not to + * modify the stored value. Some callers intentionally manipulate variables + * referenced by R/W expanded pointers, though; it is those callers' + * responsibility that the results are semantically OK. + * + * In some cases we have to palloc a return value, and in such cases we put + * it into the estate's short-term memory context. */ static void exec_eval_datum(PLpgSQL_execstate *estate, @@ -5216,6 +5319,9 @@ exec_eval_simple_expr(PLpgSQL_execstate *estate, { /* It got replanned ... is it still simple? */ exec_simple_recheck_plan(expr, cplan); + /* better recheck r/w safety, as well */ + if (expr->rwparam >= 0) + exec_check_rw_parameter(expr, expr->rwparam); if (expr->expr_simple_expr == NULL) { /* Ooops, release refcount and fail */ @@ -5362,7 +5468,13 @@ setup_param_list(PLpgSQL_execstate *estate, PLpgSQL_expr *expr) */ MemSet(paramLI->params, 0, estate->ndatums * sizeof(ParamExternData)); - /* Instantiate values for "safe" parameters of the expression */ + /* + * Instantiate values for "safe" parameters of the expression. One of + * them might be the variable the expression result will be assigned + * to, in which case we can pass the variable's value as-is even if + * it's a read-write expanded object; otherwise, convert read-write + * pointers to read-only pointers for safety. + */ dno = -1; while ((dno = bms_next_member(expr->paramnos, dno)) >= 0) { @@ -5373,7 +5485,12 @@ setup_param_list(PLpgSQL_execstate *estate, PLpgSQL_expr *expr) PLpgSQL_var *var = (PLpgSQL_var *) datum; ParamExternData *prm = ¶mLI->params[dno]; - prm->value = var->value; + if (dno == expr->rwparam) + prm->value = var->value; + else + prm->value = MakeExpandedObjectReadOnly(var->value, + var->isnull, + var->datatype->typlen); prm->isnull = var->isnull; prm->pflags = PARAM_FLAG_CONST; prm->ptype = var->datatype->typoid; @@ -5442,6 +5559,15 @@ plpgsql_param_fetch(ParamListInfo params, int paramid) exec_eval_datum(estate, datum, &prm->ptype, &prmtypmod, &prm->value, &prm->isnull); + + /* + * If it's a read/write expanded datum, convert reference to read-only, + * unless it's safe to pass as read-write. + */ + if (datum->dtype == PLPGSQL_DTYPE_VAR && dno != expr->rwparam) + prm->value = MakeExpandedObjectReadOnly(prm->value, + prm->isnull, + ((PLpgSQL_var *) datum)->datatype->typlen); } @@ -6384,6 +6510,113 @@ exec_simple_recheck_plan(PLpgSQL_expr *expr, CachedPlan *cplan) expr->expr_simple_typmod = exprTypmod((Node *) tle->expr); } +/* + * exec_check_rw_parameter --- can we pass expanded object as read/write param? + * + * If we have an assignment like "x := array_append(x, foo)" in which the + * top-level function is trusted not to corrupt its argument in case of an + * error, then when x has an expanded object as value, it is safe to pass the + * value as a read/write pointer and let the function modify the value + * in-place. + * + * This function checks for a safe expression, and sets expr->rwparam to the + * dno of the target variable (x) if safe, or -1 if not safe. + */ +static void +exec_check_rw_parameter(PLpgSQL_expr *expr, int target_dno) +{ + Oid funcid; + List *fargs; + ListCell *lc; + + /* Assume unsafe */ + expr->rwparam = -1; + + /* + * If the expression isn't simple, there's no point in trying to optimize + * (because the exec_run_select code path will flatten any expanded result + * anyway). Even without that, this seems like a good safety restriction. + */ + if (expr->expr_simple_expr == NULL) + return; + + /* + * If target variable isn't referenced by expression, no need to look + * further. + */ + if (!bms_is_member(target_dno, expr->paramnos)) + return; + + /* + * Top level of expression must be a simple FuncExpr or OpExpr. + */ + if (IsA(expr->expr_simple_expr, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) expr->expr_simple_expr; + + funcid = fexpr->funcid; + fargs = fexpr->args; + } + else if (IsA(expr->expr_simple_expr, OpExpr)) + { + OpExpr *opexpr = (OpExpr *) expr->expr_simple_expr; + + funcid = opexpr->opfuncid; + fargs = opexpr->args; + } + else + return; + + /* + * The top-level function must be one that we trust to be "safe". + * Currently we hard-wire the list, but it would be very desirable to + * allow extensions to mark their functions as safe ... + */ + if (!(funcid == F_ARRAY_APPEND || + funcid == F_ARRAY_PREPEND)) + return; + + /* + * The target variable (in the form of a Param) must only appear as a + * direct argument of the top-level function. + */ + foreach(lc, fargs) + { + Node *arg = (Node *) lfirst(lc); + + /* A Param is OK, whether it's the target variable or not */ + if (arg && IsA(arg, Param)) + continue; + /* Otherwise, argument expression must not reference target */ + if (contains_target_param(arg, &target_dno)) + return; + } + + /* OK, we can pass target as a read-write parameter */ + expr->rwparam = target_dno; +} + +/* + * Recursively check for a Param referencing the target variable + */ +static bool +contains_target_param(Node *node, int *target_dno) +{ + if (node == NULL) + return false; + if (IsA(node, Param)) + { + Param *param = (Param *) node; + + if (param->paramkind == PARAM_EXTERN && + param->paramid == *target_dno + 1) + return true; + return false; + } + return expression_tree_walker(node, contains_target_param, + (void *) target_dno); +} + /* ---------- * exec_set_found Set the global found variable to true/false * ---------- @@ -6540,7 +6773,12 @@ free_var(PLpgSQL_var *var) { if (var->freeval) { - pfree(DatumGetPointer(var->value)); + if (DatumIsReadWriteExpandedObject(var->value, + var->isnull, + var->datatype->typlen)) + DeleteExpandedObject(var->value); + else + pfree(DatumGetPointer(var->value)); var->freeval = false; } } @@ -6750,8 +6988,9 @@ format_expr_params(PLpgSQL_execstate *estate, curvar = (PLpgSQL_var *) estate->datums[dno]; - exec_eval_datum(estate, (PLpgSQL_datum *) curvar, ¶mtypeid, - ¶mtypmod, ¶mdatum, ¶misnull); + exec_eval_datum(estate, (PLpgSQL_datum *) curvar, + ¶mtypeid, ¶mtypmod, + ¶mdatum, ¶misnull); appendStringInfo(¶mstr, "%s%s = ", paramno > 0 ? ", " : "", diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y index 4026e417a1..00978909a3 100644 --- a/src/pl/plpgsql/src/pl_gram.y +++ b/src/pl/plpgsql/src/pl_gram.y @@ -2625,6 +2625,7 @@ read_sql_construct(int until, expr->query = pstrdup(ds.data); expr->plan = NULL; expr->paramnos = NULL; + expr->rwparam = -1; expr->ns = plpgsql_ns_top(); pfree(ds.data); @@ -2849,6 +2850,7 @@ make_execsql_stmt(int firsttoken, int location) expr->query = pstrdup(ds.data); expr->plan = NULL; expr->paramnos = NULL; + expr->rwparam = -1; expr->ns = plpgsql_ns_top(); pfree(ds.data); @@ -3732,6 +3734,7 @@ read_cursor_args(PLpgSQL_var *cursor, int until, const char *expected) expr->query = pstrdup(ds.data); expr->plan = NULL; expr->paramnos = NULL; + expr->rwparam = -1; expr->ns = plpgsql_ns_top(); pfree(ds.data); diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h index bec773a787..93c2504641 100644 --- a/src/pl/plpgsql/src/plpgsql.h +++ b/src/pl/plpgsql/src/plpgsql.h @@ -183,6 +183,7 @@ typedef struct char typtype; Oid typrelid; Oid collation; /* from pg_type, but can be overridden */ + bool typisarray; /* is "true" array, or domain over one */ int32 atttypmod; /* typmod (taken from someplace else) */ } PLpgSQL_type; @@ -216,6 +217,7 @@ typedef struct PLpgSQL_expr char *query; SPIPlanPtr plan; Bitmapset *paramnos; /* all dnos referenced by this query */ + int rwparam; /* dno of read/write param, or -1 if none */ /* function containing this expr (not set until we first parse query) */ struct PLpgSQL_function *func; -- 2.40.0