From 3f50ba27cf417eb57fd310c2a88f76a6ea6b966e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 27 Jun 2006 02:51:40 +0000 Subject: [PATCH] Create infrastructure for 'MinimalTuple' representation of in-memory tuples with less header overhead than a regular HeapTuple, per my recent proposal. Teach TupleTableSlot code how to deal with these. As proof of concept, change tuplestore.c to store MinimalTuples instead of HeapTuples. Future patches will expand the concept to other places where it is useful. --- contrib/tablefunc/tablefunc.c | 22 ++- src/backend/access/common/heaptuple.c | 178 +++++++++++++++++++++++- src/backend/commands/portalcmds.c | 13 +- src/backend/executor/execTuples.c | 139 ++++++++++++++++-- src/backend/executor/nodeFunctionscan.c | 15 +- src/backend/executor/nodeMaterial.c | 44 +++--- src/backend/executor/tstoreReceiver.c | 4 +- src/backend/tcop/pquery.c | 12 +- src/backend/utils/sort/tuplestore.c | 130 +++++++++++++---- src/include/access/heapam.h | 10 +- src/include/access/htup.h | 71 +++++++++- src/include/executor/tuptable.h | 25 +++- src/include/utils/tuplestore.h | 24 ++-- 13 files changed, 558 insertions(+), 129 deletions(-) diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c index f5f751e72f..7ab363972e 100644 --- a/contrib/tablefunc/tablefunc.c +++ b/contrib/tablefunc/tablefunc.c @@ -934,19 +934,16 @@ get_crosstab_tuplestore(char *sql, */ if (lastrowid != NULL) { - /* - * switch to appropriate context while storing the tuple - */ - SPIcontext = MemoryContextSwitchTo(per_query_ctx); - /* rowid changed, flush the previous output row */ tuple = BuildTupleFromCStrings(attinmeta, values); + + /* switch to appropriate context while storing the tuple */ + SPIcontext = MemoryContextSwitchTo(per_query_ctx); tuplestore_puttuple(tupstore, tuple); + MemoryContextSwitchTo(SPIcontext); + for (j = 0; j < result_ncols; j++) xpfree(values[j]); - - /* now reset the context */ - MemoryContextSwitchTo(SPIcontext); } values[0] = rowid; @@ -970,16 +967,13 @@ get_crosstab_tuplestore(char *sql, lastrowid = pstrdup(rowid); } - /* switch to appropriate context while storing the tuple */ - SPIcontext = MemoryContextSwitchTo(per_query_ctx); - /* flush the last output row */ tuple = BuildTupleFromCStrings(attinmeta, values); - tuplestore_puttuple(tupstore, tuple); - /* now reset the context */ + /* switch to appropriate context while storing the tuple */ + SPIcontext = MemoryContextSwitchTo(per_query_ctx); + tuplestore_puttuple(tupstore, tuple); MemoryContextSwitchTo(SPIcontext); - } if (SPI_finish() != SPI_OK_FINISH) diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 9d77ab2779..7ec314379b 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -16,7 +16,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.106 2006/03/05 15:58:20 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.107 2006/06/27 02:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1295,6 +1295,8 @@ slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull) { if (tuple == NULL) /* internal error */ elog(ERROR, "cannot extract system attribute from virtual tuple"); + if (slot->tts_mintuple) /* internal error */ + elog(ERROR, "cannot extract system attribute from minimal tuple"); return heap_getsysattr(tuple, attnum, tupleDesc, isnull); } @@ -1479,6 +1481,8 @@ slot_attisnull(TupleTableSlot *slot, int attnum) { if (tuple == NULL) /* internal error */ elog(ERROR, "cannot extract system attribute from virtual tuple"); + if (slot->tts_mintuple) /* internal error */ + elog(ERROR, "cannot extract system attribute from minimal tuple"); return heap_attisnull(tuple, attnum); } @@ -1505,9 +1509,8 @@ slot_attisnull(TupleTableSlot *slot, int attnum) return heap_attisnull(tuple, attnum); } -/* ---------------- - * heap_freetuple - * ---------------- +/* + * heap_freetuple */ void heap_freetuple(HeapTuple htup) @@ -1516,6 +1519,173 @@ heap_freetuple(HeapTuple htup) } +/* + * heap_form_minimal_tuple + * construct a MinimalTuple from the given values[] and isnull[] arrays, + * which are of the length indicated by tupleDescriptor->natts + * + * This is exactly like heap_form_tuple() except that the result is a + * "minimal" tuple lacking a HeapTupleData header as well as room for system + * columns. + * + * The result is allocated in the current memory context. + */ +MinimalTuple +heap_form_minimal_tuple(TupleDesc tupleDescriptor, + Datum *values, + bool *isnull) +{ + MinimalTuple tuple; /* return tuple */ + unsigned long len; + int hoff; + bool hasnull = false; + Form_pg_attribute *att = tupleDescriptor->attrs; + int numberOfAttributes = tupleDescriptor->natts; + int i; + + if (numberOfAttributes > MaxTupleAttributeNumber) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("number of columns (%d) exceeds limit (%d)", + numberOfAttributes, MaxTupleAttributeNumber))); + + /* + * Check for nulls and embedded tuples; expand any toasted attributes in + * embedded tuples. This preserves the invariant that toasting can only + * go one level deep. + * + * We can skip calling toast_flatten_tuple_attribute() if the attribute + * couldn't possibly be of composite type. All composite datums are + * varlena and have alignment 'd'; furthermore they aren't arrays. Also, + * if an attribute is already toasted, it must have been sent to disk + * already and so cannot contain toasted attributes. + */ + for (i = 0; i < numberOfAttributes; i++) + { + if (isnull[i]) + hasnull = true; + else if (att[i]->attlen == -1 && + att[i]->attalign == 'd' && + att[i]->attndims == 0 && + !VARATT_IS_EXTENDED(values[i])) + { + values[i] = toast_flatten_tuple_attribute(values[i], + att[i]->atttypid, + att[i]->atttypmod); + } + } + + /* + * Determine total space needed + */ + len = offsetof(MinimalTupleData, t_bits); + + if (hasnull) + len += BITMAPLEN(numberOfAttributes); + + if (tupleDescriptor->tdhasoid) + len += sizeof(Oid); + + hoff = len = MAXALIGN(len); /* align user data safely */ + + len += heap_compute_data_size(tupleDescriptor, values, isnull); + + /* + * Allocate and zero the space needed. + */ + tuple = (MinimalTuple) palloc0(len); + + /* + * And fill in the information. + */ + tuple->t_len = len; + tuple->t_natts = numberOfAttributes; + tuple->t_hoff = hoff + MINIMAL_TUPLE_OFFSET; + + if (tupleDescriptor->tdhasoid) /* else leave infomask = 0 */ + tuple->t_infomask = HEAP_HASOID; + + heap_fill_tuple(tupleDescriptor, + values, + isnull, + (char *) tuple + hoff, + &tuple->t_infomask, + (hasnull ? tuple->t_bits : NULL)); + + return tuple; +} + +/* + * heap_free_minimal_tuple + */ +void +heap_free_minimal_tuple(MinimalTuple mtup) +{ + pfree(mtup); +} + +/* + * heap_copy_minimal_tuple + * copy a MinimalTuple + * + * The result is allocated in the current memory context. + */ +MinimalTuple +heap_copy_minimal_tuple(MinimalTuple mtup) +{ + MinimalTuple result; + + result = (MinimalTuple) palloc(mtup->t_len); + memcpy(result, mtup, mtup->t_len); + return result; +} + +/* + * heap_tuple_from_minimal_tuple + * create a HeapTuple by copying from a MinimalTuple; + * system columns are filled with zeroes + * + * The result is allocated in the current memory context. + * The HeapTuple struct, tuple header, and tuple data are all allocated + * as a single palloc() block. + */ +HeapTuple +heap_tuple_from_minimal_tuple(MinimalTuple mtup) +{ + HeapTuple result; + uint32 len = mtup->t_len + MINIMAL_TUPLE_OFFSET; + + result = (HeapTuple) palloc(HEAPTUPLESIZE + len); + result->t_len = len; + ItemPointerSetInvalid(&(result->t_self)); + result->t_tableOid = InvalidOid; + result->t_data = (HeapTupleHeader) ((char *) result + HEAPTUPLESIZE); + memcpy((char *) result->t_data + MINIMAL_TUPLE_OFFSET, mtup, mtup->t_len); + memset(result->t_data, 0, offsetof(HeapTupleHeaderData, t_natts)); + return result; +} + +/* + * minimal_tuple_from_heap_tuple + * create a MinimalTuple by copying from a HeapTuple + * + * The result is allocated in the current memory context. + */ +MinimalTuple +minimal_tuple_from_heap_tuple(HeapTuple htup) +{ + MinimalTuple result; + uint32 len; + + Assert(htup->t_len > MINIMAL_TUPLE_OFFSET); + len = htup->t_len - MINIMAL_TUPLE_OFFSET; + result = (MinimalTuple) palloc(len); + memcpy(result, (char *) htup->t_data + MINIMAL_TUPLE_OFFSET, len); + result->t_len = len; + return result; +} + + /* ---------------- * heap_addheader * diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index 403b5c7064..c4c55c9cf3 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/portalcmds.c,v 1.46 2006/03/05 15:58:24 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/portalcmds.c,v 1.47 2006/06/27 02:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -411,17 +411,8 @@ PersistHoldablePortal(Portal portal) for (store_pos = 0; store_pos < portal->portalPos; store_pos++) { - HeapTuple tup; - bool should_free; - - tup = tuplestore_gettuple(portal->holdStore, true, - &should_free); - - if (tup == NULL) + if (!tuplestore_advance(portal->holdStore, true)) elog(ERROR, "unexpected end of tuple stream"); - - if (should_free) - pfree(tup); } } } diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 971822525f..f03d738619 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.94 2006/06/16 18:42:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.95 2006/06/27 02:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,6 +25,8 @@ * TABLE CREATE/DELETE * ExecCreateTupleTable - create a new tuple table * ExecDropTupleTable - destroy a table + * MakeSingleTupleTableSlot - make a single-slot table + * ExecDropSingleTupleTableSlot - destroy same * * SLOT RESERVATION * ExecAllocTableSlot - find an available slot in the table @@ -32,9 +34,11 @@ * SLOT ACCESSORS * ExecSetSlotDescriptor - set a slot's tuple descriptor * ExecStoreTuple - store a physical tuple in the slot + * ExecStoreMinimalTuple - store a minimal physical tuple in the slot * ExecClearTuple - clear contents of a slot * ExecStoreVirtualTuple - mark slot as containing a virtual tuple * ExecCopySlotTuple - build a physical tuple from a slot + * ExecCopySlotMinimalTuple - build a minimal physical tuple from a slot * ExecMaterializeSlot - convert virtual to physical storage * ExecCopySlot - copy one slot's contents to another * @@ -150,6 +154,7 @@ ExecCreateTupleTable(int tableSize) slot->tts_nvalid = 0; slot->tts_values = NULL; slot->tts_isnull = NULL; + slot->tts_mintuple = NULL; } return newtable; @@ -227,6 +232,7 @@ MakeSingleTupleTableSlot(TupleDesc tupdesc) slot->tts_nvalid = 0; slot->tts_values = NULL; slot->tts_isnull = NULL; + slot->tts_mintuple = NULL; ExecSetSlotDescriptor(slot, tupdesc); @@ -405,7 +411,12 @@ ExecStoreTuple(HeapTuple tuple, * Free any old physical tuple belonging to the slot. */ if (slot->tts_shouldFree) - heap_freetuple(slot->tts_tuple); + { + if (slot->tts_mintuple) + heap_free_minimal_tuple(slot->tts_mintuple); + else + heap_freetuple(slot->tts_tuple); + } /* * Store the new tuple into the specified slot. @@ -413,6 +424,7 @@ ExecStoreTuple(HeapTuple tuple, slot->tts_isempty = false; slot->tts_shouldFree = shouldFree; slot->tts_tuple = tuple; + slot->tts_mintuple = NULL; /* Mark extracted state invalid */ slot->tts_nvalid = 0; @@ -438,6 +450,63 @@ ExecStoreTuple(HeapTuple tuple, return slot; } +/* -------------------------------- + * ExecStoreMinimalTuple + * + * Like ExecStoreTuple, but insert a "minimal" tuple into the slot. + * + * No 'buffer' parameter since minimal tuples are never stored in relations. + * -------------------------------- + */ +TupleTableSlot * +ExecStoreMinimalTuple(MinimalTuple mtup, + TupleTableSlot *slot, + bool shouldFree) +{ + /* + * sanity checks + */ + Assert(mtup != NULL); + Assert(slot != NULL); + Assert(slot->tts_tupleDescriptor != NULL); + + /* + * Free any old physical tuple belonging to the slot. + */ + if (slot->tts_shouldFree) + { + if (slot->tts_mintuple) + heap_free_minimal_tuple(slot->tts_mintuple); + else + heap_freetuple(slot->tts_tuple); + } + + /* + * Drop the pin on the referenced buffer, if there is one. + */ + if (BufferIsValid(slot->tts_buffer)) + ReleaseBuffer(slot->tts_buffer); + + slot->tts_buffer = InvalidBuffer; + + /* + * Store the new tuple into the specified slot. + */ + slot->tts_isempty = false; + slot->tts_shouldFree = shouldFree; + slot->tts_tuple = &slot->tts_minhdr; + slot->tts_mintuple = mtup; + + slot->tts_minhdr.t_len = mtup->t_len + MINIMAL_TUPLE_OFFSET; + slot->tts_minhdr.t_data = (HeapTupleHeader) ((char *) mtup - MINIMAL_TUPLE_OFFSET); + /* no need to set t_self or t_tableOid since we won't allow access */ + + /* Mark extracted state invalid */ + slot->tts_nvalid = 0; + + return slot; +} + /* -------------------------------- * ExecClearTuple * @@ -458,9 +527,15 @@ ExecClearTuple(TupleTableSlot *slot) /* slot in which to store tuple */ * Free the old physical tuple if necessary. */ if (slot->tts_shouldFree) - heap_freetuple(slot->tts_tuple); + { + if (slot->tts_mintuple) + heap_free_minimal_tuple(slot->tts_mintuple); + else + heap_freetuple(slot->tts_tuple); + } slot->tts_tuple = NULL; + slot->tts_mintuple = NULL; slot->tts_shouldFree = false; /* @@ -540,10 +615,10 @@ ExecStoreAllNullTuple(TupleTableSlot *slot) /* -------------------------------- * ExecCopySlotTuple - * Obtain a copy of a slot's physical tuple. The copy is + * Obtain a copy of a slot's regular physical tuple. The copy is * palloc'd in the current memory context. * - * This works even if the slot contains a virtual tuple; + * This works even if the slot contains a virtual or minimal tuple; * however the "system columns" of the result will not be meaningful. * -------------------------------- */ @@ -560,7 +635,12 @@ ExecCopySlotTuple(TupleTableSlot *slot) * If we have a physical tuple then just copy it. */ if (slot->tts_tuple) - return heap_copytuple(slot->tts_tuple); + { + if (slot->tts_mintuple) + return heap_tuple_from_minimal_tuple(slot->tts_mintuple); + else + return heap_copytuple(slot->tts_tuple); + } /* * Otherwise we need to build a tuple from the Datum array. @@ -570,12 +650,47 @@ ExecCopySlotTuple(TupleTableSlot *slot) slot->tts_isnull); } +/* -------------------------------- + * ExecCopySlotMinimalTuple + * Obtain a copy of a slot's minimal physical tuple. The copy is + * palloc'd in the current memory context. + * -------------------------------- + */ +MinimalTuple +ExecCopySlotMinimalTuple(TupleTableSlot *slot) +{ + /* + * sanity checks + */ + Assert(slot != NULL); + Assert(!slot->tts_isempty); + + /* + * If we have a physical tuple then just copy it. + */ + if (slot->tts_tuple) + { + if (slot->tts_mintuple) + return heap_copy_minimal_tuple(slot->tts_mintuple); + else + return minimal_tuple_from_heap_tuple(slot->tts_tuple); + } + + /* + * Otherwise we need to build a tuple from the Datum array. + */ + return heap_form_minimal_tuple(slot->tts_tupleDescriptor, + slot->tts_values, + slot->tts_isnull); +} + /* -------------------------------- * ExecFetchSlotTuple - * Fetch the slot's physical tuple. + * Fetch the slot's regular physical tuple. * * If the slot contains a virtual tuple, we convert it to physical * form. The slot retains ownership of the physical tuple. + * Likewise, if it contains a minimal tuple we convert to regular form. * * The difference between this and ExecMaterializeSlot() is that this * does not guarantee that the contained tuple is local storage. @@ -592,9 +707,9 @@ ExecFetchSlotTuple(TupleTableSlot *slot) Assert(!slot->tts_isempty); /* - * If we have a physical tuple then just return it. + * If we have a regular physical tuple then just return it. */ - if (slot->tts_tuple) + if (slot->tts_tuple && slot->tts_mintuple == NULL) return slot->tts_tuple; /* @@ -629,10 +744,10 @@ ExecMaterializeSlot(TupleTableSlot *slot) Assert(!slot->tts_isempty); /* - * If we have a physical tuple, and it's locally palloc'd, we have nothing - * to do. + * If we have a regular physical tuple, and it's locally palloc'd, + * we have nothing to do. */ - if (slot->tts_tuple && slot->tts_shouldFree) + if (slot->tts_tuple && slot->tts_shouldFree && slot->tts_mintuple == NULL) return slot->tts_tuple; /* diff --git a/src/backend/executor/nodeFunctionscan.c b/src/backend/executor/nodeFunctionscan.c index 0c77e82169..90ab018874 100644 --- a/src/backend/executor/nodeFunctionscan.c +++ b/src/backend/executor/nodeFunctionscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeFunctionscan.c,v 1.39 2006/06/16 18:42:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeFunctionscan.c,v 1.40 2006/06/27 02:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,8 +48,6 @@ FunctionNext(FunctionScanState *node) EState *estate; ScanDirection direction; Tuplestorestate *tuplestorestate; - bool should_free; - HeapTuple heapTuple; /* * get information from the estate and scan state @@ -86,14 +84,11 @@ FunctionNext(FunctionScanState *node) /* * Get the next tuple from tuplestore. Return NULL if no more tuples. */ - heapTuple = tuplestore_getheaptuple(tuplestorestate, - ScanDirectionIsForward(direction), - &should_free); slot = node->ss.ss_ScanTupleSlot; - if (heapTuple) - return ExecStoreTuple(heapTuple, slot, InvalidBuffer, should_free); - else - return ExecClearTuple(slot); + (void) tuplestore_gettupleslot(tuplestorestate, + ScanDirectionIsForward(direction), + slot); + return slot; } /* ---------------------------------------------------------------- diff --git a/src/backend/executor/nodeMaterial.c b/src/backend/executor/nodeMaterial.c index eca769a86c..94a0701385 100644 --- a/src/backend/executor/nodeMaterial.c +++ b/src/backend/executor/nodeMaterial.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeMaterial.c,v 1.54 2006/03/05 15:58:26 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeMaterial.c,v 1.55 2006/06/27 02:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -44,8 +44,6 @@ ExecMaterial(MaterialState *node) ScanDirection dir; bool forward; Tuplestorestate *tuplestorestate; - HeapTuple heapTuple = NULL; - bool should_free = false; bool eof_tuplestore; TupleTableSlot *slot; @@ -80,27 +78,25 @@ ExecMaterial(MaterialState *node) { /* * When reversing direction at tuplestore EOF, the first - * getheaptuple call will fetch the last-added tuple; but we want + * gettupleslot call will fetch the last-added tuple; but we want * to return the one before that, if possible. So do an extra * fetch. */ - heapTuple = tuplestore_getheaptuple(tuplestorestate, - forward, - &should_free); - if (heapTuple == NULL) + if (!tuplestore_advance(tuplestorestate, forward)) return NULL; /* the tuplestore must be empty */ - if (should_free) - heap_freetuple(heapTuple); } eof_tuplestore = false; } + /* + * If we can fetch another tuple from the tuplestore, return it. + */ + slot = node->ss.ps.ps_ResultTupleSlot; if (!eof_tuplestore) { - heapTuple = tuplestore_getheaptuple(tuplestorestate, - forward, - &should_free); - if (heapTuple == NULL && forward) + if (tuplestore_gettupleslot(tuplestorestate, forward, slot)) + return slot; + if (forward) eof_tuplestore = true; } @@ -128,26 +124,26 @@ ExecMaterial(MaterialState *node) node->eof_underlying = true; return NULL; } - heapTuple = ExecFetchSlotTuple(outerslot); - should_free = false; /* - * Append returned tuple to tuplestore, too. NOTE: because the + * Append returned tuple to tuplestore. NOTE: because the * tuplestore is certainly in EOF state, its read position will move * forward over the added tuple. This is what we want. */ if (tuplestorestate) - tuplestore_puttuple(tuplestorestate, (void *) heapTuple); + tuplestore_puttupleslot(tuplestorestate, outerslot); + + /* + * And return a copy of the tuple. (XXX couldn't we just return + * the outerslot?) + */ + return ExecCopySlot(slot, outerslot); } /* - * Return the obtained tuple, if any. + * Nothing left ... */ - slot = (TupleTableSlot *) node->ss.ps.ps_ResultTupleSlot; - if (heapTuple) - return ExecStoreTuple(heapTuple, slot, InvalidBuffer, should_free); - else - return ExecClearTuple(slot); + return ExecClearTuple(slot); } /* ---------------------------------------------------------------- diff --git a/src/backend/executor/tstoreReceiver.c b/src/backend/executor/tstoreReceiver.c index dffc8899b2..8ebf5b7fd1 100644 --- a/src/backend/executor/tstoreReceiver.c +++ b/src/backend/executor/tstoreReceiver.c @@ -9,7 +9,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/tstoreReceiver.c,v 1.16 2006/03/05 15:58:27 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/tstoreReceiver.c,v 1.17 2006/06/27 02:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -45,7 +45,7 @@ tstoreReceiveSlot(TupleTableSlot *slot, DestReceiver *self) TStoreState *myState = (TStoreState *) self; MemoryContext oldcxt = MemoryContextSwitchTo(myState->cxt); - tuplestore_puttuple(myState->tstore, ExecFetchSlotTuple(slot)); + tuplestore_puttupleslot(myState->tstore, slot); MemoryContextSwitchTo(oldcxt); } diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 060ce56792..c79d0eae18 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tcop/pquery.c,v 1.101 2006/03/05 15:58:40 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/tcop/pquery.c,v 1.102 2006/06/27 02:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -911,21 +911,17 @@ RunFromStore(Portal portal, ScanDirection direction, long count, for (;;) { MemoryContext oldcontext; - HeapTuple tup; - bool should_free; + bool ok; oldcontext = MemoryContextSwitchTo(portal->holdContext); - tup = tuplestore_getheaptuple(portal->holdStore, forward, - &should_free); + ok = tuplestore_gettupleslot(portal->holdStore, forward, slot); MemoryContextSwitchTo(oldcontext); - if (tup == NULL) + if (!ok) break; - ExecStoreTuple(tup, slot, InvalidBuffer, should_free); - (*dest->receiveSlot) (slot, dest); ExecClearTuple(slot); diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c index 6ff6b72082..a2ed330ccc 100644 --- a/src/backend/utils/sort/tuplestore.c +++ b/src/backend/utils/sort/tuplestore.c @@ -36,7 +36,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.27 2006/03/05 15:58:49 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.28 2006/06/27 02:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -195,6 +195,7 @@ struct Tuplestorestate static Tuplestorestate *tuplestore_begin_common(bool randomAccess, bool interXact, int maxKBytes); +static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple); static void dumptuples(Tuplestorestate *state); static unsigned int getlen(Tuplestorestate *state, bool eofOK); static void *copytup_heap(Tuplestorestate *state, void *tup); @@ -304,15 +305,45 @@ tuplestore_ateof(Tuplestorestate *state) * If the read status is currently "AT EOF" then it remains so (the read * pointer advances along with the write pointer); otherwise the read * pointer is unchanged. This is for the convenience of nodeMaterial.c. + * + * tuplestore_puttupleslot() is a convenience routine to collect data from + * a TupleTableSlot without an extra copy operation. */ void -tuplestore_puttuple(Tuplestorestate *state, void *tuple) +tuplestore_puttupleslot(Tuplestorestate *state, + TupleTableSlot *slot) +{ + MinimalTuple tuple; + + /* + * Form a MinimalTuple in working memory + */ + tuple = ExecCopySlotMinimalTuple(slot); + USEMEM(state, GetMemoryChunkSpace(tuple)); + + tuplestore_puttuple_common(state, (void *) tuple); +} + +/* + * "Standard" case to copy from a HeapTuple. This is actually now somewhat + * deprecated, but not worth getting rid of in view of the number of callers. + * (Consider adding something that takes a tupdesc+values/nulls arrays so + * that we can use heap_form_minimal_tuple() and avoid a copy step.) + */ +void +tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple) { /* * Copy the tuple. (Must do this even in WRITEFILE case.) */ tuple = COPYTUP(state, tuple); + tuplestore_puttuple_common(state, (void *) tuple); +} + +static void +tuplestore_puttuple_common(Tuplestorestate *state, void *tuple) +{ switch (state->status) { case TSS_INMEM: @@ -389,7 +420,7 @@ tuplestore_puttuple(Tuplestorestate *state, void *tuple) * Returns NULL if no more tuples. If should_free is set, the * caller must pfree the returned tuple when done with it. */ -void * +static void * tuplestore_gettuple(Tuplestorestate *state, bool forward, bool *should_free) { @@ -525,6 +556,59 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward, } } +/* + * tuplestore_gettupleslot - exported function to fetch a MinimalTuple + * + * If successful, put tuple in slot and return TRUE; else, clear the slot + * and return FALSE. + */ +bool +tuplestore_gettupleslot(Tuplestorestate *state, bool forward, + TupleTableSlot *slot) +{ + MinimalTuple tuple; + bool should_free; + + tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free); + + if (tuple) + { + ExecStoreMinimalTuple(tuple, slot, should_free); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + +/* + * tuplestore_advance - exported function to adjust position without fetching + * + * We could optimize this case to avoid palloc/pfree overhead, but for the + * moment it doesn't seem worthwhile. + */ +bool +tuplestore_advance(Tuplestorestate *state, bool forward) +{ + void *tuple; + bool should_free; + + tuple = tuplestore_gettuple(state, forward, &should_free); + + if (tuple) + { + if (should_free) + pfree(tuple); + return true; + } + else + { + return false; + } +} + /* * dumptuples - remove tuples from memory and write to tape * @@ -672,34 +756,31 @@ getlen(Tuplestorestate *state, bool eofOK) /* * Routines specialized for HeapTuple case + * + * The stored form is actually a MinimalTuple, but for largely historical + * reasons we allow COPYTUP to work from a HeapTuple. + * + * Since MinimalTuple already has length in its first word, we don't need + * to write that separately. */ static void * copytup_heap(Tuplestorestate *state, void *tup) { - HeapTuple tuple = (HeapTuple) tup; + MinimalTuple tuple; - tuple = heap_copytuple(tuple); + tuple = minimal_tuple_from_heap_tuple((HeapTuple) tup); USEMEM(state, GetMemoryChunkSpace(tuple)); return (void *) tuple; } -/* - * We don't bother to write the HeapTupleData part of the tuple. - */ - static void writetup_heap(Tuplestorestate *state, void *tup) { - HeapTuple tuple = (HeapTuple) tup; - unsigned int tuplen; + MinimalTuple tuple = (MinimalTuple) tup; + unsigned int tuplen = tuple->t_len; - tuplen = tuple->t_len + sizeof(tuplen); - if (BufFileWrite(state->myfile, (void *) &tuplen, - sizeof(tuplen)) != sizeof(tuplen)) - elog(ERROR, "write failed"); - if (BufFileWrite(state->myfile, (void *) tuple->t_data, - tuple->t_len) != (size_t) tuple->t_len) + if (BufFileWrite(state->myfile, (void *) tuple, tuplen) != (size_t) tuplen) elog(ERROR, "write failed"); if (state->randomAccess) /* need trailing length word? */ if (BufFileWrite(state->myfile, (void *) &tuplen, @@ -707,23 +788,20 @@ writetup_heap(Tuplestorestate *state, void *tup) elog(ERROR, "write failed"); FREEMEM(state, GetMemoryChunkSpace(tuple)); - heap_freetuple(tuple); + heap_free_minimal_tuple(tuple); } static void * readtup_heap(Tuplestorestate *state, unsigned int len) { - unsigned int tuplen = len - sizeof(unsigned int) + HEAPTUPLESIZE; - HeapTuple tuple = (HeapTuple) palloc(tuplen); + MinimalTuple tuple = (MinimalTuple) palloc(len); + unsigned int tuplen; USEMEM(state, GetMemoryChunkSpace(tuple)); - /* reconstruct the HeapTupleData portion */ - tuple->t_len = len - sizeof(unsigned int); - ItemPointerSetInvalid(&(tuple->t_self)); - tuple->t_data = (HeapTupleHeader) (((char *) tuple) + HEAPTUPLESIZE); /* read in the tuple proper */ - if (BufFileRead(state->myfile, (void *) tuple->t_data, - tuple->t_len) != (size_t) tuple->t_len) + tuple->t_len = len; + if (BufFileRead(state->myfile, (void *) ((char *) tuple + sizeof(int)), + len - sizeof(int)) != (size_t) (len - sizeof(int))) elog(ERROR, "unexpected end of data"); if (state->randomAccess) /* need trailing length word? */ if (BufFileRead(state->myfile, (void *) &tuplen, diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index e016a52740..7301be4ae3 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.111 2006/05/10 23:18:39 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.112 2006/06/27 02:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -217,7 +217,13 @@ extern void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull); extern void heap_deformtuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, char *nulls); -extern void heap_freetuple(HeapTuple tuple); +extern void heap_freetuple(HeapTuple htup); +extern MinimalTuple heap_form_minimal_tuple(TupleDesc tupleDescriptor, + Datum *values, bool *isnull); +extern void heap_free_minimal_tuple(MinimalTuple mtup); +extern MinimalTuple heap_copy_minimal_tuple(MinimalTuple mtup); +extern HeapTuple heap_tuple_from_minimal_tuple(MinimalTuple mtup); +extern MinimalTuple minimal_tuple_from_heap_tuple(HeapTuple htup); extern HeapTuple heap_addheader(int natts, bool withoid, Size structlen, void *structure); diff --git a/src/include/access/htup.h b/src/include/access/htup.h index 57db9f8493..170ad657fb 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.82 2006/05/10 23:18:39 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.83 2006/06/27 02:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,7 +31,7 @@ */ #define MaxTupleAttributeNumber 1664 /* 8 * 208 */ -/*---------- +/* * MaxHeapAttributeNumber limits the number of (user) columns in a table. * This should be somewhat less than MaxTupleAttributeNumber. It must be * at least one less, else we will fail to do UPDATEs on a maximal-width @@ -42,11 +42,10 @@ * In any case, depending on column data types you will likely be running * into the disk-block-based limit on overall tuple size if you have more * than a thousand or so columns. TOAST won't help. - *---------- */ #define MaxHeapAttributeNumber 1600 /* 8 * 200 */ -/*---------- +/* * Heap tuple header. To avoid wasting space, the fields should be * layed out in such a way to avoid structure padding. * @@ -101,7 +100,6 @@ * t_infomask), then it is stored just before the user data, which begins at * the offset shown by t_hoff. Note that t_hoff must be a multiple of * MAXALIGN. - *---------- */ typedef struct HeapTupleFields @@ -141,6 +139,8 @@ typedef struct HeapTupleHeaderData ItemPointerData t_ctid; /* current TID of this or newer tuple */ + /* Fields below here must match MinimalTupleData! */ + int16 t_natts; /* number of attributes */ uint16 t_infomask; /* various flag bits, see below */ @@ -354,6 +354,62 @@ do { \ #define FirstLowInvalidHeapAttributeNumber (-8) +/* + * MinimalTuple is an alternate representation that is used for transient + * tuples inside the executor, in places where transaction status information + * is not required, the tuple rowtype is known, and shaving off a few bytes + * is worthwhile because we need to store many tuples. The representation + * is chosen so that tuple access routines can work with either full or + * minimal tuples via a HeapTupleData pointer structure. The access routines + * see no difference, except that they must not access the transaction status + * or t_ctid fields because those aren't there. + * + * For the most part, MinimalTuples should be accessed via TupleTableSlot + * routines. These routines will prevent access to the "system columns" + * and thereby prevent accidental use of the nonexistent fields. + * + * MinimalTupleData contains a length word, some padding, and fields matching + * HeapTupleHeaderData beginning with t_natts. The padding is chosen so that + * offsetof(t_natts) is the same modulo MAXIMUM_ALIGNOF in both structs. + * This makes data alignment rules equivalent in both cases. + * + * When a minimal tuple is accessed via a HeapTupleData pointer, t_data is + * set to point MINIMAL_TUPLE_OFFSET bytes before the actual start of the + * minimal tuple --- that is, where a full tuple matching the minimal tuple's + * data would start. This trick is what makes the structs seem equivalent. + * + * Note that t_hoff is computed the same as in a full tuple, hence it includes + * the MINIMAL_TUPLE_OFFSET distance. t_len does not include that, however. + */ +#define MINIMAL_TUPLE_OFFSET \ + ((offsetof(HeapTupleHeaderData, t_natts) - sizeof(uint32)) / MAXIMUM_ALIGNOF * MAXIMUM_ALIGNOF) +#define MINIMAL_TUPLE_PADDING \ + ((offsetof(HeapTupleHeaderData, t_natts) - sizeof(uint32)) % MAXIMUM_ALIGNOF) + +typedef struct MinimalTupleData +{ + uint32 t_len; /* actual length of minimal tuple */ + + char mt_padding[MINIMAL_TUPLE_PADDING]; + + /* Fields below here must match HeapTupleHeaderData! */ + + int16 t_natts; /* number of attributes */ + + uint16 t_infomask; /* various flag bits, see below */ + + uint8 t_hoff; /* sizeof header incl. bitmap, padding */ + + /* ^ - 27 bytes - ^ */ + + bits8 t_bits[1]; /* bitmap of NULLs -- VARIABLE LENGTH */ + + /* MORE DATA FOLLOWS AT END OF STRUCT */ +} MinimalTupleData; + +typedef MinimalTupleData *MinimalTuple; + + /* * HeapTupleData is an in-memory data structure that points to a tuple. * @@ -376,6 +432,11 @@ do { \ * it's difficult to tell apart from case #1. It should be used only in * limited contexts where the code knows that case #1 will never apply.) * + * * Separately allocated minimal tuple: t_data points MINIMAL_TUPLE_OFFSET + * bytes before the start of a MinimalTuple. As with the previous case, + * this can't be told apart from case #1 by inspection; code setting up + * or destroying this representation has to know what it's doing. + * * t_len should always be valid, except in the pointer-to-nothing case. * t_self and t_tableOid should be valid if the HeapTupleData points to * a disk buffer, or if it represents a copy of a tuple on disk. They diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h index b506651fbb..8531835134 100644 --- a/src/include/executor/tuptable.h +++ b/src/include/executor/tuptable.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/tuptable.h,v 1.31 2006/06/16 18:42:23 tgl Exp $ + * $PostgreSQL: pgsql/src/include/executor/tuptable.h,v 1.32 2006/06/27 02:51:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,7 +23,8 @@ * independent TupleTableSlots. There are several cases we need to handle: * 1. physical tuple in a disk buffer page * 2. physical tuple constructed in palloc'ed memory - * 3. "virtual" tuple consisting of Datum/isnull arrays + * 3. "minimal" physical tuple constructed in palloc'ed memory + * 4. "virtual" tuple consisting of Datum/isnull arrays * * The first two cases are similar in that they both deal with "materialized" * tuples, but resource management is different. For a tuple in a disk page @@ -31,6 +32,11 @@ * to the tuple is dropped; while for a palloc'd tuple we usually want the * tuple pfree'd when the TupleTableSlot's reference is dropped. * + * A "minimal" tuple is handled similarly to a palloc'd regular tuple. + * At present, minimal tuples never are stored in buffers, so there is no + * parallel to case 1. Note that a minimal tuple has no "system columns". + * (Actually, it could have an OID, but we have no need to access the OID.) + * * A "virtual" tuple is an optimization used to minimize physical data * copying in a nest of plan nodes. Any pass-by-reference Datums in the * tuple point to storage that is not directly associated with the @@ -83,6 +89,15 @@ * tts_values/tts_isnull are allocated when a descriptor is assigned to the * slot; they are of length equal to the descriptor's natts. * + * tts_mintuple must always be NULL if the slot does not hold a "minimal" + * tuple. When it does, tts_mintuple points to the actual MinimalTupleData + * object (the thing to be pfree'd if tts_shouldFree is true). In this case + * tts_tuple points at tts_minhdr and the fields of that are set correctly + * for access to the minimal tuple; in particular, tts_minhdr.t_data points + * MINIMAL_TUPLE_OFFSET bytes before tts_mintuple. (tts_mintuple is therefore + * redundant, but for code simplicity we store it explicitly anyway.) This + * case otherwise behaves identically to the regular-physical-tuple case. + * * tts_slow/tts_off are saved state for slot_deform_tuple, and should not * be touched by any other code. *---------- @@ -100,6 +115,8 @@ typedef struct TupleTableSlot int tts_nvalid; /* # of valid values in tts_values */ Datum *tts_values; /* current per-attribute values */ bool *tts_isnull; /* current per-attribute isnull flags */ + MinimalTuple tts_mintuple; /* set if it's a minimal tuple, else NULL */ + HeapTupleData tts_minhdr; /* workspace if it's a minimal tuple */ long tts_off; /* saved state for slot_deform_tuple */ } TupleTableSlot; @@ -133,10 +150,14 @@ extern TupleTableSlot *ExecStoreTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer, bool shouldFree); +extern TupleTableSlot *ExecStoreMinimalTuple(MinimalTuple mtup, + TupleTableSlot *slot, + bool shouldFree); extern TupleTableSlot *ExecClearTuple(TupleTableSlot *slot); extern TupleTableSlot *ExecStoreVirtualTuple(TupleTableSlot *slot); extern TupleTableSlot *ExecStoreAllNullTuple(TupleTableSlot *slot); extern HeapTuple ExecCopySlotTuple(TupleTableSlot *slot); +extern MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot); extern HeapTuple ExecFetchSlotTuple(TupleTableSlot *slot); extern HeapTuple ExecMaterializeSlot(TupleTableSlot *slot); extern TupleTableSlot *ExecCopySlot(TupleTableSlot *dstslot, diff --git a/src/include/utils/tuplestore.h b/src/include/utils/tuplestore.h index a671bf438f..6227934414 100644 --- a/src/include/utils/tuplestore.h +++ b/src/include/utils/tuplestore.h @@ -14,17 +14,23 @@ * A temporary file is used to handle the data if it exceeds the * space limit specified by the caller. * + * Beginning in Postgres 8.2, what is stored is just MinimalTuples; + * callers cannot expect valid system columns in regurgitated tuples. + * Also, we have changed the API to return tuples in TupleTableSlots, + * so that there is a check to prevent attempted access to system columns. + * * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/tuplestore.h,v 1.17 2006/03/05 15:59:08 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/tuplestore.h,v 1.18 2006/06/27 02:51:40 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef TUPLESTORE_H #define TUPLESTORE_H -#include "access/htup.h" +#include "executor/tuptable.h" + /* Tuplestorestate is an opaque type whose details are not known outside * tuplestore.c. @@ -32,7 +38,7 @@ typedef struct Tuplestorestate Tuplestorestate; /* - * Currently we only need to store HeapTuples, but it would be easy + * Currently we only need to store MinimalTuples, but it would be easy * to support the same behavior for IndexTuples and/or bare Datums. */ @@ -40,17 +46,17 @@ extern Tuplestorestate *tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes); -extern void tuplestore_puttuple(Tuplestorestate *state, void *tuple); +extern void tuplestore_puttupleslot(Tuplestorestate *state, + TupleTableSlot *slot); +extern void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple); /* tuplestore_donestoring() used to be required, but is no longer used */ #define tuplestore_donestoring(state) ((void) 0) /* backwards scan is only allowed if randomAccess was specified 'true' */ -extern void *tuplestore_gettuple(Tuplestorestate *state, bool forward, - bool *should_free); - -#define tuplestore_getheaptuple(state, forward, should_free) \ - ((HeapTuple) tuplestore_gettuple(state, forward, should_free)) +extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, + TupleTableSlot *slot); +extern bool tuplestore_advance(Tuplestorestate *state, bool forward); extern void tuplestore_end(Tuplestorestate *state); -- 2.40.0