From cdd5178c69c4cc80baedba0c7829c63b9f78d0f5 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 27 Jun 2006 16:53:02 +0000 Subject: [PATCH] Extend the MinimalTuple concept to tuplesort.c, thereby reducing the per-tuple space overhead for sorts in memory. I chose to replace the previous patch that tried to write out the bare minimum amount of data when sorting on disk; instead, just dump the MinimalTuples as-is. This wastes 3 to 10 bytes per tuple depending on architecture and null-bitmap length, but the simplification in the writetup/readtup routines seems worth it. --- src/backend/access/nbtree/nbtsort.c | 4 +- src/backend/executor/nodeSort.c | 19 +-- src/backend/utils/sort/tuplesort.c | 232 ++++++++++++++-------------- src/include/utils/tuplesort.h | 36 +++-- 4 files changed, 139 insertions(+), 152 deletions(-) diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 396c81949b..e7293b47b0 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -56,7 +56,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.101 2006/05/08 00:00:10 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.102 2006/06/27 16:53:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -179,7 +179,7 @@ _bt_spooldestroy(BTSpool *btspool) void _bt_spool(IndexTuple itup, BTSpool *btspool) { - tuplesort_puttuple(btspool->sortstate, (void *) itup); + tuplesort_putindextuple(btspool->sortstate, itup); } /* diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c index 137c4e1363..b586a37a64 100644 --- a/src/backend/executor/nodeSort.c +++ b/src/backend/executor/nodeSort.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeSort.c,v 1.56 2006/03/05 15:58:26 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeSort.c,v 1.57 2006/06/27 16:53:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -41,9 +41,7 @@ ExecSort(SortState *node) EState *estate; ScanDirection dir; Tuplesortstate *tuplesortstate; - HeapTuple heapTuple; TupleTableSlot *slot; - bool should_free; /* * get state info from node @@ -103,8 +101,7 @@ ExecSort(SortState *node) if (TupIsNull(slot)) break; - tuplesort_puttuple(tuplesortstate, - (void *) ExecFetchSlotTuple(slot)); + tuplesort_puttupleslot(tuplesortstate, slot); } /* @@ -131,15 +128,11 @@ ExecSort(SortState *node) * Get the first or next tuple from tuplesort. Returns NULL if no more * tuples. */ - heapTuple = tuplesort_getheaptuple(tuplesortstate, - ScanDirectionIsForward(dir), - &should_free); - slot = node->ss.ps.ps_ResultTupleSlot; - if (heapTuple) - return ExecStoreTuple(heapTuple, slot, InvalidBuffer, should_free); - else - return ExecClearTuple(slot); + (void) tuplesort_gettupleslot(tuplesortstate, + ScanDirectionIsForward(dir), + slot); + return slot; } /* ---------------------------------------------------------------- diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index ff614dc16f..67c2e9ab5d 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -70,7 +70,7 @@ * that we can access it randomly. When the caller does not need random * access, we return from tuplesort_performsort() as soon as we are down * to one run per logical tape. The final merge is then performed - * on-the-fly as the caller repeatedly calls tuplesort_gettuple; this + * on-the-fly as the caller repeatedly calls tuplesort_getXXX; this * saves one cycle of writing all the data out to disk and reading it in. * * Before Postgres 8.2, we always used a seven-tape polyphase merge, on the @@ -91,7 +91,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.66 2006/05/23 21:37:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.67 2006/06/27 16:53:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -121,7 +121,7 @@ bool trace_sort = false; /* * The objects we actually sort are SortTuple structs. These contain - * a pointer to the tuple proper (might be a HeapTuple or IndexTuple), + * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple), * which is a separate palloc chunk --- we assume it is just one chunk and * can be freed by a simple pfree(). SortTuples also contain the tuple's * first key column in Datum/nullflag format, and an index integer. @@ -311,8 +311,8 @@ struct Tuplesortstate bool markpos_eof; /* saved "eof_reached" */ /* - * These variables are specific to the HeapTuple case; they are set by - * tuplesort_begin_heap and used only by the HeapTuple routines. + * These variables are specific to the MinimalTuple case; they are set by + * tuplesort_begin_heap and used only by the MinimalTuple routines. */ TupleDesc tupDesc; ScanKey scanKeys; /* array of length nKeys */ @@ -448,12 +448,11 @@ static Tuplesortstate *qsort_tuplesortstate; * * Initialize for a tuple sort operation. * - * After calling tuplesort_begin, the caller should call tuplesort_puttuple + * After calling tuplesort_begin, the caller should call tuplesort_putXXX * zero or more times, then call tuplesort_performsort when all the tuples * have been supplied. After performsort, retrieve the tuples in sorted - * order by calling tuplesort_gettuple until it returns NULL. (If random + * order by calling tuplesort_getXXX until it returns false/NULL. (If random * access was requested, rescan, markpos, and restorepos can also be called.) - * For Datum sorts, putdatum/getdatum are used instead of puttuple/gettuple. * Call tuplesort_end to terminate the operation and release memory/disk space. * * Each variant of tuplesort_begin has a workMem parameter specifying the @@ -669,9 +668,9 @@ tuplesort_begin_datum(Oid datumType, * * Release resources and clean up. * - * NOTE: after calling this, any tuple pointers returned by tuplesort_gettuple - * or datum pointers returned by tuplesort_getdatum are pointing to garbage. - * Be careful not to attempt to use or free such pointers afterwards! + * NOTE: after calling this, any pointers returned by tuplesort_getXXX are + * pointing to garbage. Be careful not to attempt to use or free such + * pointers afterwards! */ void tuplesort_end(Tuplesortstate *state) @@ -762,19 +761,41 @@ grow_memtuples(Tuplesortstate *state) /* * Accept one tuple while collecting input data for sort. * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) slot); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one index tuple while collecting input data for sort. + * * Note that the input tuple is always copied; the caller need not save it. */ void -tuplesort_puttuple(Tuplesortstate *state, void *tuple) +tuplesort_putindextuple(Tuplesortstate *state, IndexTuple tuple) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; /* * Copy the given tuple into memory we control, and decrease availMem. - * Then call the code shared with the Datum case. + * Then call the common code. */ - COPYTUP(state, &stup, tuple); + COPYTUP(state, &stup, (void *) tuple); puttuple_common(state, &stup); @@ -794,7 +815,7 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) /* * If it's a pass-by-reference value, copy it into memory we control, - * and decrease availMem. Then call the code shared with the tuple case. + * and decrease availMem. Then call the common code. */ if (isNull || state->datumTypeByVal) { @@ -1151,12 +1172,42 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, /* * Fetch the next tuple in either forward or back direction. + * If successful, put tuple in slot and return TRUE; else, clear the slot + * and return FALSE. + */ +bool +tuplesort_gettupleslot(Tuplesortstate *state, bool forward, + TupleTableSlot *slot) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + bool should_free; + + if (!tuplesort_gettuple_common(state, forward, &stup, &should_free)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + if (stup.tuple) + { + ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, should_free); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + +/* + * Fetch the next index tuple in either forward or back direction. * Returns NULL if no more tuples. If *should_free is set, the * caller must pfree the returned tuple when done with it. */ -void * -tuplesort_gettuple(Tuplesortstate *state, bool forward, - bool *should_free) +IndexTuple +tuplesort_getindextuple(Tuplesortstate *state, bool forward, + bool *should_free) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; @@ -1166,7 +1217,7 @@ tuplesort_gettuple(Tuplesortstate *state, bool forward, MemoryContextSwitchTo(oldcontext); - return stup.tuple; + return (IndexTuple) stup.tuple; } /* @@ -2265,15 +2316,15 @@ ApplySortFunction(FmgrInfo *sortFunction, SortFunctionKind kind, /* - * Routines specialized for HeapTuple case + * Routines specialized for HeapTuple (actually MinimalTuple) case */ static int comparetup_heap(Tuplesortstate *state, const SortTuple *a, const SortTuple *b) { ScanKey scanKey = state->scanKeys; - HeapTuple ltup; - HeapTuple rtup; + HeapTupleData ltup; + HeapTupleData rtup; TupleDesc tupDesc; int nkey; int32 compare; @@ -2287,8 +2338,10 @@ comparetup_heap(Tuplesortstate *state, const SortTuple *a, const SortTuple *b) return compare; /* Compare additional sort keys */ - ltup = (HeapTuple) a->tuple; - rtup = (HeapTuple) b->tuple; + ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET); + rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); tupDesc = state->tupDesc; scanKey++; for (nkey = 1; nkey < state->nKeys; nkey++, scanKey++) @@ -2299,8 +2352,8 @@ comparetup_heap(Tuplesortstate *state, const SortTuple *a, const SortTuple *b) bool isnull1, isnull2; - datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1); - datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2); + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); compare = inlineApplySortFunction(&scanKey->sk_func, state->sortFnKinds[nkey], @@ -2316,132 +2369,71 @@ comparetup_heap(Tuplesortstate *state, const SortTuple *a, const SortTuple *b) static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup) { - HeapTuple tuple = (HeapTuple) tup; + /* + * We expect the passed "tup" to be a TupleTableSlot, and form a + * MinimalTuple using the exported interface for that. + */ + TupleTableSlot *slot = (TupleTableSlot *) tup; + MinimalTuple tuple; + HeapTupleData htup; /* copy the tuple into sort storage */ - stup->tuple = (void *) heap_copytuple(tuple); - USEMEM(state, GetMemoryChunkSpace(stup->tuple)); + tuple = ExecCopySlotMinimalTuple(slot); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); /* set up first-column key value */ - stup->datum1 = heap_getattr((HeapTuple) stup->tuple, + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + stup->datum1 = heap_getattr(&htup, state->scanKeys[0].sk_attno, state->tupDesc, &stup->isnull1); } /* - * When writing HeapTuples to tape, we strip off all tuple identity and - * transaction visibility information, because those fields aren't really - * interesting for in-memory tuples (they may or may not be valid in the - * incoming tuples, depending on the plan that's feeding the sort). We - * only need to store t_natts, t_infomask, the nulls bitmap if any, and - * the user data. - * - * You might think that we could omit storing t_natts, but you'd be wrong: - * the incoming tuple might be a physical disk tuple with fewer columns - * than the table's current logical tupdesc. + * Since MinimalTuple already has length in its first word, we don't need + * to write that separately. */ -typedef struct TapeTupleHeader -{ - unsigned int tuplen; /* required header of a tape item */ - int16 natts; /* number of attributes */ - uint16 infomask; /* various flag bits */ - /* nulls bitmap follows if HEAP_HASNULL, then actual tuple data */ -} TapeTupleHeader; - static void writetup_heap(Tuplesortstate *state, int tapenum, SortTuple *stup) { - HeapTuple tuple = (HeapTuple) stup->tuple; - HeapTupleHeader t_data = tuple->t_data; - TapeTupleHeader tapehdr; - unsigned int datalen; - unsigned int nullslen; - - Assert(tuple->t_len >= t_data->t_hoff); - datalen = tuple->t_len - t_data->t_hoff; - if (HeapTupleHasNulls(tuple)) - nullslen = BITMAPLEN(t_data->t_natts); - else - nullslen = 0; - tapehdr.tuplen = sizeof(TapeTupleHeader) + nullslen + datalen; - tapehdr.natts = t_data->t_natts; - tapehdr.infomask = t_data->t_infomask; - LogicalTapeWrite(state->tapeset, tapenum, - (void *) &tapehdr, sizeof(tapehdr)); - if (nullslen) - LogicalTapeWrite(state->tapeset, tapenum, - (void *) t_data->t_bits, nullslen); + MinimalTuple tuple = (MinimalTuple) stup->tuple; + unsigned int tuplen = tuple->t_len; + LogicalTapeWrite(state->tapeset, tapenum, - (char *) t_data + t_data->t_hoff, datalen); + (void *) tuple, tuplen); if (state->randomAccess) /* need trailing length word? */ LogicalTapeWrite(state->tapeset, tapenum, - (void *) &tapehdr.tuplen, sizeof(tapehdr.tuplen)); + (void *) &tuplen, sizeof(tuplen)); FREEMEM(state, GetMemoryChunkSpace(tuple)); - heap_freetuple(tuple); + heap_free_minimal_tuple(tuple); } static void readtup_heap(Tuplesortstate *state, SortTuple *stup, int tapenum, unsigned int len) { - TapeTupleHeader tapehdr; - unsigned int datalen; - unsigned int nullslen; - unsigned int hoff; - HeapTuple tuple; - HeapTupleHeader t_data; + MinimalTuple tuple = (MinimalTuple) palloc(len); + unsigned int tuplen; + HeapTupleData htup; - /* read in the rest of the header */ - if (LogicalTapeRead(state->tapeset, tapenum, - (char *) &tapehdr + sizeof(unsigned int), - sizeof(tapehdr) - sizeof(unsigned int)) != - sizeof(tapehdr) - sizeof(unsigned int)) - elog(ERROR, "unexpected end of data"); - /* reconstruct lengths of null bitmap and data part */ - if (tapehdr.infomask & HEAP_HASNULL) - nullslen = BITMAPLEN(tapehdr.natts); - else - nullslen = 0; - datalen = len - sizeof(TapeTupleHeader) - nullslen; - /* determine overhead size of tuple (should match heap_form_tuple) */ - hoff = offsetof(HeapTupleHeaderData, t_bits) + nullslen; - if (tapehdr.infomask & HEAP_HASOID) - hoff += sizeof(Oid); - hoff = MAXALIGN(hoff); - /* Allocate the space in one chunk, like heap_form_tuple */ - tuple = (HeapTuple) palloc(HEAPTUPLESIZE + hoff + datalen); USEMEM(state, GetMemoryChunkSpace(tuple)); - t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); - /* make sure unused header fields are zeroed */ - MemSetAligned(t_data, 0, hoff); - /* reconstruct the HeapTupleData fields */ - tuple->t_len = hoff + datalen; - ItemPointerSetInvalid(&(tuple->t_self)); - tuple->t_tableOid = InvalidOid; - tuple->t_data = t_data; - /* reconstruct the HeapTupleHeaderData fields */ - ItemPointerSetInvalid(&(t_data->t_ctid)); - t_data->t_natts = tapehdr.natts; - t_data->t_infomask = (tapehdr.infomask & ~HEAP_XACT_MASK) - | (HEAP_XMIN_INVALID | HEAP_XMAX_INVALID); - t_data->t_hoff = hoff; - /* read in the null bitmap if any */ - if (nullslen) - if (LogicalTapeRead(state->tapeset, tapenum, - (void *) t_data->t_bits, nullslen) != nullslen) - elog(ERROR, "unexpected end of data"); - /* and the data proper */ + /* read in the tuple proper */ + tuple->t_len = len; if (LogicalTapeRead(state->tapeset, tapenum, - (char *) t_data + hoff, datalen) != datalen) + (void *) ((char *) tuple + sizeof(int)), + len - sizeof(int)) != (size_t) (len - sizeof(int))) elog(ERROR, "unexpected end of data"); if (state->randomAccess) /* need trailing length word? */ - if (LogicalTapeRead(state->tapeset, tapenum, (void *) &tapehdr.tuplen, - sizeof(tapehdr.tuplen)) != sizeof(tapehdr.tuplen)) + if (LogicalTapeRead(state->tapeset, tapenum, (void *) &tuplen, + sizeof(tuplen)) != sizeof(tuplen)) elog(ERROR, "unexpected end of data"); stup->tuple = (void *) tuple; /* set up first-column key value */ - stup->datum1 = heap_getattr(tuple, + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + stup->datum1 = heap_getattr(&htup, state->scanKeys[0].sk_attno, state->tupDesc, &stup->isnull1); diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h index b93b8a9192..75a12ac229 100644 --- a/src/include/utils/tuplesort.h +++ b/src/include/utils/tuplesort.h @@ -13,29 +13,34 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/tuplesort.h,v 1.20 2006/05/23 21:37:59 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/tuplesort.h,v 1.21 2006/06/27 16:53:02 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef TUPLESORT_H #define TUPLESORT_H -#include "access/htup.h" #include "access/itup.h" +#include "executor/tuptable.h" #include "fmgr.h" -/* Tuplesortstate is an opaque type whose details are not known outside tuplesort.c. */ +/* Tuplesortstate is an opaque type whose details are not known outside + * tuplesort.c. + */ typedef struct Tuplesortstate Tuplesortstate; /* * We provide two different interfaces to what is essentially the same * code: one for sorting HeapTuples and one for sorting IndexTuples. * They differ primarily in the way that the sort key information is - * supplied. Also, tuplesort.c guarantees to preserve all the header - * fields of an IndexTuple, but when sorting HeapTuples only the user data - * is guaranteed preserved, not the "system columns" (tuple identity and - * transaction visibility info). + * supplied. Also, the HeapTuple case actually stores MinimalTuples, + * which means it doesn't preserve the "system columns" (tuple identity and + * transaction visibility info). The IndexTuple case does preserve all + * the header fields of an index entry. In the HeapTuple case we can + * save some cycles by passing and returning the tuples in TupleTableSlots, + * rather than forming actual HeapTuples (which'd have to be converted to + * MinimalTuples). * * Yet a third slightly different interface supports sorting bare Datums. */ @@ -51,21 +56,18 @@ extern Tuplesortstate *tuplesort_begin_datum(Oid datumType, Oid sortOperator, int workMem, bool randomAccess); -extern void tuplesort_puttuple(Tuplesortstate *state, void *tuple); - +extern void tuplesort_puttupleslot(Tuplesortstate *state, + TupleTableSlot *slot); +extern void tuplesort_putindextuple(Tuplesortstate *state, IndexTuple tuple); extern void tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull); extern void tuplesort_performsort(Tuplesortstate *state); -extern void *tuplesort_gettuple(Tuplesortstate *state, bool forward, - bool *should_free); - -#define tuplesort_getheaptuple(state, forward, should_free) \ - ((HeapTuple) tuplesort_gettuple(state, forward, should_free)) -#define tuplesort_getindextuple(state, forward, should_free) \ - ((IndexTuple) tuplesort_gettuple(state, forward, should_free)) - +extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward, + TupleTableSlot *slot); +extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward, + bool *should_free); extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward, Datum *val, bool *isNull); -- 2.40.0