From 05bba3d176e0adc1a032d5c8c6ea2a7622c7dd0d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 29 Oct 2008 00:00:39 +0000 Subject: [PATCH] Be more tense about not creating tuplestores with randomAccess = true unless backwards scan could actually happen. In particular, pass a flag to materialize-mode SRFs that tells them whether they need to require random access. In passing, also suppress unneeded backward-scan overhead for a Portal's holdStore tuplestore. Per my proposal about reducing I/O costs for tuplestores. --- contrib/tablefunc/tablefunc.c | 21 ++++++++++++++------- contrib/xml2/xpath.c | 6 ++++-- src/backend/commands/prepare.c | 6 ++++-- src/backend/executor/execQual.c | 12 ++++++++---- src/backend/executor/nodeFunctionscan.c | 6 ++++-- src/backend/utils/fmgr/README | 6 +++++- src/backend/utils/mmgr/portalmem.c | 18 +++++++++++++----- src/include/executor/executor.h | 5 +++-- src/include/nodes/execnodes.h | 7 +++++-- src/pl/plperl/plperl.c | 5 +++-- src/pl/plpgsql/src/pl_exec.c | 6 ++++-- 11 files changed, 67 insertions(+), 31 deletions(-) diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c index 9d1a86756b..1f4a6dd9dd 100644 --- a/contrib/tablefunc/tablefunc.c +++ b/contrib/tablefunc/tablefunc.c @@ -1,5 +1,5 @@ /* - * $PostgreSQL: pgsql/contrib/tablefunc/tablefunc.c,v 1.54 2008/10/28 22:02:05 tgl Exp $ + * $PostgreSQL: pgsql/contrib/tablefunc/tablefunc.c,v 1.55 2008/10/29 00:00:38 tgl Exp $ * * * tablefunc @@ -51,7 +51,8 @@ static HTAB *load_categories_hash(char *cats_sql, MemoryContext per_query_ctx); static Tuplestorestate *get_crosstab_tuplestore(char *sql, HTAB *crosstab_hash, TupleDesc tupdesc, - MemoryContext per_query_ctx); + MemoryContext per_query_ctx, + bool randomAccess); static void validateConnectbyTupleDesc(TupleDesc tupdesc, bool show_branch, bool show_serial); static bool compatCrosstabTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2); static bool compatConnectbyTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2); @@ -66,6 +67,7 @@ static Tuplestorestate *connectby(char *relname, bool show_branch, bool show_serial, MemoryContext per_query_ctx, + bool randomAccess, AttInMetadata *attinmeta); static Tuplestorestate *build_tuplestore_recursively(char *key_fld, char *parent_key_fld, @@ -745,7 +747,8 @@ crosstab_hash(PG_FUNCTION_ARGS) rsinfo->setResult = get_crosstab_tuplestore(sql, crosstab_hash, tupdesc, - per_query_ctx); + per_query_ctx, + rsinfo->allowedModes & SFRM_Materialize_Random); /* * SFRM_Materialize mode expects us to return a NULL Datum. The actual @@ -852,7 +855,8 @@ static Tuplestorestate * get_crosstab_tuplestore(char *sql, HTAB *crosstab_hash, TupleDesc tupdesc, - MemoryContext per_query_ctx) + MemoryContext per_query_ctx, + bool randomAccess) { Tuplestorestate *tupstore; int num_categories = hash_get_num_entries(crosstab_hash); @@ -863,8 +867,8 @@ get_crosstab_tuplestore(char *sql, int proc; MemoryContext SPIcontext; - /* initialize our tuplestore */ - tupstore = tuplestore_begin_heap(true, false, work_mem); + /* initialize our tuplestore (while still in query context!) */ + tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); /* Connect to SPI manager */ if ((ret = SPI_connect()) < 0) @@ -1113,6 +1117,7 @@ connectby_text(PG_FUNCTION_ARGS) show_branch, show_serial, per_query_ctx, + rsinfo->allowedModes & SFRM_Materialize_Random, attinmeta); rsinfo->setDesc = tupdesc; @@ -1192,6 +1197,7 @@ connectby_text_serial(PG_FUNCTION_ARGS) show_branch, show_serial, per_query_ctx, + rsinfo->allowedModes & SFRM_Materialize_Random, attinmeta); rsinfo->setDesc = tupdesc; @@ -1222,6 +1228,7 @@ connectby(char *relname, bool show_branch, bool show_serial, MemoryContext per_query_ctx, + bool randomAccess, AttInMetadata *attinmeta) { Tuplestorestate *tupstore = NULL; @@ -1239,7 +1246,7 @@ connectby(char *relname, oldcontext = MemoryContextSwitchTo(per_query_ctx); /* initialize our tuplestore */ - tupstore = tuplestore_begin_heap(true, false, work_mem); + tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); MemoryContextSwitchTo(oldcontext); diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c index 39908494f3..b2f3342f73 100644 --- a/contrib/xml2/xpath.c +++ b/contrib/xml2/xpath.c @@ -1,5 +1,5 @@ /* - * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.20 2008/05/17 01:28:22 adunstan Exp $ + * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.21 2008/10/29 00:00:38 tgl Exp $ * * Parser interface for DOM-based parser (libxml) rather than stream-based SAX-type parser */ @@ -688,7 +688,9 @@ xpath_table(PG_FUNCTION_ARGS) * Create the tuplestore - work_mem is the max in-memory size before a * file is created on disk to hold it. */ - tupstore = tuplestore_begin_heap(true, false, work_mem); + tupstore = + tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random, + false, work_mem); MemoryContextSwitchTo(oldcontext); diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index dee994c550..1d3bb13ff3 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -10,7 +10,7 @@ * Copyright (c) 2002-2008, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/prepare.c,v 1.91 2008/08/28 23:09:45 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/prepare.c,v 1.92 2008/10/29 00:00:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -766,7 +766,9 @@ pg_prepared_statement(PG_FUNCTION_ARGS) * We put all the tuples into a tuplestore in one scan of the hashtable. * This avoids any issue of the hashtable possibly changing between calls. */ - tupstore = tuplestore_begin_heap(true, false, work_mem); + tupstore = + tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random, + false, work_mem); /* hash table might be uninitialized */ if (prepared_queries) diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 9ac1065e6a..4de99be626 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.234 2008/10/28 22:02:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.235 2008/10/29 00:00:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1429,6 +1429,7 @@ restart: rsinfo.econtext = econtext; rsinfo.expectedDesc = fcache->funcResultDesc; rsinfo.allowedModes = (int) (SFRM_ValuePerCall | SFRM_Materialize); + /* note we do not set SFRM_Materialize_Random */ rsinfo.returnMode = SFRM_ValuePerCall; /* isDone is filled below */ rsinfo.setResult = NULL; @@ -1702,7 +1703,8 @@ ExecMakeFunctionResultNoSets(FuncExprState *fcache, Tuplestorestate * ExecMakeTableFunctionResult(ExprState *funcexpr, ExprContext *econtext, - TupleDesc expectedDesc) + TupleDesc expectedDesc, + bool randomAccess) { Tuplestorestate *tupstore = NULL; TupleDesc tupdesc = NULL; @@ -1736,6 +1738,8 @@ ExecMakeTableFunctionResult(ExprState *funcexpr, rsinfo.econtext = econtext; rsinfo.expectedDesc = expectedDesc; rsinfo.allowedModes = (int) (SFRM_ValuePerCall | SFRM_Materialize); + if (randomAccess) + rsinfo.allowedModes |= (int) SFRM_Materialize_Random; rsinfo.returnMode = SFRM_ValuePerCall; /* isDone is filled below */ rsinfo.setResult = NULL; @@ -1909,7 +1913,7 @@ ExecMakeTableFunctionResult(ExprState *funcexpr, -1, 0); } - tupstore = tuplestore_begin_heap(true, false, work_mem); + tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); MemoryContextSwitchTo(oldcontext); rsinfo.setResult = tupstore; rsinfo.setDesc = tupdesc; @@ -1976,7 +1980,7 @@ no_function_result: if (rsinfo.setResult == NULL) { MemoryContextSwitchTo(econtext->ecxt_per_query_memory); - tupstore = tuplestore_begin_heap(true, false, work_mem); + tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); rsinfo.setResult = tupstore; if (!returnsSet) { diff --git a/src/backend/executor/nodeFunctionscan.c b/src/backend/executor/nodeFunctionscan.c index 8da9fc620a..7def87a870 100644 --- a/src/backend/executor/nodeFunctionscan.c +++ b/src/backend/executor/nodeFunctionscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeFunctionscan.c,v 1.48 2008/10/28 22:02:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeFunctionscan.c,v 1.49 2008/10/29 00:00:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -64,7 +64,8 @@ FunctionNext(FunctionScanState *node) node->tuplestorestate = tuplestorestate = ExecMakeTableFunctionResult(node->funcexpr, node->ss.ps.ps_ExprContext, - node->tupdesc); + node->tupdesc, + node->eflags & EXEC_FLAG_BACKWARD); } /* @@ -123,6 +124,7 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags) scanstate = makeNode(FunctionScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; + scanstate->eflags = eflags; /* * Miscellaneous initialization diff --git a/src/backend/utils/fmgr/README b/src/backend/utils/fmgr/README index ccf911e62e..7c1ccc5bf8 100644 --- a/src/backend/utils/fmgr/README +++ b/src/backend/utils/fmgr/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/utils/fmgr/README,v 1.14 2008/10/28 22:02:05 tgl Exp $ +$PostgreSQL: pgsql/src/backend/utils/fmgr/README,v 1.15 2008/10/29 00:00:38 tgl Exp $ Function Manager ================ @@ -432,6 +432,10 @@ function is called in). The function stores pointers to the Tuplestore and TupleDesc into ReturnSetInfo, sets returnMode to indicate materialize mode, and returns null. isDone is not used and should be left at ExprSingleResult. +The Tuplestore must be created with randomAccess = true if +SFRM_Materialize_Random is set in allowedModes, but it can (and preferably +should) be created with randomAccess = false if not. + If available, the expected tuple descriptor is passed in ReturnSetInfo; in other contexts the expectedDesc field will be NULL. The function need not pay attention to expectedDesc, but it may be useful in special cases. diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index b98b578ed2..42820ec688 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.111 2008/07/18 20:26:06 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.112 2008/10/29 00:00:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -354,11 +354,17 @@ PortalCreateHoldStore(Portal portal) ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); - /* Create the tuple store, selecting cross-transaction temp files. */ + /* + * Create the tuple store, selecting cross-transaction temp files, and + * enabling random access only if cursor requires scrolling. + * + * XXX: Should maintenance_work_mem be used for the portal size? + */ oldcxt = MemoryContextSwitchTo(portal->holdContext); - /* XXX: Should maintenance_work_mem be used for the portal size? */ - portal->holdStore = tuplestore_begin_heap(true, true, work_mem); + portal->holdStore = + tuplestore_begin_heap(portal->cursorOptions & CURSOR_OPT_SCROLL, + true, work_mem); MemoryContextSwitchTo(oldcxt); } @@ -913,7 +919,9 @@ pg_cursor(PG_FUNCTION_ARGS) * We put all the tuples into a tuplestore in one scan of the hashtable. * This avoids any issue of the hashtable possibly changing between calls. */ - tupstore = tuplestore_begin_heap(true, false, work_mem); + tupstore = + tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random, + false, work_mem); hash_seq_init(&hash_seq, PortalHashTable); while ((hentry = hash_seq_search(&hash_seq)) != NULL) diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 65e7e4041d..ac1b81cd9b 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.150 2008/10/28 22:02:05 tgl Exp $ + * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.151 2008/10/29 00:00:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -178,7 +178,8 @@ extern Datum GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull); extern Tuplestorestate *ExecMakeTableFunctionResult(ExprState *funcexpr, ExprContext *econtext, - TupleDesc expectedDesc); + TupleDesc expectedDesc, + bool randomAccess); extern Datum ExecEvalExprSwitchContext(ExprState *expression, ExprContext *econtext, bool *isNull, ExprDoneCond *isDone); extern ExprState *ExecInitExpr(Expr *node, PlanState *parent); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index ccc5963121..8164de6a08 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.192 2008/10/28 22:02:05 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.193 2008/10/29 00:00:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -156,7 +156,8 @@ typedef enum typedef enum { SFRM_ValuePerCall = 0x01, /* one value returned per call */ - SFRM_Materialize = 0x02 /* result set instantiated in Tuplestore */ + SFRM_Materialize = 0x02, /* result set instantiated in Tuplestore */ + SFRM_Materialize_Random = 0x04 /* Tuplestore needs randomAccess */ } SetFunctionReturnMode; /* @@ -1180,6 +1181,7 @@ typedef struct SubqueryScanState * Function nodes are used to scan the results of a * function appearing in FROM (typically a function returning set). * + * eflags node's capability flags * tupdesc expected return tuple description * tuplestorestate private state of tuplestore.c * funcexpr state for function expression being evaluated @@ -1188,6 +1190,7 @@ typedef struct SubqueryScanState typedef struct FunctionScanState { ScanState ss; /* its first field is NodeTag */ + int eflags; TupleDesc tupdesc; Tuplestorestate *tuplestorestate; ExprState *funcexpr; diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index 073e41d400..75eaf10134 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -1,7 +1,7 @@ /********************************************************************** * plperl.c - perl as a procedural language for PostgreSQL * - * $PostgreSQL: pgsql/src/pl/plperl/plperl.c,v 1.140 2008/10/09 17:24:05 alvherre Exp $ + * $PostgreSQL: pgsql/src/pl/plperl/plperl.c,v 1.141 2008/10/29 00:00:39 tgl Exp $ * **********************************************************************/ @@ -1922,7 +1922,8 @@ plperl_return_next(SV *sv) current_call_data->ret_tdesc = CreateTupleDescCopy(tupdesc); current_call_data->tuple_store = - tuplestore_begin_heap(true, false, work_mem); + tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random, + false, work_mem); if (prodesc->fn_retistuple) { current_call_data->attinmeta = diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index e2218f8831..9716b81200 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.221 2008/09/24 14:40:00 tgl Exp $ + * $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.222 2008/10/29 00:00:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2357,7 +2357,9 @@ exec_init_tuple_store(PLpgSQL_execstate *estate) estate->tuple_store_cxt = rsi->econtext->ecxt_per_query_memory; oldcxt = MemoryContextSwitchTo(estate->tuple_store_cxt); - estate->tuple_store = tuplestore_begin_heap(true, false, work_mem); + estate->tuple_store = + tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random, + false, work_mem); MemoryContextSwitchTo(oldcxt); estate->rettupdesc = rsi->expectedDesc; -- 2.40.0