X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Fbackend%2Fexecutor%2FexecMain.c;h=82b05dc4d1b488641d17e4ceff8ed10da65eebcc;hb=3bd909b220930f21d6e15833a17947be749e7fde;hp=2ff4150f601a1af4225c30629bf5e366ca7e022e;hpb=3d02cae310deec7ca48ada68e553bfeedbd0f638;p=postgresql diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 2ff4150f60..37b7bbd413 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -6,113 +6,104 @@ * INTERFACE ROUTINES * ExecutorStart() * ExecutorRun() + * ExecutorFinish() * ExecutorEnd() * - * The old ExecutorMain() has been replaced by ExecutorStart(), - * ExecutorRun() and ExecutorEnd() - * - * These three procedures are the external interfaces to the executor. + * These four procedures are the external interface to the executor. * In each case, the query descriptor is required as an argument. * - * ExecutorStart() must be called at the beginning of execution of any - * query plan and ExecutorEnd() should always be called at the end of - * execution of a plan. + * ExecutorStart must be called at the beginning of execution of any + * query plan and ExecutorEnd must always be called at the end of + * execution of a plan (unless it is aborted due to error). * * ExecutorRun accepts direction and count arguments that specify whether * the plan is to be executed forwards, backwards, and for how many tuples. + * In some cases ExecutorRun may be called multiple times to process all + * the tuples for a plan. It is also acceptable to stop short of executing + * the whole plan (but only if it is a SELECT). + * + * ExecutorFinish must be called after the final ExecutorRun call and + * before ExecutorEnd. This can be omitted only in case of EXPLAIN, + * which should also omit ExecutorRun. * - * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.323 2009/02/08 18:02:27 tgl Exp $ + * src/backend/executor/execMain.c * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "access/heapam.h" -#include "access/reloptions.h" +#include "access/htup_details.h" #include "access/sysattr.h" #include "access/transam.h" #include "access/xact.h" -#include "catalog/heap.h" #include "catalog/namespace.h" -#include "catalog/toasting.h" -#include "commands/tablespace.h" +#include "commands/matview.h" #include "commands/trigger.h" #include "executor/execdebug.h" -#include "executor/instrument.h" -#include "executor/nodeSubplan.h" +#include "foreign/fdwapi.h" +#include "mb/pg_wchar.h" #include "miscadmin.h" -#include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" -#include "parser/parse_clause.h" #include "parser/parsetree.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" -#include "storage/smgr.h" +#include "tcop/utility.h" #include "utils/acl.h" -#include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/rls.h" #include "utils/snapmgr.h" #include "utils/tqual.h" -/* Hooks for plugins to get control in ExecutorStart/Run/End() */ -ExecutorStart_hook_type ExecutorStart_hook = NULL; -ExecutorRun_hook_type ExecutorRun_hook = NULL; -ExecutorEnd_hook_type ExecutorEnd_hook = NULL; +/* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */ +ExecutorStart_hook_type ExecutorStart_hook = NULL; +ExecutorRun_hook_type ExecutorRun_hook = NULL; +ExecutorFinish_hook_type ExecutorFinish_hook = NULL; +ExecutorEnd_hook_type ExecutorEnd_hook = NULL; -typedef struct evalPlanQual -{ - Index rti; - EState *estate; - PlanState *planstate; - struct evalPlanQual *next; /* stack of active PlanQual plans */ - struct evalPlanQual *free; /* list of free PlanQual plans */ -} evalPlanQual; +/* Hook for plugin to get control in ExecCheckRTPerms() */ +ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL; /* decls for local routines only used within this module */ static void InitPlan(QueryDesc *queryDesc, int eflags); -static void ExecCheckPlanOutput(Relation resultRel, List *targetList); +static void CheckValidRowMarkRel(Relation rel, RowMarkType markType); +static void ExecPostprocessPlan(EState *estate); static void ExecEndPlan(PlanState *planstate, EState *estate); static void ExecutePlan(EState *estate, PlanState *planstate, CmdType operation, + bool sendTuples, long numberTuples, ScanDirection direction, DestReceiver *dest); -static void ExecSelect(TupleTableSlot *slot, - DestReceiver *dest, EState *estate); -static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid, - TupleTableSlot *planSlot, - DestReceiver *dest, EState *estate); -static void ExecDelete(ItemPointer tupleid, - TupleTableSlot *planSlot, - DestReceiver *dest, EState *estate); -static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid, - TupleTableSlot *planSlot, - DestReceiver *dest, EState *estate); -static void ExecProcessReturning(ProjectionInfo *projectReturning, - TupleTableSlot *tupleSlot, - TupleTableSlot *planSlot, - DestReceiver *dest); -static TupleTableSlot *EvalPlanQualNext(EState *estate); -static void EndEvalPlanQual(EState *estate); -static void ExecCheckRTPerms(List *rangeTable); -static void ExecCheckRTEPerms(RangeTblEntry *rte); +static bool ExecCheckRTEPerms(RangeTblEntry *rte); +static bool ExecCheckRTEPermsModified(Oid relOid, Oid userid, + Bitmapset *modifiedCols, + AclMode requiredPerms); static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt); -static void EvalPlanQualStart(evalPlanQual *epq, EState *estate, - evalPlanQual *priorepq); -static void EvalPlanQualStop(evalPlanQual *epq); -static void OpenIntoRel(QueryDesc *queryDesc); -static void CloseIntoRel(QueryDesc *queryDesc); -static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo); -static void intorel_receive(TupleTableSlot *slot, DestReceiver *self); -static void intorel_shutdown(DestReceiver *self); -static void intorel_destroy(DestReceiver *self); +static char *ExecBuildSlotValueDescription(Oid reloid, + TupleTableSlot *slot, + TupleDesc tupdesc, + Bitmapset *modifiedCols, + int maxfieldlen); +static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate, + Plan *planTree); + +/* + * Note that GetUpdatedColumns() also exists in commands/trigger.c. There does + * not appear to be any good header to put it into, given the structures that + * it uses, so we let them be duplicated. Be sure to update both if one needs + * to be changed, however. + */ +#define GetInsertedColumns(relinfo, estate) \ + (rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->insertedCols) +#define GetUpdatedColumns(relinfo, estate) \ + (rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->updatedCols) /* end of local decls */ @@ -123,8 +114,8 @@ static void intorel_destroy(DestReceiver *self); * This routine must be called at the beginning of any execution of any * query plan * - * Takes a QueryDesc previously created by CreateQueryDesc (it's not real - * clear why we bother to separate the two functions, but...). The tupDesc + * Takes a QueryDesc previously created by CreateQueryDesc (which is separate + * only because some places use QueryDescs for utility commands). The tupDesc * field of the QueryDesc is filled in to describe the tuples that will be * returned, and the internal fields (estate and planstate) are set up. * @@ -161,8 +152,20 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) /* * If the transaction is read-only, we need to check if any writes are * planned to non-temporary tables. EXPLAIN is considered read-only. + * + * Don't allow writes in parallel mode. Supporting UPDATE and DELETE + * would require (a) storing the combocid hash in shared memory, rather + * than synchronizing it just once at the start of parallelism, and (b) an + * alternative to heap_update()'s reliance on xmax for mutual exclusion. + * INSERT may have no such troubles, but we forbid it to simplify the + * checks. + * + * We have lower-level defenses in CommandCounterIncrement and elsewhere + * against performing unsafe operations in parallel mode, but this gives a + * more user-friendly error message. */ - if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY)) + if ((XactReadOnly || IsInParallelMode()) && + !(eflags & EXEC_FLAG_EXPLAIN_ONLY)) ExecCheckXactReadOnly(queryDesc->plannedstmt); /* @@ -174,7 +177,8 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); /* - * Fill in parameters, if any, from queryDesc + * Fill in external parameters, if any, from queryDesc; and allocate + * workspace for internal parameters */ estate->es_param_list_info = queryDesc->params; @@ -188,10 +192,23 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) switch (queryDesc->operation) { case CMD_SELECT: - /* SELECT INTO and SELECT FOR UPDATE/SHARE need to mark tuples */ - if (queryDesc->plannedstmt->intoClause != NULL || - queryDesc->plannedstmt->rowMarks != NIL) + + /* + * SELECT FOR [KEY] UPDATE/SHARE and modifying CTEs need to mark + * tuples + */ + if (queryDesc->plannedstmt->rowMarks != NIL || + queryDesc->plannedstmt->hasModifyingCTE) estate->es_output_cid = GetCurrentCommandId(true); + + /* + * A SELECT without modifying CTEs can't possibly queue triggers, + * so force skip-triggers mode. This is just a marginal efficiency + * hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't + * all that expensive, but we might as well do it. + */ + if (!queryDesc->plannedstmt->hasModifyingCTE) + eflags |= EXEC_FLAG_SKIP_TRIGGERS; break; case CMD_INSERT: @@ -211,13 +228,26 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) */ estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot); estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot); - estate->es_instrument = queryDesc->doInstrument; + estate->es_top_eflags = eflags; + estate->es_instrument = queryDesc->instrument_options; /* * Initialize the plan state tree */ InitPlan(queryDesc, eflags); + /* + * Set up an AFTER-trigger statement context, unless told not to, or + * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called). + */ + if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY))) + AfterTriggerBeginQuery(); + + /* Enter parallel mode, if required by the query. */ + if (queryDesc->plannedstmt->parallelModeNeeded && + !(eflags & EXEC_FLAG_EXPLAIN_ONLY)) + EnterParallelMode(); + MemoryContextSwitchTo(oldcontext); } @@ -235,7 +265,9 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) * we retrieve up to 'count' tuples in the specified direction. * * Note: count = 0 is interpreted as no portal limit, i.e., run to - * completion. + * completion. Also note that the count limit is only applied to + * retrieved tuples, not for instance to those inserted/updated/deleted + * by a ModifyTable plan node. * * There is no return value, but output tuples (if any) are sent to * the destination receiver specified in the QueryDesc; and the number @@ -274,13 +306,14 @@ standard_ExecutorRun(QueryDesc *queryDesc, estate = queryDesc->estate; Assert(estate != NULL); + Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); /* * Switch into per-query memory context */ oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); - /* Allow instrumentation of ExecutorRun overall runtime */ + /* Allow instrumentation of Executor overall runtime */ if (queryDesc->totaltime) InstrStartNode(queryDesc->totaltime); @@ -297,7 +330,7 @@ standard_ExecutorRun(QueryDesc *queryDesc, estate->es_lastoid = InvalidOid; sendTuples = (operation == CMD_SELECT || - queryDesc->plannedstmt->returningLists); + queryDesc->plannedstmt->hasReturning); if (sendTuples) (*dest->rStartup) (dest, operation, queryDesc->tupDesc); @@ -309,10 +342,14 @@ standard_ExecutorRun(QueryDesc *queryDesc, ExecutePlan(estate, queryDesc->planstate, operation, + sendTuples, count, direction, dest); + /* Allow nodes to release or shut down resources. */ + (void) ExecShutdownNode(queryDesc->planstate); + /* * shutdown tuple receiver, if we started it */ @@ -325,6 +362,68 @@ standard_ExecutorRun(QueryDesc *queryDesc, MemoryContextSwitchTo(oldcontext); } +/* ---------------------------------------------------------------- + * ExecutorFinish + * + * This routine must be called after the last ExecutorRun call. + * It performs cleanup such as firing AFTER triggers. It is + * separate from ExecutorEnd because EXPLAIN ANALYZE needs to + * include these actions in the total runtime. + * + * We provide a function hook variable that lets loadable plugins + * get control when ExecutorFinish is called. Such a plugin would + * normally call standard_ExecutorFinish(). + * + * ---------------------------------------------------------------- + */ +void +ExecutorFinish(QueryDesc *queryDesc) +{ + if (ExecutorFinish_hook) + (*ExecutorFinish_hook) (queryDesc); + else + standard_ExecutorFinish(queryDesc); +} + +void +standard_ExecutorFinish(QueryDesc *queryDesc) +{ + EState *estate; + MemoryContext oldcontext; + + /* sanity checks */ + Assert(queryDesc != NULL); + + estate = queryDesc->estate; + + Assert(estate != NULL); + Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); + + /* This should be run once and only once per Executor instance */ + Assert(!estate->es_finished); + + /* Switch into per-query memory context */ + oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); + + /* Allow instrumentation of Executor overall runtime */ + if (queryDesc->totaltime) + InstrStartNode(queryDesc->totaltime); + + /* Run ModifyTable nodes to completion */ + ExecPostprocessPlan(estate); + + /* Execute queued AFTER triggers, unless told not to */ + if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS)) + AfterTriggerEndQuery(estate); + + if (queryDesc->totaltime) + InstrStopNode(queryDesc->totaltime, 0); + + MemoryContextSwitchTo(oldcontext); + + estate->es_finished = true; +} + /* ---------------------------------------------------------------- * ExecutorEnd * @@ -359,6 +458,14 @@ standard_ExecutorEnd(QueryDesc *queryDesc) Assert(estate != NULL); + /* + * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This + * Assert is needed because ExecutorFinish is new as of 9.1, and callers + * might forget to call it. + */ + Assert(estate->es_finished || + (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); + /* * Switch into per-query memory context to run ExecEndPlan */ @@ -366,12 +473,6 @@ standard_ExecutorEnd(QueryDesc *queryDesc) ExecEndPlan(queryDesc->planstate, estate); - /* - * Close the SELECT INTO relation if any - */ - if (estate->es_select_into) - CloseIntoRel(queryDesc); - /* do away with our snapshots */ UnregisterSnapshot(estate->es_snapshot); UnregisterSnapshot(estate->es_crosscheck_snapshot); @@ -381,6 +482,11 @@ standard_ExecutorEnd(QueryDesc *queryDesc) */ MemoryContextSwitchTo(oldcontext); + /* Exit parallel mode, if it was required by the query. */ + if (queryDesc->plannedstmt->parallelModeNeeded && + !(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)) + ExitParallelMode(); + /* * Release EState and per-query memory context. This should release * everything the executor has allocated. @@ -425,7 +531,7 @@ ExecutorRewind(QueryDesc *queryDesc) /* * rescan plan */ - ExecReScan(queryDesc->planstate, NULL); + ExecReScan(queryDesc->planstate); MemoryContextSwitchTo(oldcontext); } @@ -434,23 +540,48 @@ ExecutorRewind(QueryDesc *queryDesc) /* * ExecCheckRTPerms * Check access permissions for all relations listed in a range table. + * + * Returns true if permissions are adequate. Otherwise, throws an appropriate + * error if ereport_on_violation is true, or simply returns false otherwise. + * + * Note that this does NOT address row level security policies (aka: RLS). If + * rows will be returned to the user as a result of this permission check + * passing, then RLS also needs to be consulted (and check_enable_rls()). + * + * See rewrite/rowsecurity.c. */ -static void -ExecCheckRTPerms(List *rangeTable) +bool +ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation) { ListCell *l; + bool result = true; foreach(l, rangeTable) { - ExecCheckRTEPerms((RangeTblEntry *) lfirst(l)); + RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); + + result = ExecCheckRTEPerms(rte); + if (!result) + { + Assert(rte->rtekind == RTE_RELATION); + if (ereport_on_violation) + aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS, + get_rel_name(rte->relid)); + return false; + } } + + if (ExecutorCheckPerms_hook) + result = (*ExecutorCheckPerms_hook) (rangeTable, + ereport_on_violation); + return result; } /* * ExecCheckRTEPerms * Check access permissions for a single RTE. */ -static void +static bool ExecCheckRTEPerms(RangeTblEntry *rte) { AclMode requiredPerms; @@ -458,8 +589,6 @@ ExecCheckRTEPerms(RangeTblEntry *rte) AclMode remainingPerms; Oid relOid; Oid userid; - Bitmapset *tmpset; - int col; /* * Only plain-relation RTEs need to be checked here. Function RTEs are @@ -467,14 +596,14 @@ ExecCheckRTEPerms(RangeTblEntry *rte) * Join, subquery, and special RTEs need no checks. */ if (rte->rtekind != RTE_RELATION) - return; + return true; /* * No work if requiredPerms is empty. */ requiredPerms = rte->requiredPerms; if (requiredPerms == 0) - return; + return true; relOid = rte->relid; @@ -482,7 +611,7 @@ ExecCheckRTEPerms(RangeTblEntry *rte) * userid to check as: current user unless we have a setuid indication. * * Note: GetUserId() is presently fast enough that there's no harm in - * calling it separately for each RTE. If that stops being true, we could + * calling it separately for each RTE. If that stops being true, we could * call it once in ExecCheckRTPerms and pass the userid down from there. * But for now, no need for the extra clutter. */ @@ -497,13 +626,14 @@ ExecCheckRTEPerms(RangeTblEntry *rte) remainingPerms = requiredPerms & ~relPerms; if (remainingPerms != 0) { + int col = -1; + /* * If we lack any permissions that exist only as relation permissions, * we can fail straight away. */ if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE)) - aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS, - get_rel_name(relOid)); + return false; /* * Check to see if we have the needed privileges at column level. @@ -523,80 +653,100 @@ ExecCheckRTEPerms(RangeTblEntry *rte) { if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT, ACLMASK_ANY) != ACLCHECK_OK) - aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS, - get_rel_name(relOid)); + return false; } - tmpset = bms_copy(rte->selectedCols); - while ((col = bms_first_member(tmpset)) >= 0) + while ((col = bms_next_member(rte->selectedCols, col)) >= 0) { - /* remove the column number offset */ - col += FirstLowInvalidHeapAttributeNumber; - if (col == InvalidAttrNumber) + /* bit #s are offset by FirstLowInvalidHeapAttributeNumber */ + AttrNumber attno = col + FirstLowInvalidHeapAttributeNumber; + + if (attno == InvalidAttrNumber) { /* Whole-row reference, must have priv on all cols */ if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT, ACLMASK_ALL) != ACLCHECK_OK) - aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS, - get_rel_name(relOid)); + return false; } else { - if (pg_attribute_aclcheck(relOid, col, userid, ACL_SELECT) - != ACLCHECK_OK) - aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS, - get_rel_name(relOid)); + if (pg_attribute_aclcheck(relOid, attno, userid, + ACL_SELECT) != ACLCHECK_OK) + return false; } } - bms_free(tmpset); } /* - * Basically the same for the mod columns, with either INSERT or UPDATE + * Basically the same for the mod columns, for both INSERT and UPDATE * privilege as specified by remainingPerms. */ - remainingPerms &= ~ACL_SELECT; - if (remainingPerms != 0) - { - /* - * When the query doesn't explicitly change any columns, allow - * the query if we have permission on any column of the rel. This - * is to handle SELECT FOR UPDATE as well as possible corner cases - * in INSERT and UPDATE. - */ - if (bms_is_empty(rte->modifiedCols)) - { - if (pg_attribute_aclcheck_all(relOid, userid, remainingPerms, - ACLMASK_ANY) != ACLCHECK_OK) - aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS, - get_rel_name(relOid)); - } + if (remainingPerms & ACL_INSERT && !ExecCheckRTEPermsModified(relOid, + userid, + rte->insertedCols, + ACL_INSERT)) + return false; + + if (remainingPerms & ACL_UPDATE && !ExecCheckRTEPermsModified(relOid, + userid, + rte->updatedCols, + ACL_UPDATE)) + return false; + } + return true; +} - tmpset = bms_copy(rte->modifiedCols); - while ((col = bms_first_member(tmpset)) >= 0) - { - /* remove the column number offset */ - col += FirstLowInvalidHeapAttributeNumber; - if (col == InvalidAttrNumber) - { - /* whole-row reference can't happen here */ - elog(ERROR, "whole-row update is not implemented"); - } - else - { - if (pg_attribute_aclcheck(relOid, col, userid, remainingPerms) - != ACLCHECK_OK) - aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS, - get_rel_name(relOid)); - } - } - bms_free(tmpset); +/* + * ExecCheckRTEPermsModified + * Check INSERT or UPDATE access permissions for a single RTE (these + * are processed uniformly). + */ +static bool +ExecCheckRTEPermsModified(Oid relOid, Oid userid, Bitmapset *modifiedCols, + AclMode requiredPerms) +{ + int col = -1; + + /* + * When the query doesn't explicitly update any columns, allow the query + * if we have permission on any column of the rel. This is to handle + * SELECT FOR UPDATE as well as possible corner cases in UPDATE. + */ + if (bms_is_empty(modifiedCols)) + { + if (pg_attribute_aclcheck_all(relOid, userid, requiredPerms, + ACLMASK_ANY) != ACLCHECK_OK) + return false; + } + + while ((col = bms_next_member(modifiedCols, col)) >= 0) + { + /* bit #s are offset by FirstLowInvalidHeapAttributeNumber */ + AttrNumber attno = col + FirstLowInvalidHeapAttributeNumber; + + if (attno == InvalidAttrNumber) + { + /* whole-row reference can't happen here */ + elog(ERROR, "whole-row update is not implemented"); + } + else + { + if (pg_attribute_aclcheck(relOid, attno, userid, + requiredPerms) != ACLCHECK_OK) + return false; } } + return true; } /* - * Check that the query does not imply any writes to non-temp tables. + * Check that the query does not imply any writes to non-temp tables; + * unless we're in parallel mode, in which case don't even allow writes + * to temp tables. + * + * Note: in a Hot Standby slave this would need to reject writes to temp + * tables just as we do in parallel mode; but an HS slave can't have created + * any temp tables in the first place, so no need to check that. */ static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt) @@ -604,14 +754,9 @@ ExecCheckXactReadOnly(PlannedStmt *plannedstmt) ListCell *l; /* - * CREATE TABLE AS or SELECT INTO? - * - * XXX should we allow this if the destination is temp? + * Fail if write permissions are requested in parallel mode for table + * (temp or non-temp), otherwise fail for any non-temp table. */ - if (plannedstmt->intoClause != NULL) - goto fail; - - /* Fail if write permissions are requested on any non-temp table */ foreach(l, plannedstmt->rtable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); @@ -625,15 +770,11 @@ ExecCheckXactReadOnly(PlannedStmt *plannedstmt) if (isTempNamespace(get_rel_namespace(rte->relid))) continue; - goto fail; + PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt)); } - return; - -fail: - ereport(ERROR, - (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), - errmsg("transaction is read-only"))); + if (plannedstmt->commandType != CMD_SELECT || plannedstmt->hasModifyingCTE) + PreventCommandIfParallelMode(CreateCommandTag((Node *) plannedstmt)); } @@ -660,15 +801,19 @@ InitPlan(QueryDesc *queryDesc, int eflags) /* * Do permissions checks */ - ExecCheckRTPerms(rangeTable); + ExecCheckRTPerms(rangeTable, true); /* * initialize the node's execution state */ estate->es_range_table = rangeTable; + estate->es_plannedstmt = plannedstmt; /* - * initialize result relation stuff + * initialize result relation stuff, and open/lock the result rels. + * + * We must do this before initializing the plan tree, else we might try to + * do a lock upgrade if a result rel is also a source rel. */ if (plannedstmt->resultRelations) { @@ -691,14 +836,13 @@ InitPlan(QueryDesc *queryDesc, int eflags) InitResultRelInfo(resultRelInfo, resultRelation, resultRelationIndex, - operation, estate->es_instrument); resultRelInfo++; } estate->es_result_relations = resultRelInfos; estate->es_num_result_relations = numResultRelations; - /* Initialize to first or only result rel */ - estate->es_result_relation_info = resultRelInfos; + /* es_result_relation_info is NULL except when within ModifyTable */ + estate->es_result_relation_info = NULL; } else { @@ -711,26 +855,14 @@ InitPlan(QueryDesc *queryDesc, int eflags) } /* - * Detect whether we're doing SELECT INTO. If so, set the es_into_oids - * flag appropriately so that the plan tree will be initialized with the - * correct tuple descriptors. (Other SELECT INTO stuff comes later.) - */ - estate->es_select_into = false; - if (operation == CMD_SELECT && plannedstmt->intoClause != NULL) - { - estate->es_select_into = true; - estate->es_into_oids = interpretOidsOption(plannedstmt->intoClause->options); - } - - /* - * Have to lock relations selected FOR UPDATE/FOR SHARE before we - * initialize the plan tree, else we'd be doing a lock upgrade. While we - * are at it, build the ExecRowMark list. + * Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE + * before we initialize the plan tree, else we'd be risking lock upgrades. + * While we are at it, build the ExecRowMark list. */ estate->es_rowMarks = NIL; foreach(l, plannedstmt->rowMarks) { - RowMarkClause *rc = (RowMarkClause *) lfirst(l); + PlanRowMark *rc = (PlanRowMark *) lfirst(l); Oid relid; Relation relation; ExecRowMark *erm; @@ -739,63 +871,65 @@ InitPlan(QueryDesc *queryDesc, int eflags) if (rc->isParent) continue; + /* get relation's OID (will produce InvalidOid if subquery) */ relid = getrelid(rc->rti, rangeTable); - relation = heap_open(relid, RowShareLock); + + /* + * If you change the conditions under which rel locks are acquired + * here, be sure to adjust ExecOpenScanRelation to match. + */ + switch (rc->markType) + { + case ROW_MARK_EXCLUSIVE: + case ROW_MARK_NOKEYEXCLUSIVE: + case ROW_MARK_SHARE: + case ROW_MARK_KEYSHARE: + relation = heap_open(relid, RowShareLock); + break; + case ROW_MARK_REFERENCE: + relation = heap_open(relid, AccessShareLock); + break; + case ROW_MARK_COPY: + /* no physical table access is required */ + relation = NULL; + break; + default: + elog(ERROR, "unrecognized markType: %d", rc->markType); + relation = NULL; /* keep compiler quiet */ + break; + } + + /* Check that relation is a legal target for marking */ + if (relation) + CheckValidRowMarkRel(relation, rc->markType); + erm = (ExecRowMark *) palloc(sizeof(ExecRowMark)); erm->relation = relation; + erm->relid = relid; erm->rti = rc->rti; erm->prti = rc->prti; - erm->forUpdate = rc->forUpdate; - erm->noWait = rc->noWait; - /* We'll locate the junk attrs below */ - erm->ctidAttNo = InvalidAttrNumber; - erm->toidAttNo = InvalidAttrNumber; + erm->rowmarkId = rc->rowmarkId; + erm->markType = rc->markType; + erm->strength = rc->strength; + erm->waitPolicy = rc->waitPolicy; + erm->ermActive = false; ItemPointerSetInvalid(&(erm->curCtid)); + erm->ermExtra = NULL; estate->es_rowMarks = lappend(estate->es_rowMarks, erm); } /* - * Initialize the executor "tuple" table. We need slots for all the plan - * nodes, plus possibly output slots for the junkfilter(s). At this point - * we aren't sure if we need junkfilters, so just add slots for them - * unconditionally. Also, if it's not a SELECT, set up a slot for use for - * trigger output tuples. Also, one for RETURNING-list evaluation. + * Initialize the executor's tuple table to empty. */ - { - int nSlots; - - /* Slots for the main plan tree */ - nSlots = ExecCountSlotsNode(plan); - /* Add slots for subplans and initplans */ - foreach(l, plannedstmt->subplans) - { - Plan *subplan = (Plan *) lfirst(l); - - nSlots += ExecCountSlotsNode(subplan); - } - /* Add slots for junkfilter(s) */ - if (plannedstmt->resultRelations != NIL) - nSlots += list_length(plannedstmt->resultRelations); - else - nSlots += 1; - if (operation != CMD_SELECT) - nSlots++; /* for es_trig_tuple_slot */ - if (plannedstmt->returningLists) - nSlots++; /* for RETURNING projection */ - - estate->es_tupleTable = ExecCreateTupleTable(nSlots); - - if (operation != CMD_SELECT) - estate->es_trig_tuple_slot = - ExecAllocTableSlot(estate->es_tupleTable); - } + estate->es_tupleTable = NIL; + estate->es_trig_tuple_slot = NULL; + estate->es_trig_oldtup_slot = NULL; + estate->es_trig_newtup_slot = NULL; /* mark EvalPlanQual not active */ - estate->es_plannedstmt = plannedstmt; - estate->es_evalPlanQual = NULL; - estate->es_evTupleNull = NULL; - estate->es_evTuple = NULL; - estate->es_useEvalPlan = false; + estate->es_epqTuple = NULL; + estate->es_epqTupleSet = NULL; + estate->es_epqScanDone = NULL; /* * Initialize private state information for each SubPlan. We must do this @@ -815,7 +949,8 @@ InitPlan(QueryDesc *queryDesc, int eflags) * it is a parameterless subplan (not initplan), we suggest that it be * prepared to handle REWIND efficiently; otherwise there is no need. */ - sp_eflags = eflags & EXEC_FLAG_EXPLAIN_ONLY; + sp_eflags = eflags + & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA); if (bms_is_member(i, plannedstmt->rewindPlanIDs)) sp_eflags |= EXEC_FLAG_REWIND; @@ -835,250 +970,64 @@ InitPlan(QueryDesc *queryDesc, int eflags) planstate = ExecInitNode(plan, estate, eflags); /* - * Get the tuple descriptor describing the type of tuples to return. (this - * is especially important if we are creating a relation with "SELECT - * INTO") + * Get the tuple descriptor describing the type of tuples to return. */ tupType = ExecGetResultType(planstate); /* - * Initialize the junk filter if needed. SELECT and INSERT queries need a - * filter if there are any junk attrs in the tlist. UPDATE and - * DELETE always need a filter, since there's always a junk 'ctid' - * attribute present --- no need to look first. - * - * This section of code is also a convenient place to verify that the - * output of an INSERT or UPDATE matches the target table(s). + * Initialize the junk filter if needed. SELECT queries need a filter if + * there are any junk attrs in the top-level tlist. */ + if (operation == CMD_SELECT) { bool junk_filter_needed = false; ListCell *tlist; - switch (operation) + foreach(tlist, plan->targetlist) { - case CMD_SELECT: - case CMD_INSERT: - foreach(tlist, plan->targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(tlist); + TargetEntry *tle = (TargetEntry *) lfirst(tlist); - if (tle->resjunk) - { - junk_filter_needed = true; - break; - } - } - break; - case CMD_UPDATE: - case CMD_DELETE: + if (tle->resjunk) + { junk_filter_needed = true; break; - default: - break; + } } if (junk_filter_needed) { - /* - * If there are multiple result relations, each one needs its own - * junk filter. Note this is only possible for UPDATE/DELETE, so - * we can't be fooled by some needing a filter and some not. - */ - if (list_length(plannedstmt->resultRelations) > 1) - { - PlanState **appendplans; - int as_nplans; - ResultRelInfo *resultRelInfo; - - /* Top plan had better be an Append here. */ - Assert(IsA(plan, Append)); - Assert(((Append *) plan)->isTarget); - Assert(IsA(planstate, AppendState)); - appendplans = ((AppendState *) planstate)->appendplans; - as_nplans = ((AppendState *) planstate)->as_nplans; - Assert(as_nplans == estate->es_num_result_relations); - resultRelInfo = estate->es_result_relations; - for (i = 0; i < as_nplans; i++) - { - PlanState *subplan = appendplans[i]; - JunkFilter *j; - - if (operation == CMD_UPDATE) - ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc, - subplan->plan->targetlist); - - j = ExecInitJunkFilter(subplan->plan->targetlist, - resultRelInfo->ri_RelationDesc->rd_att->tdhasoid, - ExecAllocTableSlot(estate->es_tupleTable)); - - /* - * Since it must be UPDATE/DELETE, there had better be a - * "ctid" junk attribute in the tlist ... but ctid could - * be at a different resno for each result relation. We - * look up the ctid resnos now and save them in the - * junkfilters. - */ - j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid"); - if (!AttributeNumberIsValid(j->jf_junkAttNo)) - elog(ERROR, "could not find junk ctid column"); - resultRelInfo->ri_junkFilter = j; - resultRelInfo++; - } - - /* - * Set active junkfilter too; at this point ExecInitAppend has - * already selected an active result relation... - */ - estate->es_junkFilter = - estate->es_result_relation_info->ri_junkFilter; - - /* - * We currently can't support rowmarks in this case, because - * the associated junk CTIDs might have different resnos in - * different subplans. - */ - if (estate->es_rowMarks) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("SELECT FOR UPDATE/SHARE is not supported within a query with multiple result relations"))); - } - else - { - /* Normal case with just one JunkFilter */ - JunkFilter *j; - - if (operation == CMD_INSERT || operation == CMD_UPDATE) - ExecCheckPlanOutput(estate->es_result_relation_info->ri_RelationDesc, - planstate->plan->targetlist); - - j = ExecInitJunkFilter(planstate->plan->targetlist, - tupType->tdhasoid, - ExecAllocTableSlot(estate->es_tupleTable)); - estate->es_junkFilter = j; - if (estate->es_result_relation_info) - estate->es_result_relation_info->ri_junkFilter = j; - - if (operation == CMD_SELECT) - { - /* For SELECT, want to return the cleaned tuple type */ - tupType = j->jf_cleanTupType; - } - else if (operation == CMD_UPDATE || operation == CMD_DELETE) - { - /* For UPDATE/DELETE, find the ctid junk attr now */ - j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid"); - if (!AttributeNumberIsValid(j->jf_junkAttNo)) - elog(ERROR, "could not find junk ctid column"); - } - - /* For SELECT FOR UPDATE/SHARE, find the junk attrs now */ - foreach(l, estate->es_rowMarks) - { - ExecRowMark *erm = (ExecRowMark *) lfirst(l); - char resname[32]; - - /* always need the ctid */ - snprintf(resname, sizeof(resname), "ctid%u", - erm->prti); - erm->ctidAttNo = ExecFindJunkAttribute(j, resname); - if (!AttributeNumberIsValid(erm->ctidAttNo)) - elog(ERROR, "could not find junk \"%s\" column", - resname); - /* if child relation, need tableoid too */ - if (erm->rti != erm->prti) - { - snprintf(resname, sizeof(resname), "tableoid%u", - erm->prti); - erm->toidAttNo = ExecFindJunkAttribute(j, resname); - if (!AttributeNumberIsValid(erm->toidAttNo)) - elog(ERROR, "could not find junk \"%s\" column", - resname); - } - } - } - } - else - { - if (operation == CMD_INSERT) - ExecCheckPlanOutput(estate->es_result_relation_info->ri_RelationDesc, - planstate->plan->targetlist); - - estate->es_junkFilter = NULL; - if (estate->es_rowMarks) - elog(ERROR, "SELECT FOR UPDATE/SHARE, but no junk columns"); - } - } - - /* - * Initialize RETURNING projections if needed. - */ - if (plannedstmt->returningLists) - { - TupleTableSlot *slot; - ExprContext *econtext; - ResultRelInfo *resultRelInfo; - - /* - * We set QueryDesc.tupDesc to be the RETURNING rowtype in this case. - * We assume all the sublists will generate the same output tupdesc. - */ - tupType = ExecTypeFromTL((List *) linitial(plannedstmt->returningLists), - false); + JunkFilter *j; - /* Set up a slot for the output of the RETURNING projection(s) */ - slot = ExecAllocTableSlot(estate->es_tupleTable); - ExecSetSlotDescriptor(slot, tupType); - /* Need an econtext too */ - econtext = CreateExprContext(estate); - - /* - * Build a projection for each result rel. Note that any SubPlans in - * the RETURNING lists get attached to the topmost plan node. - */ - Assert(list_length(plannedstmt->returningLists) == estate->es_num_result_relations); - resultRelInfo = estate->es_result_relations; - foreach(l, plannedstmt->returningLists) - { - List *rlist = (List *) lfirst(l); - List *rliststate; + j = ExecInitJunkFilter(planstate->plan->targetlist, + tupType->tdhasoid, + ExecInitExtraTupleSlot(estate)); + estate->es_junkFilter = j; - rliststate = (List *) ExecInitExpr((Expr *) rlist, planstate); - resultRelInfo->ri_projectReturning = - ExecBuildProjectionInfo(rliststate, econtext, slot, - resultRelInfo->ri_RelationDesc->rd_att); - resultRelInfo++; + /* Want to return the cleaned tuple type */ + tupType = j->jf_cleanTupType; } } queryDesc->tupDesc = tupType; queryDesc->planstate = planstate; - - /* - * If doing SELECT INTO, initialize the "into" relation. We must wait - * till now so we have the "clean" result tuple type to create the new - * table from. - * - * If EXPLAIN, skip creating the "into" relation. - */ - if (estate->es_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY)) - OpenIntoRel(queryDesc); } /* - * Initialize ResultRelInfo data for one result relation + * Check that a proposed result relation is a legal target for the operation + * + * Generally the parser and/or planner should have noticed any such mistake + * already, but let's make sure. + * + * Note: when changing this function, you probably also need to look at + * CheckValidRowMarkRel. */ void -InitResultRelInfo(ResultRelInfo *resultRelInfo, - Relation resultRelationDesc, - Index resultRelationIndex, - CmdType operation, - bool doInstrument) +CheckValidResultRel(Relation resultRel, CmdType operation) { - /* - * Check valid relkind ... parser and/or planner should have noticed this - * already, but let's make sure. - */ - switch (resultRelationDesc->rd_rel->relkind) + TriggerDesc *trigDesc = resultRel->trigdesc; + FdwRoutine *fdwroutine; + + switch (resultRel->rd_rel->relkind) { case RELKIND_RELATION: /* OK */ @@ -1087,136 +1036,229 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot change sequence \"%s\"", - RelationGetRelationName(resultRelationDesc)))); + RelationGetRelationName(resultRel)))); break; case RELKIND_TOASTVALUE: ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot change TOAST relation \"%s\"", - RelationGetRelationName(resultRelationDesc)))); + RelationGetRelationName(resultRel)))); break; case RELKIND_VIEW: - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("cannot change view \"%s\"", - RelationGetRelationName(resultRelationDesc)))); + + /* + * Okay only if there's a suitable INSTEAD OF trigger. Messages + * here should match rewriteHandler.c's rewriteTargetView, except + * that we omit errdetail because we haven't got the information + * handy (and given that we really shouldn't get here anyway, it's + * not worth great exertion to get). + */ + switch (operation) + { + case CMD_INSERT: + if (!trigDesc || !trigDesc->trig_insert_instead_row) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot insert into view \"%s\"", + RelationGetRelationName(resultRel)), + errhint("To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule."))); + break; + case CMD_UPDATE: + if (!trigDesc || !trigDesc->trig_update_instead_row) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot update view \"%s\"", + RelationGetRelationName(resultRel)), + errhint("To enable updating the view, provide an INSTEAD OF UPDATE trigger or an unconditional ON UPDATE DO INSTEAD rule."))); + break; + case CMD_DELETE: + if (!trigDesc || !trigDesc->trig_delete_instead_row) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot delete from view \"%s\"", + RelationGetRelationName(resultRel)), + errhint("To enable deleting from the view, provide an INSTEAD OF DELETE trigger or an unconditional ON DELETE DO INSTEAD rule."))); + break; + default: + elog(ERROR, "unrecognized CmdType: %d", (int) operation); + break; + } + break; + case RELKIND_MATVIEW: + if (!MatViewIncrementalMaintenanceIsEnabled()) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change materialized view \"%s\"", + RelationGetRelationName(resultRel)))); + break; + case RELKIND_FOREIGN_TABLE: + /* Okay only if the FDW supports it */ + fdwroutine = GetFdwRoutineForRelation(resultRel, false); + switch (operation) + { + case CMD_INSERT: + if (fdwroutine->ExecForeignInsert == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot insert into foreign table \"%s\"", + RelationGetRelationName(resultRel)))); + if (fdwroutine->IsForeignRelUpdatable != NULL && + (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_INSERT)) == 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("foreign table \"%s\" does not allow inserts", + RelationGetRelationName(resultRel)))); + break; + case CMD_UPDATE: + if (fdwroutine->ExecForeignUpdate == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot update foreign table \"%s\"", + RelationGetRelationName(resultRel)))); + if (fdwroutine->IsForeignRelUpdatable != NULL && + (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_UPDATE)) == 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("foreign table \"%s\" does not allow updates", + RelationGetRelationName(resultRel)))); + break; + case CMD_DELETE: + if (fdwroutine->ExecForeignDelete == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot delete from foreign table \"%s\"", + RelationGetRelationName(resultRel)))); + if (fdwroutine->IsForeignRelUpdatable != NULL && + (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_DELETE)) == 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("foreign table \"%s\" does not allow deletes", + RelationGetRelationName(resultRel)))); + break; + default: + elog(ERROR, "unrecognized CmdType: %d", (int) operation); + break; + } break; default: ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot change relation \"%s\"", - RelationGetRelationName(resultRelationDesc)))); + RelationGetRelationName(resultRel)))); break; } +} - /* OK, fill in the node */ - MemSet(resultRelInfo, 0, sizeof(ResultRelInfo)); - resultRelInfo->type = T_ResultRelInfo; - resultRelInfo->ri_RangeTableIndex = resultRelationIndex; - resultRelInfo->ri_RelationDesc = resultRelationDesc; - resultRelInfo->ri_NumIndices = 0; - resultRelInfo->ri_IndexRelationDescs = NULL; - resultRelInfo->ri_IndexRelationInfo = NULL; - /* make a copy so as not to depend on relcache info not changing... */ - resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc); - if (resultRelInfo->ri_TrigDesc) - { - int n = resultRelInfo->ri_TrigDesc->numtriggers; +/* + * Check that a proposed rowmark target relation is a legal target + * + * In most cases parser and/or planner should have noticed this already, but + * they don't cover all cases. + */ +static void +CheckValidRowMarkRel(Relation rel, RowMarkType markType) +{ + FdwRoutine *fdwroutine; - resultRelInfo->ri_TrigFunctions = (FmgrInfo *) - palloc0(n * sizeof(FmgrInfo)); - if (doInstrument) - resultRelInfo->ri_TrigInstrument = InstrAlloc(n); - else - resultRelInfo->ri_TrigInstrument = NULL; - } - else + switch (rel->rd_rel->relkind) { - resultRelInfo->ri_TrigFunctions = NULL; - resultRelInfo->ri_TrigInstrument = NULL; + case RELKIND_RELATION: + /* OK */ + break; + case RELKIND_SEQUENCE: + /* Must disallow this because we don't vacuum sequences */ + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot lock rows in sequence \"%s\"", + RelationGetRelationName(rel)))); + break; + case RELKIND_TOASTVALUE: + /* We could allow this, but there seems no good reason to */ + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot lock rows in TOAST relation \"%s\"", + RelationGetRelationName(rel)))); + break; + case RELKIND_VIEW: + /* Should not get here; planner should have expanded the view */ + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot lock rows in view \"%s\"", + RelationGetRelationName(rel)))); + break; + case RELKIND_MATVIEW: + /* Allow referencing a matview, but not actual locking clauses */ + if (markType != ROW_MARK_REFERENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot lock rows in materialized view \"%s\"", + RelationGetRelationName(rel)))); + break; + case RELKIND_FOREIGN_TABLE: + /* Okay only if the FDW supports it */ + fdwroutine = GetFdwRoutineForRelation(rel, false); + if (fdwroutine->RefetchForeignRow == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot lock rows in foreign table \"%s\"", + RelationGetRelationName(rel)))); + break; + default: + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot lock rows in relation \"%s\"", + RelationGetRelationName(rel)))); + break; } - resultRelInfo->ri_ConstraintExprs = NULL; - resultRelInfo->ri_junkFilter = NULL; - resultRelInfo->ri_projectReturning = NULL; - - /* - * If there are indices on the result relation, open them and save - * descriptors in the result relation info, so that we can add new index - * entries for the tuples we add/update. We need not do this for a - * DELETE, however, since deletion doesn't affect indexes. - */ - if (resultRelationDesc->rd_rel->relhasindex && - operation != CMD_DELETE) - ExecOpenIndices(resultRelInfo); } /* - * Verify that the tuples to be produced by INSERT or UPDATE match the - * target relation's rowtype - * - * We do this to guard against stale plans. If plan invalidation is - * functioning properly then we should never get a failure here, but better - * safe than sorry. Note that this is called after we have obtained lock - * on the target rel, so the rowtype can't change underneath us. + * Initialize ResultRelInfo data for one result relation * - * The plan output is represented by its targetlist, because that makes - * handling the dropped-column case easier. + * Caution: before Postgres 9.1, this function included the relkind checking + * that's now in CheckValidResultRel, and it also did ExecOpenIndices if + * appropriate. Be sure callers cover those needs. */ -static void -ExecCheckPlanOutput(Relation resultRel, List *targetList) +void +InitResultRelInfo(ResultRelInfo *resultRelInfo, + Relation resultRelationDesc, + Index resultRelationIndex, + int instrument_options) { - TupleDesc resultDesc = RelationGetDescr(resultRel); - int attno = 0; - ListCell *lc; - - foreach(lc, targetList) + MemSet(resultRelInfo, 0, sizeof(ResultRelInfo)); + resultRelInfo->type = T_ResultRelInfo; + resultRelInfo->ri_RangeTableIndex = resultRelationIndex; + resultRelInfo->ri_RelationDesc = resultRelationDesc; + resultRelInfo->ri_NumIndices = 0; + resultRelInfo->ri_IndexRelationDescs = NULL; + resultRelInfo->ri_IndexRelationInfo = NULL; + /* make a copy so as not to depend on relcache info not changing... */ + resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc); + if (resultRelInfo->ri_TrigDesc) { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - Form_pg_attribute attr; - - if (tle->resjunk) - continue; /* ignore junk tlist items */ - - if (attno >= resultDesc->natts) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("table row type and query-specified row type do not match"), - errdetail("Query has too many columns."))); - attr = resultDesc->attrs[attno++]; + int n = resultRelInfo->ri_TrigDesc->numtriggers; - if (!attr->attisdropped) - { - /* Normal case: demand type match */ - if (exprType((Node *) tle->expr) != attr->atttypid) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("table row type and query-specified row type do not match"), - errdetail("Table has type %s at ordinal position %d, but query expects %s.", - format_type_be(attr->atttypid), - attno, - format_type_be(exprType((Node *) tle->expr))))); - } - else - { - /* - * For a dropped column, we can't check atttypid (it's likely 0). - * In any case the planner has most likely inserted an INT4 null. - * What we insist on is just *some* NULL constant. - */ - if (!IsA(tle->expr, Const) || - !((Const *) tle->expr)->constisnull) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("table row type and query-specified row type do not match"), - errdetail("Query provides a value for a dropped column at ordinal position %d.", - attno))); - } + resultRelInfo->ri_TrigFunctions = (FmgrInfo *) + palloc0(n * sizeof(FmgrInfo)); + resultRelInfo->ri_TrigWhenExprs = (List **) + palloc0(n * sizeof(List *)); + if (instrument_options) + resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options); + } + else + { + resultRelInfo->ri_TrigFunctions = NULL; + resultRelInfo->ri_TrigWhenExprs = NULL; + resultRelInfo->ri_TrigInstrument = NULL; } - if (attno != resultDesc->natts) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("table row type and query-specified row type do not match"), - errdetail("Query has too few columns."))); + if (resultRelationDesc->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + resultRelInfo->ri_FdwRoutine = GetFdwRoutineForRelation(resultRelationDesc, true); + else + resultRelInfo->ri_FdwRoutine = NULL; + resultRelInfo->ri_FdwState = NULL; + resultRelInfo->ri_ConstraintExprs = NULL; + resultRelInfo->ri_junkFilter = NULL; + resultRelInfo->ri_projectReturning = NULL; } /* @@ -1229,7 +1271,7 @@ ExecCheckPlanOutput(Relation resultRel, List *targetList) * if so it doesn't matter which one we pick.) However, it is sometimes * necessary to fire triggers on other relations; this happens mainly when an * RI update trigger queues additional triggers on other relations, which will - * be processed in the context of the outer query. For efficiency's sake, + * be processed in the context of the outer query. For efficiency's sake, * we want to have a ResultRelInfo for those triggers too; that can avoid * repeated re-opening of the relation. (It also provides a way for EXPLAIN * ANALYZE to report the runtimes of such triggers.) So we make additional @@ -1266,33 +1308,36 @@ ExecGetTriggerResultRel(EState *estate, Oid relid) /* * Open the target relation's relcache entry. We assume that an * appropriate lock is still held by the backend from whenever the trigger - * event got queued, so we need take no new lock here. + * event got queued, so we need take no new lock here. Also, we need not + * recheck the relkind, so no need for CheckValidResultRel. */ rel = heap_open(relid, NoLock); /* - * Make the new entry in the right context. Currently, we don't need any - * index information in ResultRelInfos used only for triggers, so tell - * InitResultRelInfo it's a DELETE. + * Make the new entry in the right context. */ oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); rInfo = makeNode(ResultRelInfo); InitResultRelInfo(rInfo, rel, 0, /* dummy rangetable index */ - CMD_DELETE, estate->es_instrument); estate->es_trig_target_relations = lappend(estate->es_trig_target_relations, rInfo); MemoryContextSwitchTo(oldcontext); + /* + * Currently, we don't need any index information in ResultRelInfos used + * only for triggers, so no need to call ExecOpenIndices. + */ + return rInfo; } /* * ExecContextForcesOids * - * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO, + * This is pretty grotty: when doing INSERT, UPDATE, or CREATE TABLE AS, * we need to ensure that result tuples have space for an OID iff they are * going to be stored into a relation that has OIDs. In other contexts * we are free to choose whether to leave space for OIDs in result tuples @@ -1309,41 +1354,86 @@ ExecGetTriggerResultRel(EState *estate, Oid relid) * recognize how far down the requirement really goes, but for now we just * make all plan nodes do the same thing if the top level forces the choice. * - * We assume that estate->es_result_relation_info is already set up to - * describe the target relation. Note that in an UPDATE that spans an - * inheritance tree, some of the target relations may have OIDs and some not. - * We have to make the decisions on a per-relation basis as we initialize - * each of the child plans of the topmost Append plan. + * We assume that if we are generating tuples for INSERT or UPDATE, + * estate->es_result_relation_info is already set up to describe the target + * relation. Note that in an UPDATE that spans an inheritance tree, some of + * the target relations may have OIDs and some not. We have to make the + * decisions on a per-relation basis as we initialize each of the subplans of + * the ModifyTable node, so ModifyTable has to set es_result_relation_info + * while initializing each subplan. * - * SELECT INTO is even uglier, because we don't have the INTO relation's - * descriptor available when this code runs; we have to look aside at a - * flag set by InitPlan(). + * CREATE TABLE AS is even uglier, because we don't have the target relation's + * descriptor available when this code runs; we have to look aside at the + * flags passed to ExecutorStart(). */ bool ExecContextForcesOids(PlanState *planstate, bool *hasoids) { - if (planstate->state->es_select_into) + ResultRelInfo *ri = planstate->state->es_result_relation_info; + + if (ri != NULL) + { + Relation rel = ri->ri_RelationDesc; + + if (rel != NULL) + { + *hasoids = rel->rd_rel->relhasoids; + return true; + } + } + + if (planstate->state->es_top_eflags & EXEC_FLAG_WITH_OIDS) { - *hasoids = planstate->state->es_into_oids; + *hasoids = true; return true; } - else + if (planstate->state->es_top_eflags & EXEC_FLAG_WITHOUT_OIDS) + { + *hasoids = false; + return true; + } + + return false; +} + +/* ---------------------------------------------------------------- + * ExecPostprocessPlan + * + * Give plan nodes a final chance to execute before shutdown + * ---------------------------------------------------------------- + */ +static void +ExecPostprocessPlan(EState *estate) +{ + ListCell *lc; + + /* + * Make sure nodes run forward. + */ + estate->es_direction = ForwardScanDirection; + + /* + * Run any secondary ModifyTable nodes to completion, in case the main + * query did not fetch all rows from them. (We do this to ensure that + * such nodes have predictable results.) + */ + foreach(lc, estate->es_auxmodifytables) { - ResultRelInfo *ri = planstate->state->es_result_relation_info; + PlanState *ps = (PlanState *) lfirst(lc); - if (ri != NULL) + for (;;) { - Relation rel = ri->ri_RelationDesc; + TupleTableSlot *slot; - if (rel != NULL) - { - *hasoids = rel->rd_rel->relhasoids; - return true; - } + /* Reset the per-output-tuple exprcontext each time */ + ResetPerTupleExprContext(estate); + + slot = ExecProcNode(ps); + + if (TupIsNull(slot)) + break; } } - - return false; } /* ---------------------------------------------------------------- @@ -1365,12 +1455,6 @@ ExecEndPlan(PlanState *planstate, EState *estate) int i; ListCell *l; - /* - * shut down any PlanQual processing we were doing - */ - if (estate->es_evalPlanQual != NULL) - EndEvalPlanQual(estate); - /* * shut down the node-type-specific query processing */ @@ -1387,10 +1471,12 @@ ExecEndPlan(PlanState *planstate, EState *estate) } /* - * destroy the executor "tuple" table. + * destroy the executor's tuple table. Actually we only care about + * releasing buffer pins and tupdesc refcounts; there's no need to pfree + * the TupleTableSlots, since the containing memory context is about to go + * away anyway. */ - ExecDropTupleTable(estate->es_tupleTable, true); - estate->es_tupleTable = NULL; + ExecResetTupleTable(estate->es_tupleTable, false); /* * close the result relation(s) if any, but hold locks until xact commit. @@ -1416,20 +1502,22 @@ ExecEndPlan(PlanState *planstate, EState *estate) } /* - * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks + * close any relations selected FOR [KEY] UPDATE/SHARE, again keeping + * locks */ foreach(l, estate->es_rowMarks) { - ExecRowMark *erm = lfirst(l); + ExecRowMark *erm = (ExecRowMark *) lfirst(l); - heap_close(erm->relation, NoLock); + if (erm->relation) + heap_close(erm->relation, NoLock); } } /* ---------------------------------------------------------------- * ExecutePlan * - * Processes the query plan until we have processed 'numberTuples' tuples, + * Processes the query plan until we have retrieved 'numberTuples' tuples, * moving in the specified direction. * * Runs to completion if numberTuples is 0 @@ -1442,15 +1530,12 @@ static void ExecutePlan(EState *estate, PlanState *planstate, CmdType operation, + bool sendTuples, long numberTuples, ScanDirection direction, DestReceiver *dest) { - JunkFilter *junkfilter; - TupleTableSlot *planSlot; TupleTableSlot *slot; - ItemPointer tupleid = NULL; - ItemPointerData tuple_ctid; long current_tuple_count; /* @@ -1463,25 +1548,6 @@ ExecutePlan(EState *estate, */ estate->es_direction = direction; - /* - * Process BEFORE EACH STATEMENT triggers - */ - switch (operation) - { - case CMD_UPDATE: - ExecBSUpdateTriggers(estate, estate->es_result_relation_info); - break; - case CMD_DELETE: - ExecBSDeleteTriggers(estate, estate->es_result_relation_info); - break; - case CMD_INSERT: - ExecBSInsertTriggers(estate, estate->es_result_relation_info); - break; - default: - /* do nothing */ - break; - } - /* * Loop until we've processed the proper number of tuples from the plan. */ @@ -1493,23 +1559,14 @@ ExecutePlan(EState *estate, /* * Execute the plan and obtain a tuple */ -lnext: ; - if (estate->es_useEvalPlan) - { - planSlot = EvalPlanQualNext(estate); - if (TupIsNull(planSlot)) - planSlot = ExecProcNode(planstate); - } - else - planSlot = ExecProcNode(planstate); + slot = ExecProcNode(planstate); /* * if the tuple is null, then we assume there is nothing more to * process so we just end the loop... */ - if (TupIsNull(planSlot)) + if (TupIsNull(slot)) break; - slot = planSlot; /* * If we have a junk filter, then project a new tuple with the junk @@ -1518,174 +1575,24 @@ lnext: ; * Store this new "clean" tuple in the junkfilter's resultSlot. * (Formerly, we stored it back over the "dirty" tuple, which is WRONG * because that tuple slot has the wrong descriptor.) - * - * But first, extract all the junk information we need. */ - if ((junkfilter = estate->es_junkFilter) != NULL) - { - /* - * Process any FOR UPDATE or FOR SHARE locking requested. - */ - if (estate->es_rowMarks != NIL) - { - ListCell *l; - - lmark: ; - foreach(l, estate->es_rowMarks) - { - ExecRowMark *erm = lfirst(l); - Datum datum; - bool isNull; - HeapTupleData tuple; - Buffer buffer; - ItemPointerData update_ctid; - TransactionId update_xmax; - TupleTableSlot *newSlot; - LockTupleMode lockmode; - HTSU_Result test; - - /* if child rel, must check whether it produced this row */ - if (erm->rti != erm->prti) - { - Oid tableoid; - - datum = ExecGetJunkAttribute(slot, - erm->toidAttNo, - &isNull); - /* shouldn't ever get a null result... */ - if (isNull) - elog(ERROR, "tableoid is NULL"); - tableoid = DatumGetObjectId(datum); - - if (tableoid != RelationGetRelid(erm->relation)) - { - /* this child is inactive right now */ - ItemPointerSetInvalid(&(erm->curCtid)); - continue; - } - } - - /* okay, fetch the tuple by ctid */ - datum = ExecGetJunkAttribute(slot, - erm->ctidAttNo, - &isNull); - /* shouldn't ever get a null result... */ - if (isNull) - elog(ERROR, "ctid is NULL"); - tuple.t_self = *((ItemPointer) DatumGetPointer(datum)); - - if (erm->forUpdate) - lockmode = LockTupleExclusive; - else - lockmode = LockTupleShared; - - test = heap_lock_tuple(erm->relation, &tuple, &buffer, - &update_ctid, &update_xmax, - estate->es_output_cid, - lockmode, erm->noWait); - ReleaseBuffer(buffer); - switch (test) - { - case HeapTupleSelfUpdated: - /* treat it as deleted; do not process */ - goto lnext; - - case HeapTupleMayBeUpdated: - break; - - case HeapTupleUpdated: - if (IsXactIsoLevelSerializable) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent update"))); - if (!ItemPointerEquals(&update_ctid, - &tuple.t_self)) - { - /* updated, so look at updated version */ - newSlot = EvalPlanQual(estate, - erm->rti, - &update_ctid, - update_xmax); - if (!TupIsNull(newSlot)) - { - slot = planSlot = newSlot; - estate->es_useEvalPlan = true; - goto lmark; - } - } - - /* - * if tuple was deleted or PlanQual failed for - * updated tuple - we must not return this tuple! - */ - goto lnext; - - default: - elog(ERROR, "unrecognized heap_lock_tuple status: %u", - test); - } - - /* Remember tuple TID for WHERE CURRENT OF */ - erm->curCtid = tuple.t_self; - } - } - - /* - * extract the 'ctid' junk attribute. - */ - if (operation == CMD_UPDATE || operation == CMD_DELETE) - { - Datum datum; - bool isNull; - - datum = ExecGetJunkAttribute(slot, junkfilter->jf_junkAttNo, - &isNull); - /* shouldn't ever get a null result... */ - if (isNull) - elog(ERROR, "ctid is NULL"); - - tupleid = (ItemPointer) DatumGetPointer(datum); - tuple_ctid = *tupleid; /* make sure we don't free the ctid!! */ - tupleid = &tuple_ctid; - } - - /* - * Create a new "clean" tuple with all junk attributes removed. We - * don't need to do this for DELETE, however (there will in fact - * be no non-junk attributes in a DELETE!) - */ - if (operation != CMD_DELETE) - slot = ExecFilterJunk(junkfilter, slot); - } + if (estate->es_junkFilter != NULL) + slot = ExecFilterJunk(estate->es_junkFilter, slot); /* - * now that we have a tuple, do the appropriate thing with it.. either - * send it to the output destination, add it to a relation someplace, - * delete it from a relation, or modify some of its attributes. + * If we are supposed to send the tuple somewhere, do so. (In + * practice, this is probably always the case at this point.) */ - switch (operation) - { - case CMD_SELECT: - ExecSelect(slot, dest, estate); - break; - - case CMD_INSERT: - ExecInsert(slot, tupleid, planSlot, dest, estate); - break; - - case CMD_DELETE: - ExecDelete(tupleid, planSlot, dest, estate); - break; - - case CMD_UPDATE: - ExecUpdate(slot, tupleid, planSlot, dest, estate); - break; + if (sendTuples) + (*dest->receiveSlot) (slot, dest); - default: - elog(ERROR, "unrecognized operation code: %d", - (int) operation); - break; - } + /* + * Count tuples processed, if this is a SELECT. (For other operation + * types, the ModifyTable plan node must count the appropriate + * events.) + */ + if (operation == CMD_SELECT) + (estate->es_processed)++; /* * check our tuple count.. if we've processed the proper number then @@ -1696,596 +1603,526 @@ lnext: ; if (numberTuples && numberTuples == current_tuple_count) break; } - - /* - * Process AFTER EACH STATEMENT triggers - */ - switch (operation) - { - case CMD_UPDATE: - ExecASUpdateTriggers(estate, estate->es_result_relation_info); - break; - case CMD_DELETE: - ExecASDeleteTriggers(estate, estate->es_result_relation_info); - break; - case CMD_INSERT: - ExecASInsertTriggers(estate, estate->es_result_relation_info); - break; - default: - /* do nothing */ - break; - } } -/* ---------------------------------------------------------------- - * ExecSelect - * - * SELECTs are easy.. we just pass the tuple to the appropriate - * output function. - * ---------------------------------------------------------------- - */ -static void -ExecSelect(TupleTableSlot *slot, - DestReceiver *dest, - EState *estate) -{ - (*dest->receiveSlot) (slot, dest); - IncrRetrieved(); - (estate->es_processed)++; -} -/* ---------------------------------------------------------------- - * ExecInsert +/* + * ExecRelCheck --- check that tuple meets constraints for result relation * - * INSERTs are trickier.. we have to insert the tuple into - * the base relation and insert appropriate tuples into the - * index relations. - * ---------------------------------------------------------------- + * Returns NULL if OK, else name of failed check constraint */ -static void -ExecInsert(TupleTableSlot *slot, - ItemPointer tupleid, - TupleTableSlot *planSlot, - DestReceiver *dest, - EState *estate) +static const char * +ExecRelCheck(ResultRelInfo *resultRelInfo, + TupleTableSlot *slot, EState *estate) { - HeapTuple tuple; - ResultRelInfo *resultRelInfo; - Relation resultRelationDesc; - Oid newId; - - /* - * get the heap tuple out of the tuple table slot, making sure we have a - * writable copy - */ - tuple = ExecMaterializeSlot(slot); - - /* - * get information on the (current) result relation - */ - resultRelInfo = estate->es_result_relation_info; - resultRelationDesc = resultRelInfo->ri_RelationDesc; + Relation rel = resultRelInfo->ri_RelationDesc; + int ncheck = rel->rd_att->constr->num_check; + ConstrCheck *check = rel->rd_att->constr->check; + ExprContext *econtext; + MemoryContext oldContext; + List *qual; + int i; /* - * If the result relation has OIDs, force the tuple's OID to zero so that - * heap_insert will assign a fresh OID. Usually the OID already will be - * zero at this point, but there are corner cases where the plan tree can - * return a tuple extracted literally from some table with the same - * rowtype. - * - * XXX if we ever wanted to allow users to assign their own OIDs to new - * rows, this'd be the place to do it. For the moment, we make a point - * of doing this before calling triggers, so that a user-supplied trigger - * could hack the OID if desired. + * If first time through for this result relation, build expression + * nodetrees for rel's constraint expressions. Keep them in the per-query + * memory context so they'll survive throughout the query. */ - if (resultRelationDesc->rd_rel->relhasoids) - HeapTupleSetOid(tuple, InvalidOid); - - /* BEFORE ROW INSERT Triggers */ - if (resultRelInfo->ri_TrigDesc && - resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0) + if (resultRelInfo->ri_ConstraintExprs == NULL) { - HeapTuple newtuple; - - newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple); - - if (newtuple == NULL) /* "do nothing" */ - return; - - if (newtuple != tuple) /* modified by Trigger(s) */ + oldContext = MemoryContextSwitchTo(estate->es_query_cxt); + resultRelInfo->ri_ConstraintExprs = + (List **) palloc(ncheck * sizeof(List *)); + for (i = 0; i < ncheck; i++) { - /* - * Put the modified tuple into a slot for convenience of routines - * below. We assume the tuple was allocated in per-tuple memory - * context, and therefore will go away by itself. The tuple table - * slot should not try to clear it. - */ - TupleTableSlot *newslot = estate->es_trig_tuple_slot; - - if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor) - ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor); - ExecStoreTuple(newtuple, newslot, InvalidBuffer, false); - slot = newslot; - tuple = newtuple; + /* ExecQual wants implicit-AND form */ + qual = make_ands_implicit(stringToNode(check[i].ccbin)); + resultRelInfo->ri_ConstraintExprs[i] = (List *) + ExecPrepareExpr((Expr *) qual, estate); } + MemoryContextSwitchTo(oldContext); } /* - * Check the constraints of the tuple - */ - if (resultRelationDesc->rd_att->constr) - ExecConstraints(resultRelInfo, slot, estate); - - /* - * insert the tuple - * - * Note: heap_insert returns the tid (location) of the new tuple in the - * t_self field. + * We will use the EState's per-tuple context for evaluating constraint + * expressions (creating it if it's not already there). */ - newId = heap_insert(resultRelationDesc, tuple, - estate->es_output_cid, 0, NULL); + econtext = GetPerTupleExprContext(estate); - IncrAppended(); - (estate->es_processed)++; - estate->es_lastoid = newId; - setLastTid(&(tuple->t_self)); + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; - /* - * insert index entries for tuple - */ - if (resultRelInfo->ri_NumIndices > 0) - ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false); + /* And evaluate the constraints */ + for (i = 0; i < ncheck; i++) + { + qual = resultRelInfo->ri_ConstraintExprs[i]; - /* AFTER ROW INSERT Triggers */ - ExecARInsertTriggers(estate, resultRelInfo, tuple); + /* + * NOTE: SQL specifies that a NULL result from a constraint expression + * is not to be treated as a failure. Therefore, tell ExecQual to + * return TRUE for NULL. + */ + if (!ExecQual(qual, econtext, true)) + return check[i].ccname; + } - /* Process RETURNING if present */ - if (resultRelInfo->ri_projectReturning) - ExecProcessReturning(resultRelInfo->ri_projectReturning, - slot, planSlot, dest); -} + /* NULL result means no error */ + return NULL; +} -/* ---------------------------------------------------------------- - * ExecDelete - * - * DELETE is like UPDATE, except that we delete the tuple and no - * index modifications are needed - * ---------------------------------------------------------------- - */ -static void -ExecDelete(ItemPointer tupleid, - TupleTableSlot *planSlot, - DestReceiver *dest, - EState *estate) +void +ExecConstraints(ResultRelInfo *resultRelInfo, + TupleTableSlot *slot, EState *estate) { - ResultRelInfo *resultRelInfo; - Relation resultRelationDesc; - HTSU_Result result; - ItemPointerData update_ctid; - TransactionId update_xmax; + Relation rel = resultRelInfo->ri_RelationDesc; + TupleDesc tupdesc = RelationGetDescr(rel); + TupleConstr *constr = tupdesc->constr; + Bitmapset *modifiedCols; + Bitmapset *insertedCols; + Bitmapset *updatedCols; - /* - * get information on the (current) result relation - */ - resultRelInfo = estate->es_result_relation_info; - resultRelationDesc = resultRelInfo->ri_RelationDesc; + Assert(constr); - /* BEFORE ROW DELETE Triggers */ - if (resultRelInfo->ri_TrigDesc && - resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0) + if (constr->has_not_null) { - bool dodelete; - - dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid); - - if (!dodelete) /* "do nothing" */ - return; - } + int natts = tupdesc->natts; + int attrChk; - /* - * delete the tuple - * - * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that - * the row to be deleted is visible to that snapshot, and throw a can't- - * serialize error if not. This is a special-case behavior needed for - * referential integrity updates in serializable transactions. - */ -ldelete:; - result = heap_delete(resultRelationDesc, tupleid, - &update_ctid, &update_xmax, - estate->es_output_cid, - estate->es_crosscheck_snapshot, - true /* wait for commit */ ); - switch (result) - { - case HeapTupleSelfUpdated: - /* already deleted by self; nothing to do */ - return; + for (attrChk = 1; attrChk <= natts; attrChk++) + { + if (tupdesc->attrs[attrChk - 1]->attnotnull && + slot_attisnull(slot, attrChk)) + { + char *val_desc; - case HeapTupleMayBeUpdated: - break; + insertedCols = GetInsertedColumns(resultRelInfo, estate); + updatedCols = GetUpdatedColumns(resultRelInfo, estate); + modifiedCols = bms_union(insertedCols, updatedCols); + val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel), + slot, + tupdesc, + modifiedCols, + 64); - case HeapTupleUpdated: - if (IsXactIsoLevelSerializable) ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent update"))); - else if (!ItemPointerEquals(tupleid, &update_ctid)) - { - TupleTableSlot *epqslot; - - epqslot = EvalPlanQual(estate, - resultRelInfo->ri_RangeTableIndex, - &update_ctid, - update_xmax); - if (!TupIsNull(epqslot)) - { - *tupleid = update_ctid; - goto ldelete; - } + (errcode(ERRCODE_NOT_NULL_VIOLATION), + errmsg("null value in column \"%s\" violates not-null constraint", + NameStr(tupdesc->attrs[attrChk - 1]->attname)), + val_desc ? errdetail("Failing row contains %s.", val_desc) : 0, + errtablecol(rel, attrChk))); } - /* tuple already deleted; nothing to do */ - return; + } + } - default: - elog(ERROR, "unrecognized heap_delete status: %u", result); - return; + if (constr->num_check > 0) + { + const char *failed; + + if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL) + { + char *val_desc; + + insertedCols = GetInsertedColumns(resultRelInfo, estate); + updatedCols = GetUpdatedColumns(resultRelInfo, estate); + modifiedCols = bms_union(insertedCols, updatedCols); + val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel), + slot, + tupdesc, + modifiedCols, + 64); + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("new row for relation \"%s\" violates check constraint \"%s\"", + RelationGetRelationName(rel), failed), + val_desc ? errdetail("Failing row contains %s.", val_desc) : 0, + errtableconstraint(rel, failed))); + } } +} - IncrDeleted(); - (estate->es_processed)++; +/* + * ExecWithCheckOptions -- check that tuple satisfies any WITH CHECK OPTIONs + * of the specified kind. + * + * Note that this needs to be called multiple times to ensure that all kinds of + * WITH CHECK OPTIONs are handled (both those from views which have the WITH + * CHECK OPTION set and from row level security policies). See ExecInsert() + * and ExecUpdate(). + */ +void +ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo, + TupleTableSlot *slot, EState *estate) +{ + Relation rel = resultRelInfo->ri_RelationDesc; + TupleDesc tupdesc = RelationGetDescr(rel); + ExprContext *econtext; + ListCell *l1, + *l2; /* - * Note: Normally one would think that we have to delete index tuples - * associated with the heap tuple now... - * - * ... but in POSTGRES, we have no need to do this because VACUUM will - * take care of it later. We can't delete index tuples immediately - * anyway, since the tuple is still visible to other transactions. + * We will use the EState's per-tuple context for evaluating constraint + * expressions (creating it if it's not already there). */ + econtext = GetPerTupleExprContext(estate); - /* AFTER ROW DELETE Triggers */ - ExecARDeleteTriggers(estate, resultRelInfo, tupleid); + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; - /* Process RETURNING if present */ - if (resultRelInfo->ri_projectReturning) + /* Check each of the constraints */ + forboth(l1, resultRelInfo->ri_WithCheckOptions, + l2, resultRelInfo->ri_WithCheckOptionExprs) { + WithCheckOption *wco = (WithCheckOption *) lfirst(l1); + ExprState *wcoExpr = (ExprState *) lfirst(l2); + /* - * We have to put the target tuple into a slot, which means first we - * gotta fetch it. We can use the trigger tuple slot. + * Skip any WCOs which are not the kind we are looking for at this + * time. */ - TupleTableSlot *slot = estate->es_trig_tuple_slot; - HeapTupleData deltuple; - Buffer delbuffer; - - deltuple.t_self = *tupleid; - if (!heap_fetch(resultRelationDesc, SnapshotAny, - &deltuple, &delbuffer, false, NULL)) - elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING"); + if (wco->kind != kind) + continue; - if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc)) - ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc)); - ExecStoreTuple(&deltuple, slot, InvalidBuffer, false); + /* + * WITH CHECK OPTION checks are intended to ensure that the new tuple + * is visible (in the case of a view) or that it passes the + * 'with-check' policy (in the case of row security). If the qual + * evaluates to NULL or FALSE, then the new tuple won't be included in + * the view or doesn't pass the 'with-check' policy for the table. We + * need ExecQual to return FALSE for NULL to handle the view case (the + * opposite of what we do above for CHECK constraints). + */ + if (!ExecQual((List *) wcoExpr, econtext, false)) + { + char *val_desc; + Bitmapset *modifiedCols; + Bitmapset *insertedCols; + Bitmapset *updatedCols; - ExecProcessReturning(resultRelInfo->ri_projectReturning, - slot, planSlot, dest); + switch (wco->kind) + { + /* + * For WITH CHECK OPTIONs coming from views, we might be + * able to provide the details on the row, depending on + * the permissions on the relation (that is, if the user + * could view it directly anyway). For RLS violations, we + * don't include the data since we don't know if the user + * should be able to view the tuple as as that depends on + * the USING policy. + */ + case WCO_VIEW_CHECK: + insertedCols = GetInsertedColumns(resultRelInfo, estate); + updatedCols = GetUpdatedColumns(resultRelInfo, estate); + modifiedCols = bms_union(insertedCols, updatedCols); + val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel), + slot, + tupdesc, + modifiedCols, + 64); - ExecClearTuple(slot); - ReleaseBuffer(delbuffer); + ereport(ERROR, + (errcode(ERRCODE_WITH_CHECK_OPTION_VIOLATION), + errmsg("new row violates WITH CHECK OPTION for \"%s\"", + wco->relname), + val_desc ? errdetail("Failing row contains %s.", + val_desc) : 0)); + break; + case WCO_RLS_INSERT_CHECK: + case WCO_RLS_UPDATE_CHECK: + if (wco->polname != NULL) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("new row violates row level security policy \"%s\" for \"%s\"", + wco->polname, wco->relname))); + else + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("new row violates row level security policy for \"%s\"", + wco->relname))); + break; + case WCO_RLS_CONFLICT_CHECK: + if (wco->polname != NULL) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("new row violates row level security policy \"%s\" (USING expression) for \"%s\"", + wco->polname, wco->relname))); + else + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("new row violates row level security policy (USING expression) for \"%s\"", + wco->relname))); + break; + default: + elog(ERROR, "unrecognized WCO kind: %u", wco->kind); + break; + } + } } } -/* ---------------------------------------------------------------- - * ExecUpdate - * - * note: we can't run UPDATE queries with transactions - * off because UPDATEs are actually INSERTs and our - * scan will mistakenly loop forever, updating the tuple - * it just inserted.. This should be fixed but until it - * is, we don't want to get stuck in an infinite loop - * which corrupts your database.. - * ---------------------------------------------------------------- +/* + * ExecBuildSlotValueDescription -- construct a string representing a tuple + * + * This is intentionally very similar to BuildIndexValueDescription, but + * unlike that function, we truncate long field values (to at most maxfieldlen + * bytes). That seems necessary here since heap field values could be very + * long, whereas index entries typically aren't so wide. + * + * Also, unlike the case with index entries, we need to be prepared to ignore + * dropped columns. We used to use the slot's tuple descriptor to decode the + * data, but the slot's descriptor doesn't identify dropped columns, so we + * now need to be passed the relation's descriptor. + * + * Note that, like BuildIndexValueDescription, if the user does not have + * permission to view any of the columns involved, a NULL is returned. Unlike + * BuildIndexValueDescription, if the user has access to view a subset of the + * column involved, that subset will be returned with a key identifying which + * columns they are. */ -static void -ExecUpdate(TupleTableSlot *slot, - ItemPointer tupleid, - TupleTableSlot *planSlot, - DestReceiver *dest, - EState *estate) +static char * +ExecBuildSlotValueDescription(Oid reloid, + TupleTableSlot *slot, + TupleDesc tupdesc, + Bitmapset *modifiedCols, + int maxfieldlen) { - HeapTuple tuple; - ResultRelInfo *resultRelInfo; - Relation resultRelationDesc; - HTSU_Result result; - ItemPointerData update_ctid; - TransactionId update_xmax; + StringInfoData buf; + StringInfoData collist; + bool write_comma = false; + bool write_comma_collist = false; + int i; + AclResult aclresult; + bool table_perm = false; + bool any_perm = false; /* - * abort the operation if not running transactions + * Check if RLS is enabled and should be active for the relation; if so, + * then don't return anything. Otherwise, go through normal permission + * checks. */ - if (IsBootstrapProcessingMode()) - elog(ERROR, "cannot UPDATE during bootstrap"); + if (check_enable_rls(reloid, InvalidOid, true) == RLS_ENABLED) + return NULL; - /* - * get the heap tuple out of the tuple table slot, making sure we have a - * writable copy - */ - tuple = ExecMaterializeSlot(slot); + initStringInfo(&buf); + + appendStringInfoChar(&buf, '('); /* - * get information on the (current) result relation + * Check if the user has permissions to see the row. Table-level SELECT + * allows access to all columns. If the user does not have table-level + * SELECT then we check each column and include those the user has SELECT + * rights on. Additionally, we always include columns the user provided + * data for. */ - resultRelInfo = estate->es_result_relation_info; - resultRelationDesc = resultRelInfo->ri_RelationDesc; - - /* BEFORE ROW UPDATE Triggers */ - if (resultRelInfo->ri_TrigDesc && - resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0) + aclresult = pg_class_aclcheck(reloid, GetUserId(), ACL_SELECT); + if (aclresult != ACLCHECK_OK) { - HeapTuple newtuple; + /* Set up the buffer for the column list */ + initStringInfo(&collist); + appendStringInfoChar(&collist, '('); + } + else + table_perm = any_perm = true; + + /* Make sure the tuple is fully deconstructed */ + slot_getallattrs(slot); - newtuple = ExecBRUpdateTriggers(estate, resultRelInfo, - tupleid, tuple); + for (i = 0; i < tupdesc->natts; i++) + { + bool column_perm = false; + char *val; + int vallen; - if (newtuple == NULL) /* "do nothing" */ - return; + /* ignore dropped columns */ + if (tupdesc->attrs[i]->attisdropped) + continue; - if (newtuple != tuple) /* modified by Trigger(s) */ + if (!table_perm) { /* - * Put the modified tuple into a slot for convenience of routines - * below. We assume the tuple was allocated in per-tuple memory - * context, and therefore will go away by itself. The tuple table - * slot should not try to clear it. + * No table-level SELECT, so need to make sure they either have + * SELECT rights on the column or that they have provided the data + * for the column. If not, omit this column from the error + * message. */ - TupleTableSlot *newslot = estate->es_trig_tuple_slot; + aclresult = pg_attribute_aclcheck(reloid, tupdesc->attrs[i]->attnum, + GetUserId(), ACL_SELECT); + if (bms_is_member(tupdesc->attrs[i]->attnum - FirstLowInvalidHeapAttributeNumber, + modifiedCols) || aclresult == ACLCHECK_OK) + { + column_perm = any_perm = true; + + if (write_comma_collist) + appendStringInfoString(&collist, ", "); + else + write_comma_collist = true; - if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor) - ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor); - ExecStoreTuple(newtuple, newslot, InvalidBuffer, false); - slot = newslot; - tuple = newtuple; + appendStringInfoString(&collist, NameStr(tupdesc->attrs[i]->attname)); + } } - } - /* - * Check the constraints of the tuple - * - * If we generate a new candidate tuple after EvalPlanQual testing, we - * must loop back here and recheck constraints. (We don't need to redo - * triggers, however. If there are any BEFORE triggers then trigger.c - * will have done heap_lock_tuple to lock the correct tuple, so there's no - * need to do them again.) - */ -lreplace:; - if (resultRelationDesc->rd_att->constr) - ExecConstraints(resultRelInfo, slot, estate); + if (table_perm || column_perm) + { + if (slot->tts_isnull[i]) + val = "null"; + else + { + Oid foutoid; + bool typisvarlena; - /* - * replace the heap tuple - * - * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that - * the row to be updated is visible to that snapshot, and throw a can't- - * serialize error if not. This is a special-case behavior needed for - * referential integrity updates in serializable transactions. - */ - result = heap_update(resultRelationDesc, tupleid, tuple, - &update_ctid, &update_xmax, - estate->es_output_cid, - estate->es_crosscheck_snapshot, - true /* wait for commit */ ); - switch (result) - { - case HeapTupleSelfUpdated: - /* already deleted by self; nothing to do */ - return; + getTypeOutputInfo(tupdesc->attrs[i]->atttypid, + &foutoid, &typisvarlena); + val = OidOutputFunctionCall(foutoid, slot->tts_values[i]); + } - case HeapTupleMayBeUpdated: - break; + if (write_comma) + appendStringInfoString(&buf, ", "); + else + write_comma = true; - case HeapTupleUpdated: - if (IsXactIsoLevelSerializable) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent update"))); - else if (!ItemPointerEquals(tupleid, &update_ctid)) + /* truncate if needed */ + vallen = strlen(val); + if (vallen <= maxfieldlen) + appendStringInfoString(&buf, val); + else { - TupleTableSlot *epqslot; - - epqslot = EvalPlanQual(estate, - resultRelInfo->ri_RangeTableIndex, - &update_ctid, - update_xmax); - if (!TupIsNull(epqslot)) - { - *tupleid = update_ctid; - slot = ExecFilterJunk(estate->es_junkFilter, epqslot); - tuple = ExecMaterializeSlot(slot); - goto lreplace; - } + vallen = pg_mbcliplen(val, vallen, maxfieldlen); + appendBinaryStringInfo(&buf, val, vallen); + appendStringInfoString(&buf, "..."); } - /* tuple already deleted; nothing to do */ - return; - - default: - elog(ERROR, "unrecognized heap_update status: %u", result); - return; + } } - IncrReplaced(); - (estate->es_processed)++; + /* If we end up with zero columns being returned, then return NULL. */ + if (!any_perm) + return NULL; - /* - * Note: instead of having to update the old index tuples associated with - * the heap tuple, all we do is form and insert new index tuples. This is - * because UPDATEs are actually DELETEs and INSERTs, and index tuple - * deletion is done later by VACUUM (see notes in ExecDelete). All we do - * here is insert new index tuples. -cim 9/27/89 - */ + appendStringInfoChar(&buf, ')'); - /* - * insert index entries for tuple - * - * Note: heap_update returns the tid (location) of the new tuple in the - * t_self field. - * - * If it's a HOT update, we mustn't insert new index entries. - */ - if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple)) - ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false); + if (!table_perm) + { + appendStringInfoString(&collist, ") = "); + appendStringInfoString(&collist, buf.data); - /* AFTER ROW UPDATE Triggers */ - ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple); + return collist.data; + } - /* Process RETURNING if present */ - if (resultRelInfo->ri_projectReturning) - ExecProcessReturning(resultRelInfo->ri_projectReturning, - slot, planSlot, dest); + return buf.data; } + /* - * ExecRelCheck --- check that tuple meets constraints for result relation + * ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a + * given ResultRelInfo */ -static const char * -ExecRelCheck(ResultRelInfo *resultRelInfo, - TupleTableSlot *slot, EState *estate) +LockTupleMode +ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo) { - Relation rel = resultRelInfo->ri_RelationDesc; - int ncheck = rel->rd_att->constr->num_check; - ConstrCheck *check = rel->rd_att->constr->check; - ExprContext *econtext; - MemoryContext oldContext; - List *qual; - int i; + Bitmapset *keyCols; + Bitmapset *updatedCols; /* - * If first time through for this result relation, build expression - * nodetrees for rel's constraint expressions. Keep them in the per-query - * memory context so they'll survive throughout the query. + * Compute lock mode to use. If columns that are part of the key have not + * been modified, then we can use a weaker lock, allowing for better + * concurrency. */ - if (resultRelInfo->ri_ConstraintExprs == NULL) - { - oldContext = MemoryContextSwitchTo(estate->es_query_cxt); - resultRelInfo->ri_ConstraintExprs = - (List **) palloc(ncheck * sizeof(List *)); - for (i = 0; i < ncheck; i++) - { - /* ExecQual wants implicit-AND form */ - qual = make_ands_implicit(stringToNode(check[i].ccbin)); - resultRelInfo->ri_ConstraintExprs[i] = (List *) - ExecPrepareExpr((Expr *) qual, estate); - } - MemoryContextSwitchTo(oldContext); - } + updatedCols = GetUpdatedColumns(relinfo, estate); + keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc, + INDEX_ATTR_BITMAP_KEY); - /* - * We will use the EState's per-tuple context for evaluating constraint - * expressions (creating it if it's not already there). - */ - econtext = GetPerTupleExprContext(estate); + if (bms_overlap(keyCols, updatedCols)) + return LockTupleExclusive; - /* Arrange for econtext's scan tuple to be the tuple under test */ - econtext->ecxt_scantuple = slot; + return LockTupleNoKeyExclusive; +} - /* And evaluate the constraints */ - for (i = 0; i < ncheck; i++) +/* + * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index + * + * If no such struct, either return NULL or throw error depending on missing_ok + */ +ExecRowMark * +ExecFindRowMark(EState *estate, Index rti, bool missing_ok) +{ + ListCell *lc; + + foreach(lc, estate->es_rowMarks) { - qual = resultRelInfo->ri_ConstraintExprs[i]; + ExecRowMark *erm = (ExecRowMark *) lfirst(lc); - /* - * NOTE: SQL92 specifies that a NULL result from a constraint - * expression is not to be treated as a failure. Therefore, tell - * ExecQual to return TRUE for NULL. - */ - if (!ExecQual(qual, econtext, true)) - return check[i].ccname; + if (erm->rti == rti) + return erm; } - - /* NULL result means no error */ + if (!missing_ok) + elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti); return NULL; } -void -ExecConstraints(ResultRelInfo *resultRelInfo, - TupleTableSlot *slot, EState *estate) +/* + * ExecBuildAuxRowMark -- create an ExecAuxRowMark struct + * + * Inputs are the underlying ExecRowMark struct and the targetlist of the + * input plan node (not planstate node!). We need the latter to find out + * the column numbers of the resjunk columns. + */ +ExecAuxRowMark * +ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist) { - Relation rel = resultRelInfo->ri_RelationDesc; - TupleConstr *constr = rel->rd_att->constr; + ExecAuxRowMark *aerm = (ExecAuxRowMark *) palloc0(sizeof(ExecAuxRowMark)); + char resname[32]; - Assert(constr); + aerm->rowmark = erm; - if (constr->has_not_null) + /* Look up the resjunk columns associated with this rowmark */ + if (erm->markType != ROW_MARK_COPY) { - int natts = rel->rd_att->natts; - int attrChk; - - for (attrChk = 1; attrChk <= natts; attrChk++) - { - if (rel->rd_att->attrs[attrChk - 1]->attnotnull && - slot_attisnull(slot, attrChk)) - ereport(ERROR, - (errcode(ERRCODE_NOT_NULL_VIOLATION), - errmsg("null value in column \"%s\" violates not-null constraint", - NameStr(rel->rd_att->attrs[attrChk - 1]->attname)))); - } + /* need ctid for all methods other than COPY */ + snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId); + aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist, + resname); + if (!AttributeNumberIsValid(aerm->ctidAttNo)) + elog(ERROR, "could not find junk %s column", resname); } - - if (constr->num_check > 0) + else { - const char *failed; + /* need wholerow if COPY */ + snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId); + aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist, + resname); + if (!AttributeNumberIsValid(aerm->wholeAttNo)) + elog(ERROR, "could not find junk %s column", resname); + } - if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL) - ereport(ERROR, - (errcode(ERRCODE_CHECK_VIOLATION), - errmsg("new row for relation \"%s\" violates check constraint \"%s\"", - RelationGetRelationName(rel), failed))); + /* if child rel, need tableoid */ + if (erm->rti != erm->prti) + { + snprintf(resname, sizeof(resname), "tableoid%u", erm->rowmarkId); + aerm->toidAttNo = ExecFindJunkAttributeInTlist(targetlist, + resname); + if (!AttributeNumberIsValid(aerm->toidAttNo)) + elog(ERROR, "could not find junk %s column", resname); } + + return aerm; } + /* - * ExecProcessReturning --- evaluate a RETURNING list and send to dest + * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to + * process the updated version under READ COMMITTED rules. * - * projectReturning: RETURNING projection info for current result rel - * tupleSlot: slot holding tuple actually inserted/updated/deleted - * planSlot: slot holding tuple returned by top plan node - * dest: where to send the output + * See backend/executor/README for some info about how this works. */ -static void -ExecProcessReturning(ProjectionInfo *projectReturning, - TupleTableSlot *tupleSlot, - TupleTableSlot *planSlot, - DestReceiver *dest) -{ - ExprContext *econtext = projectReturning->pi_exprContext; - TupleTableSlot *retSlot; - - /* - * Reset per-tuple memory context to free any expression evaluation - * storage allocated in the previous cycle. - */ - ResetExprContext(econtext); - - /* Make tuple and any needed join variables available to ExecProject */ - econtext->ecxt_scantuple = tupleSlot; - econtext->ecxt_outertuple = planSlot; - - /* Compute the RETURNING expressions */ - retSlot = ExecProject(projectReturning, NULL); - - /* Send to dest */ - (*dest->receiveSlot) (retSlot, dest); - ExecClearTuple(retSlot); -} /* * Check a modified tuple to see if we want to process its updated version * under READ COMMITTED rules. * - * See backend/executor/README for some info about how this works. - * - * estate - executor state data + * estate - outer executor state data + * epqstate - state for EvalPlanQual rechecking + * relation - table containing tuple * rti - rangetable index of table containing tuple + * lockmode - requested tuple lock mode * *tid - t_ctid from the outdated tuple (ie, next updated version) * priorXmax - t_xmax from the outdated tuple * @@ -2294,68 +2131,132 @@ ExecProcessReturning(ProjectionInfo *projectReturning, * * Returns a slot containing the new candidate update/delete tuple, or * NULL if we determine we shouldn't process the row. + * + * Note: properly, lockmode should be declared as enum LockTupleMode, + * but we use "int" to avoid having to include heapam.h in executor.h. */ TupleTableSlot * -EvalPlanQual(EState *estate, Index rti, +EvalPlanQual(EState *estate, EPQState *epqstate, + Relation relation, Index rti, int lockmode, ItemPointer tid, TransactionId priorXmax) { - evalPlanQual *epq; - EState *epqstate; - Relation relation; - HeapTupleData tuple; - HeapTuple copyTuple = NULL; - SnapshotData SnapshotDirty; - bool endNode; + TupleTableSlot *slot; + HeapTuple copyTuple; - Assert(rti != 0); + Assert(rti > 0); /* - * find relation containing target tuple + * Get and lock the updated version of the row; if fail, return NULL. */ - if (estate->es_result_relation_info != NULL && - estate->es_result_relation_info->ri_RangeTableIndex == rti) - relation = estate->es_result_relation_info->ri_RelationDesc; - else - { - ListCell *l; + copyTuple = EvalPlanQualFetch(estate, relation, lockmode, LockWaitBlock, + tid, priorXmax); - relation = NULL; - foreach(l, estate->es_rowMarks) - { - ExecRowMark *erm = lfirst(l); + if (copyTuple == NULL) + return NULL; - if (erm->rti == rti) - { - relation = erm->relation; - break; - } - } - if (relation == NULL) - elog(ERROR, "could not find RowMark for RT index %u", rti); - } + /* + * For UPDATE/DELETE we have to return tid of actual row we're executing + * PQ for. + */ + *tid = copyTuple->t_self; /* - * fetch tid tuple - * - * Loop here to deal with updated or busy tuples + * Need to run a recheck subquery. Initialize or reinitialize EPQ state. */ - InitDirtySnapshot(SnapshotDirty); - tuple.t_self = *tid; - for (;;) - { - Buffer buffer; + EvalPlanQualBegin(epqstate, estate); - if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL)) - { - /* - * If xmin isn't what we're expecting, the slot must have been - * recycled and reused for an unrelated tuple. This implies that - * the latest version of the row was deleted, so we need do - * nothing. (Should be safe to examine xmin without getting - * buffer's content lock, since xmin never changes in an existing - * tuple.) - */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data), + /* + * Free old test tuple, if any, and store new tuple where relation's scan + * node will see it + */ + EvalPlanQualSetTuple(epqstate, rti, copyTuple); + + /* + * Fetch any non-locked source rows + */ + EvalPlanQualFetchRowMarks(epqstate); + + /* + * Run the EPQ query. We assume it will return at most one tuple. + */ + slot = EvalPlanQualNext(epqstate); + + /* + * If we got a tuple, force the slot to materialize the tuple so that it + * is not dependent on any local state in the EPQ query (in particular, + * it's highly likely that the slot contains references to any pass-by-ref + * datums that may be present in copyTuple). As with the next step, this + * is to guard against early re-use of the EPQ query. + */ + if (!TupIsNull(slot)) + (void) ExecMaterializeSlot(slot); + + /* + * Clear out the test tuple. This is needed in case the EPQ query is + * re-used to test a tuple for a different relation. (Not clear that can + * really happen, but let's be safe.) + */ + EvalPlanQualSetTuple(epqstate, rti, NULL); + + return slot; +} + +/* + * Fetch a copy of the newest version of an outdated tuple + * + * estate - executor state data + * relation - table containing tuple + * lockmode - requested tuple lock mode + * wait_policy - requested lock wait policy + * *tid - t_ctid from the outdated tuple (ie, next updated version) + * priorXmax - t_xmax from the outdated tuple + * + * Returns a palloc'd copy of the newest tuple version, or NULL if we find + * that there is no newest version (ie, the row was deleted not updated). + * We also return NULL if the tuple is locked and the wait policy is to skip + * such tuples. + * + * If successful, we have locked the newest tuple version, so caller does not + * need to worry about it changing anymore. + * + * Note: properly, lockmode should be declared as enum LockTupleMode, + * but we use "int" to avoid having to include heapam.h in executor.h. + */ +HeapTuple +EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, + LockWaitPolicy wait_policy, + ItemPointer tid, TransactionId priorXmax) +{ + HeapTuple copyTuple = NULL; + HeapTupleData tuple; + SnapshotData SnapshotDirty; + + /* + * fetch target tuple + * + * Loop here to deal with updated or busy tuples + */ + InitDirtySnapshot(SnapshotDirty); + tuple.t_self = *tid; + for (;;) + { + Buffer buffer; + + if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL)) + { + HTSU_Result test; + HeapUpdateFailureData hufd; + + /* + * If xmin isn't what we're expecting, the slot must have been + * recycled and reused for an unrelated tuple. This implies that + * the latest version of the row was deleted, so we need do + * nothing. (Should be safe to examine xmin without getting + * buffer's content lock. We assume reading a TransactionId to be + * atomic, and Xmin never changes in an existing tuple, except to + * invalid or frozen, and neither of those can match priorXmax.) + */ + if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data), priorXmax)) { ReleaseBuffer(buffer); @@ -2368,24 +2269,42 @@ EvalPlanQual(EState *estate, Index rti, /* * If tuple is being updated by other transaction then we have to - * wait for its commit/abort. + * wait for its commit/abort, or die trying. */ if (TransactionIdIsValid(SnapshotDirty.xmax)) { ReleaseBuffer(buffer); - XactLockTableWait(SnapshotDirty.xmax); + switch (wait_policy) + { + case LockWaitBlock: + XactLockTableWait(SnapshotDirty.xmax, + relation, &tuple.t_self, + XLTW_FetchUpdated); + break; + case LockWaitSkip: + if (!ConditionalXactLockTableWait(SnapshotDirty.xmax)) + return NULL; /* skip instead of waiting */ + break; + case LockWaitError: + if (!ConditionalXactLockTableWait(SnapshotDirty.xmax)) + ereport(ERROR, + (errcode(ERRCODE_LOCK_NOT_AVAILABLE), + errmsg("could not obtain lock on row in relation \"%s\"", + RelationGetRelationName(relation)))); + break; + } continue; /* loop back to repeat heap_fetch */ } /* * If tuple was inserted by our own transaction, we have to check * cmin against es_output_cid: cmin >= current CID means our - * command cannot see the tuple, so we should ignore it. Without - * this we are open to the "Halloween problem" of indefinitely - * re-updating the same tuple. (We need not check cmax because - * HeapTupleSatisfiesDirty will consider a tuple deleted by our - * transaction dead, regardless of cmax.) We just checked that - * priorXmax == xmin, so we can test that variable instead of + * command cannot see the tuple, so we should ignore it. Otherwise + * heap_lock_tuple() will throw an error, and so would any later + * attempt to update or delete the tuple. (We need not check cmax + * because HeapTupleSatisfiesDirty will consider a tuple deleted + * by our transaction dead, regardless of cmax.) We just checked + * that priorXmax == xmin, so we can test that variable instead of * doing HeapTupleHeaderGetXmin again. */ if (TransactionIdIsCurrentTransactionId(priorXmax) && @@ -2395,6 +2314,75 @@ EvalPlanQual(EState *estate, Index rti, return NULL; } + /* + * This is a live tuple, so now try to lock it. + */ + test = heap_lock_tuple(relation, &tuple, + estate->es_output_cid, + lockmode, wait_policy, + false, &buffer, &hufd); + /* We now have two pins on the buffer, get rid of one */ + ReleaseBuffer(buffer); + + switch (test) + { + case HeapTupleSelfUpdated: + + /* + * The target tuple was already updated or deleted by the + * current command, or by a later command in the current + * transaction. We *must* ignore the tuple in the former + * case, so as to avoid the "Halloween problem" of + * repeated update attempts. In the latter case it might + * be sensible to fetch the updated tuple instead, but + * doing so would require changing heap_update and + * heap_delete to not complain about updating "invisible" + * tuples, which seems pretty scary (heap_lock_tuple will + * not complain, but few callers expect + * HeapTupleInvisible, and we're not one of them). So for + * now, treat the tuple as deleted and do not process. + */ + ReleaseBuffer(buffer); + return NULL; + + case HeapTupleMayBeUpdated: + /* successfully locked */ + break; + + case HeapTupleUpdated: + ReleaseBuffer(buffer); + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + + /* Should not encounter speculative tuple on recheck */ + Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data)); + if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self)) + { + /* it was updated, so look at the updated version */ + tuple.t_self = hufd.ctid; + /* updated row should have xmin matching this xmax */ + priorXmax = hufd.xmax; + continue; + } + /* tuple was deleted, so give up */ + return NULL; + + case HeapTupleWouldBlock: + ReleaseBuffer(buffer); + return NULL; + + case HeapTupleInvisible: + elog(ERROR, "attempted to lock invisible tuple"); + + default: + ReleaseBuffer(buffer); + elog(ERROR, "unrecognized heap_lock_tuple status: %u", + test); + return NULL; /* keep compiler quiet */ + } + /* * We got tuple - now copy it for use by recheck query. */ @@ -2430,7 +2418,7 @@ EvalPlanQual(EState *estate, Index rti, * mean that the row was updated or deleted by either a committed xact * or our own xact. If it was deleted, we can ignore it; if it was * updated then chain up to the next version and repeat the whole - * test. + * process. * * As above, it should be safe to examine xmax and t_ctid without the * buffer content lock, because they can't be changing. @@ -2445,336 +2433,461 @@ EvalPlanQual(EState *estate, Index rti, /* updated, so look at the updated row */ tuple.t_self = tuple.t_data->t_ctid; /* updated row should have xmin matching this xmax */ - priorXmax = HeapTupleHeaderGetXmax(tuple.t_data); + priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data); ReleaseBuffer(buffer); /* loop back to fetch next in chain */ } /* - * For UPDATE/DELETE we have to return tid of actual row we're executing - * PQ for. + * Return the copied tuple */ - *tid = tuple.t_self; + return copyTuple; +} - /* - * Need to run a recheck subquery. Find or create a PQ stack entry. - */ - epq = estate->es_evalPlanQual; - endNode = true; +/* + * EvalPlanQualInit -- initialize during creation of a plan state node + * that might need to invoke EPQ processing. + * + * Note: subplan/auxrowmarks can be NULL/NIL if they will be set later + * with EvalPlanQualSetPlan. + */ +void +EvalPlanQualInit(EPQState *epqstate, EState *estate, + Plan *subplan, List *auxrowmarks, int epqParam) +{ + /* Mark the EPQ state inactive */ + epqstate->estate = NULL; + epqstate->planstate = NULL; + epqstate->origslot = NULL; + /* ... and remember data that EvalPlanQualBegin will need */ + epqstate->plan = subplan; + epqstate->arowMarks = auxrowmarks; + epqstate->epqParam = epqParam; +} - if (epq != NULL && epq->rti == 0) - { - /* Top PQ stack entry is idle, so re-use it */ - Assert(!(estate->es_useEvalPlan) && epq->next == NULL); - epq->rti = rti; - endNode = false; - } +/* + * EvalPlanQualSetPlan -- set or change subplan of an EPQState. + * + * We need this so that ModifyTable can deal with multiple subplans. + */ +void +EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks) +{ + /* If we have a live EPQ query, shut it down */ + EvalPlanQualEnd(epqstate); + /* And set/change the plan pointer */ + epqstate->plan = subplan; + /* The rowmarks depend on the plan, too */ + epqstate->arowMarks = auxrowmarks; +} - /* - * If this is request for another RTE - Ra, - then we have to check wasn't - * PlanQual requested for Ra already and if so then Ra' row was updated - * again and we have to re-start old execution for Ra and forget all what - * we done after Ra was suspended. Cool? -:)) - */ - if (epq != NULL && epq->rti != rti && - epq->estate->es_evTuple[rti - 1] != NULL) - { - do - { - evalPlanQual *oldepq; - - /* stop execution */ - EvalPlanQualStop(epq); - /* pop previous PlanQual from the stack */ - oldepq = epq->next; - Assert(oldepq && oldepq->rti != 0); - /* push current PQ to freePQ stack */ - oldepq->free = epq; - epq = oldepq; - estate->es_evalPlanQual = epq; - } while (epq->rti != rti); - } +/* + * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL + * + * NB: passed tuple must be palloc'd; it may get freed later + */ +void +EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple) +{ + EState *estate = epqstate->estate; + + Assert(rti > 0); /* - * If we are requested for another RTE then we have to suspend execution - * of current PlanQual and start execution for new one. + * free old test tuple, if any, and store new tuple where relation's scan + * node will see it */ - if (epq == NULL || epq->rti != rti) + if (estate->es_epqTuple[rti - 1] != NULL) + heap_freetuple(estate->es_epqTuple[rti - 1]); + estate->es_epqTuple[rti - 1] = tuple; + estate->es_epqTupleSet[rti - 1] = true; +} + +/* + * Fetch back the current test tuple (if any) for the specified RTI + */ +HeapTuple +EvalPlanQualGetTuple(EPQState *epqstate, Index rti) +{ + EState *estate = epqstate->estate; + + Assert(rti > 0); + + return estate->es_epqTuple[rti - 1]; +} + +/* + * Fetch the current row values for any non-locked relations that need + * to be scanned by an EvalPlanQual operation. origslot must have been set + * to contain the current result row (top-level row) that we need to recheck. + */ +void +EvalPlanQualFetchRowMarks(EPQState *epqstate) +{ + ListCell *l; + + Assert(epqstate->origslot != NULL); + + foreach(l, epqstate->arowMarks) { - /* try to reuse plan used previously */ - evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL; + ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(l); + ExecRowMark *erm = aerm->rowmark; + Datum datum; + bool isNull; + HeapTupleData tuple; + + if (RowMarkRequiresRowShareLock(erm->markType)) + elog(ERROR, "EvalPlanQual doesn't support locking rowmarks"); + + /* clear any leftover test tuple for this rel */ + EvalPlanQualSetTuple(epqstate, erm->rti, NULL); - if (newepq == NULL) /* first call or freePQ stack is empty */ + /* if child rel, must check whether it produced this row */ + if (erm->rti != erm->prti) { - newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual)); - newepq->free = NULL; - newepq->estate = NULL; - newepq->planstate = NULL; + Oid tableoid; + + datum = ExecGetJunkAttribute(epqstate->origslot, + aerm->toidAttNo, + &isNull); + /* non-locked rels could be on the inside of outer joins */ + if (isNull) + continue; + tableoid = DatumGetObjectId(datum); + + Assert(OidIsValid(erm->relid)); + if (tableoid != erm->relid) + { + /* this child is inactive right now */ + continue; + } } - else + + if (erm->markType == ROW_MARK_REFERENCE) { - /* recycle previously used PlanQual */ - Assert(newepq->estate == NULL); - epq->free = NULL; - } - /* push current PQ to the stack */ - newepq->next = epq; - epq = newepq; - estate->es_evalPlanQual = epq; - epq->rti = rti; - endNode = false; - } + HeapTuple copyTuple; - Assert(epq->rti == rti); + Assert(erm->relation != NULL); - /* - * Ok - we're requested for the same RTE. Unfortunately we still have to - * end and restart execution of the plan, because ExecReScan wouldn't - * ensure that upper plan nodes would reset themselves. We could make - * that work if insertion of the target tuple were integrated with the - * Param mechanism somehow, so that the upper plan nodes know that their - * children's outputs have changed. - * - * Note that the stack of free evalPlanQual nodes is quite useless at the - * moment, since it only saves us from pallocing/releasing the - * evalPlanQual nodes themselves. But it will be useful once we implement - * ReScan instead of end/restart for re-using PlanQual nodes. - */ - if (endNode) - { - /* stop execution */ - EvalPlanQualStop(epq); - } + /* fetch the tuple's ctid */ + datum = ExecGetJunkAttribute(epqstate->origslot, + aerm->ctidAttNo, + &isNull); + /* non-locked rels could be on the inside of outer joins */ + if (isNull) + continue; - /* - * Initialize new recheck query. - * - * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to - * instead copy down changeable state from the top plan (including - * es_result_relation_info, es_junkFilter) and reset locally changeable - * state in the epq (including es_param_exec_vals, es_evTupleNull). - */ - EvalPlanQualStart(epq, estate, epq->next); + /* fetch requests on foreign tables must be passed to their FDW */ + if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + FdwRoutine *fdwroutine; + bool updated = false; - /* - * free old RTE' tuple, if any, and store target tuple where relation's - * scan node will see it - */ - epqstate = epq->estate; - if (epqstate->es_evTuple[rti - 1] != NULL) - heap_freetuple(epqstate->es_evTuple[rti - 1]); - epqstate->es_evTuple[rti - 1] = copyTuple; + fdwroutine = GetFdwRoutineForRelation(erm->relation, false); + /* this should have been checked already, but let's be safe */ + if (fdwroutine->RefetchForeignRow == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot lock rows in foreign table \"%s\"", + RelationGetRelationName(erm->relation)))); + copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate, + erm, + datum, + &updated); + if (copyTuple == NULL) + elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); - return EvalPlanQualNext(estate); + /* + * Ideally we'd insist on updated == false here, but that + * assumes that FDWs can track that exactly, which they might + * not be able to. So just ignore the flag. + */ + } + else + { + /* ordinary table, fetch the tuple */ + Buffer buffer; + + tuple.t_self = *((ItemPointer) DatumGetPointer(datum)); + if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer, + false, NULL)) + elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); + + /* successful, copy tuple */ + copyTuple = heap_copytuple(&tuple); + ReleaseBuffer(buffer); + } + + /* store tuple */ + EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple); + } + else + { + HeapTupleHeader td; + + Assert(erm->markType == ROW_MARK_COPY); + + /* fetch the whole-row Var for the relation */ + datum = ExecGetJunkAttribute(epqstate->origslot, + aerm->wholeAttNo, + &isNull); + /* non-locked rels could be on the inside of outer joins */ + if (isNull) + continue; + td = DatumGetHeapTupleHeader(datum); + + /* build a temporary HeapTuple control structure */ + tuple.t_len = HeapTupleHeaderGetDatumLength(td); + tuple.t_data = td; + /* relation might be a foreign table, if so provide tableoid */ + tuple.t_tableOid = erm->relid; + /* also copy t_ctid in case there's valid data there */ + tuple.t_self = td->t_ctid; + + /* copy and store tuple */ + EvalPlanQualSetTuple(epqstate, erm->rti, + heap_copytuple(&tuple)); + } + } } -static TupleTableSlot * -EvalPlanQualNext(EState *estate) +/* + * Fetch the next row (if any) from EvalPlanQual testing + * + * (In practice, there should never be more than one row...) + */ +TupleTableSlot * +EvalPlanQualNext(EPQState *epqstate) { - evalPlanQual *epq = estate->es_evalPlanQual; MemoryContext oldcontext; TupleTableSlot *slot; - Assert(epq->rti != 0); - -lpqnext:; - oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt); - slot = ExecProcNode(epq->planstate); + oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt); + slot = ExecProcNode(epqstate->planstate); MemoryContextSwitchTo(oldcontext); - /* - * No more tuples for this PQ. Continue previous one. - */ - if (TupIsNull(slot)) - { - evalPlanQual *oldepq; - - /* stop execution */ - EvalPlanQualStop(epq); - /* pop old PQ from the stack */ - oldepq = epq->next; - if (oldepq == NULL) - { - /* this is the first (oldest) PQ - mark as free */ - epq->rti = 0; - estate->es_useEvalPlan = false; - /* and continue Query execution */ - return NULL; - } - Assert(oldepq->rti != 0); - /* push current PQ to freePQ stack */ - oldepq->free = epq; - epq = oldepq; - estate->es_evalPlanQual = epq; - goto lpqnext; - } - return slot; } -static void -EndEvalPlanQual(EState *estate) +/* + * Initialize or reset an EvalPlanQual state tree + */ +void +EvalPlanQualBegin(EPQState *epqstate, EState *parentestate) { - evalPlanQual *epq = estate->es_evalPlanQual; + EState *estate = epqstate->estate; - if (epq->rti == 0) /* plans already shutdowned */ + if (estate == NULL) { - Assert(epq->next == NULL); - return; + /* First time through, so create a child EState */ + EvalPlanQualStart(epqstate, parentestate, epqstate->plan); } - - for (;;) + else { - evalPlanQual *oldepq; + /* + * We already have a suitable child EPQ tree, so just reset it. + */ + int rtsize = list_length(parentestate->es_range_table); + PlanState *planstate = epqstate->planstate; + + MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool)); - /* stop execution */ - EvalPlanQualStop(epq); - /* pop old PQ from the stack */ - oldepq = epq->next; - if (oldepq == NULL) + /* Recopy current values of parent parameters */ + if (parentestate->es_plannedstmt->nParamExec > 0) { - /* this is the first (oldest) PQ - mark as free */ - epq->rti = 0; - estate->es_useEvalPlan = false; - break; + int i = parentestate->es_plannedstmt->nParamExec; + + while (--i >= 0) + { + /* copy value if any, but not execPlan link */ + estate->es_param_exec_vals[i].value = + parentestate->es_param_exec_vals[i].value; + estate->es_param_exec_vals[i].isnull = + parentestate->es_param_exec_vals[i].isnull; + } } - Assert(oldepq->rti != 0); - /* push current PQ to freePQ stack */ - oldepq->free = epq; - epq = oldepq; - estate->es_evalPlanQual = epq; + + /* + * Mark child plan tree as needing rescan at all scan nodes. The + * first ExecProcNode will take care of actually doing the rescan. + */ + planstate->chgParam = bms_add_member(planstate->chgParam, + epqstate->epqParam); } } /* - * Start execution of one level of PlanQual. + * Start execution of an EvalPlanQual plan tree. * * This is a cut-down version of ExecutorStart(): we copy some state from * the top-level estate rather than initializing it fresh. */ static void -EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq) +EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree) { - EState *epqstate; + EState *estate; int rtsize; MemoryContext oldcontext; ListCell *l; - rtsize = list_length(estate->es_range_table); + rtsize = list_length(parentestate->es_range_table); - epq->estate = epqstate = CreateExecutorState(); + epqstate->estate = estate = CreateExecutorState(); - oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt); + oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); /* - * The epqstates share the top query's copy of unchanging state such as + * Child EPQ EStates share the parent's copy of unchanging state such as * the snapshot, rangetable, result-rel info, and external Param info. * They need their own copies of local state, including a tuple table, * es_param_exec_vals, etc. - */ - epqstate->es_direction = ForwardScanDirection; - epqstate->es_snapshot = estate->es_snapshot; - epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot; - epqstate->es_range_table = estate->es_range_table; - epqstate->es_output_cid = estate->es_output_cid; - epqstate->es_result_relations = estate->es_result_relations; - epqstate->es_num_result_relations = estate->es_num_result_relations; - epqstate->es_result_relation_info = estate->es_result_relation_info; - epqstate->es_junkFilter = estate->es_junkFilter; + * + * The ResultRelInfo array management is trickier than it looks. We + * create a fresh array for the child but copy all the content from the + * parent. This is because it's okay for the child to share any + * per-relation state the parent has already created --- but if the child + * sets up any ResultRelInfo fields, such as its own junkfilter, that + * state must *not* propagate back to the parent. (For one thing, the + * pointed-to data is in a memory context that won't last long enough.) + */ + estate->es_direction = ForwardScanDirection; + estate->es_snapshot = parentestate->es_snapshot; + estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot; + estate->es_range_table = parentestate->es_range_table; + estate->es_plannedstmt = parentestate->es_plannedstmt; + estate->es_junkFilter = parentestate->es_junkFilter; + estate->es_output_cid = parentestate->es_output_cid; + if (parentestate->es_num_result_relations > 0) + { + int numResultRelations = parentestate->es_num_result_relations; + ResultRelInfo *resultRelInfos; + + resultRelInfos = (ResultRelInfo *) + palloc(numResultRelations * sizeof(ResultRelInfo)); + memcpy(resultRelInfos, parentestate->es_result_relations, + numResultRelations * sizeof(ResultRelInfo)); + estate->es_result_relations = resultRelInfos; + estate->es_num_result_relations = numResultRelations; + } + /* es_result_relation_info must NOT be copied */ /* es_trig_target_relations must NOT be copied */ - epqstate->es_param_list_info = estate->es_param_list_info; - if (estate->es_plannedstmt->nParamExec > 0) - epqstate->es_param_exec_vals = (ParamExecData *) - palloc0(estate->es_plannedstmt->nParamExec * sizeof(ParamExecData)); - epqstate->es_rowMarks = estate->es_rowMarks; - epqstate->es_instrument = estate->es_instrument; - epqstate->es_select_into = estate->es_select_into; - epqstate->es_into_oids = estate->es_into_oids; - epqstate->es_plannedstmt = estate->es_plannedstmt; - - /* - * Each epqstate must have its own es_evTupleNull state, but all the stack - * entries share es_evTuple state. This allows sub-rechecks to inherit - * the value being examined by an outer recheck. - */ - epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool)); - if (priorepq == NULL) - /* first PQ stack entry */ - epqstate->es_evTuple = (HeapTuple *) - palloc0(rtsize * sizeof(HeapTuple)); + estate->es_rowMarks = parentestate->es_rowMarks; + estate->es_top_eflags = parentestate->es_top_eflags; + estate->es_instrument = parentestate->es_instrument; + /* es_auxmodifytables must NOT be copied */ + + /* + * The external param list is simply shared from parent. The internal + * param workspace has to be local state, but we copy the initial values + * from the parent, so as to have access to any param values that were + * already set from other parts of the parent's plan tree. + */ + estate->es_param_list_info = parentestate->es_param_list_info; + if (parentestate->es_plannedstmt->nParamExec > 0) + { + int i = parentestate->es_plannedstmt->nParamExec; + + estate->es_param_exec_vals = (ParamExecData *) + palloc0(i * sizeof(ParamExecData)); + while (--i >= 0) + { + /* copy value if any, but not execPlan link */ + estate->es_param_exec_vals[i].value = + parentestate->es_param_exec_vals[i].value; + estate->es_param_exec_vals[i].isnull = + parentestate->es_param_exec_vals[i].isnull; + } + } + + /* + * Each EState must have its own es_epqScanDone state, but if we have + * nested EPQ checks they should share es_epqTuple arrays. This allows + * sub-rechecks to inherit the values being examined by an outer recheck. + */ + estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool)); + if (parentestate->es_epqTuple != NULL) + { + estate->es_epqTuple = parentestate->es_epqTuple; + estate->es_epqTupleSet = parentestate->es_epqTupleSet; + } else - /* later stack entries share the same storage */ - epqstate->es_evTuple = priorepq->estate->es_evTuple; + { + estate->es_epqTuple = (HeapTuple *) + palloc0(rtsize * sizeof(HeapTuple)); + estate->es_epqTupleSet = (bool *) + palloc0(rtsize * sizeof(bool)); + } /* - * Create sub-tuple-table; we needn't redo the CountSlots work though. + * Each estate also has its own tuple table. */ - epqstate->es_tupleTable = - ExecCreateTupleTable(estate->es_tupleTable->size); + estate->es_tupleTable = NIL; /* * Initialize private state information for each SubPlan. We must do this * before running ExecInitNode on the main query tree, since - * ExecInitSubPlan expects to be able to find these entries. + * ExecInitSubPlan expects to be able to find these entries. Some of the + * SubPlans might not be used in the part of the plan tree we intend to + * run, but since it's not easy to tell which, we just initialize them + * all. */ - Assert(epqstate->es_subplanstates == NIL); - foreach(l, estate->es_plannedstmt->subplans) + Assert(estate->es_subplanstates == NIL); + foreach(l, parentestate->es_plannedstmt->subplans) { Plan *subplan = (Plan *) lfirst(l); PlanState *subplanstate; - subplanstate = ExecInitNode(subplan, epqstate, 0); - - epqstate->es_subplanstates = lappend(epqstate->es_subplanstates, - subplanstate); + subplanstate = ExecInitNode(subplan, estate, 0); + estate->es_subplanstates = lappend(estate->es_subplanstates, + subplanstate); } /* - * Initialize the private state information for all the nodes in the query - * tree. This opens files, allocates storage and leaves us ready to start - * processing tuples. + * Initialize the private state information for all the nodes in the part + * of the plan tree we need to run. This opens files, allocates storage + * and leaves us ready to start processing tuples. */ - epq->planstate = ExecInitNode(estate->es_plannedstmt->planTree, epqstate, 0); + epqstate->planstate = ExecInitNode(planTree, estate, 0); MemoryContextSwitchTo(oldcontext); } /* - * End execution of one level of PlanQual. + * EvalPlanQualEnd -- shut down at termination of parent plan state node, + * or if we are done with the current EPQ child. * * This is a cut-down version of ExecutorEnd(); basically we want to do most * of the normal cleanup, but *not* close result relations (which we are - * just sharing from the outer query). We do, however, have to close any + * just sharing from the outer query). We do, however, have to close any * trigger target relations that got opened, since those are not shared. + * (There probably shouldn't be any of the latter, but just in case...) */ -static void -EvalPlanQualStop(evalPlanQual *epq) +void +EvalPlanQualEnd(EPQState *epqstate) { - EState *epqstate = epq->estate; + EState *estate = epqstate->estate; MemoryContext oldcontext; ListCell *l; - oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt); + if (estate == NULL) + return; /* idle, so nothing to do */ + + oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); - ExecEndNode(epq->planstate); + ExecEndNode(epqstate->planstate); - foreach(l, epqstate->es_subplanstates) + foreach(l, estate->es_subplanstates) { PlanState *subplanstate = (PlanState *) lfirst(l); ExecEndNode(subplanstate); } - ExecDropTupleTable(epqstate->es_tupleTable, true); - epqstate->es_tupleTable = NULL; - - if (epqstate->es_evTuple[epq->rti - 1] != NULL) - { - heap_freetuple(epqstate->es_evTuple[epq->rti - 1]); - epqstate->es_evTuple[epq->rti - 1] = NULL; - } + /* throw away the per-estate tuple table */ + ExecResetTupleTable(estate->es_tupleTable, false); - foreach(l, epqstate->es_trig_target_relations) + /* close any trigger target relations attached to this EState */ + foreach(l, estate->es_trig_target_relations) { ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l); @@ -2785,300 +2898,10 @@ EvalPlanQualStop(evalPlanQual *epq) MemoryContextSwitchTo(oldcontext); - FreeExecutorState(epqstate); - - epq->estate = NULL; - epq->planstate = NULL; -} - -/* - * ExecGetActivePlanTree --- get the active PlanState tree from a QueryDesc - * - * Ordinarily this is just the one mentioned in the QueryDesc, but if we - * are looking at a row returned by the EvalPlanQual machinery, we need - * to look at the subsidiary state instead. - */ -PlanState * -ExecGetActivePlanTree(QueryDesc *queryDesc) -{ - EState *estate = queryDesc->estate; - - if (estate && estate->es_useEvalPlan && estate->es_evalPlanQual != NULL) - return estate->es_evalPlanQual->planstate; - else - return queryDesc->planstate; -} - - -/* - * Support for SELECT INTO (a/k/a CREATE TABLE AS) - * - * We implement SELECT INTO by diverting SELECT's normal output with - * a specialized DestReceiver type. - */ - -typedef struct -{ - DestReceiver pub; /* publicly-known function pointers */ - EState *estate; /* EState we are working with */ - Relation rel; /* Relation to write to */ - int hi_options; /* heap_insert performance options */ - BulkInsertState bistate; /* bulk insert state */ -} DR_intorel; - -/* - * OpenIntoRel --- actually create the SELECT INTO target relation - * - * This also replaces QueryDesc->dest with the special DestReceiver for - * SELECT INTO. We assume that the correct result tuple type has already - * been placed in queryDesc->tupDesc. - */ -static void -OpenIntoRel(QueryDesc *queryDesc) -{ - IntoClause *into = queryDesc->plannedstmt->intoClause; - EState *estate = queryDesc->estate; - Relation intoRelationDesc; - char *intoName; - Oid namespaceId; - Oid tablespaceId; - Datum reloptions; - AclResult aclresult; - Oid intoRelationId; - TupleDesc tupdesc; - DR_intorel *myState; - static char *validnsps[] = HEAP_RELOPT_NAMESPACES; - - Assert(into); - - /* - * Check consistency of arguments - */ - if (into->onCommit != ONCOMMIT_NOOP && !into->rel->istemp) - ereport(ERROR, - (errcode(ERRCODE_INVALID_TABLE_DEFINITION), - errmsg("ON COMMIT can only be used on temporary tables"))); - - /* - * Find namespace to create in, check its permissions - */ - intoName = into->rel->relname; - namespaceId = RangeVarGetCreationNamespace(into->rel); - - aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), - ACL_CREATE); - if (aclresult != ACLCHECK_OK) - aclcheck_error(aclresult, ACL_KIND_NAMESPACE, - get_namespace_name(namespaceId)); - - /* - * Select tablespace to use. If not specified, use default tablespace - * (which may in turn default to database's default). - */ - if (into->tableSpaceName) - { - tablespaceId = get_tablespace_oid(into->tableSpaceName); - if (!OidIsValid(tablespaceId)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_OBJECT), - errmsg("tablespace \"%s\" does not exist", - into->tableSpaceName))); - } - else - { - tablespaceId = GetDefaultTablespace(into->rel->istemp); - /* note InvalidOid is OK in this case */ - } - - /* Check permissions except when using the database's default space */ - if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace) - { - AclResult aclresult; - - aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(), - ACL_CREATE); - - if (aclresult != ACLCHECK_OK) - aclcheck_error(aclresult, ACL_KIND_TABLESPACE, - get_tablespace_name(tablespaceId)); - } - - /* Parse and validate any reloptions */ - reloptions = transformRelOptions((Datum) 0, - into->options, - NULL, - validnsps, - true, - false); - (void) heap_reloptions(RELKIND_RELATION, reloptions, true); - - /* Copy the tupdesc because heap_create_with_catalog modifies it */ - tupdesc = CreateTupleDescCopy(queryDesc->tupDesc); - - /* Now we can actually create the new relation */ - intoRelationId = heap_create_with_catalog(intoName, - namespaceId, - tablespaceId, - InvalidOid, - GetUserId(), - tupdesc, - NIL, - RELKIND_RELATION, - false, - true, - 0, - into->onCommit, - reloptions, - allowSystemTableMods); - - FreeTupleDesc(tupdesc); - - /* - * Advance command counter so that the newly-created relation's catalog - * tuples will be visible to heap_open. - */ - CommandCounterIncrement(); - - /* - * If necessary, create a TOAST table for the INTO relation. Note that - * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that - * the TOAST table will be visible for insertion. - */ - reloptions = transformRelOptions((Datum) 0, - into->options, - "toast", - validnsps, - true, - false); - - (void) heap_reloptions(RELKIND_TOASTVALUE, reloptions, true); - - AlterTableCreateToastTable(intoRelationId, reloptions); - - /* - * And open the constructed table for writing. - */ - intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock); - - /* - * Now replace the query's DestReceiver with one for SELECT INTO - */ - queryDesc->dest = CreateDestReceiver(DestIntoRel); - myState = (DR_intorel *) queryDesc->dest; - Assert(myState->pub.mydest == DestIntoRel); - myState->estate = estate; - myState->rel = intoRelationDesc; - - /* - * We can skip WAL-logging the insertions, unless PITR is in use. We - * can skip the FSM in any case. - */ - myState->hi_options = HEAP_INSERT_SKIP_FSM | - (XLogArchivingActive() ? 0 : HEAP_INSERT_SKIP_WAL); - myState->bistate = GetBulkInsertState(); - - /* Not using WAL requires rd_targblock be initially invalid */ - Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber); -} - -/* - * CloseIntoRel --- clean up SELECT INTO at ExecutorEnd time - */ -static void -CloseIntoRel(QueryDesc *queryDesc) -{ - DR_intorel *myState = (DR_intorel *) queryDesc->dest; - - /* OpenIntoRel might never have gotten called */ - if (myState && myState->pub.mydest == DestIntoRel && myState->rel) - { - FreeBulkInsertState(myState->bistate); - - /* If we skipped using WAL, must heap_sync before commit */ - if (myState->hi_options & HEAP_INSERT_SKIP_WAL) - heap_sync(myState->rel); - - /* close rel, but keep lock until commit */ - heap_close(myState->rel, NoLock); - - myState->rel = NULL; - } -} - -/* - * CreateIntoRelDestReceiver -- create a suitable DestReceiver object - */ -DestReceiver * -CreateIntoRelDestReceiver(void) -{ - DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel)); - - self->pub.receiveSlot = intorel_receive; - self->pub.rStartup = intorel_startup; - self->pub.rShutdown = intorel_shutdown; - self->pub.rDestroy = intorel_destroy; - self->pub.mydest = DestIntoRel; - - /* private fields will be set by OpenIntoRel */ - - return (DestReceiver *) self; -} - -/* - * intorel_startup --- executor startup - */ -static void -intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) -{ - /* no-op */ -} - -/* - * intorel_receive --- receive one tuple - */ -static void -intorel_receive(TupleTableSlot *slot, DestReceiver *self) -{ - DR_intorel *myState = (DR_intorel *) self; - HeapTuple tuple; - - /* - * get the heap tuple out of the tuple table slot, making sure we have a - * writable copy - */ - tuple = ExecMaterializeSlot(slot); - - /* - * force assignment of new OID (see comments in ExecInsert) - */ - if (myState->rel->rd_rel->relhasoids) - HeapTupleSetOid(tuple, InvalidOid); - - heap_insert(myState->rel, - tuple, - myState->estate->es_output_cid, - myState->hi_options, - myState->bistate); - - /* We know this is a newly created relation, so there are no indexes */ - - IncrAppended(); -} - -/* - * intorel_shutdown --- executor end - */ -static void -intorel_shutdown(DestReceiver *self) -{ - /* no-op */ -} + FreeExecutorState(estate); -/* - * intorel_destroy --- release DestReceiver object - */ -static void -intorel_destroy(DestReceiver *self) -{ - pfree(self); + /* Mark EPQState idle */ + epqstate->estate = NULL; + epqstate->planstate = NULL; + epqstate->origslot = NULL; }