Fix state reversal after partition tuple routing
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Mon, 19 Mar 2018 20:43:55 +0000 (17:43 -0300)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Mon, 19 Mar 2018 20:43:55 +0000 (17:43 -0300)
We make some changes to ModifyTableState and the EState it uses whenever
we route tuples to partitions; but we weren't restoring properly in all
cases, possibly causing crashes when partitions with different tuple
descriptors are targeted by tuples inserted in the same command.
Refactor some code, creating ExecPrepareTupleRouting, to encapsulate the
needed state changing logic, and have it invoked one level above its
current place (ie. put it in ExecModifyTable instead of ExecInsert);
this makes it all more readable.

Add a test case to exercise this.

We don't support having views as partitions; and since only views can
have INSTEAD OF triggers, there is no point in testing for INSTEAD OF
when processing insertions into a partitioned table.  Remove code that
appears to support this (but which is actually never relevant.)

In passing, fix location of some very confusing comments in
ModifyTableState.

Reported-by: Amit Langote
Author: Etsuro Fujita, Amit Langote
Discussion: https://postgr/es/m/0473bf5c-57b1-f1f7-3d58-455c2230bc5f@lab.ntt.co.jp

src/backend/commands/copy.c
src/backend/executor/nodeModifyTable.c
src/include/nodes/execnodes.h
src/test/regress/expected/insert.out
src/test/regress/sql/insert.sql

index ef301b6328addad48f15bd445edcfb9ce8d8cbfa..5d46c9889dd93f08f98bc5e81e62b8c8a89f95eb 100644 (file)
@@ -2656,13 +2656,12 @@ CopyFrom(CopyState cstate)
                        if (cstate->transition_capture != NULL)
                        {
                                if (resultRelInfo->ri_TrigDesc &&
-                                       (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
-                                        resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
+                                       resultRelInfo->ri_TrigDesc->trig_insert_before_row)
                                {
                                        /*
-                                        * If there are any BEFORE or INSTEAD triggers on the
-                                        * partition, we'll have to be ready to convert their
-                                        * result back to tuplestore format.
+                                        * If there are any BEFORE triggers on the partition,
+                                        * we'll have to be ready to convert their result back to
+                                        * tuplestore format.
                                         */
                                        cstate->transition_capture->tcs_original_insert_tuple = NULL;
                                        cstate->transition_capture->tcs_map =
@@ -2803,12 +2802,13 @@ CopyFrom(CopyState cstate)
                         * tuples inserted by an INSERT command.
                         */
                        processed++;
+               }
 
-                       if (saved_resultRelInfo)
-                       {
-                               resultRelInfo = saved_resultRelInfo;
-                               estate->es_result_relation_info = resultRelInfo;
-                       }
+               /* Restore the saved ResultRelInfo */
+               if (saved_resultRelInfo)
+               {
+                       resultRelInfo = saved_resultRelInfo;
+                       estate->es_result_relation_info = resultRelInfo;
                }
        }
 
index 474259337439fc8f6a0a3766ccf57f44fe074950..f42d33a969460e5629979c004bf31e2134e7e594 100644 (file)
@@ -62,6 +62,10 @@ static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
                                         EState *estate,
                                         bool canSetTag,
                                         TupleTableSlot **returning);
+static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
+                                               EState *estate,
+                                               ResultRelInfo *targetRelInfo,
+                                               TupleTableSlot *slot);
 
 /*
  * Verify that the tuples to be produced by INSERT or UPDATE match the
@@ -259,7 +263,6 @@ ExecInsert(ModifyTableState *mtstate,
 {
        HeapTuple       tuple;
        ResultRelInfo *resultRelInfo;
-       ResultRelInfo *saved_resultRelInfo = NULL;
        Relation        resultRelationDesc;
        Oid                     newId;
        List       *recheckIndexes = NIL;
@@ -275,100 +278,6 @@ ExecInsert(ModifyTableState *mtstate,
         * get information on the (current) result relation
         */
        resultRelInfo = estate->es_result_relation_info;
-
-       /* Determine the partition to heap_insert the tuple into */
-       if (mtstate->mt_partition_dispatch_info)
-       {
-               int                     leaf_part_index;
-               TupleConversionMap *map;
-
-               /*
-                * Away we go ... If we end up not finding a partition after all,
-                * ExecFindPartition() does not return and errors out instead.
-                * Otherwise, the returned value is to be used as an index into arrays
-                * mt_partitions[] and mt_partition_tupconv_maps[] that will get us
-                * the ResultRelInfo and TupleConversionMap for the partition,
-                * respectively.
-                */
-               leaf_part_index = ExecFindPartition(resultRelInfo,
-                                                                                       mtstate->mt_partition_dispatch_info,
-                                                                                       slot,
-                                                                                       estate);
-               Assert(leaf_part_index >= 0 &&
-                          leaf_part_index < mtstate->mt_num_partitions);
-
-               /*
-                * Save the old ResultRelInfo and switch to the one corresponding to
-                * the selected partition.
-                */
-               saved_resultRelInfo = resultRelInfo;
-               resultRelInfo = mtstate->mt_partitions + leaf_part_index;
-
-               /* We do not yet have a way to insert into a foreign partition */
-               if (resultRelInfo->ri_FdwRoutine)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                        errmsg("cannot route inserted tuples to a foreign table")));
-
-               /* For ExecInsertIndexTuples() to work on the partition's indexes */
-               estate->es_result_relation_info = resultRelInfo;
-
-               /*
-                * If we're capturing transition tuples, we might need to convert from
-                * the partition rowtype to parent rowtype.
-                */
-               if (mtstate->mt_transition_capture != NULL)
-               {
-                       if (resultRelInfo->ri_TrigDesc &&
-                               (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
-                                resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
-                       {
-                               /*
-                                * If there are any BEFORE or INSTEAD triggers on the
-                                * partition, we'll have to be ready to convert their result
-                                * back to tuplestore format.
-                                */
-                               mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
-                               mtstate->mt_transition_capture->tcs_map =
-                                       mtstate->mt_transition_tupconv_maps[leaf_part_index];
-                       }
-                       else
-                       {
-                               /*
-                                * Otherwise, just remember the original unconverted tuple, to
-                                * avoid a needless round trip conversion.
-                                */
-                               mtstate->mt_transition_capture->tcs_original_insert_tuple = tuple;
-                               mtstate->mt_transition_capture->tcs_map = NULL;
-                       }
-               }
-               if (mtstate->mt_oc_transition_capture != NULL)
-                       mtstate->mt_oc_transition_capture->tcs_map =
-                               mtstate->mt_transition_tupconv_maps[leaf_part_index];
-
-               /*
-                * We might need to convert from the parent rowtype to the partition
-                * rowtype.
-                */
-               map = mtstate->mt_partition_tupconv_maps[leaf_part_index];
-               if (map)
-               {
-                       Relation        partrel = resultRelInfo->ri_RelationDesc;
-
-                       tuple = do_convert_tuple(tuple, map);
-
-                       /*
-                        * We must use the partition's tuple descriptor from this point
-                        * on, until we're finished dealing with the partition. Use the
-                        * dedicated slot for that.
-                        */
-                       slot = mtstate->mt_partition_tuple_slot;
-                       Assert(slot != NULL);
-                       ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
-                       ExecStoreTuple(tuple, slot, InvalidBuffer, true);
-               }
-       }
-
        resultRelationDesc = resultRelInfo->ri_RelationDesc;
 
        /*
@@ -477,7 +386,7 @@ ExecInsert(ModifyTableState *mtstate,
                 * No need though if the tuple has been routed, and a BR trigger
                 * doesn't exist.
                 */
-               if (saved_resultRelInfo != NULL &&
+               if (resultRelInfo->ri_PartitionRoot != NULL &&
                        !(resultRelInfo->ri_TrigDesc &&
                          resultRelInfo->ri_TrigDesc->trig_insert_before_row))
                        check_partition_constr = false;
@@ -645,9 +554,6 @@ ExecInsert(ModifyTableState *mtstate,
        if (resultRelInfo->ri_projectReturning)
                result = ExecProcessReturning(resultRelInfo, slot, planSlot);
 
-       if (saved_resultRelInfo)
-               estate->es_result_relation_info = saved_resultRelInfo;
-
        return result;
 }
 
@@ -1545,6 +1451,111 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
        }
 }
 
+/*
+ * ExecPrepareTupleRouting --- prepare for routing one tuple
+ *
+ * Determine the partition in which the tuple in slot is to be inserted,
+ * and modify mtstate and estate to prepare for it.
+ *
+ * Caller must revert the estate changes after executing the insertion!
+ * In mtstate, transition capture changes may also need to be reverted.
+ *
+ * Returns a slot holding the tuple of the partition rowtype.
+ */
+static TupleTableSlot *
+ExecPrepareTupleRouting(ModifyTableState *mtstate,
+                                               EState *estate,
+                                               ResultRelInfo *targetRelInfo,
+                                               TupleTableSlot *slot)
+{
+       int                     partidx;
+       ResultRelInfo *partrel;
+       HeapTuple       tuple;
+       TupleConversionMap *map;
+
+       /*
+        * Determine the target partition.  If ExecFindPartition does not find
+        * a partition after all, it doesn't return here; otherwise, the returned
+        * value is to be used as an index into the arrays for the resultRelInfo
+        * and TupleConversionMap for the partition.
+        */
+       partidx = ExecFindPartition(targetRelInfo,
+                                                               mtstate->mt_partition_dispatch_info,
+                                                               slot,
+                                                               estate);
+       Assert(partidx >= 0 && partidx < mtstate->mt_num_partitions);
+
+       /* Get the ResultRelInfo corresponding to the selected partition. */
+       partrel = &mtstate->mt_partitions[partidx];
+
+       /* We do not yet have a way to insert into a foreign partition */
+       if (partrel->ri_FdwRoutine)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("cannot route inserted tuples to a foreign table")));
+
+       /*
+        * Make it look like we are inserting into the partition.
+        */
+       estate->es_result_relation_info = partrel;
+
+       /* Get the heap tuple out of the given slot. */
+       tuple = ExecMaterializeSlot(slot);
+
+       /*
+        * If we're capturing transition tuples, we might need to convert from the
+        * partition rowtype to parent rowtype.
+        */
+       if (mtstate->mt_transition_capture != NULL)
+       {
+               if (partrel->ri_TrigDesc &&
+                       partrel->ri_TrigDesc->trig_insert_before_row)
+               {
+                       /*
+                        * If there are any BEFORE triggers on the partition, we'll have
+                        * to be ready to convert their result back to tuplestore format.
+                        */
+                       mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
+                       mtstate->mt_transition_capture->tcs_map =
+                               mtstate->mt_transition_tupconv_maps[partidx];
+               }
+               else
+               {
+                       /*
+                        * Otherwise, just remember the original unconverted tuple, to
+                        * avoid a needless round trip conversion.
+                        */
+                       mtstate->mt_transition_capture->tcs_original_insert_tuple = tuple;
+                       mtstate->mt_transition_capture->tcs_map = NULL;
+               }
+       }
+       if (mtstate->mt_oc_transition_capture != NULL)
+       {
+               mtstate->mt_oc_transition_capture->tcs_map =
+                       mtstate->mt_transition_tupconv_maps[partidx];
+       }
+
+       /*
+        * Convert the tuple, if necessary.
+        */
+       map = mtstate->mt_partition_tupconv_maps[partidx];
+       if (map)
+       {
+               tuple = do_convert_tuple(tuple, map);
+
+               /*
+                * We must use the partition's tuple descriptor from this point on,
+                * until we're finished dealing with the partition.  Use the
+                * dedicated slot for that.
+                */
+               slot = mtstate->mt_partition_tuple_slot;
+               ExecSetSlotDescriptor(slot, RelationGetDescr(partrel->ri_RelationDesc));
+               ExecStoreTuple(tuple, slot, InvalidBuffer, true);
+       }
+
+       return slot;
+}
+
 /* ----------------------------------------------------------------
  *        ExecModifyTable
  *
@@ -1763,9 +1774,16 @@ ExecModifyTable(PlanState *pstate)
                switch (operation)
                {
                        case CMD_INSERT:
+                               /* Prepare for tuple routing, if needed. */
+                               if (node->mt_partition_dispatch_info)
+                                       slot = ExecPrepareTupleRouting(node, estate,
+                                                                                                  resultRelInfo, slot);
                                slot = ExecInsert(node, slot, planSlot,
                                                                  node->mt_arbiterindexes, node->mt_onconflict,
                                                                  estate, node->canSetTag);
+                               /* Revert ExecPrepareTupleRouting's node change. */
+                               if (node->mt_partition_dispatch_info)
+                                       estate->es_result_relation_info = resultRelInfo;
                                break;
                        case CMD_UPDATE:
                                slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot,
index fc7f6e20f2b8a36e7a092f7421801299673156c8..4420e541f0f4cdf36ca7e5b0a83aea36b98e3f83 100644 (file)
@@ -981,15 +981,24 @@ typedef struct ModifyTableState
        int                     mt_num_partitions;      /* Number of members in the following
                                                                         * arrays */
        ResultRelInfo *mt_partitions;   /* Per partition result relation */
-       TupleConversionMap **mt_partition_tupconv_maps;
+
        /* Per partition tuple conversion map */
+       TupleConversionMap **mt_partition_tupconv_maps;
+
+       /*
+        * Used to manipulate any given leaf partition's rowtype after that
+        * partition is chosen for insertion by tuple-routing.
+        */
        TupleTableSlot *mt_partition_tuple_slot;
-       struct TransitionCaptureState *mt_transition_capture;
+
        /* controls transition table population for specified operation */
-       struct TransitionCaptureState *mt_oc_transition_capture;
+       struct TransitionCaptureState *mt_transition_capture;
+
        /* controls transition table population for INSERT...ON CONFLICT UPDATE */
-       TupleConversionMap **mt_transition_tupconv_maps;
+       struct TransitionCaptureState *mt_oc_transition_capture;
+
        /* Per plan/partition tuple conversion */
+       TupleConversionMap **mt_transition_tupconv_maps;
 } ModifyTableState;
 
 /* ----------------
index e2f63933ececa155e99248c7d6845a0291063575..d6c4394a7597c89948871d638c917a7e522f4c50 100644 (file)
@@ -554,6 +554,32 @@ drop role regress_coldesc_role;
 drop table inserttest3;
 drop table brtrigpartcon;
 drop function brtrigpartcon1trigf();
+-- check that "do nothing" BR triggers work with tuple-routing (this checks
+-- that estate->es_result_relation_info is appropriately set/reset for each
+-- routed tuple)
+create table donothingbrtrig_test (a int, b text) partition by list (a);
+create table donothingbrtrig_test1 (b text, a int);
+create table donothingbrtrig_test2 (c text, b text, a int);
+alter table donothingbrtrig_test2 drop column c;
+create or replace function donothingbrtrig_func() returns trigger as $$begin raise notice 'b: %', new.b; return NULL; end$$ language plpgsql;
+create trigger donothingbrtrig1 before insert on donothingbrtrig_test1 for each row execute procedure donothingbrtrig_func();
+create trigger donothingbrtrig2 before insert on donothingbrtrig_test2 for each row execute procedure donothingbrtrig_func();
+alter table donothingbrtrig_test attach partition donothingbrtrig_test1 for values in (1);
+alter table donothingbrtrig_test attach partition donothingbrtrig_test2 for values in (2);
+insert into donothingbrtrig_test values (1, 'foo'), (2, 'bar');
+NOTICE:  b: foo
+NOTICE:  b: bar
+copy donothingbrtrig_test from stdout;
+NOTICE:  b: baz
+NOTICE:  b: qux
+select tableoid::regclass, * from donothingbrtrig_test;
+ tableoid | a | b 
+----------+---+---
+(0 rows)
+
+-- cleanup
+drop table donothingbrtrig_test;
+drop function donothingbrtrig_func();
 -- check multi-column range partitioning with minvalue/maxvalue constraints
 create table mcrparted (a text, b int) partition by range(a, b);
 create table mcrparted1_lt_b partition of mcrparted for values from (minvalue, minvalue) to ('b', minvalue);
index 3e869acba805bae11e5c52a0e0d24a0d5102dabf..4123d39c7b145b22eda238332094a8b96e15a649 100644 (file)
@@ -378,6 +378,29 @@ drop table inserttest3;
 drop table brtrigpartcon;
 drop function brtrigpartcon1trigf();
 
+-- check that "do nothing" BR triggers work with tuple-routing (this checks
+-- that estate->es_result_relation_info is appropriately set/reset for each
+-- routed tuple)
+create table donothingbrtrig_test (a int, b text) partition by list (a);
+create table donothingbrtrig_test1 (b text, a int);
+create table donothingbrtrig_test2 (c text, b text, a int);
+alter table donothingbrtrig_test2 drop column c;
+create or replace function donothingbrtrig_func() returns trigger as $$begin raise notice 'b: %', new.b; return NULL; end$$ language plpgsql;
+create trigger donothingbrtrig1 before insert on donothingbrtrig_test1 for each row execute procedure donothingbrtrig_func();
+create trigger donothingbrtrig2 before insert on donothingbrtrig_test2 for each row execute procedure donothingbrtrig_func();
+alter table donothingbrtrig_test attach partition donothingbrtrig_test1 for values in (1);
+alter table donothingbrtrig_test attach partition donothingbrtrig_test2 for values in (2);
+insert into donothingbrtrig_test values (1, 'foo'), (2, 'bar');
+copy donothingbrtrig_test from stdout;
+1      baz
+2      qux
+\.
+select tableoid::regclass, * from donothingbrtrig_test;
+
+-- cleanup
+drop table donothingbrtrig_test;
+drop function donothingbrtrig_func();
+
 -- check multi-column range partitioning with minvalue/maxvalue constraints
 create table mcrparted (a text, b int) partition by range(a, b);
 create table mcrparted1_lt_b partition of mcrparted for values from (minvalue, minvalue) to ('b', minvalue);