return generate_partition_qual(rel, recurse);
}
-/* Turn an array of OIDs with N elements into a list */
-#define OID_ARRAY_TO_LIST(arr, N, list) \
+/*
+ * Append OIDs of rel's partitions to the list 'partoids' and for each OID,
+ * append pointer rel to the list 'parents'.
+ */
+#define APPEND_REL_PARTITION_OIDS(rel, partoids, parents) \
do\
{\
int i;\
- for (i = 0; i < (N); i++)\
- (list) = lappend_oid((list), (arr)[i]);\
+ for (i = 0; i < (rel)->rd_partdesc->nparts; i++)\
+ {\
+ (partoids) = lappend_oid((partoids), (rel)->rd_partdesc->oids[i]);\
+ (parents) = lappend((parents), (rel));\
+ }\
} while(0)
/*
RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
int *num_parted, List **leaf_part_oids)
{
- PartitionDesc rootpartdesc = RelationGetPartitionDesc(rel);
PartitionDispatchData **pd;
List *all_parts = NIL,
- *parted_rels;
- ListCell *lc;
+ *all_parents = NIL,
+ *parted_rels,
+ *parted_rel_parents;
+ ListCell *lc1,
+ *lc2;
int i,
k,
offset;
*/
*num_parted = 1;
parted_rels = list_make1(rel);
- OID_ARRAY_TO_LIST(rootpartdesc->oids, rootpartdesc->nparts, all_parts);
- foreach(lc, all_parts)
+ /* Root partitioned table has no parent, so NULL for parent */
+ parted_rel_parents = list_make1(NULL);
+ APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents);
+ forboth(lc1, all_parts, lc2, all_parents)
{
- Relation partrel = heap_open(lfirst_oid(lc), lockmode);
+ Relation partrel = heap_open(lfirst_oid(lc1), lockmode);
+ Relation parent = lfirst(lc2);
PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
/*
{
(*num_parted)++;
parted_rels = lappend(parted_rels, partrel);
- OID_ARRAY_TO_LIST(partdesc->oids, partdesc->nparts, all_parts);
+ parted_rel_parents = lappend(parted_rel_parents, parent);
+ APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents);
}
else
heap_close(partrel, NoLock);
sizeof(PartitionDispatchData *));
*leaf_part_oids = NIL;
i = k = offset = 0;
- foreach(lc, parted_rels)
+ forboth(lc1, parted_rels, lc2, parted_rel_parents)
{
- Relation partrel = lfirst(lc);
+ Relation partrel = lfirst(lc1);
+ Relation parent = lfirst(lc2);
PartitionKey partkey = RelationGetPartitionKey(partrel);
+ TupleDesc tupdesc = RelationGetDescr(partrel);
PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
int j,
m;
pd[i]->key = partkey;
pd[i]->keystate = NIL;
pd[i]->partdesc = partdesc;
+ if (parent != NULL)
+ {
+ /*
+ * For every partitioned table other than root, we must store
+ * a tuple table slot initialized with its tuple descriptor and
+ * a tuple conversion map to convert a tuple from its parent's
+ * rowtype to its own. That is to make sure that we are looking
+ * at the correct row using the correct tuple descriptor when
+ * computing its partition key for tuple routing.
+ */
+ pd[i]->tupslot = MakeSingleTupleTableSlot(tupdesc);
+ pd[i]->tupmap = convert_tuples_by_name(RelationGetDescr(parent),
+ tupdesc,
+ gettext_noop("could not convert row type"));
+ }
+ else
+ {
+ /* Not required for the root partitioned table */
+ pd[i]->tupslot = NULL;
+ pd[i]->tupmap = NULL;
+ }
pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
/*
{
PartitionKey key = parent->key;
PartitionDesc partdesc = parent->partdesc;
+ TupleTableSlot *myslot = parent->tupslot;
+ TupleConversionMap *map = parent->tupmap;
/* Quick exit */
if (partdesc->nparts == 0)
return -1;
}
+ if (myslot != NULL)
+ {
+ HeapTuple tuple = ExecFetchSlotTuple(slot);
+
+ ExecClearTuple(myslot);
+ Assert(map != NULL);
+ tuple = do_convert_tuple(tuple, map);
+ ExecStoreTuple(tuple, myslot, InvalidBuffer, true);
+ slot = myslot;
+ }
+
/* Extract partition key from tuple */
FormPartitionKeyDatum(parent, slot, estate, values, isnull);
/* Triggers might need a slot as well */
estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
+ /*
+ * Initialize a dedicated slot to manipulate tuples of any given
+ * partition's rowtype.
+ */
+ if (cstate->partition_dispatch_info)
+ estate->es_partition_tuple_slot = ExecInitExtraTupleSlot(estate);
+ else
+ estate->es_partition_tuple_slot = NULL;
+
/*
* It's more efficient to prepare a bunch of tuples for insertion, and
* insert them in one heap_multi_insert() call, than call heap_insert()
for (;;)
{
- TupleTableSlot *slot;
+ TupleTableSlot *slot,
+ *oldslot = NULL;
bool skip_tuple;
Oid loaded_oid = InvalidOid;
map = cstate->partition_tupconv_maps[leaf_part_index];
if (map)
{
+ Relation partrel = resultRelInfo->ri_RelationDesc;
+
tuple = do_convert_tuple(tuple, map);
+
+ /*
+ * We must use the partition's tuple descriptor from this
+ * point on. Use a dedicated slot from this point on until
+ * we're finished dealing with the partition.
+ */
+ oldslot = slot;
+ slot = estate->es_partition_tuple_slot;
+ Assert(slot != NULL);
+ ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
ExecStoreTuple(tuple, slot, InvalidBuffer, true);
}
{
resultRelInfo = saved_resultRelInfo;
estate->es_result_relation_info = resultRelInfo;
+
+ /* Switch back to the slot corresponding to the root table */
+ Assert(oldslot != NULL);
+ slot = oldslot;
}
}
}
* Remember cstate->partition_dispatch_info[0] corresponds to the root
* partitioned table, which we must not try to close, because it is
* the main target table of COPY that will be closed eventually by
- * DoCopy().
+ * DoCopy(). Also, tupslot is NULL for the root partitioned table.
*/
for (i = 1; i < cstate->num_dispatch; i++)
{
PartitionDispatch pd = cstate->partition_dispatch_info[i];
heap_close(pd->reldesc, NoLock);
+ ExecDropSingleTupleTableSlot(pd->tupslot);
}
for (i = 0; i < cstate->num_partitions; i++)
{
Relation resultRelationDesc;
Oid newId;
List *recheckIndexes = NIL;
+ TupleTableSlot *oldslot = NULL;
/*
* get the heap tuple out of the tuple table slot, making sure we have a
map = mtstate->mt_partition_tupconv_maps[leaf_part_index];
if (map)
{
+ Relation partrel = resultRelInfo->ri_RelationDesc;
+
tuple = do_convert_tuple(tuple, map);
+
+ /*
+ * We must use the partition's tuple descriptor from this
+ * point on, until we're finished dealing with the partition.
+ * Use the dedicated slot for that.
+ */
+ oldslot = slot;
+ slot = estate->es_partition_tuple_slot;
+ Assert(slot != NULL);
+ ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
ExecStoreTuple(tuple, slot, InvalidBuffer, true);
}
}
{
resultRelInfo = saved_resultRelInfo;
estate->es_result_relation_info = resultRelInfo;
+
+ /* Switch back to the slot corresponding to the root table */
+ Assert(oldslot != NULL);
+ slot = oldslot;
}
/*
mtstate->mt_partitions = partitions;
mtstate->mt_num_partitions = num_partitions;
mtstate->mt_partition_tupconv_maps = partition_tupconv_maps;
+
+ /*
+ * Initialize a dedicated slot to manipulate tuples of any given
+ * partition's rowtype.
+ */
+ estate->es_partition_tuple_slot = ExecInitExtraTupleSlot(estate);
}
+ else
+ estate->es_partition_tuple_slot = NULL;
/*
* Initialize any WITH CHECK OPTION constraints if needed.
* Remember node->mt_partition_dispatch_info[0] corresponds to the root
* partitioned table, which we must not try to close, because it is the
* main target table of the query that will be closed by ExecEndPlan().
+ * Also, tupslot is NULL for the root partitioned table.
*/
for (i = 1; i < node->mt_num_dispatch; i++)
{
PartitionDispatch pd = node->mt_partition_dispatch_info[i];
heap_close(pd->reldesc, NoLock);
+ ExecDropSingleTupleTableSlot(pd->tupslot);
}
for (i = 0; i < node->mt_num_partitions; i++)
{
* key Partition key information of the table
* keystate Execution state required for expressions in the partition key
* partdesc Partition descriptor of the table
+ * tupslot A standalone TupleTableSlot initialized with this table's tuple
+ * descriptor
+ * tupmap TupleConversionMap to convert from the parent's rowtype to
+ * this table's rowtype (when extracting the partition key of a
+ * tuple just before routing it through this table)
* indexes Array with partdesc->nparts members (for details on what
* individual members represent, see how they are set in
* RelationGetPartitionDispatchInfo())
PartitionKey key;
List *keystate; /* list of ExprState */
PartitionDesc partdesc;
+ TupleTableSlot *tupslot;
+ TupleConversionMap *tupmap;
int *indexes;
} PartitionDispatchData;
TupleTableSlot *es_trig_oldtup_slot; /* for TriggerEnabled */
TupleTableSlot *es_trig_newtup_slot; /* for TriggerEnabled */
+ /* Slot used to manipulate a tuple after it is routed to a partition */
+ TupleTableSlot *es_partition_tuple_slot;
+
/* Parameter info: */
ParamListInfo es_param_list_info; /* values of external params */
ParamExecData *es_param_exec_vals; /* values of internal params */
drop cascades to table part_ee_ff
drop cascades to table part_ee_ff1
drop cascades to table part_ee_ff2
+-- more tests for certain multi-level partitioning scenarios
+create table p (a int, b int) partition by range (a, b);
+create table p1 (b int, a int not null) partition by range (b);
+create table p11 (like p1);
+alter table p11 drop a;
+alter table p11 add a int;
+alter table p11 drop a;
+alter table p11 add a int not null;
+-- attnum for key attribute 'a' is different in p, p1, and p11
+select attrelid::regclass, attname, attnum
+from pg_attribute
+where attname = 'a'
+ and (attrelid = 'p'::regclass
+ or attrelid = 'p1'::regclass
+ or attrelid = 'p11'::regclass);
+ attrelid | attname | attnum
+----------+---------+--------
+ p | a | 1
+ p1 | a | 2
+ p11 | a | 4
+(3 rows)
+
+alter table p1 attach partition p11 for values from (2) to (5);
+alter table p attach partition p1 for values from (1, 2) to (1, 10);
+-- check that "(1, 2)" is correctly routed to p11.
+insert into p values (1, 2);
+select tableoid::regclass, * from p;
+ tableoid | a | b
+----------+---+---
+ p11 | 1 | 2
+(1 row)
+
+-- cleanup
+drop table p cascade;
+NOTICE: drop cascades to 2 other objects
+DETAIL: drop cascades to table p1
+drop cascades to table p11
-- cleanup
drop table range_parted cascade;
drop table list_parted cascade;
+
+-- more tests for certain multi-level partitioning scenarios
+create table p (a int, b int) partition by range (a, b);
+create table p1 (b int, a int not null) partition by range (b);
+create table p11 (like p1);
+alter table p11 drop a;
+alter table p11 add a int;
+alter table p11 drop a;
+alter table p11 add a int not null;
+-- attnum for key attribute 'a' is different in p, p1, and p11
+select attrelid::regclass, attname, attnum
+from pg_attribute
+where attname = 'a'
+ and (attrelid = 'p'::regclass
+ or attrelid = 'p1'::regclass
+ or attrelid = 'p11'::regclass);
+
+alter table p1 attach partition p11 for values from (2) to (5);
+alter table p attach partition p1 for values from (1, 2) to (1, 10);
+
+-- check that "(1, 2)" is correctly routed to p11.
+insert into p values (1, 2);
+select tableoid::regclass, * from p;
+
+-- cleanup
+drop table p cascade;