1 /*-------------------------------------------------------------------------
4 * Support routines for partitioning.
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/executor/execPartition.c
12 *-------------------------------------------------------------------------
17 #include "catalog/pg_inherits_fn.h"
18 #include "executor/execPartition.h"
19 #include "executor/executor.h"
20 #include "mb/pg_wchar.h"
21 #include "miscadmin.h"
22 #include "utils/lsyscache.h"
23 #include "utils/rls.h"
24 #include "utils/ruleutils.h"
26 static PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
27 int *num_parted, List **leaf_part_oids);
28 static void get_partition_dispatch_recurse(Relation rel, Relation parent,
29 List **pds, List **leaf_part_oids);
30 static void FormPartitionKeyDatum(PartitionDispatch pd,
35 static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
41 * ExecSetupPartitionTupleRouting - set up information needed during
42 * tuple routing for partitioned tables
45 * 'pd' receives an array of PartitionDispatch objects with one entry for
46 * every partitioned table in the partition tree
47 * 'partitions' receives an array of ResultRelInfo* objects with one entry for
48 * every leaf partition in the partition tree
49 * 'tup_conv_maps' receives an array of TupleConversionMap objects with one
50 * entry for every leaf partition (required to convert input tuple based
51 * on the root table's rowtype to a leaf partition's rowtype after tuple
53 * 'partition_tuple_slot' receives a standalone TupleTableSlot to be used
54 * to manipulate any given leaf partition's rowtype after that partition
55 * is chosen by tuple-routing.
56 * 'num_parted' receives the number of partitioned tables in the partition
57 * tree (= the number of entries in the 'pd' output array)
58 * 'num_partitions' receives the number of leaf partitions in the partition
59 * tree (= the number of entries in the 'partitions' and 'tup_conv_maps'
62 * Note that all the relations in the partition tree are locked using the
63 * RowExclusiveLock mode upon return from this function.
66 ExecSetupPartitionTupleRouting(Relation rel,
69 PartitionDispatch **pd,
70 ResultRelInfo ***partitions,
71 TupleConversionMap ***tup_conv_maps,
72 TupleTableSlot **partition_tuple_slot,
73 int *num_parted, int *num_partitions)
75 TupleDesc tupDesc = RelationGetDescr(rel);
79 ResultRelInfo *leaf_part_rri;
82 * Get the information about the partition tree after locking all the
85 (void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL);
86 *pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts);
87 *num_partitions = list_length(leaf_parts);
88 *partitions = (ResultRelInfo **) palloc(*num_partitions *
89 sizeof(ResultRelInfo *));
90 *tup_conv_maps = (TupleConversionMap **) palloc0(*num_partitions *
91 sizeof(TupleConversionMap *));
94 * Initialize an empty slot that will be used to manipulate tuples of any
95 * given partition's rowtype. It is attached to the caller-specified node
96 * (such as ModifyTableState) and released when the node finishes
99 *partition_tuple_slot = MakeTupleTableSlot();
101 leaf_part_rri = (ResultRelInfo *) palloc0(*num_partitions *
102 sizeof(ResultRelInfo));
104 foreach(cell, leaf_parts)
107 TupleDesc part_tupdesc;
110 * We locked all the partitions above including the leaf partitions.
111 * Note that each of the relations in *partitions are eventually
112 * closed by the caller.
114 partrel = heap_open(lfirst_oid(cell), NoLock);
115 part_tupdesc = RelationGetDescr(partrel);
118 * Save a tuple conversion map to convert a tuple routed to this
119 * partition from the parent's type to the partition's.
121 (*tup_conv_maps)[i] = convert_tuples_by_name(tupDesc, part_tupdesc,
122 gettext_noop("could not convert row type"));
124 InitResultRelInfo(leaf_part_rri,
128 estate->es_instrument);
131 * Verify result relation is a valid target for INSERT.
133 CheckValidResultRel(leaf_part_rri, CMD_INSERT);
136 * Open partition indices (remember we do not support ON CONFLICT in
137 * case of partitioned tables, so we do not need support information
138 * for speculative insertion)
140 if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex &&
141 leaf_part_rri->ri_IndexRelationDescs == NULL)
142 ExecOpenIndices(leaf_part_rri, false);
144 estate->es_leaf_result_relations =
145 lappend(estate->es_leaf_result_relations, leaf_part_rri);
147 (*partitions)[i] = leaf_part_rri++;
153 * ExecFindPartition -- Find a leaf partition in the partition tree rooted
154 * at parent, for the heap tuple contained in *slot
156 * estate must be non-NULL; we'll need it to compute any expressions in the
159 * If no leaf partition is found, this routine errors out with the appropriate
160 * error message, else it returns the leaf partition sequence number
161 * as an index into the array of (ResultRelInfos of) all leaf partitions in
162 * the partition tree.
165 ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
166 TupleTableSlot *slot, EState *estate)
169 Datum values[PARTITION_MAX_KEYS];
170 bool isnull[PARTITION_MAX_KEYS];
172 PartitionDispatch parent;
173 ExprContext *ecxt = GetPerTupleExprContext(estate);
174 TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
177 * First check the root table's partition constraint, if any. No point in
178 * routing the tuple if it doesn't belong in the root table itself.
180 if (resultRelInfo->ri_PartitionCheck)
181 ExecPartitionCheck(resultRelInfo, slot, estate);
183 /* start with the root partitioned table */
187 PartitionDesc partdesc;
188 TupleTableSlot *myslot = parent->tupslot;
189 TupleConversionMap *map = parent->tupmap;
192 rel = parent->reldesc;
193 partdesc = RelationGetPartitionDesc(rel);
196 * Convert the tuple to this parent's layout so that we can do certain
197 * things we do below.
199 if (myslot != NULL && map != NULL)
201 HeapTuple tuple = ExecFetchSlotTuple(slot);
203 ExecClearTuple(myslot);
204 tuple = do_convert_tuple(tuple, map);
205 ExecStoreTuple(tuple, myslot, InvalidBuffer, true);
210 if (partdesc->nparts == 0)
217 * Extract partition key from tuple. Expression evaluation machinery
218 * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
219 * point to the correct tuple slot. The slot might have changed from
220 * what was used for the parent table if the table of the current
221 * partitioning level has different tuple descriptor from the parent.
222 * So update ecxt_scantuple accordingly.
224 ecxt->ecxt_scantuple = slot;
225 FormPartitionKeyDatum(parent, slot, estate, values, isnull);
226 cur_index = get_partition_for_tuple(rel, values, isnull);
229 * cur_index < 0 means we failed to find a partition of this parent.
230 * cur_index >= 0 means we either found the leaf partition, or the
231 * next parent to find a partition of.
238 else if (parent->indexes[cur_index] >= 0)
240 result = parent->indexes[cur_index];
244 parent = pd[-parent->indexes[cur_index]];
247 /* A partition was not found. */
252 val_desc = ExecBuildSlotPartitionKeyDescription(rel,
254 Assert(OidIsValid(RelationGetRelid(rel)));
256 (errcode(ERRCODE_CHECK_VIOLATION),
257 errmsg("no partition of relation \"%s\" found for row",
258 RelationGetRelationName(rel)),
259 val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0));
262 ecxt->ecxt_scantuple = ecxt_scantuple_old;
267 * RelationGetPartitionDispatchInfo
268 * Returns information necessary to route tuples down a partition tree
270 * The number of elements in the returned array (that is, the number of
271 * PartitionDispatch objects for the partitioned tables in the partition tree)
272 * is returned in *num_parted and a list of the OIDs of all the leaf
273 * partitions of rel is returned in *leaf_part_oids.
275 * All the relations in the partition tree (including 'rel') must have been
276 * locked (using at least the AccessShareLock) by the caller.
278 static PartitionDispatch *
279 RelationGetPartitionDispatchInfo(Relation rel,
280 int *num_parted, List **leaf_part_oids)
283 PartitionDispatchData **pd;
287 Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
290 *leaf_part_oids = NIL;
292 get_partition_dispatch_recurse(rel, NULL, &pdlist, leaf_part_oids);
293 *num_parted = list_length(pdlist);
294 pd = (PartitionDispatchData **) palloc(*num_parted *
295 sizeof(PartitionDispatchData *));
299 pd[i++] = lfirst(lc);
306 * get_partition_dispatch_recurse
307 * Recursively expand partition tree rooted at rel
309 * As the partition tree is expanded in a depth-first manner, we maintain two
310 * global lists: of PartitionDispatch objects corresponding to partitioned
311 * tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
313 * Note that the order of OIDs of leaf partitions in leaf_part_oids matches
314 * the order in which the planner's expand_partitioned_rtentry() processes
315 * them. It's not necessarily the case that the offsets match up exactly,
316 * because constraint exclusion might prune away some partitions on the
317 * planner side, whereas we'll always have the complete list; but unpruned
318 * partitions will appear in the same order in the plan as they are returned
322 get_partition_dispatch_recurse(Relation rel, Relation parent,
323 List **pds, List **leaf_part_oids)
325 TupleDesc tupdesc = RelationGetDescr(rel);
326 PartitionDesc partdesc = RelationGetPartitionDesc(rel);
327 PartitionKey partkey = RelationGetPartitionKey(rel);
328 PartitionDispatch pd;
333 /* Build a PartitionDispatch for this table and add it to *pds. */
334 pd = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
335 *pds = lappend(*pds, pd);
339 pd->partdesc = partdesc;
343 * For every partitioned table other than the root, we must store a
344 * tuple table slot initialized with its tuple descriptor and a tuple
345 * conversion map to convert a tuple from its parent's rowtype to its
346 * own. That is to make sure that we are looking at the correct row
347 * using the correct tuple descriptor when computing its partition key
350 pd->tupslot = MakeSingleTupleTableSlot(tupdesc);
351 pd->tupmap = convert_tuples_by_name(RelationGetDescr(parent),
353 gettext_noop("could not convert row type"));
357 /* Not required for the root partitioned table */
363 * Go look at each partition of this table. If it's a leaf partition,
364 * simply add its OID to *leaf_part_oids. If it's a partitioned table,
365 * recursively call get_partition_dispatch_recurse(), so that its
366 * partitions are processed as well and a corresponding PartitionDispatch
367 * object gets added to *pds.
369 * About the values in pd->indexes: for a leaf partition, it contains the
370 * leaf partition's position in the global list *leaf_part_oids minus 1,
371 * whereas for a partitioned table partition, it contains the partition's
372 * position in the global list *pds multiplied by -1. The latter is
373 * multiplied by -1 to distinguish partitioned tables from leaf partitions
374 * when going through the values in pd->indexes. So, for example, when
375 * using it during tuple-routing, encountering a value >= 0 means we found
376 * a leaf partition. It is immediately returned as the index in the array
377 * of ResultRelInfos of all the leaf partitions, using which we insert the
378 * tuple into that leaf partition. A negative value means we found a
379 * partitioned table. The value multiplied by -1 is returned as the index
380 * in the array of PartitionDispatch objects of all partitioned tables in
381 * the tree. This value is used to continue the search in the next level
382 * of the partition tree.
384 pd->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
385 for (i = 0; i < partdesc->nparts; i++)
387 Oid partrelid = partdesc->oids[i];
389 if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
391 *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
392 pd->indexes[i] = list_length(*leaf_part_oids) - 1;
397 * We assume all tables in the partition tree were already locked
400 Relation partrel = heap_open(partrelid, NoLock);
402 pd->indexes[i] = -list_length(*pds);
403 get_partition_dispatch_recurse(partrel, rel, pds, leaf_part_oids);
409 * FormPartitionKeyDatum
410 * Construct values[] and isnull[] arrays for the partition key
413 * pd Partition dispatch object of the partitioned table
414 * slot Heap tuple from which to extract partition key
415 * estate executor state for evaluating any partition key
416 * expressions (must be non-NULL)
417 * values Array of partition key Datums (output area)
418 * isnull Array of is-null indicators (output area)
420 * the ecxt_scantuple slot of estate's per-tuple expr context must point to
421 * the heap tuple passed in.
425 FormPartitionKeyDatum(PartitionDispatch pd,
426 TupleTableSlot *slot,
431 ListCell *partexpr_item;
434 if (pd->key->partexprs != NIL && pd->keystate == NIL)
436 /* Check caller has set up context correctly */
437 Assert(estate != NULL &&
438 GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
440 /* First time through, set up expression evaluation state */
441 pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
444 partexpr_item = list_head(pd->keystate);
445 for (i = 0; i < pd->key->partnatts; i++)
447 AttrNumber keycol = pd->key->partattrs[i];
453 /* Plain column; get the value directly from the heap tuple */
454 datum = slot_getattr(slot, keycol, &isNull);
458 /* Expression; need to evaluate it */
459 if (partexpr_item == NULL)
460 elog(ERROR, "wrong number of partition key expressions");
461 datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
462 GetPerTupleExprContext(estate),
464 partexpr_item = lnext(partexpr_item);
470 if (partexpr_item != NULL)
471 elog(ERROR, "wrong number of partition key expressions");
475 * ExecBuildSlotPartitionKeyDescription
477 * This works very much like BuildIndexValueDescription() and is currently
478 * used for building error messages when ExecFindPartition() fails to find
479 * partition for a row.
482 ExecBuildSlotPartitionKeyDescription(Relation rel,
488 PartitionKey key = RelationGetPartitionKey(rel);
489 int partnatts = get_partition_natts(key);
491 Oid relid = RelationGetRelid(rel);
494 if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
497 /* If the user has table-level access, just go build the description. */
498 aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
499 if (aclresult != ACLCHECK_OK)
502 * Step through the columns of the partition key and make sure the
503 * user has SELECT rights on all of them.
505 for (i = 0; i < partnatts; i++)
507 AttrNumber attnum = get_partition_col_attnum(key, i);
510 * If this partition key column is an expression, we return no
511 * detail rather than try to figure out what column(s) the
512 * expression includes and if the user has SELECT rights on them.
514 if (attnum == InvalidAttrNumber ||
515 pg_attribute_aclcheck(relid, attnum, GetUserId(),
516 ACL_SELECT) != ACLCHECK_OK)
521 initStringInfo(&buf);
522 appendStringInfo(&buf, "(%s) = (",
523 pg_get_partkeydef_columns(relid, true));
525 for (i = 0; i < partnatts; i++)
537 getTypeOutputInfo(get_partition_col_typid(key, i),
538 &foutoid, &typisvarlena);
539 val = OidOutputFunctionCall(foutoid, values[i]);
543 appendStringInfoString(&buf, ", ");
545 /* truncate if needed */
546 vallen = strlen(val);
547 if (vallen <= maxfieldlen)
548 appendStringInfoString(&buf, val);
551 vallen = pg_mbcliplen(val, vallen, maxfieldlen);
552 appendBinaryStringInfo(&buf, val, vallen);
553 appendStringInfoString(&buf, "...");
557 appendStringInfoChar(&buf, ')');