]> granicus.if.org Git - postgresql/commitdiff
Fix multi-column range partitioning constraints.
authorRobert Haas <rhaas@postgresql.org>
Sat, 13 May 2017 15:35:30 +0000 (11:35 -0400)
committerRobert Haas <rhaas@postgresql.org>
Sat, 13 May 2017 15:36:41 +0000 (11:36 -0400)
The old logic was just plain wrong.

Report by Olaf Gawenda.  Patch by Amit Langote, reviewed by
Beena Emerson and by me.  Minor adjustments by me also.

src/backend/catalog/partition.c
src/include/nodes/pg_list.h
src/test/regress/expected/inherit.out
src/test/regress/expected/insert.out
src/test/regress/sql/inherit.sql
src/test/regress/sql/insert.sql

index 8641ae16a265e82a2f4bb4f8593a62ad74e36cfa..9ef966607f8de4f974379d94b12bb01c60904e13 100644 (file)
@@ -118,10 +118,18 @@ static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
 static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
                                                   void *arg);
 
-static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
-static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
 static Oid get_partition_operator(PartitionKey key, int col,
                                           StrategyNumber strategy, bool *need_relabel);
+static Expr *make_partition_op_expr(PartitionKey key, int keynum,
+                                          uint16 strategy, Expr *arg1, Expr *arg2);
+static void get_range_key_properties(PartitionKey key, int keynum,
+                                                PartitionRangeDatum *ldatum,
+                                                PartitionRangeDatum *udatum,
+                                                ListCell **partexprs_item,
+                                                Expr **keyCol,
+                                                Const **lower_val, Const **upper_val);
+static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
+static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
 static List *generate_partition_qual(Relation rel);
 
 static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
@@ -1145,6 +1153,123 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
 
 /* Module-local functions */
 
+/*
+ * get_partition_operator
+ *
+ * Return oid of the operator of given strategy for a given partition key
+ * column.
+ */
+static Oid
+get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
+                                          bool *need_relabel)
+{
+       Oid                     operoid;
+
+       /*
+        * First check if there exists an operator of the given strategy, with
+        * this column's type as both its lefttype and righttype, in the
+        * partitioning operator family specified for the column.
+        */
+       operoid = get_opfamily_member(key->partopfamily[col],
+                                                                 key->parttypid[col],
+                                                                 key->parttypid[col],
+                                                                 strategy);
+
+       /*
+        * If one doesn't exist, we must resort to using an operator in the same
+        * opreator family but with the operator class declared input type.  It is
+        * OK to do so, because the column's type is known to be binary-coercible
+        * with the operator class input type (otherwise, the operator class in
+        * question would not have been accepted as the partitioning operator
+        * class).  We must however inform the caller to wrap the non-Const
+        * expression with a RelabelType node to denote the implicit coercion. It
+        * ensures that the resulting expression structurally matches similarly
+        * processed expressions within the optimizer.
+        */
+       if (!OidIsValid(operoid))
+       {
+               operoid = get_opfamily_member(key->partopfamily[col],
+                                                                         key->partopcintype[col],
+                                                                         key->partopcintype[col],
+                                                                         strategy);
+               *need_relabel = true;
+       }
+       else
+               *need_relabel = false;
+
+       if (!OidIsValid(operoid))
+               elog(ERROR, "could not find operator for partitioning");
+
+       return operoid;
+}
+
+/*
+ * make_partition_op_expr
+ *             Returns an Expr for the given partition key column with arg1 and
+ *             arg2 as its leftop and rightop, respectively
+ */
+static Expr *
+make_partition_op_expr(PartitionKey key, int keynum,
+                                          uint16 strategy, Expr *arg1, Expr *arg2)
+{
+       Oid                     operoid;
+       bool            need_relabel = false;
+       Expr       *result = NULL;
+
+       /* Get the correct btree operator for this partitioning column */
+       operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
+
+       /*
+        * Chosen operator may be such that the non-Const operand needs to be
+        * coerced, so apply the same; see the comment in
+        * get_partition_operator().
+        */
+       if (!IsA(arg1, Const) &&
+               (need_relabel ||
+                key->partcollation[keynum] != key->parttypcoll[keynum]))
+               arg1 = (Expr *) makeRelabelType(arg1,
+                                                                               key->partopcintype[keynum],
+                                                                               -1,
+                                                                               key->partcollation[keynum],
+                                                                               COERCE_EXPLICIT_CAST);
+
+       /* Generate the actual expression */
+       switch (key->strategy)
+       {
+               case PARTITION_STRATEGY_LIST:
+                       {
+                               ScalarArrayOpExpr *saopexpr;
+
+                               /* Build leftop = ANY (rightop) */
+                               saopexpr = makeNode(ScalarArrayOpExpr);
+                               saopexpr->opno = operoid;
+                               saopexpr->opfuncid = get_opcode(operoid);
+                               saopexpr->useOr = true;
+                               saopexpr->inputcollid = key->partcollation[0];
+                               saopexpr->args = list_make2(arg1, arg2);
+                               saopexpr->location = -1;
+
+                               result = (Expr *) saopexpr;
+                               break;
+                       }
+
+               case PARTITION_STRATEGY_RANGE:
+                       result = make_opclause(operoid,
+                                                                  BOOLOID,
+                                                                  false,
+                                                                  arg1, arg2,
+                                                                  InvalidOid,
+                                                                  key->partcollation[keynum]);
+                       break;
+
+               default:
+                       elog(ERROR, "invalid partitioning strategy");
+                       break;
+       }
+
+       return result;
+}
+
 /*
  * get_qual_for_list
  *
@@ -1155,14 +1280,12 @@ get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
 {
        List       *result;
        ArrayExpr  *arr;
-       ScalarArrayOpExpr *opexpr;
+       Expr       *opexpr;
        ListCell   *cell,
                           *prev,
                           *next;
        Expr       *keyCol;
-       Oid                     operoid;
-       bool            need_relabel,
-                               list_has_null = false;
+       bool            list_has_null = false;
        NullTest   *nulltest1 = NULL,
                           *nulltest2 = NULL;
 
@@ -1233,24 +1356,9 @@ get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
        arr->multidims = false;
        arr->location = -1;
 
-       /* Get the correct btree equality operator */
-       operoid = get_partition_operator(key, 0, BTEqualStrategyNumber,
-                                                                        &need_relabel);
-       if (need_relabel || key->partcollation[0] != key->parttypcoll[0])
-               keyCol = (Expr *) makeRelabelType(keyCol,
-                                                                                 key->partopcintype[0],
-                                                                                 -1,
-                                                                                 key->partcollation[0],
-                                                                                 COERCE_EXPLICIT_CAST);
-
-       /* Build leftop = ANY (rightop) */
-       opexpr = makeNode(ScalarArrayOpExpr);
-       opexpr->opno = operoid;
-       opexpr->opfuncid = get_opcode(operoid);
-       opexpr->useOr = true;
-       opexpr->inputcollid = key->partcollation[0];
-       opexpr->args = list_make2(keyCol, arr);
-       opexpr->location = -1;
+       /* Generate the main expression, i.e., keyCol = ANY (arr) */
+       opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
+                                                                       keyCol, (Expr *) arr);
 
        if (nulltest1)
                result = list_make2(nulltest1, opexpr);
@@ -1267,10 +1375,91 @@ get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
        return result;
 }
 
+/*
+ * get_range_key_properties
+ *             Returns range partition key information for a given column
+ *
+ * On return, *partexprs_item points to the cell containing the next
+ * expression in the key->partexprs list, or NULL.
+ */
+static void
+get_range_key_properties(PartitionKey key, int keynum,
+                                                PartitionRangeDatum *ldatum,
+                                                PartitionRangeDatum *udatum,
+                                                ListCell **partexprs_item,
+                                                Expr **keyCol,
+                                                Const **lower_val, Const **upper_val)
+{
+       /* Partition key expression for this column */
+       if (key->partattrs[keynum] != 0)
+       {
+               *keyCol = (Expr *) makeVar(1,
+                                                                  key->partattrs[keynum],
+                                                                  key->parttypid[keynum],
+                                                                  key->parttypmod[keynum],
+                                                                  key->parttypcoll[keynum],
+                                                                  0);
+       }
+       else
+       {
+               *keyCol = copyObject(lfirst(*partexprs_item));
+               *partexprs_item = lnext(*partexprs_item);
+       }
+
+       if (!ldatum->infinite)
+               *lower_val = (Const *) ldatum->value;
+       else
+               *lower_val = NULL;
+
+       if (!udatum->infinite)
+               *upper_val = (Const *) udatum->value;
+       else
+               *upper_val = NULL;
+}
+
 /*
  * get_qual_for_range
  *
- * Get a list of OpExpr's to use as a range partition's constraint.
+ * Get a list of expressions to use as a range partition's constraint.
+ * If there are multiple expressions, they are to be considered implicitly
+ * ANDed.
+ *
+ * For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
+ * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
+ * generate an expression tree of the following form:
+ *
+ *     (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
+ *             AND
+ *     (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
+ *
+ * If, say, b were an expression key instead of a simple column, we also
+ * append (b IS NOT NULL) to the AND's argument list.
+ *
+ * It is often the case that a prefix of lower and upper bound tuples contains
+ * the same values, for example, (al = au), in which case, we will emit an
+ * expression tree of the following form:
+ *
+ *     (a = al)
+ *             AND
+ *     (b > bl OR (b = bl AND c >= cl))
+ *             AND
+ *     (b < bu) OR (b = bu AND c < cu))
+ *
+ * If cu happens to be UNBOUNDED, we need not emit any expression for it, so
+ * the last line would be:
+ *
+ *     (b < bu) OR (b = bu), which is simplified to (b <= bu)
+ *
+ * In most common cases with only one partition column, say a, the following
+ * expression tree will be generated: a >= al AND a < au
+ *
+ * If all values of both lower and upper bounds are UNBOUNDED, the partition
+ * does not really have a constraint, except the IS NOT NULL constraint for
+ * any expression keys.
+ *
+ * If we end up with an empty result list, we append return a single-member
+ * list containing a constant-true expression in that case, because callers
+ * expect a non-empty list.
  */
 static List *
 get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
@@ -1278,56 +1467,49 @@ get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
        List       *result = NIL;
        ListCell   *cell1,
                           *cell2,
-                          *partexprs_item;
-       int                     i;
+                          *partexprs_item,
+                          *partexprs_item_saved;
+       int                     i,
+                               j;
+       PartitionRangeDatum *ldatum,
+                          *udatum;
+       Expr       *keyCol;
+       Const      *lower_val,
+                          *upper_val;
+       NullTest   *nulltest;
+       List       *lower_or_arms,
+                          *upper_or_arms;
+       int                     num_or_arms,
+                               current_or_arm;
+       ListCell   *lower_or_start_datum,
+                          *upper_or_start_datum;
+       bool            need_next_lower_arm,
+                               need_next_upper_arm;
+
+       lower_or_start_datum = list_head(spec->lowerdatums);
+       upper_or_start_datum = list_head(spec->upperdatums);
+       num_or_arms = key->partnatts;
 
        /*
-        * Iterate over columns of the key, emitting an OpExpr for each using the
-        * corresponding lower and upper datums as constant operands.
+        * A range-partitioned table does not allow partition keys to be null. For
+        * simple columns, their NOT NULL constraint suffices for the enforcement
+        * of non-nullability.  But for the expression keys, which are still
+        * nullable, we must emit a IS NOT NULL expression.  Collect them in
+        * result first.
         */
-       i = 0;
        partexprs_item = list_head(key->partexprs);
-       forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
+       for (i = 0; i < key->partnatts; i++)
        {
-               PartitionRangeDatum *ldatum = lfirst(cell1),
-                                  *udatum = lfirst(cell2);
-               Expr       *keyCol;
-               Const      *lower_val = NULL,
-                                  *upper_val = NULL;
-               EState     *estate;
-               MemoryContext oldcxt;
-               Expr       *test_expr;
-               ExprState  *test_exprstate;
-               Datum           test_result;
-               bool            isNull;
-               bool            need_relabel = false;
-               Oid                     operoid;
-               NullTest   *nulltest;
-
-               /* Left operand */
-               if (key->partattrs[i] != 0)
-               {
-                       keyCol = (Expr *) makeVar(1,
-                                                                         key->partattrs[i],
-                                                                         key->parttypid[i],
-                                                                         key->parttypmod[i],
-                                                                         key->parttypcoll[i],
-                                                                         0);
-               }
-               else
+               if (key->partattrs[i] == 0)
                {
-                       keyCol = copyObject(lfirst(partexprs_item));
+                       Expr       *keyCol;
+
+                       if (partexprs_item == NULL)
+                               elog(ERROR, "wrong number of partition key expressions");
+                       keyCol = lfirst(partexprs_item);
                        partexprs_item = lnext(partexprs_item);
-               }
+                       Assert(!IsA(keyCol, Var));
 
-               /*
-                * Emit a IS NOT NULL expression for non-Var keys, because whereas
-                * simple attributes are covered by NOT NULL constraints, expression
-                * keys are still nullable which is not acceptable in case of range
-                * partitioning.
-                */
-               if (!IsA(keyCol, Var))
-               {
                        nulltest = makeNode(NullTest);
                        nulltest->arg = keyCol;
                        nulltest->nulltesttype = IS_NOT_NULL;
@@ -1335,182 +1517,221 @@ get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
                        nulltest->location = -1;
                        result = lappend(result, nulltest);
                }
+       }
+
+       /*
+        * Iterate over the key columns and check if the corresponding lower and
+        * upper datums are equal using the btree equality operator for the
+        * column's type.  If equal, we emit single keyCol = common_value
+        * expression.  Starting from the first column for which the corresponding
+        * lower and upper bound datums are not equal, we generate OR expressions
+        * as shown in the function's header comment.
+        */
+       i = 0;
+       partexprs_item = list_head(key->partexprs);
+       forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
+       {
+               EState     *estate;
+               MemoryContext oldcxt;
+               Expr       *test_expr;
+               ExprState  *test_exprstate;
+               Datum           test_result;
+               bool            isNull;
+
+               ldatum = lfirst(cell1);
+               udatum = lfirst(cell2);
 
                /*
-                * Stop at this column if either of lower or upper datum is infinite,
-                * but do emit an OpExpr for the non-infinite datum.
+                * Since get_range_key_properties() modifies partexprs_item, and we
+                * might need to start over from the previous expression in the later
+                * part of this functiom, save away the current value.
                 */
-               if (!ldatum->infinite)
-                       lower_val = (Const *) ldatum->value;
-               if (!udatum->infinite)
-                       upper_val = (Const *) udatum->value;
+               partexprs_item_saved = partexprs_item;
+
+               get_range_key_properties(key, i, ldatum, udatum,
+                                                                &partexprs_item,
+                                                                &keyCol,
+                                                                &lower_val, &upper_val);
 
                /*
-                * If lower_val and upper_val are both finite and happen to be equal,
-                * emit only (keyCol = lower_val) for this column, because all rows in
-                * this partition could only ever contain this value (ie, lower_val)
-                * in the current partitioning column.  We must consider further
-                * columns because the above condition does not fully constrain the
-                * rows of this partition.
+                * If either or both of lower_val and upper_val is NULL, they are
+                * unequal, because being NULL means the column is unbounded in the
+                * respective direction.
                 */
-               if (lower_val && upper_val)
-               {
-                       /* Get the correct btree equality operator for the test */
-                       operoid = get_partition_operator(key, i, BTEqualStrategyNumber,
-                                                                                        &need_relabel);
-
-                       /* Create the test expression */
-                       estate = CreateExecutorState();
-                       oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
-                       test_expr = make_opclause(operoid,
-                                                                         BOOLOID,
-                                                                         false,
-                                                                         (Expr *) lower_val,
-                                                                         (Expr *) upper_val,
-                                                                         InvalidOid,
-                                                                         key->partcollation[i]);
-                       fix_opfuncids((Node *) test_expr);
-                       test_exprstate = ExecInitExpr(test_expr, NULL);
-                       test_result = ExecEvalExprSwitchContext(test_exprstate,
+               if (!lower_val || !upper_val)
+                       break;
+
+               /* Create the test expression */
+               estate = CreateExecutorState();
+               oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
+               test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
+                                                                                  (Expr *) lower_val,
+                                                                                  (Expr *) upper_val);
+               fix_opfuncids((Node *) test_expr);
+               test_exprstate = ExecInitExpr(test_expr, NULL);
+               test_result = ExecEvalExprSwitchContext(test_exprstate,
                                                                                          GetPerTupleExprContext(estate),
-                                                                                                       &isNull);
-                       MemoryContextSwitchTo(oldcxt);
-                       FreeExecutorState(estate);
+                                                                                               &isNull);
+               MemoryContextSwitchTo(oldcxt);
+               FreeExecutorState(estate);
 
-                       if (DatumGetBool(test_result))
-                       {
-                               /* This can never be, but it's better to make sure */
-                               if (i == key->partnatts - 1)
-                                       elog(ERROR, "invalid range bound specification");
-
-                               if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
-                                       keyCol = (Expr *) makeRelabelType(keyCol,
-                                                                                                         key->partopcintype[i],
-                                                                                                         -1,
-                                                                                                         key->partcollation[i],
-                                                                                                         COERCE_EXPLICIT_CAST);
-                               result = lappend(result,
-                                                                make_opclause(operoid,
-                                                                                          BOOLOID,
-                                                                                          false,
-                                                                                          keyCol,
-                                                                                          (Expr *) lower_val,
-                                                                                          InvalidOid,
-                                                                                          key->partcollation[i]));
-
-                               /* Go over to consider the next column. */
-                               i++;
-                               continue;
-                       }
-               }
+               /* If not equal, go generate the OR expressions */
+               if (!DatumGetBool(test_result))
+                       break;
 
                /*
-                * We can say here that lower_val != upper_val.  Emit expressions
-                * (keyCol >= lower_val) and (keyCol < upper_val), then stop.
+                * The bounds for the last key column can't be equal, because such a
+                * range partition would never be allowed to be defined (it would have
+                * an empty range otherwise).
                 */
-               if (lower_val)
-               {
-                       operoid = get_partition_operator(key, i,
-                                                                                        BTGreaterEqualStrategyNumber,
-                                                                                        &need_relabel);
-
-                       if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
-                               keyCol = (Expr *) makeRelabelType(keyCol,
-                                                                                                 key->partopcintype[i],
-                                                                                                 -1,
-                                                                                                 key->partcollation[i],
-                                                                                                 COERCE_EXPLICIT_CAST);
-                       result = lappend(result,
-                                                        make_opclause(operoid,
-                                                                                  BOOLOID,
-                                                                                  false,
-                                                                                  keyCol,
-                                                                                  (Expr *) lower_val,
-                                                                                  InvalidOid,
-                                                                                  key->partcollation[i]));
-               }
+               if (i == key->partnatts - 1)
+                       elog(ERROR, "invalid range bound specification");
 
-               if (upper_val)
+               /* Equal, so generate keyCol = lower_val expression */
+               result = lappend(result,
+                                                make_partition_op_expr(key, i, BTEqualStrategyNumber,
+                                                                                               keyCol, (Expr *) lower_val));
+
+               i++;
+       }
+
+       /* First pair of lower_val and upper_val that are not equal. */
+       lower_or_start_datum = cell1;
+       upper_or_start_datum = cell2;
+
+       /* OR will have as many arms as there are key columns left. */
+       num_or_arms = key->partnatts - i;
+       current_or_arm = 0;
+       lower_or_arms = upper_or_arms = NIL;
+       need_next_lower_arm = need_next_upper_arm = true;
+       while (current_or_arm < num_or_arms)
+       {
+               List       *lower_or_arm_args = NIL,
+                                  *upper_or_arm_args = NIL;
+
+               j = i;
+               partexprs_item = partexprs_item_saved;
+
+               for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum)
                {
-                       operoid = get_partition_operator(key, i,
-                                                                                        BTLessStrategyNumber,
-                                                                                        &need_relabel);
-
-                       if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
-                               keyCol = (Expr *) makeRelabelType(keyCol,
-                                                                                                 key->partopcintype[i],
-                                                                                                 -1,
-                                                                                                 key->partcollation[i],
-                                                                                                 COERCE_EXPLICIT_CAST);
-
-                       result = lappend(result,
-                                                        make_opclause(operoid,
-                                                                                  BOOLOID,
-                                                                                  false,
-                                                                                  keyCol,
-                                                                                  (Expr *) upper_val,
-                                                                                  InvalidOid,
-                                                                                  key->partcollation[i]));
+                       PartitionRangeDatum *ldatum_next = NULL,
+                                          *udatum_next = NULL;
+
+                       ldatum = lfirst(cell1);
+                       if (lnext(cell1))
+                               ldatum_next = lfirst(lnext(cell1));
+                       udatum = lfirst(cell2);
+                       if (lnext(cell2))
+                               udatum_next = lfirst(lnext(cell2));
+                       get_range_key_properties(key, j, ldatum, udatum,
+                                                                        &partexprs_item,
+                                                                        &keyCol,
+                                                                        &lower_val, &upper_val);
+
+                       if (need_next_lower_arm && lower_val)
+                       {
+                               uint16          strategy;
+
+                               /*
+                                * For the non-last columns of this arm, use the EQ operator.
+                                * For the last or the last finite-valued column, use GE.
+                                */
+                               if (j - i < current_or_arm)
+                                       strategy = BTEqualStrategyNumber;
+                               else if ((ldatum_next && ldatum_next->infinite) ||
+                                                j == key->partnatts - 1)
+                                       strategy = BTGreaterEqualStrategyNumber;
+                               else
+                                       strategy = BTGreaterStrategyNumber;
+
+                               lower_or_arm_args = lappend(lower_or_arm_args,
+                                                                                       make_partition_op_expr(key, j,
+                                                                                                                                  strategy,
+                                                                                                                                  keyCol,
+                                                                                                               (Expr *) lower_val));
+                       }
+
+                       if (need_next_upper_arm && upper_val)
+                       {
+                               uint16          strategy;
+
+                               /*
+                                * For the non-last columns of this arm, use the EQ operator.
+                                * For the last finite-valued column, use LE.
+                                */
+                               if (j - i < current_or_arm)
+                                       strategy = BTEqualStrategyNumber;
+                               else if (udatum_next && udatum_next->infinite)
+                                       strategy = BTLessEqualStrategyNumber;
+                               else
+                                       strategy = BTLessStrategyNumber;
+
+                               upper_or_arm_args = lappend(upper_or_arm_args,
+                                                                                       make_partition_op_expr(key, j,
+                                                                                                                                  strategy,
+                                                                                                                                  keyCol,
+                                                                                                               (Expr *) upper_val));
+
+                       }
+
+                       /*
+                        * Did we generate enough of OR's arguments?  First arm considers
+                        * the first of the remaining columns, second arm considers first
+                        * two of the remaining columns, and so on.
+                        */
+                       ++j;
+                       if (j - i > current_or_arm)
+                       {
+                               /*
+                                * We need not emit the next arm if the new column that will
+                                * be considered is unbounded.
+                                */
+                               need_next_lower_arm = ldatum_next && !ldatum_next->infinite;
+                               need_next_upper_arm = udatum_next && !udatum_next->infinite;
+                               break;
+                       }
                }
 
-               /*
-                * We can stop at this column, because we would not have checked the
-                * next column when routing a given row into this partition.
-                */
-               break;
-       }
+               if (lower_or_arm_args != NIL)
+                       lower_or_arms = lappend(lower_or_arms,
+                                                                       list_length(lower_or_arm_args) > 1
+                                                         ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
+                                                                       : linitial(lower_or_arm_args));
 
-       return result;
-}
+               if (upper_or_arm_args != NIL)
+                       upper_or_arms = lappend(upper_or_arms,
+                                                                       list_length(upper_or_arm_args) > 1
+                                                         ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
+                                                                       : linitial(upper_or_arm_args));
 
-/*
- * get_partition_operator
- *
- * Return oid of the operator of given strategy for a given partition key
- * column.
- */
-static Oid
-get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
-                                          bool *need_relabel)
-{
-       Oid                     operoid;
+               /* If no work to do in the next iteration, break away. */
+               if (!need_next_lower_arm && !need_next_upper_arm)
+                       break;
 
-       /*
-        * First check if there exists an operator of the given strategy, with
-        * this column's type as both its lefttype and righttype, in the
-        * partitioning operator family specified for the column.
-        */
-       operoid = get_opfamily_member(key->partopfamily[col],
-                                                                 key->parttypid[col],
-                                                                 key->parttypid[col],
-                                                                 strategy);
+               ++current_or_arm;
+       }
 
        /*
-        * If one doesn't exist, we must resort to using an operator in the same
-        * opreator family but with the operator class declared input type.  It is
-        * OK to do so, because the column's type is known to be binary-coercible
-        * with the operator class input type (otherwise, the operator class in
-        * question would not have been accepted as the partitioning operator
-        * class).  We must however inform the caller to wrap the non-Const
-        * expression with a RelabelType node to denote the implicit coercion. It
-        * ensures that the resulting expression structurally matches similarly
-        * processed expressions within the optimizer.
+        * Generate the OR expressions for each of lower and upper bounds (if
+        * required), and append to the list of implicitly ANDed list of
+        * expressions.
         */
-       if (!OidIsValid(operoid))
-       {
-               operoid = get_opfamily_member(key->partopfamily[col],
-                                                                         key->partopcintype[col],
-                                                                         key->partopcintype[col],
-                                                                         strategy);
-               *need_relabel = true;
-       }
-       else
-               *need_relabel = false;
+       if (lower_or_arms != NIL)
+               result = lappend(result,
+                                                list_length(lower_or_arms) > 1
+                                                ? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
+                                                : linitial(lower_or_arms));
+       if (upper_or_arms != NIL)
+               result = lappend(result,
+                                                list_length(upper_or_arms) > 1
+                                                ? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
+                                                : linitial(upper_or_arms));
+
+       /* As noted above, caller expects the list to be non-empty. */
+       if (result == NULL)
+               result = list_make1(makeBoolConst(true, false));
 
-       if (!OidIsValid(operoid))
-               elog(ERROR, "could not find operator for partitioning");
-
-       return operoid;
+       return result;
 }
 
 /*
index 9df7fb30d302a0f682b71c20aa51665b0c4bd558..3313b051ceceef6e3ef637ef1945563746fe6368 100644 (file)
@@ -182,6 +182,20 @@ list_length(const List *l)
                 (cell1) != NULL && (cell2) != NULL;                                            \
                 (cell1) = lnext(cell1), (cell2) = lnext(cell2))
 
+/*
+ * for_both_cell -
+ *       a convenience macro which loops through two lists starting from the
+ *       specified cells of each. This macro loops through both lists at the same
+ *       time, stopping when either list runs out of elements.  Depending on the
+ *       requirements of the call site, it may also be wise to assert that the
+ *       lengths of the two lists are equal, and initcell1 and initcell2 are at
+ *       the same position in the respective lists.
+ */
+#define for_both_cell(cell1, initcell1, cell2, initcell2)      \
+       for ((cell1) = (initcell1), (cell2) = (initcell2);              \
+                (cell1) != NULL && (cell2) != NULL;                            \
+                (cell1) = lnext(cell1), (cell2) = lnext(cell2))
+
 /*
  * forthree -
  *       the same for three lists
index 6163ed811769e525744c251bf08d0ba835150643..af7090ba0d61af84e5f4166c55caddbaddfb3344 100644 (file)
@@ -1828,3 +1828,93 @@ explain (costs off) select * from range_list_parted where a >= 30;
 
 drop table list_parted;
 drop table range_list_parted;
+-- check that constraint exclusion is able to cope with the partition
+-- constraint emitted for multi-column range partitioned tables
+create table mcrparted (a int, b int, c int) partition by range (a, abs(b), c);
+create table mcrparted0 partition of mcrparted for values from (unbounded, unbounded, unbounded) to (1, 1, 1);
+create table mcrparted1 partition of mcrparted for values from (1, 1, 1) to (10, 5, 10);
+create table mcrparted2 partition of mcrparted for values from (10, 5, 10) to (10, 10, 10);
+create table mcrparted3 partition of mcrparted for values from (11, 1, 1) to (20, 10, 10);
+create table mcrparted4 partition of mcrparted for values from (20, 10, 10) to (20, 20, 20);
+create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (unbounded, unbounded, unbounded);
+explain (costs off) select * from mcrparted where a = 0;       -- scans mcrparted0
+          QUERY PLAN          
+------------------------------
+ Append
+   ->  Seq Scan on mcrparted0
+         Filter: (a = 0)
+(3 rows)
+
+explain (costs off) select * from mcrparted where a = 10 and abs(b) < 5;       -- scans mcrparted1
+                 QUERY PLAN                  
+---------------------------------------------
+ Append
+   ->  Seq Scan on mcrparted1
+         Filter: ((a = 10) AND (abs(b) < 5))
+(3 rows)
+
+explain (costs off) select * from mcrparted where a = 10 and abs(b) = 5;       -- scans mcrparted1, mcrparted2
+                 QUERY PLAN                  
+---------------------------------------------
+ Append
+   ->  Seq Scan on mcrparted1
+         Filter: ((a = 10) AND (abs(b) = 5))
+   ->  Seq Scan on mcrparted2
+         Filter: ((a = 10) AND (abs(b) = 5))
+(5 rows)
+
+explain (costs off) select * from mcrparted where abs(b) = 5;  -- scans all partitions
+          QUERY PLAN          
+------------------------------
+ Append
+   ->  Seq Scan on mcrparted0
+         Filter: (abs(b) = 5)
+   ->  Seq Scan on mcrparted1
+         Filter: (abs(b) = 5)
+   ->  Seq Scan on mcrparted2
+         Filter: (abs(b) = 5)
+   ->  Seq Scan on mcrparted3
+         Filter: (abs(b) = 5)
+   ->  Seq Scan on mcrparted5
+         Filter: (abs(b) = 5)
+(11 rows)
+
+explain (costs off) select * from mcrparted where a > -1;      -- scans all partitions
+             QUERY PLAN              
+-------------------------------------
+ Append
+   ->  Seq Scan on mcrparted0
+         Filter: (a > '-1'::integer)
+   ->  Seq Scan on mcrparted1
+         Filter: (a > '-1'::integer)
+   ->  Seq Scan on mcrparted2
+         Filter: (a > '-1'::integer)
+   ->  Seq Scan on mcrparted3
+         Filter: (a > '-1'::integer)
+   ->  Seq Scan on mcrparted4
+         Filter: (a > '-1'::integer)
+   ->  Seq Scan on mcrparted5
+         Filter: (a > '-1'::integer)
+(13 rows)
+
+explain (costs off) select * from mcrparted where a = 20 and abs(b) = 10 and c > 10;   -- scans mcrparted4
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Append
+   ->  Seq Scan on mcrparted4
+         Filter: ((c > 10) AND (a = 20) AND (abs(b) = 10))
+(3 rows)
+
+explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mcrparted3, mcrparte4, mcrparte5
+               QUERY PLAN                
+-----------------------------------------
+ Append
+   ->  Seq Scan on mcrparted3
+         Filter: ((c > 20) AND (a = 20))
+   ->  Seq Scan on mcrparted4
+         Filter: ((c > 20) AND (a = 20))
+   ->  Seq Scan on mcrparted5
+         Filter: ((c > 20) AND (a = 20))
+(7 rows)
+
+drop table mcrparted;
index 6f34b1c640684274be4743c0b424df4fdacd2fe1..02429a37e3c06fbf0396c1561775d91a58478d15 100644 (file)
@@ -435,3 +435,62 @@ revoke all on key_desc from someone_else;
 revoke all on key_desc_1 from someone_else;
 drop role someone_else;
 drop table key_desc, key_desc_1;
+-- check multi-column range partitioning expression enforces the same
+-- constraint as what tuple-routing would determine it to be
+create table mcrparted (a int, b int, c int) partition by range (a, abs(b), c);
+create table mcrparted0 partition of mcrparted for values from (unbounded, unbounded, unbounded) to (1, unbounded, unbounded);
+create table mcrparted1 partition of mcrparted for values from (2, 1, unbounded) to (10, 5, 10);
+create table mcrparted2 partition of mcrparted for values from (10, 6, unbounded) to (10, unbounded, unbounded);
+create table mcrparted3 partition of mcrparted for values from (11, 1, 1) to (20, 10, 10);
+create table mcrparted4 partition of mcrparted for values from (21, unbounded, unbounded) to (30, 20, unbounded);
+create table mcrparted5 partition of mcrparted for values from (30, 21, 20) to (unbounded, unbounded, unbounded);
+-- routed to mcrparted0
+insert into mcrparted values (0, 1, 1);
+insert into mcrparted0 values (0, 1, 1);
+-- routed to mcparted1
+insert into mcrparted values (9, 1000, 1);
+insert into mcrparted1 values (9, 1000, 1);
+insert into mcrparted values (10, 5, -1);
+insert into mcrparted1 values (10, 5, -1);
+insert into mcrparted values (2, 1, 0);
+insert into mcrparted1 values (2, 1, 0);
+-- routed to mcparted2
+insert into mcrparted values (10, 6, 1000);
+insert into mcrparted2 values (10, 6, 1000);
+insert into mcrparted values (10, 1000, 1000);
+insert into mcrparted2 values (10, 1000, 1000);
+-- no partition exists, nor does mcrparted3 accept it
+insert into mcrparted values (11, 1, -1);
+ERROR:  no partition of relation "mcrparted" found for row
+DETAIL:  Partition key of the failing row contains (a, abs(b), c) = (11, 1, -1).
+insert into mcrparted3 values (11, 1, -1);
+ERROR:  new row for relation "mcrparted3" violates partition constraint
+DETAIL:  Failing row contains (11, 1, -1).
+-- routed to mcrparted5
+insert into mcrparted values (30, 21, 20);
+insert into mcrparted5 values (30, 21, 20);
+insert into mcrparted4 values (30, 21, 20);    -- error
+ERROR:  new row for relation "mcrparted4" violates partition constraint
+DETAIL:  Failing row contains (30, 21, 20).
+-- check rows
+select tableoid::regclass::text, * from mcrparted order by 1;
+  tableoid  | a  |  b   |  c   
+------------+----+------+------
+ mcrparted0 |  0 |    1 |    1
+ mcrparted0 |  0 |    1 |    1
+ mcrparted1 |  9 | 1000 |    1
+ mcrparted1 |  9 | 1000 |    1
+ mcrparted1 | 10 |    5 |   -1
+ mcrparted1 | 10 |    5 |   -1
+ mcrparted1 |  2 |    1 |    0
+ mcrparted1 |  2 |    1 |    0
+ mcrparted2 | 10 |    6 | 1000
+ mcrparted2 | 10 |    6 | 1000
+ mcrparted2 | 10 | 1000 | 1000
+ mcrparted2 | 10 | 1000 | 1000
+ mcrparted5 | 30 |   21 |   20
+ mcrparted5 | 30 |   21 |   20
+(14 rows)
+
+-- cleanup
+drop table mcrparted;
index d43b75c4a70e4a0c2e9c9efab3f833358a31da2f..7f34f43ec0c2cd05dd6b3beadced9d8317bb3b26 100644 (file)
@@ -643,3 +643,21 @@ explain (costs off) select * from range_list_parted where a >= 30;
 
 drop table list_parted;
 drop table range_list_parted;
+
+-- check that constraint exclusion is able to cope with the partition
+-- constraint emitted for multi-column range partitioned tables
+create table mcrparted (a int, b int, c int) partition by range (a, abs(b), c);
+create table mcrparted0 partition of mcrparted for values from (unbounded, unbounded, unbounded) to (1, 1, 1);
+create table mcrparted1 partition of mcrparted for values from (1, 1, 1) to (10, 5, 10);
+create table mcrparted2 partition of mcrparted for values from (10, 5, 10) to (10, 10, 10);
+create table mcrparted3 partition of mcrparted for values from (11, 1, 1) to (20, 10, 10);
+create table mcrparted4 partition of mcrparted for values from (20, 10, 10) to (20, 20, 20);
+create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (unbounded, unbounded, unbounded);
+explain (costs off) select * from mcrparted where a = 0;       -- scans mcrparted0
+explain (costs off) select * from mcrparted where a = 10 and abs(b) < 5;       -- scans mcrparted1
+explain (costs off) select * from mcrparted where a = 10 and abs(b) = 5;       -- scans mcrparted1, mcrparted2
+explain (costs off) select * from mcrparted where abs(b) = 5;  -- scans all partitions
+explain (costs off) select * from mcrparted where a > -1;      -- scans all partitions
+explain (costs off) select * from mcrparted where a = 20 and abs(b) = 10 and c > 10;   -- scans mcrparted4
+explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mcrparted3, mcrparte4, mcrparte5
+drop table mcrparted;
index 020854c960ee5056afee388b4817aa322babe8ba..db8967bad7ddffc6480752cf25589b5235bef552 100644 (file)
@@ -289,3 +289,46 @@ revoke all on key_desc from someone_else;
 revoke all on key_desc_1 from someone_else;
 drop role someone_else;
 drop table key_desc, key_desc_1;
+
+-- check multi-column range partitioning expression enforces the same
+-- constraint as what tuple-routing would determine it to be
+create table mcrparted (a int, b int, c int) partition by range (a, abs(b), c);
+create table mcrparted0 partition of mcrparted for values from (unbounded, unbounded, unbounded) to (1, unbounded, unbounded);
+create table mcrparted1 partition of mcrparted for values from (2, 1, unbounded) to (10, 5, 10);
+create table mcrparted2 partition of mcrparted for values from (10, 6, unbounded) to (10, unbounded, unbounded);
+create table mcrparted3 partition of mcrparted for values from (11, 1, 1) to (20, 10, 10);
+create table mcrparted4 partition of mcrparted for values from (21, unbounded, unbounded) to (30, 20, unbounded);
+create table mcrparted5 partition of mcrparted for values from (30, 21, 20) to (unbounded, unbounded, unbounded);
+
+-- routed to mcrparted0
+insert into mcrparted values (0, 1, 1);
+insert into mcrparted0 values (0, 1, 1);
+
+-- routed to mcparted1
+insert into mcrparted values (9, 1000, 1);
+insert into mcrparted1 values (9, 1000, 1);
+insert into mcrparted values (10, 5, -1);
+insert into mcrparted1 values (10, 5, -1);
+insert into mcrparted values (2, 1, 0);
+insert into mcrparted1 values (2, 1, 0);
+
+-- routed to mcparted2
+insert into mcrparted values (10, 6, 1000);
+insert into mcrparted2 values (10, 6, 1000);
+insert into mcrparted values (10, 1000, 1000);
+insert into mcrparted2 values (10, 1000, 1000);
+
+-- no partition exists, nor does mcrparted3 accept it
+insert into mcrparted values (11, 1, -1);
+insert into mcrparted3 values (11, 1, -1);
+
+-- routed to mcrparted5
+insert into mcrparted values (30, 21, 20);
+insert into mcrparted5 values (30, 21, 20);
+insert into mcrparted4 values (30, 21, 20);    -- error
+
+-- check rows
+select tableoid::regclass::text, * from mcrparted order by 1;
+
+-- cleanup
+drop table mcrparted;