]> granicus.if.org Git - postgresql/blob - src/backend/catalog/partition.c
b434c384c97aa33fd32ac97b44db3b9a49f2975c
[postgresql] / src / backend / catalog / partition.c
1 /*-------------------------------------------------------------------------
2  *
3  * partition.c
4  *                Partitioning related data structures and functions.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *                src/backend/catalog/partition.c
12  *
13  *-------------------------------------------------------------------------
14 */
15
16 #include "postgres.h"
17
18 #include "access/heapam.h"
19 #include "access/htup_details.h"
20 #include "access/nbtree.h"
21 #include "access/sysattr.h"
22 #include "catalog/dependency.h"
23 #include "catalog/indexing.h"
24 #include "catalog/objectaddress.h"
25 #include "catalog/partition.h"
26 #include "catalog/pg_collation.h"
27 #include "catalog/pg_inherits.h"
28 #include "catalog/pg_inherits_fn.h"
29 #include "catalog/pg_opclass.h"
30 #include "catalog/pg_type.h"
31 #include "executor/executor.h"
32 #include "miscadmin.h"
33 #include "nodes/makefuncs.h"
34 #include "nodes/nodeFuncs.h"
35 #include "nodes/parsenodes.h"
36 #include "optimizer/clauses.h"
37 #include "optimizer/planmain.h"
38 #include "optimizer/var.h"
39 #include "rewrite/rewriteManip.h"
40 #include "storage/lmgr.h"
41 #include "utils/array.h"
42 #include "utils/builtins.h"
43 #include "utils/datum.h"
44 #include "utils/memutils.h"
45 #include "utils/fmgroids.h"
46 #include "utils/inval.h"
47 #include "utils/lsyscache.h"
48 #include "utils/rel.h"
49 #include "utils/ruleutils.h"
50 #include "utils/syscache.h"
51
52 /*
53  * Information about bounds of a partitioned relation
54  *
55  * A list partition datum that is known to be NULL is never put into the
56  * datums array. Instead, it is tracked using has_null and null_index fields.
57  *
58  * In the case of range partitioning, ndatums will typically be far less than
59  * 2 * nparts, because a partition's upper bound and the next partition's lower
60  * bound are the same in most common cases, and we only store one of them.
61  *
62  * In the case of list partitioning, the indexes array stores one entry for
63  * every datum, which is the index of the partition that accepts a given datum.
64  * In case of range partitioning, it stores one entry per distinct range
65  * datum, which is the index of the partition for which a given datum
66  * is an upper bound.
67  */
68
69 /* Ternary value to represent what's contained in a range bound datum */
70 typedef enum RangeDatumContent
71 {
72         RANGE_DATUM_FINITE = 0,         /* actual datum stored elsewhere */
73         RANGE_DATUM_NEG_INF,            /* negative infinity */
74         RANGE_DATUM_POS_INF                     /* positive infinity */
75 } RangeDatumContent;
76
77 typedef struct PartitionBoundInfoData
78 {
79         char            strategy;               /* list or range bounds? */
80         int                     ndatums;                /* Length of the datums following array */
81         Datum     **datums;                     /* Array of datum-tuples with key->partnatts
82                                                                  * datums each */
83         RangeDatumContent **content;/* what's contained in each range bound datum?
84                                                                  * (see the above enum); NULL for list
85                                                                  * partitioned tables */
86         int                *indexes;            /* Partition indexes; one entry per member of
87                                                                  * the datums array (plus one if range
88                                                                  * partitioned table) */
89         bool            has_null;               /* Is there a null-accepting partition? false
90                                                                  * for range partitioned tables */
91         int                     null_index;             /* Index of the null-accepting partition; -1
92                                                                  * for range partitioned tables */
93 } PartitionBoundInfoData;
94
95 /*
96  * When qsort'ing partition bounds after reading from the catalog, each bound
97  * is represented with one of the following structs.
98  */
99
100 /* One value coming from some (index'th) list partition */
101 typedef struct PartitionListValue
102 {
103         int                     index;
104         Datum           value;
105 } PartitionListValue;
106
107 /* One bound of a range partition */
108 typedef struct PartitionRangeBound
109 {
110         int                     index;
111         Datum      *datums;                     /* range bound datums */
112         RangeDatumContent *content; /* what's contained in each datum? */
113         bool            lower;                  /* this is the lower (vs upper) bound */
114 } PartitionRangeBound;
115
116 static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
117                                                            void *arg);
118 static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
119                                                    void *arg);
120
121 static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
122 static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
123 static Oid get_partition_operator(PartitionKey key, int col,
124                                            StrategyNumber strategy, bool *need_relabel);
125 static List *generate_partition_qual(Relation rel);
126
127 static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
128                                          List *datums, bool lower);
129 static int32 partition_rbound_cmp(PartitionKey key,
130                                          Datum *datums1, RangeDatumContent *content1, bool lower1,
131                                          PartitionRangeBound *b2);
132 static int32 partition_rbound_datum_cmp(PartitionKey key,
133                                                    Datum *rb_datums, RangeDatumContent *rb_content,
134                                                    Datum *tuple_datums);
135
136 static int32 partition_bound_cmp(PartitionKey key,
137                                         PartitionBoundInfo boundinfo,
138                                         int offset, void *probe, bool probe_is_bound);
139 static int partition_bound_bsearch(PartitionKey key,
140                                                 PartitionBoundInfo boundinfo,
141                                                 void *probe, bool probe_is_bound, bool *is_equal);
142
143 /*
144  * RelationBuildPartitionDesc
145  *              Form rel's partition descriptor
146  *
147  * Not flushed from the cache by RelationClearRelation() unless changed because
148  * of addition or removal of partition.
149  */
150 void
151 RelationBuildPartitionDesc(Relation rel)
152 {
153         List       *inhoids,
154                            *partoids;
155         Oid                *oids = NULL;
156         List       *boundspecs = NIL;
157         ListCell   *cell;
158         int                     i,
159                                 nparts;
160         PartitionKey key = RelationGetPartitionKey(rel);
161         PartitionDesc result;
162         MemoryContext oldcxt;
163
164         int                     ndatums = 0;
165
166         /* List partitioning specific */
167         PartitionListValue **all_values = NULL;
168         bool            found_null = false;
169         int                     null_index = -1;
170
171         /* Range partitioning specific */
172         PartitionRangeBound **rbounds = NULL;
173
174         /*
175          * The following could happen in situations where rel has a pg_class entry
176          * but not the pg_partitioned_table entry yet.
177          */
178         if (key == NULL)
179                 return;
180
181         /* Get partition oids from pg_inherits */
182         inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
183
184         /* Collect bound spec nodes in a list */
185         i = 0;
186         partoids = NIL;
187         foreach(cell, inhoids)
188         {
189                 Oid                     inhrelid = lfirst_oid(cell);
190                 HeapTuple       tuple;
191                 Datum           datum;
192                 bool            isnull;
193                 Node       *boundspec;
194
195                 tuple = SearchSysCache1(RELOID, inhrelid);
196                 if (!HeapTupleIsValid(tuple))
197                         elog(ERROR, "cache lookup failed for relation %u", inhrelid);
198
199                 /*
200                  * It is possible that the pg_class tuple of a partition has not been
201                  * updated yet to set its relpartbound field.  The only case where
202                  * this happens is when we open the parent relation to check using its
203                  * partition descriptor that a new partition's bound does not overlap
204                  * some existing partition.
205                  */
206                 if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
207                 {
208                         ReleaseSysCache(tuple);
209                         continue;
210                 }
211
212                 datum = SysCacheGetAttr(RELOID, tuple,
213                                                                 Anum_pg_class_relpartbound,
214                                                                 &isnull);
215                 Assert(!isnull);
216                 boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
217                 boundspecs = lappend(boundspecs, boundspec);
218                 partoids = lappend_oid(partoids, inhrelid);
219                 ReleaseSysCache(tuple);
220         }
221
222         nparts = list_length(partoids);
223
224         if (nparts > 0)
225         {
226                 oids = (Oid *) palloc(nparts * sizeof(Oid));
227                 i = 0;
228                 foreach(cell, partoids)
229                         oids[i++] = lfirst_oid(cell);
230
231                 /* Convert from node to the internal representation */
232                 if (key->strategy == PARTITION_STRATEGY_LIST)
233                 {
234                         List       *non_null_values = NIL;
235
236                         /*
237                          * Create a unified list of non-null values across all partitions.
238                          */
239                         i = 0;
240                         found_null = false;
241                         null_index = -1;
242                         foreach(cell, boundspecs)
243                         {
244                                 ListCell   *c;
245                                 PartitionBoundSpec *spec = lfirst(cell);
246
247                                 if (spec->strategy != PARTITION_STRATEGY_LIST)
248                                         elog(ERROR, "invalid strategy in partition bound spec");
249
250                                 foreach(c, spec->listdatums)
251                                 {
252                                         Const      *val = lfirst(c);
253                                         PartitionListValue *list_value = NULL;
254
255                                         if (!val->constisnull)
256                                         {
257                                                 list_value = (PartitionListValue *)
258                                                         palloc0(sizeof(PartitionListValue));
259                                                 list_value->index = i;
260                                                 list_value->value = val->constvalue;
261                                         }
262                                         else
263                                         {
264                                                 /*
265                                                  * Never put a null into the values array, flag
266                                                  * instead for the code further down below where we
267                                                  * construct the actual relcache struct.
268                                                  */
269                                                 if (found_null)
270                                                         elog(ERROR, "found null more than once");
271                                                 found_null = true;
272                                                 null_index = i;
273                                         }
274
275                                         if (list_value)
276                                                 non_null_values = lappend(non_null_values,
277                                                                                                   list_value);
278                                 }
279
280                                 i++;
281                         }
282
283                         ndatums = list_length(non_null_values);
284
285                         /*
286                          * Collect all list values in one array. Alongside the value, we
287                          * also save the index of partition the value comes from.
288                          */
289                         all_values = (PartitionListValue **) palloc(ndatums *
290                                                                                            sizeof(PartitionListValue *));
291                         i = 0;
292                         foreach(cell, non_null_values)
293                         {
294                                 PartitionListValue *src = lfirst(cell);
295
296                                 all_values[i] = (PartitionListValue *)
297                                         palloc(sizeof(PartitionListValue));
298                                 all_values[i]->value = src->value;
299                                 all_values[i]->index = src->index;
300                                 i++;
301                         }
302
303                         qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
304                                           qsort_partition_list_value_cmp, (void *) key);
305                 }
306                 else if (key->strategy == PARTITION_STRATEGY_RANGE)
307                 {
308                         int                     j,
309                                                 k;
310                         PartitionRangeBound **all_bounds,
311                                            *prev;
312                         bool       *distinct_indexes;
313
314                         all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
315                                                                                           sizeof(PartitionRangeBound *));
316                         distinct_indexes = (bool *) palloc(2 * nparts * sizeof(bool));
317
318                         /*
319                          * Create a unified list of range bounds across all the
320                          * partitions.
321                          */
322                         i = j = 0;
323                         foreach(cell, boundspecs)
324                         {
325                                 PartitionBoundSpec *spec = lfirst(cell);
326                                 PartitionRangeBound *lower,
327                                                    *upper;
328
329                                 if (spec->strategy != PARTITION_STRATEGY_RANGE)
330                                         elog(ERROR, "invalid strategy in partition bound spec");
331
332                                 lower = make_one_range_bound(key, i, spec->lowerdatums,
333                                                                                          true);
334                                 upper = make_one_range_bound(key, i, spec->upperdatums,
335                                                                                          false);
336                                 all_bounds[j] = lower;
337                                 all_bounds[j + 1] = upper;
338                                 j += 2;
339                                 i++;
340                         }
341                         Assert(j == 2 * nparts);
342
343                         /* Sort all the bounds in ascending order */
344                         qsort_arg(all_bounds, 2 * nparts,
345                                           sizeof(PartitionRangeBound *),
346                                           qsort_partition_rbound_cmp,
347                                           (void *) key);
348
349                         /*
350                          * Count the number of distinct bounds to allocate an array of
351                          * that size.
352                          */
353                         ndatums = 0;
354                         prev = NULL;
355                         for (i = 0; i < 2 * nparts; i++)
356                         {
357                                 PartitionRangeBound *cur = all_bounds[i];
358                                 bool            is_distinct = false;
359                                 int                     j;
360
361                                 /* Is current bound is distinct from the previous? */
362                                 for (j = 0; j < key->partnatts; j++)
363                                 {
364                                         Datum           cmpval;
365
366                                         if (prev == NULL)
367                                         {
368                                                 is_distinct = true;
369                                                 break;
370                                         }
371
372                                         /*
373                                          * If either of them has infinite element, we can't equate
374                                          * them.  Even when both are infinite, they'd have
375                                          * opposite signs, because only one of cur and prev is a
376                                          * lower bound).
377                                          */
378                                         if (cur->content[j] != RANGE_DATUM_FINITE ||
379                                                 prev->content[j] != RANGE_DATUM_FINITE)
380                                         {
381                                                 is_distinct = true;
382                                                 break;
383                                         }
384                                         cmpval = FunctionCall2Coll(&key->partsupfunc[j],
385                                                                                            key->partcollation[j],
386                                                                                            cur->datums[j],
387                                                                                            prev->datums[j]);
388                                         if (DatumGetInt32(cmpval) != 0)
389                                         {
390                                                 is_distinct = true;
391                                                 break;
392                                         }
393                                 }
394
395                                 /*
396                                  * Count the current bound if it is distinct from the previous
397                                  * one.  Also, store if the index i contains a distinct bound
398                                  * that we'd like put in the relcache array.
399                                  */
400                                 if (is_distinct)
401                                 {
402                                         distinct_indexes[i] = true;
403                                         ndatums++;
404                                 }
405                                 else
406                                         distinct_indexes[i] = false;
407
408                                 prev = cur;
409                         }
410
411                         /*
412                          * Finally save them in an array from where they will be copied
413                          * into the relcache.
414                          */
415                         rbounds = (PartitionRangeBound **) palloc(ndatums *
416                                                                                           sizeof(PartitionRangeBound *));
417                         k = 0;
418                         for (i = 0; i < 2 * nparts; i++)
419                         {
420                                 if (distinct_indexes[i])
421                                         rbounds[k++] = all_bounds[i];
422                         }
423                         Assert(k == ndatums);
424                 }
425                 else
426                         elog(ERROR, "unexpected partition strategy: %d",
427                                  (int) key->strategy);
428         }
429
430         /* Now build the actual relcache partition descriptor */
431         rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
432                                                                                   RelationGetRelationName(rel),
433                                                                                   ALLOCSET_DEFAULT_SIZES);
434         oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
435
436         result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
437         result->nparts = nparts;
438         if (nparts > 0)
439         {
440                 PartitionBoundInfo boundinfo;
441                 int                *mapping;
442                 int                     next_index = 0;
443
444                 result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
445
446                 boundinfo = (PartitionBoundInfoData *)
447                         palloc0(sizeof(PartitionBoundInfoData));
448                 boundinfo->strategy = key->strategy;
449                 boundinfo->ndatums = ndatums;
450                 boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
451
452                 /* Initialize mapping array with invalid values */
453                 mapping = (int *) palloc(sizeof(int) * nparts);
454                 for (i = 0; i < nparts; i++)
455                         mapping[i] = -1;
456
457                 switch (key->strategy)
458                 {
459                         case PARTITION_STRATEGY_LIST:
460                                 {
461                                         boundinfo->has_null = found_null;
462                                         boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
463
464                                         /*
465                                          * Copy values.  Indexes of individual values are mapped
466                                          * to canonical values so that they match for any two list
467                                          * partitioned tables with same number of partitions and
468                                          * same lists per partition.  One way to canonicalize is
469                                          * to assign the index in all_values[] of the smallest
470                                          * value of each partition, as the index of all of the
471                                          * partition's values.
472                                          */
473                                         for (i = 0; i < ndatums; i++)
474                                         {
475                                                 boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
476                                                 boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
477                                                                                                                 key->parttypbyval[0],
478                                                                                                                  key->parttyplen[0]);
479
480                                                 /* If the old index has no mapping, assign one */
481                                                 if (mapping[all_values[i]->index] == -1)
482                                                         mapping[all_values[i]->index] = next_index++;
483
484                                                 boundinfo->indexes[i] = mapping[all_values[i]->index];
485                                         }
486
487                                         /*
488                                          * If null-accepting partition has no mapped index yet,
489                                          * assign one.  This could happen if such partition
490                                          * accepts only null and hence not covered in the above
491                                          * loop which only handled non-null values.
492                                          */
493                                         if (found_null)
494                                         {
495                                                 Assert(null_index >= 0);
496                                                 if (mapping[null_index] == -1)
497                                                         mapping[null_index] = next_index++;
498                                         }
499
500                                         /* All partition must now have a valid mapping */
501                                         Assert(next_index == nparts);
502
503                                         if (found_null)
504                                                 boundinfo->null_index = mapping[null_index];
505                                         else
506                                                 boundinfo->null_index = -1;
507                                         break;
508                                 }
509
510                         case PARTITION_STRATEGY_RANGE:
511                                 {
512                                         boundinfo->content = (RangeDatumContent **) palloc(ndatums *
513                                                                                                 sizeof(RangeDatumContent *));
514                                         boundinfo->indexes = (int *) palloc((ndatums + 1) *
515                                                                                                                 sizeof(int));
516
517                                         for (i = 0; i < ndatums; i++)
518                                         {
519                                                 int                     j;
520
521                                                 boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
522                                                                                                                           sizeof(Datum));
523                                                 boundinfo->content[i] = (RangeDatumContent *)
524                                                         palloc(key->partnatts *
525                                                                    sizeof(RangeDatumContent));
526                                                 for (j = 0; j < key->partnatts; j++)
527                                                 {
528                                                         if (rbounds[i]->content[j] == RANGE_DATUM_FINITE)
529                                                                 boundinfo->datums[i][j] =
530                                                                         datumCopy(rbounds[i]->datums[j],
531                                                                                           key->parttypbyval[j],
532                                                                                           key->parttyplen[j]);
533                                                         /* Remember, we are storing the tri-state value. */
534                                                         boundinfo->content[i][j] = rbounds[i]->content[j];
535                                                 }
536
537                                                 /*
538                                                  * There is no mapping for invalid indexes.
539                                                  *
540                                                  * Any lower bounds in the rbounds array have invalid
541                                                  * indexes assigned, because the values between the
542                                                  * previous bound (if there is one) and this (lower)
543                                                  * bound are not part of the range of any existing
544                                                  * partition.
545                                                  */
546                                                 if (rbounds[i]->lower)
547                                                         boundinfo->indexes[i] = -1;
548                                                 else
549                                                 {
550                                                         int                     orig_index = rbounds[i]->index;
551
552                                                         /* If the old index is has no mapping, assign one */
553                                                         if (mapping[orig_index] == -1)
554                                                                 mapping[orig_index] = next_index++;
555
556                                                         boundinfo->indexes[i] = mapping[orig_index];
557                                                 }
558                                         }
559                                         boundinfo->indexes[i] = -1;
560                                         break;
561                                 }
562
563                         default:
564                                 elog(ERROR, "unexpected partition strategy: %d",
565                                          (int) key->strategy);
566                 }
567
568                 result->boundinfo = boundinfo;
569
570                 /*
571                  * Now assign OIDs from the original array into mapped indexes of the
572                  * result array.  Order of OIDs in the former is defined by the
573                  * catalog scan that retrived them, whereas that in the latter is
574                  * defined by canonicalized representation of the list values or the
575                  * range bounds.
576                  */
577                 for (i = 0; i < nparts; i++)
578                         result->oids[mapping[i]] = oids[i];
579                 pfree(mapping);
580         }
581
582         MemoryContextSwitchTo(oldcxt);
583         rel->rd_partdesc = result;
584 }
585
586 /*
587  * Are two partition bound collections logically equal?
588  *
589  * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
590  * This is also useful when b1 and b2 are bound collections of two separate
591  * relations, respectively, because PartitionBoundInfo is a canonical
592  * representation of partition bounds.
593  */
594 bool
595 partition_bounds_equal(PartitionKey key,
596                                            PartitionBoundInfo b1, PartitionBoundInfo b2)
597 {
598         int                     i;
599
600         if (b1->strategy != b2->strategy)
601                 return false;
602
603         if (b1->ndatums != b2->ndatums)
604                 return false;
605
606         if (b1->has_null != b2->has_null)
607                 return false;
608
609         if (b1->null_index != b2->null_index)
610                 return false;
611
612         for (i = 0; i < b1->ndatums; i++)
613         {
614                 int                     j;
615
616                 for (j = 0; j < key->partnatts; j++)
617                 {
618                         /* For range partitions, the bounds might not be finite. */
619                         if (b1->content != NULL)
620                         {
621                                 /*
622                                  * A finite bound always differs from an infinite bound, and
623                                  * different kinds of infinities differ from each other.
624                                  */
625                                 if (b1->content[i][j] != b2->content[i][j])
626                                         return false;
627
628                                 /* Non-finite bounds are equal without further examination. */
629                                 if (b1->content[i][j] != RANGE_DATUM_FINITE)
630                                         continue;
631                         }
632
633                         /*
634                          * Compare the actual values. Note that it would be both incorrect
635                          * and unsafe to invoke the comparison operator derived from the
636                          * partitioning specification here.  It would be incorrect because
637                          * we want the relcache entry to be updated for ANY change to the
638                          * partition bounds, not just those that the partitioning operator
639                          * thinks are significant.  It would be unsafe because we might
640                          * reach this code in the context of an aborted transaction, and
641                          * an arbitrary partitioning operator might not be safe in that
642                          * context.  datumIsEqual() should be simple enough to be safe.
643                          */
644                         if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
645                                                           key->parttypbyval[j],
646                                                           key->parttyplen[j]))
647                                 return false;
648                 }
649
650                 if (b1->indexes[i] != b2->indexes[i])
651                         return false;
652         }
653
654         /* There are ndatums+1 indexes in case of range partitions */
655         if (key->strategy == PARTITION_STRATEGY_RANGE &&
656                 b1->indexes[i] != b2->indexes[i])
657                 return false;
658
659         return true;
660 }
661
662 /*
663  * check_new_partition_bound
664  *
665  * Checks if the new partition's bound overlaps any of the existing partitions
666  * of parent.  Also performs additional checks as necessary per strategy.
667  */
668 void
669 check_new_partition_bound(char *relname, Relation parent, Node *bound)
670 {
671         PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
672         PartitionKey key = RelationGetPartitionKey(parent);
673         PartitionDesc partdesc = RelationGetPartitionDesc(parent);
674         ParseState *pstate = make_parsestate(NULL);
675         int                     with = -1;
676         bool            overlap = false;
677
678         switch (key->strategy)
679         {
680                 case PARTITION_STRATEGY_LIST:
681                         {
682                                 Assert(spec->strategy == PARTITION_STRATEGY_LIST);
683
684                                 if (partdesc->nparts > 0)
685                                 {
686                                         PartitionBoundInfo boundinfo = partdesc->boundinfo;
687                                         ListCell   *cell;
688
689                                         Assert(boundinfo &&
690                                                    boundinfo->strategy == PARTITION_STRATEGY_LIST &&
691                                                    (boundinfo->ndatums > 0 || boundinfo->has_null));
692
693                                         foreach(cell, spec->listdatums)
694                                         {
695                                                 Const      *val = lfirst(cell);
696
697                                                 if (!val->constisnull)
698                                                 {
699                                                         int                     offset;
700                                                         bool            equal;
701
702                                                         offset = partition_bound_bsearch(key, boundinfo,
703                                                                                                                          &val->constvalue,
704                                                                                                                          true, &equal);
705                                                         if (offset >= 0 && equal)
706                                                         {
707                                                                 overlap = true;
708                                                                 with = boundinfo->indexes[offset];
709                                                                 break;
710                                                         }
711                                                 }
712                                                 else if (boundinfo->has_null)
713                                                 {
714                                                         overlap = true;
715                                                         with = boundinfo->null_index;
716                                                         break;
717                                                 }
718                                         }
719                                 }
720
721                                 break;
722                         }
723
724                 case PARTITION_STRATEGY_RANGE:
725                         {
726                                 PartitionRangeBound *lower,
727                                                    *upper;
728
729                                 Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
730                                 lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
731                                 upper = make_one_range_bound(key, -1, spec->upperdatums, false);
732
733                                 /*
734                                  * First check if the resulting range would be empty with
735                                  * specified lower and upper bounds
736                                  */
737                                 if (partition_rbound_cmp(key, lower->datums, lower->content, true,
738                                                                                  upper) >= 0)
739                                         ereport(ERROR,
740                                                         (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
741                                         errmsg("cannot create range partition with empty range"),
742                                                          parser_errposition(pstate, spec->location)));
743
744                                 if (partdesc->nparts > 0)
745                                 {
746                                         PartitionBoundInfo boundinfo = partdesc->boundinfo;
747                                         int                     off1,
748                                                                 off2;
749                                         bool            equal = false;
750
751                                         Assert(boundinfo && boundinfo->ndatums > 0 &&
752                                                    boundinfo->strategy == PARTITION_STRATEGY_RANGE);
753
754                                         /*
755                                          * Firstly, find the greatest range bound that is less
756                                          * than or equal to the new lower bound.
757                                          */
758                                         off1 = partition_bound_bsearch(key, boundinfo, lower, true,
759                                                                                                    &equal);
760
761                                         /*
762                                          * off1 == -1 means that all existing bounds are greater
763                                          * than the new lower bound.  In that case and the case
764                                          * where no partition is defined between the bounds at
765                                          * off1 and off1 + 1, we have a "gap" in the range that
766                                          * could be occupied by the new partition.  We confirm if
767                                          * so by checking whether the new upper bound is confined
768                                          * within the gap.
769                                          */
770                                         if (!equal && boundinfo->indexes[off1 + 1] < 0)
771                                         {
772                                                 off2 = partition_bound_bsearch(key, boundinfo, upper,
773                                                                                                            true, &equal);
774
775                                                 /*
776                                                  * If the new upper bound is returned to be equal to
777                                                  * the bound at off2, the latter must be the upper
778                                                  * bound of some partition with which the new
779                                                  * partition clearly overlaps.
780                                                  *
781                                                  * Also, if bound at off2 is not same as the one
782                                                  * returned for the new lower bound (IOW, off1 !=
783                                                  * off2), then the new partition overlaps at least one
784                                                  * partition.
785                                                  */
786                                                 if (equal || off1 != off2)
787                                                 {
788                                                         overlap = true;
789
790                                                         /*
791                                                          * The bound at off2 could be the lower bound of
792                                                          * the partition with which the new partition
793                                                          * overlaps.  In that case, use the upper bound
794                                                          * (that is, the bound at off2 + 1) to get the
795                                                          * index of that partition.
796                                                          */
797                                                         if (boundinfo->indexes[off2] < 0)
798                                                                 with = boundinfo->indexes[off2 + 1];
799                                                         else
800                                                                 with = boundinfo->indexes[off2];
801                                                 }
802                                         }
803                                         else
804                                         {
805                                                 /*
806                                                  * Equal has been set to true and there is no "gap"
807                                                  * between the bound at off1 and that at off1 + 1, so
808                                                  * the new partition will overlap some partition. In
809                                                  * the former case, the new lower bound is found to be
810                                                  * equal to the bound at off1, which could only ever
811                                                  * be true if the latter is the lower bound of some
812                                                  * partition.  It's clear in such a case that the new
813                                                  * partition overlaps that partition, whose index we
814                                                  * get using its upper bound (that is, using the bound
815                                                  * at off1 + 1).
816                                                  */
817                                                 overlap = true;
818                                                 with = boundinfo->indexes[off1 + 1];
819                                         }
820                                 }
821
822                                 break;
823                         }
824
825                 default:
826                         elog(ERROR, "unexpected partition strategy: %d",
827                                  (int) key->strategy);
828         }
829
830         if (overlap)
831         {
832                 Assert(with >= 0);
833                 ereport(ERROR,
834                                 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
835                                  errmsg("partition \"%s\" would overlap partition \"%s\"",
836                                                 relname, get_rel_name(partdesc->oids[with])),
837                                  parser_errposition(pstate, spec->location)));
838         }
839 }
840
841 /*
842  * get_partition_parent
843  *
844  * Returns inheritance parent of a partition by scanning pg_inherits
845  *
846  * Note: Because this function assumes that the relation whose OID is passed
847  * as an argument will have precisely one parent, it should only be called
848  * when it is known that the relation is a partition.
849  */
850 Oid
851 get_partition_parent(Oid relid)
852 {
853         Form_pg_inherits form;
854         Relation        catalogRelation;
855         SysScanDesc scan;
856         ScanKeyData key[2];
857         HeapTuple       tuple;
858         Oid                     result;
859
860         catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
861
862         ScanKeyInit(&key[0],
863                                 Anum_pg_inherits_inhrelid,
864                                 BTEqualStrategyNumber, F_OIDEQ,
865                                 ObjectIdGetDatum(relid));
866         ScanKeyInit(&key[1],
867                                 Anum_pg_inherits_inhseqno,
868                                 BTEqualStrategyNumber, F_INT4EQ,
869                                 Int32GetDatum(1));
870
871         scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true,
872                                                           NULL, 2, key);
873
874         tuple = systable_getnext(scan);
875         Assert(HeapTupleIsValid(tuple));
876
877         form = (Form_pg_inherits) GETSTRUCT(tuple);
878         result = form->inhparent;
879
880         systable_endscan(scan);
881         heap_close(catalogRelation, AccessShareLock);
882
883         return result;
884 }
885
886 /*
887  * get_qual_from_partbound
888  *              Given a parser node for partition bound, return the list of executable
889  *              expressions as partition constraint
890  */
891 List *
892 get_qual_from_partbound(Relation rel, Relation parent, Node *bound)
893 {
894         PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
895         PartitionKey key = RelationGetPartitionKey(parent);
896         List       *my_qual = NIL;
897
898         Assert(key != NULL);
899
900         switch (key->strategy)
901         {
902                 case PARTITION_STRATEGY_LIST:
903                         Assert(spec->strategy == PARTITION_STRATEGY_LIST);
904                         my_qual = get_qual_for_list(key, spec);
905                         break;
906
907                 case PARTITION_STRATEGY_RANGE:
908                         Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
909                         my_qual = get_qual_for_range(key, spec);
910                         break;
911
912                 default:
913                         elog(ERROR, "unexpected partition strategy: %d",
914                                  (int) key->strategy);
915         }
916
917         return my_qual;
918 }
919
920 /*
921  * map_partition_varattnos - maps varattno of any Vars in expr from the
922  * parent attno to partition attno.
923  *
924  * We must allow for cases where physical attnos of a partition can be
925  * different from the parent's.
926  *
927  * Note: this will work on any node tree, so really the argument and result
928  * should be declared "Node *".  But a substantial majority of the callers
929  * are working on Lists, so it's less messy to do the casts internally.
930  */
931 List *
932 map_partition_varattnos(List *expr, int target_varno,
933                                                 Relation partrel, Relation parent)
934 {
935         AttrNumber *part_attnos;
936         bool            found_whole_row;
937
938         if (expr == NIL)
939                 return NIL;
940
941         part_attnos = convert_tuples_by_name_map(RelationGetDescr(partrel),
942                                                                                          RelationGetDescr(parent),
943                                                                  gettext_noop("could not convert row type"));
944         expr = (List *) map_variable_attnos((Node *) expr,
945                                                                                 target_varno, 0,
946                                                                                 part_attnos,
947                                                                                 RelationGetDescr(parent)->natts,
948                                                                                 &found_whole_row);
949         /* There can never be a whole-row reference here */
950         if (found_whole_row)
951                 elog(ERROR, "unexpected whole-row reference found in partition key");
952
953         return expr;
954 }
955
956 /*
957  * RelationGetPartitionQual
958  *
959  * Returns a list of partition quals
960  */
961 List *
962 RelationGetPartitionQual(Relation rel)
963 {
964         /* Quick exit */
965         if (!rel->rd_rel->relispartition)
966                 return NIL;
967
968         return generate_partition_qual(rel);
969 }
970
971 /*
972  * Append OIDs of rel's partitions to the list 'partoids' and for each OID,
973  * append pointer rel to the list 'parents'.
974  */
975 #define APPEND_REL_PARTITION_OIDS(rel, partoids, parents) \
976         do\
977         {\
978                 int             i;\
979                 for (i = 0; i < (rel)->rd_partdesc->nparts; i++)\
980                 {\
981                         (partoids) = lappend_oid((partoids), (rel)->rd_partdesc->oids[i]);\
982                         (parents) = lappend((parents), (rel));\
983                 }\
984         } while(0)
985
986 /*
987  * RelationGetPartitionDispatchInfo
988  *              Returns information necessary to route tuples down a partition tree
989  *
990  * All the partitions will be locked with lockmode, unless it is NoLock.
991  * A list of the OIDs of all the leaf partition of rel is returned in
992  * *leaf_part_oids.
993  */
994 PartitionDispatch *
995 RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
996                                                                  int *num_parted, List **leaf_part_oids)
997 {
998         PartitionDispatchData **pd;
999         List       *all_parts = NIL,
1000                            *all_parents = NIL,
1001                            *parted_rels,
1002                            *parted_rel_parents;
1003         ListCell   *lc1,
1004                            *lc2;
1005         int                     i,
1006                                 k,
1007                                 offset;
1008
1009         /*
1010          * Lock partitions and make a list of the partitioned ones to prepare
1011          * their PartitionDispatch objects below.
1012          *
1013          * Cannot use find_all_inheritors() here, because then the order of OIDs
1014          * in parted_rels list would be unknown, which does not help, because we
1015          * we assign indexes within individual PartitionDispatch in an order that
1016          * is predetermined (determined by the order of OIDs in individual
1017          * partition descriptors).
1018          */
1019         *num_parted = 1;
1020         parted_rels = list_make1(rel);
1021         /* Root partitioned table has no parent, so NULL for parent */
1022         parted_rel_parents = list_make1(NULL);
1023         APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents);
1024         forboth(lc1, all_parts, lc2, all_parents)
1025         {
1026                 Relation        partrel = heap_open(lfirst_oid(lc1), lockmode);
1027                 Relation        parent = lfirst(lc2);
1028                 PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
1029
1030                 /*
1031                  * If this partition is a partitioned table, add its children to the
1032                  * end of the list, so that they are processed as well.
1033                  */
1034                 if (partdesc)
1035                 {
1036                         (*num_parted)++;
1037                         parted_rels = lappend(parted_rels, partrel);
1038                         parted_rel_parents = lappend(parted_rel_parents, parent);
1039                         APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents);
1040                 }
1041                 else
1042                         heap_close(partrel, NoLock);
1043
1044                 /*
1045                  * We keep the partitioned ones open until we're done using the
1046                  * information being collected here (for example, see
1047                  * ExecEndModifyTable).
1048                  */
1049         }
1050
1051         /*
1052          * We want to create two arrays - one for leaf partitions and another for
1053          * partitioned tables (including the root table and internal partitions).
1054          * While we only create the latter here, leaf partition array of suitable
1055          * objects (such as, ResultRelInfo) is created by the caller using the
1056          * list of OIDs we return.  Indexes into these arrays get assigned in a
1057          * breadth-first manner, whereby partitions of any given level are placed
1058          * consecutively in the respective arrays.
1059          */
1060         pd = (PartitionDispatchData **) palloc(*num_parted *
1061                                                                                    sizeof(PartitionDispatchData *));
1062         *leaf_part_oids = NIL;
1063         i = k = offset = 0;
1064         forboth(lc1, parted_rels, lc2, parted_rel_parents)
1065         {
1066                 Relation        partrel = lfirst(lc1);
1067                 Relation        parent = lfirst(lc2);
1068                 PartitionKey partkey = RelationGetPartitionKey(partrel);
1069                 TupleDesc       tupdesc = RelationGetDescr(partrel);
1070                 PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
1071                 int                     j,
1072                                         m;
1073
1074                 pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
1075                 pd[i]->reldesc = partrel;
1076                 pd[i]->key = partkey;
1077                 pd[i]->keystate = NIL;
1078                 pd[i]->partdesc = partdesc;
1079                 if (parent != NULL)
1080                 {
1081                         /*
1082                          * For every partitioned table other than root, we must store a
1083                          * tuple table slot initialized with its tuple descriptor and a
1084                          * tuple conversion map to convert a tuple from its parent's
1085                          * rowtype to its own. That is to make sure that we are looking at
1086                          * the correct row using the correct tuple descriptor when
1087                          * computing its partition key for tuple routing.
1088                          */
1089                         pd[i]->tupslot = MakeSingleTupleTableSlot(tupdesc);
1090                         pd[i]->tupmap = convert_tuples_by_name(RelationGetDescr(parent),
1091                                                                                                    tupdesc,
1092                                                                  gettext_noop("could not convert row type"));
1093                 }
1094                 else
1095                 {
1096                         /* Not required for the root partitioned table */
1097                         pd[i]->tupslot = NULL;
1098                         pd[i]->tupmap = NULL;
1099                 }
1100                 pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
1101
1102                 /*
1103                  * Indexes corresponding to the internal partitions are multiplied by
1104                  * -1 to distinguish them from those of leaf partitions.  Encountering
1105                  * an index >= 0 means we found a leaf partition, which is immediately
1106                  * returned as the partition we are looking for.  A negative index
1107                  * means we found a partitioned table, whose PartitionDispatch object
1108                  * is located at the above index multiplied back by -1.  Using the
1109                  * PartitionDispatch object, search is continued further down the
1110                  * partition tree.
1111                  */
1112                 m = 0;
1113                 for (j = 0; j < partdesc->nparts; j++)
1114                 {
1115                         Oid                     partrelid = partdesc->oids[j];
1116
1117                         if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
1118                         {
1119                                 *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
1120                                 pd[i]->indexes[j] = k++;
1121                         }
1122                         else
1123                         {
1124                                 /*
1125                                  * offset denotes the number of partitioned tables of upper
1126                                  * levels including those of the current level.  Any partition
1127                                  * of this table must belong to the next level and hence will
1128                                  * be placed after the last partitioned table of this level.
1129                                  */
1130                                 pd[i]->indexes[j] = -(1 + offset + m);
1131                                 m++;
1132                         }
1133                 }
1134                 i++;
1135
1136                 /*
1137                  * This counts the number of partitioned tables at upper levels
1138                  * including those of the current level.
1139                  */
1140                 offset += m;
1141         }
1142
1143         return pd;
1144 }
1145
1146 /* Module-local functions */
1147
1148 /*
1149  * get_qual_for_list
1150  *
1151  * Returns a list of expressions to use as a list partition's constraint.
1152  */
1153 static List *
1154 get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
1155 {
1156         List       *result;
1157         ArrayExpr  *arr;
1158         ScalarArrayOpExpr *opexpr;
1159         ListCell   *cell,
1160                            *prev,
1161                            *next;
1162         Node       *keyCol;
1163         Oid                     operoid;
1164         bool            need_relabel,
1165                                 list_has_null = false;
1166         NullTest   *nulltest1 = NULL,
1167                            *nulltest2 = NULL;
1168
1169         /* Left operand is either a simple Var or arbitrary expression */
1170         if (key->partattrs[0] != 0)
1171                 keyCol = (Node *) makeVar(1,
1172                                                                   key->partattrs[0],
1173                                                                   key->parttypid[0],
1174                                                                   key->parttypmod[0],
1175                                                                   key->parttypcoll[0],
1176                                                                   0);
1177         else
1178                 keyCol = (Node *) copyObject(linitial(key->partexprs));
1179
1180         /*
1181          * We must remove any NULL value in the list; we handle it separately
1182          * below.
1183          */
1184         prev = NULL;
1185         for (cell = list_head(spec->listdatums); cell; cell = next)
1186         {
1187                 Const      *val = (Const *) lfirst(cell);
1188
1189                 next = lnext(cell);
1190
1191                 if (val->constisnull)
1192                 {
1193                         list_has_null = true;
1194                         spec->listdatums = list_delete_cell(spec->listdatums,
1195                                                                                                 cell, prev);
1196                 }
1197                 else
1198                         prev = cell;
1199         }
1200
1201         if (!list_has_null)
1202         {
1203                 /*
1204                  * Gin up a col IS NOT NULL test that will be AND'd with other
1205                  * expressions
1206                  */
1207                 nulltest1 = makeNode(NullTest);
1208                 nulltest1->arg = (Expr *) keyCol;
1209                 nulltest1->nulltesttype = IS_NOT_NULL;
1210                 nulltest1->argisrow = false;
1211                 nulltest1->location = -1;
1212         }
1213         else
1214         {
1215                 /*
1216                  * Gin up a col IS NULL test that will be OR'd with other expressions
1217                  */
1218                 nulltest2 = makeNode(NullTest);
1219                 nulltest2->arg = (Expr *) keyCol;
1220                 nulltest2->nulltesttype = IS_NULL;
1221                 nulltest2->argisrow = false;
1222                 nulltest2->location = -1;
1223         }
1224
1225         /* Right operand is an ArrayExpr containing this partition's values */
1226         arr = makeNode(ArrayExpr);
1227         arr->array_typeid = !type_is_array(key->parttypid[0])
1228                 ? get_array_type(key->parttypid[0])
1229                 : key->parttypid[0];
1230         arr->array_collid = key->parttypcoll[0];
1231         arr->element_typeid = key->parttypid[0];
1232         arr->elements = spec->listdatums;
1233         arr->multidims = false;
1234         arr->location = -1;
1235
1236         /* Get the correct btree equality operator */
1237         operoid = get_partition_operator(key, 0, BTEqualStrategyNumber,
1238                                                                          &need_relabel);
1239         if (need_relabel || key->partcollation[0] != key->parttypcoll[0])
1240                 keyCol = (Node *) makeRelabelType((Expr *) keyCol,
1241                                                                                   key->partopcintype[0],
1242                                                                                   -1,
1243                                                                                   key->partcollation[0],
1244                                                                                   COERCE_EXPLICIT_CAST);
1245
1246         /* Build leftop = ANY (rightop) */
1247         opexpr = makeNode(ScalarArrayOpExpr);
1248         opexpr->opno = operoid;
1249         opexpr->opfuncid = get_opcode(operoid);
1250         opexpr->useOr = true;
1251         opexpr->inputcollid = key->partcollation[0];
1252         opexpr->args = list_make2(keyCol, arr);
1253         opexpr->location = -1;
1254
1255         if (nulltest1)
1256                 result = list_make2(nulltest1, opexpr);
1257         else if (nulltest2)
1258         {
1259                 Expr       *or;
1260
1261                 or = makeBoolExpr(OR_EXPR, list_make2(nulltest2, opexpr), -1);
1262                 result = list_make1(or);
1263         }
1264         else
1265                 result = list_make1(opexpr);
1266
1267         return result;
1268 }
1269
1270 /*
1271  * get_qual_for_range
1272  *
1273  * Get a list of OpExpr's to use as a range partition's constraint.
1274  */
1275 static List *
1276 get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
1277 {
1278         List       *result = NIL;
1279         ListCell   *cell1,
1280                            *cell2,
1281                            *partexprs_item;
1282         int                     i;
1283
1284         /*
1285          * Iterate over columns of the key, emitting an OpExpr for each using the
1286          * corresponding lower and upper datums as constant operands.
1287          */
1288         i = 0;
1289         partexprs_item = list_head(key->partexprs);
1290         forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
1291         {
1292                 PartitionRangeDatum *ldatum = lfirst(cell1),
1293                                    *udatum = lfirst(cell2);
1294                 Node       *keyCol;
1295                 Const      *lower_val = NULL,
1296                                    *upper_val = NULL;
1297                 EState     *estate;
1298                 MemoryContext oldcxt;
1299                 Expr       *test_expr;
1300                 ExprState  *test_exprstate;
1301                 Datum           test_result;
1302                 bool            isNull;
1303                 bool            need_relabel = false;
1304                 Oid                     operoid;
1305                 NullTest   *nulltest;
1306
1307                 /* Left operand */
1308                 if (key->partattrs[i] != 0)
1309                 {
1310                         keyCol = (Node *) makeVar(1,
1311                                                                           key->partattrs[i],
1312                                                                           key->parttypid[i],
1313                                                                           key->parttypmod[i],
1314                                                                           key->parttypcoll[i],
1315                                                                           0);
1316                 }
1317                 else
1318                 {
1319                         keyCol = (Node *) copyObject(lfirst(partexprs_item));
1320                         partexprs_item = lnext(partexprs_item);
1321                 }
1322
1323                 /*
1324                  * Emit a IS NOT NULL expression for non-Var keys, because whereas
1325                  * simple attributes are covered by NOT NULL constraints, expression
1326                  * keys are still nullable which is not acceptable in case of range
1327                  * partitioning.
1328                  */
1329                 if (!IsA(keyCol, Var))
1330                 {
1331                         nulltest = makeNode(NullTest);
1332                         nulltest->arg = (Expr *) keyCol;
1333                         nulltest->nulltesttype = IS_NOT_NULL;
1334                         nulltest->argisrow = false;
1335                         nulltest->location = -1;
1336                         result = lappend(result, nulltest);
1337                 }
1338
1339                 /*
1340                  * Stop at this column if either of lower or upper datum is infinite,
1341                  * but do emit an OpExpr for the non-infinite datum.
1342                  */
1343                 if (!ldatum->infinite)
1344                         lower_val = (Const *) ldatum->value;
1345                 if (!udatum->infinite)
1346                         upper_val = (Const *) udatum->value;
1347
1348                 /*
1349                  * If lower_val and upper_val are both finite and happen to be equal,
1350                  * emit only (keyCol = lower_val) for this column, because all rows in
1351                  * this partition could only ever contain this value (ie, lower_val)
1352                  * in the current partitioning column.  We must consider further
1353                  * columns because the above condition does not fully constrain the
1354                  * rows of this partition.
1355                  */
1356                 if (lower_val && upper_val)
1357                 {
1358                         /* Get the correct btree equality operator for the test */
1359                         operoid = get_partition_operator(key, i, BTEqualStrategyNumber,
1360                                                                                          &need_relabel);
1361
1362                         /* Create the test expression */
1363                         estate = CreateExecutorState();
1364                         oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
1365                         test_expr = make_opclause(operoid,
1366                                                                           BOOLOID,
1367                                                                           false,
1368                                                                           (Expr *) lower_val,
1369                                                                           (Expr *) upper_val,
1370                                                                           InvalidOid,
1371                                                                           key->partcollation[i]);
1372                         fix_opfuncids((Node *) test_expr);
1373                         test_exprstate = ExecInitExpr(test_expr, NULL);
1374                         test_result = ExecEvalExprSwitchContext(test_exprstate,
1375                                                                                           GetPerTupleExprContext(estate),
1376                                                                                                         &isNull);
1377                         MemoryContextSwitchTo(oldcxt);
1378                         FreeExecutorState(estate);
1379
1380                         if (DatumGetBool(test_result))
1381                         {
1382                                 /* This can never be, but it's better to make sure */
1383                                 if (i == key->partnatts - 1)
1384                                         elog(ERROR, "invalid range bound specification");
1385
1386                                 if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
1387                                         keyCol = (Node *) makeRelabelType((Expr *) keyCol,
1388                                                                                                           key->partopcintype[i],
1389                                                                                                           -1,
1390                                                                                                           key->partcollation[i],
1391                                                                                                           COERCE_EXPLICIT_CAST);
1392                                 result = lappend(result,
1393                                                                  make_opclause(operoid,
1394                                                                                            BOOLOID,
1395                                                                                            false,
1396                                                                                            (Expr *) keyCol,
1397                                                                                            (Expr *) lower_val,
1398                                                                                            InvalidOid,
1399                                                                                            key->partcollation[i]));
1400
1401                                 /* Go over to consider the next column. */
1402                                 i++;
1403                                 continue;
1404                         }
1405                 }
1406
1407                 /*
1408                  * We can say here that lower_val != upper_val.  Emit expressions
1409                  * (keyCol >= lower_val) and (keyCol < upper_val), then stop.
1410                  */
1411                 if (lower_val)
1412                 {
1413                         operoid = get_partition_operator(key, i,
1414                                                                                          BTGreaterEqualStrategyNumber,
1415                                                                                          &need_relabel);
1416
1417                         if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
1418                                 keyCol = (Node *) makeRelabelType((Expr *) keyCol,
1419                                                                                                   key->partopcintype[i],
1420                                                                                                   -1,
1421                                                                                                   key->partcollation[i],
1422                                                                                                   COERCE_EXPLICIT_CAST);
1423                         result = lappend(result,
1424                                                          make_opclause(operoid,
1425                                                                                    BOOLOID,
1426                                                                                    false,
1427                                                                                    (Expr *) keyCol,
1428                                                                                    (Expr *) lower_val,
1429                                                                                    InvalidOid,
1430                                                                                    key->partcollation[i]));
1431                 }
1432
1433                 if (upper_val)
1434                 {
1435                         operoid = get_partition_operator(key, i,
1436                                                                                          BTLessStrategyNumber,
1437                                                                                          &need_relabel);
1438
1439                         if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
1440                                 keyCol = (Node *) makeRelabelType((Expr *) keyCol,
1441                                                                                                   key->partopcintype[i],
1442                                                                                                   -1,
1443                                                                                                   key->partcollation[i],
1444                                                                                                   COERCE_EXPLICIT_CAST);
1445
1446                         result = lappend(result,
1447                                                          make_opclause(operoid,
1448                                                                                    BOOLOID,
1449                                                                                    false,
1450                                                                                    (Expr *) keyCol,
1451                                                                                    (Expr *) upper_val,
1452                                                                                    InvalidOid,
1453                                                                                    key->partcollation[i]));
1454                 }
1455
1456                 /*
1457                  * We can stop at this column, because we would not have checked the
1458                  * next column when routing a given row into this partition.
1459                  */
1460                 break;
1461         }
1462
1463         return result;
1464 }
1465
1466 /*
1467  * get_partition_operator
1468  *
1469  * Return oid of the operator of given strategy for a given partition key
1470  * column.
1471  */
1472 static Oid
1473 get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
1474                                            bool *need_relabel)
1475 {
1476         Oid                     operoid;
1477
1478         /*
1479          * First check if there exists an operator of the given strategy, with
1480          * this column's type as both its lefttype and righttype, in the
1481          * partitioning operator family specified for the column.
1482          */
1483         operoid = get_opfamily_member(key->partopfamily[col],
1484                                                                   key->parttypid[col],
1485                                                                   key->parttypid[col],
1486                                                                   strategy);
1487
1488         /*
1489          * If one doesn't exist, we must resort to using an operator in the same
1490          * opreator family but with the operator class declared input type.  It is
1491          * OK to do so, because the column's type is known to be binary-coercible
1492          * with the operator class input type (otherwise, the operator class in
1493          * question would not have been accepted as the partitioning operator
1494          * class).  We must however inform the caller to wrap the non-Const
1495          * expression with a RelabelType node to denote the implicit coercion. It
1496          * ensures that the resulting expression structurally matches similarly
1497          * processed expressions within the optimizer.
1498          */
1499         if (!OidIsValid(operoid))
1500         {
1501                 operoid = get_opfamily_member(key->partopfamily[col],
1502                                                                           key->partopcintype[col],
1503                                                                           key->partopcintype[col],
1504                                                                           strategy);
1505                 *need_relabel = true;
1506         }
1507         else
1508                 *need_relabel = false;
1509
1510         if (!OidIsValid(operoid))
1511                 elog(ERROR, "could not find operator for partitioning");
1512
1513         return operoid;
1514 }
1515
1516 /*
1517  * generate_partition_qual
1518  *
1519  * Generate partition predicate from rel's partition bound expression
1520  *
1521  * Result expression tree is stored CacheMemoryContext to ensure it survives
1522  * as long as the relcache entry. But we should be running in a less long-lived
1523  * working context. To avoid leaking cache memory if this routine fails partway
1524  * through, we build in working memory and then copy the completed structure
1525  * into cache memory.
1526  */
1527 static List *
1528 generate_partition_qual(Relation rel)
1529 {
1530         HeapTuple       tuple;
1531         MemoryContext oldcxt;
1532         Datum           boundDatum;
1533         bool            isnull;
1534         Node       *bound;
1535         List       *my_qual = NIL,
1536                            *result = NIL;
1537         Relation        parent;
1538
1539         /* Guard against stack overflow due to overly deep partition tree */
1540         check_stack_depth();
1541
1542         /* Quick copy */
1543         if (rel->rd_partcheck != NIL)
1544                 return copyObject(rel->rd_partcheck);
1545
1546         /* Grab at least an AccessShareLock on the parent table */
1547         parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
1548                                            AccessShareLock);
1549
1550         /* Get pg_class.relpartbound */
1551         tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
1552         if (!HeapTupleIsValid(tuple))
1553                 elog(ERROR, "cache lookup failed for relation %u",
1554                          RelationGetRelid(rel));
1555
1556         boundDatum = SysCacheGetAttr(RELOID, tuple,
1557                                                                  Anum_pg_class_relpartbound,
1558                                                                  &isnull);
1559         if (isnull)                                     /* should not happen */
1560                 elog(ERROR, "relation \"%s\" has relpartbound = null",
1561                          RelationGetRelationName(rel));
1562         bound = stringToNode(TextDatumGetCString(boundDatum));
1563         ReleaseSysCache(tuple);
1564
1565         my_qual = get_qual_from_partbound(rel, parent, bound);
1566
1567         /* Add the parent's quals to the list (if any) */
1568         if (parent->rd_rel->relispartition)
1569                 result = list_concat(generate_partition_qual(parent), my_qual);
1570         else
1571                 result = my_qual;
1572
1573         /*
1574          * Change Vars to have partition's attnos instead of the parent's. We do
1575          * this after we concatenate the parent's quals, because we want every Var
1576          * in it to bear this relation's attnos. It's safe to assume varno = 1
1577          * here.
1578          */
1579         result = map_partition_varattnos(result, 1, rel, parent);
1580
1581         /* Save a copy in the relcache */
1582         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
1583         rel->rd_partcheck = copyObject(result);
1584         MemoryContextSwitchTo(oldcxt);
1585
1586         /* Keep the parent locked until commit */
1587         heap_close(parent, NoLock);
1588
1589         return result;
1590 }
1591
1592 /* ----------------
1593  *              FormPartitionKeyDatum
1594  *                      Construct values[] and isnull[] arrays for the partition key
1595  *                      of a tuple.
1596  *
1597  *      pd                              Partition dispatch object of the partitioned table
1598  *      slot                    Heap tuple from which to extract partition key
1599  *      estate                  executor state for evaluating any partition key
1600  *                                      expressions (must be non-NULL)
1601  *      values                  Array of partition key Datums (output area)
1602  *      isnull                  Array of is-null indicators (output area)
1603  *
1604  * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1605  * the heap tuple passed in.
1606  * ----------------
1607  */
1608 void
1609 FormPartitionKeyDatum(PartitionDispatch pd,
1610                                           TupleTableSlot *slot,
1611                                           EState *estate,
1612                                           Datum *values,
1613                                           bool *isnull)
1614 {
1615         ListCell   *partexpr_item;
1616         int                     i;
1617
1618         if (pd->key->partexprs != NIL && pd->keystate == NIL)
1619         {
1620                 /* Check caller has set up context correctly */
1621                 Assert(estate != NULL &&
1622                            GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1623
1624                 /* First time through, set up expression evaluation state */
1625                 pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1626         }
1627
1628         partexpr_item = list_head(pd->keystate);
1629         for (i = 0; i < pd->key->partnatts; i++)
1630         {
1631                 AttrNumber      keycol = pd->key->partattrs[i];
1632                 Datum           datum;
1633                 bool            isNull;
1634
1635                 if (keycol != 0)
1636                 {
1637                         /* Plain column; get the value directly from the heap tuple */
1638                         datum = slot_getattr(slot, keycol, &isNull);
1639                 }
1640                 else
1641                 {
1642                         /* Expression; need to evaluate it */
1643                         if (partexpr_item == NULL)
1644                                 elog(ERROR, "wrong number of partition key expressions");
1645                         datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1646                                                                                           GetPerTupleExprContext(estate),
1647                                                                                           &isNull);
1648                         partexpr_item = lnext(partexpr_item);
1649                 }
1650                 values[i] = datum;
1651                 isnull[i] = isNull;
1652         }
1653
1654         if (partexpr_item != NULL)
1655                 elog(ERROR, "wrong number of partition key expressions");
1656 }
1657
1658 /*
1659  * get_partition_for_tuple
1660  *              Finds a leaf partition for tuple contained in *slot
1661  *
1662  * Returned value is the sequence number of the leaf partition thus found,
1663  * or -1 if no leaf partition is found for the tuple.  *failed_at is set
1664  * to the OID of the partitioned table whose partition was not found in
1665  * the latter case.
1666  */
1667 int
1668 get_partition_for_tuple(PartitionDispatch *pd,
1669                                                 TupleTableSlot *slot,
1670                                                 EState *estate,
1671                                                 PartitionDispatchData **failed_at,
1672                                                 TupleTableSlot **failed_slot)
1673 {
1674         PartitionDispatch parent;
1675         Datum           values[PARTITION_MAX_KEYS];
1676         bool            isnull[PARTITION_MAX_KEYS];
1677         int                     cur_offset,
1678                                 cur_index;
1679         int                     i,
1680                                 result;
1681         ExprContext *ecxt = GetPerTupleExprContext(estate);
1682         TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
1683
1684         /* start with the root partitioned table */
1685         parent = pd[0];
1686         while (true)
1687         {
1688                 PartitionKey key = parent->key;
1689                 PartitionDesc partdesc = parent->partdesc;
1690                 TupleTableSlot *myslot = parent->tupslot;
1691                 TupleConversionMap *map = parent->tupmap;
1692
1693                 if (myslot != NULL && map != NULL)
1694                 {
1695                         HeapTuple       tuple = ExecFetchSlotTuple(slot);
1696
1697                         ExecClearTuple(myslot);
1698                         tuple = do_convert_tuple(tuple, map);
1699                         ExecStoreTuple(tuple, myslot, InvalidBuffer, true);
1700                         slot = myslot;
1701                 }
1702
1703                 /* Quick exit */
1704                 if (partdesc->nparts == 0)
1705                 {
1706                         *failed_at = parent;
1707                         *failed_slot = slot;
1708                         return -1;
1709                 }
1710
1711                 /*
1712                  * Extract partition key from tuple. Expression evaluation machinery
1713                  * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
1714                  * point to the correct tuple slot.  The slot might have changed from
1715                  * what was used for the parent table if the table of the current
1716                  * partitioning level has different tuple descriptor from the parent.
1717                  * So update ecxt_scantuple accordingly.
1718                  */
1719                 ecxt->ecxt_scantuple = slot;
1720                 FormPartitionKeyDatum(parent, slot, estate, values, isnull);
1721
1722                 if (key->strategy == PARTITION_STRATEGY_RANGE)
1723                 {
1724                         /* Disallow nulls in the range partition key of the tuple */
1725                         for (i = 0; i < key->partnatts; i++)
1726                                 if (isnull[i])
1727                                         ereport(ERROR,
1728                                                         (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
1729                                                 errmsg("range partition key of row contains null")));
1730                 }
1731
1732                 if (partdesc->boundinfo->has_null && isnull[0])
1733                         /* Tuple maps to the null-accepting list partition */
1734                         cur_index = partdesc->boundinfo->null_index;
1735                 else
1736                 {
1737                         /* Else bsearch in partdesc->boundinfo */
1738                         bool            equal = false;
1739
1740                         cur_offset = partition_bound_bsearch(key, partdesc->boundinfo,
1741                                                                                                  values, false, &equal);
1742                         switch (key->strategy)
1743                         {
1744                                 case PARTITION_STRATEGY_LIST:
1745                                         if (cur_offset >= 0 && equal)
1746                                                 cur_index = partdesc->boundinfo->indexes[cur_offset];
1747                                         else
1748                                                 cur_index = -1;
1749                                         break;
1750
1751                                 case PARTITION_STRATEGY_RANGE:
1752
1753                                         /*
1754                                          * Offset returned is such that the bound at offset is
1755                                          * found to be less or equal with the tuple. So, the bound
1756                                          * at offset+1 would be the upper bound.
1757                                          */
1758                                         cur_index = partdesc->boundinfo->indexes[cur_offset + 1];
1759                                         break;
1760
1761                                 default:
1762                                         elog(ERROR, "unexpected partition strategy: %d",
1763                                                  (int) key->strategy);
1764                         }
1765                 }
1766
1767                 /*
1768                  * cur_index < 0 means we failed to find a partition of this parent.
1769                  * cur_index >= 0 means we either found the leaf partition, or the
1770                  * next parent to find a partition of.
1771                  */
1772                 if (cur_index < 0)
1773                 {
1774                         result = -1;
1775                         *failed_at = parent;
1776                         *failed_slot = slot;
1777                         break;
1778                 }
1779                 else if (parent->indexes[cur_index] >= 0)
1780                 {
1781                         result = parent->indexes[cur_index];
1782                         break;
1783                 }
1784                 else
1785                         parent = pd[-parent->indexes[cur_index]];
1786         }
1787
1788         ecxt->ecxt_scantuple = ecxt_scantuple_old;
1789         return result;
1790 }
1791
1792 /*
1793  * qsort_partition_list_value_cmp
1794  *
1795  * Compare two list partition bound datums
1796  */
1797 static int32
1798 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
1799 {
1800         Datum           val1 = (*(const PartitionListValue **) a)->value,
1801                                 val2 = (*(const PartitionListValue **) b)->value;
1802         PartitionKey key = (PartitionKey) arg;
1803
1804         return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1805                                                                                    key->partcollation[0],
1806                                                                                    val1, val2));
1807 }
1808
1809 /*
1810  * make_one_range_bound
1811  *
1812  * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
1813  * and a flag telling whether the bound is lower or not.  Made into a function
1814  * because there are multiple sites that want to use this facility.
1815  */
1816 static PartitionRangeBound *
1817 make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
1818 {
1819         PartitionRangeBound *bound;
1820         ListCell   *cell;
1821         int                     i;
1822
1823         bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
1824         bound->index = index;
1825         bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
1826         bound->content = (RangeDatumContent *) palloc0(key->partnatts *
1827                                                                                                    sizeof(RangeDatumContent));
1828         bound->lower = lower;
1829
1830         i = 0;
1831         foreach(cell, datums)
1832         {
1833                 PartitionRangeDatum *datum = lfirst(cell);
1834
1835                 /* What's contained in this range datum? */
1836                 bound->content[i] = !datum->infinite
1837                         ? RANGE_DATUM_FINITE
1838                         : (lower ? RANGE_DATUM_NEG_INF
1839                            : RANGE_DATUM_POS_INF);
1840
1841                 if (bound->content[i] == RANGE_DATUM_FINITE)
1842                 {
1843                         Const      *val = (Const *) datum->value;
1844
1845                         if (val->constisnull)
1846                                 elog(ERROR, "invalid range bound datum");
1847                         bound->datums[i] = val->constvalue;
1848                 }
1849
1850                 i++;
1851         }
1852
1853         return bound;
1854 }
1855
1856 /* Used when sorting range bounds across all range partitions */
1857 static int32
1858 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
1859 {
1860         PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
1861         PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
1862         PartitionKey key = (PartitionKey) arg;
1863
1864         return partition_rbound_cmp(key, b1->datums, b1->content, b1->lower, b2);
1865 }
1866
1867 /*
1868  * partition_rbound_cmp
1869  *
1870  * Return for two range bounds whether the 1st one (specified in datum1,
1871  * content1, and lower1) is <=, =, >= the bound specified in *b2
1872  */
1873 static int32
1874 partition_rbound_cmp(PartitionKey key,
1875                                          Datum *datums1, RangeDatumContent *content1, bool lower1,
1876                                          PartitionRangeBound *b2)
1877 {
1878         int32           cmpval = 0;             /* placate compiler */
1879         int                     i;
1880         Datum      *datums2 = b2->datums;
1881         RangeDatumContent *content2 = b2->content;
1882         bool            lower2 = b2->lower;
1883
1884         for (i = 0; i < key->partnatts; i++)
1885         {
1886                 /*
1887                  * First, handle cases involving infinity, which don't require
1888                  * invoking the comparison proc.
1889                  */
1890                 if (content1[i] != RANGE_DATUM_FINITE &&
1891                         content2[i] != RANGE_DATUM_FINITE)
1892
1893                         /*
1894                          * Both are infinity, so they are equal unless one is negative
1895                          * infinity and other positive (or vice versa)
1896                          */
1897                         return content1[i] == content2[i] ? 0
1898                                 : (content1[i] < content2[i] ? -1 : 1);
1899                 else if (content1[i] != RANGE_DATUM_FINITE)
1900                         return content1[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
1901                 else if (content2[i] != RANGE_DATUM_FINITE)
1902                         return content2[i] == RANGE_DATUM_NEG_INF ? 1 : -1;
1903
1904                 cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
1905                                                                                                  key->partcollation[i],
1906                                                                                                  datums1[i],
1907                                                                                                  datums2[i]));
1908                 if (cmpval != 0)
1909                         break;
1910         }
1911
1912         /*
1913          * If the comparison is anything other than equal, we're done. If they
1914          * compare equal though, we still have to consider whether the boundaries
1915          * are inclusive or exclusive.  Exclusive one is considered smaller of the
1916          * two.
1917          */
1918         if (cmpval == 0 && lower1 != lower2)
1919                 cmpval = lower1 ? 1 : -1;
1920
1921         return cmpval;
1922 }
1923
1924 /*
1925  * partition_rbound_datum_cmp
1926  *
1927  * Return whether range bound (specified in rb_datums, rb_content, and
1928  * rb_lower) <=, =, >= partition key of tuple (tuple_datums)
1929  */
1930 static int32
1931 partition_rbound_datum_cmp(PartitionKey key,
1932                                                    Datum *rb_datums, RangeDatumContent *rb_content,
1933                                                    Datum *tuple_datums)
1934 {
1935         int                     i;
1936         int32           cmpval = -1;
1937
1938         for (i = 0; i < key->partnatts; i++)
1939         {
1940                 if (rb_content[i] != RANGE_DATUM_FINITE)
1941                         return rb_content[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
1942
1943                 cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
1944                                                                                                  key->partcollation[i],
1945                                                                                                  rb_datums[i],
1946                                                                                                  tuple_datums[i]));
1947                 if (cmpval != 0)
1948                         break;
1949         }
1950
1951         return cmpval;
1952 }
1953
1954 /*
1955  * partition_bound_cmp
1956  *
1957  * Return whether the bound at offset in boundinfo is <=, =, >= the argument
1958  * specified in *probe.
1959  */
1960 static int32
1961 partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo,
1962                                         int offset, void *probe, bool probe_is_bound)
1963 {
1964         Datum      *bound_datums = boundinfo->datums[offset];
1965         int32           cmpval = -1;
1966
1967         switch (key->strategy)
1968         {
1969                 case PARTITION_STRATEGY_LIST:
1970                         cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1971                                                                                                          key->partcollation[0],
1972                                                                                                          bound_datums[0],
1973                                                                                                          *(Datum *) probe));
1974                         break;
1975
1976                 case PARTITION_STRATEGY_RANGE:
1977                         {
1978                                 RangeDatumContent *content = boundinfo->content[offset];
1979
1980                                 if (probe_is_bound)
1981                                 {
1982                                         /*
1983                                          * We need to pass whether the existing bound is a lower
1984                                          * bound, so that two equal-valued lower and upper bounds
1985                                          * are not regarded equal.
1986                                          */
1987                                         bool            lower = boundinfo->indexes[offset] < 0;
1988
1989                                         cmpval = partition_rbound_cmp(key,
1990                                                                                                 bound_datums, content, lower,
1991                                                                                           (PartitionRangeBound *) probe);
1992                                 }
1993                                 else
1994                                         cmpval = partition_rbound_datum_cmp(key,
1995                                                                                                                 bound_datums, content,
1996                                                                                                                 (Datum *) probe);
1997                                 break;
1998                         }
1999
2000                 default:
2001                         elog(ERROR, "unexpected partition strategy: %d",
2002                                  (int) key->strategy);
2003         }
2004
2005         return cmpval;
2006 }
2007
2008 /*
2009  * Binary search on a collection of partition bounds. Returns greatest
2010  * bound in array boundinfo->datums which is less than or equal to *probe
2011  * If all bounds in the array are greater than *probe, -1 is returned.
2012  *
2013  * *probe could either be a partition bound or a Datum array representing
2014  * the partition key of a tuple being routed; probe_is_bound tells which.
2015  * We pass that down to the comparison function so that it can interpret the
2016  * contents of *probe accordingly.
2017  *
2018  * *is_equal is set to whether the bound at the returned index is equal with
2019  * *probe.
2020  */
2021 static int
2022 partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo,
2023                                                 void *probe, bool probe_is_bound, bool *is_equal)
2024 {
2025         int                     lo,
2026                                 hi,
2027                                 mid;
2028
2029         lo = -1;
2030         hi = boundinfo->ndatums - 1;
2031         while (lo < hi)
2032         {
2033                 int32           cmpval;
2034
2035                 mid = (lo + hi + 1) / 2;
2036                 cmpval = partition_bound_cmp(key, boundinfo, mid, probe,
2037                                                                          probe_is_bound);
2038                 if (cmpval <= 0)
2039                 {
2040                         lo = mid;
2041                         *is_equal = (cmpval == 0);
2042
2043                         if (*is_equal)
2044                                 break;
2045                 }
2046                 else
2047                         hi = mid - 1;
2048         }
2049
2050         return lo;
2051 }