]> granicus.if.org Git - postgresql/commitdiff
Speed up CREATE INDEX CONCURRENTLY's TID sort.
authorRobert Haas <rhaas@postgresql.org>
Wed, 16 Dec 2015 20:23:45 +0000 (15:23 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 16 Dec 2015 20:23:45 +0000 (15:23 -0500)
Encode TIDs as 64-bit integers to speed up comparisons.  This seems to
speed things up on all platforms, but is even more beneficial when
8-byte integers are passed by value.

Peter Geoghegan.  Design suggestions and review by Tom Lane.  Review
also by Simon Riggs and by me.

src/backend/catalog/index.c

index e59b163173c611f0605ed8e255debcfec0225f5b..c10be3d6996aa74d27881827643bdc2a814f0a63 100644 (file)
@@ -109,6 +109,8 @@ static void index_update_stats(Relation rel,
 static void IndexCheckExclusion(Relation heapRelation,
                                        Relation indexRelation,
                                        IndexInfo *indexInfo);
+static inline int64 itemptr_encode(ItemPointer itemptr);
+static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
 static void validate_index_heapscan(Relation heapRelation,
                                                Relation indexRelation,
@@ -2832,7 +2834,13 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
        ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
        ivinfo.strategy = NULL;
 
-       state.tuplesort = tuplesort_begin_datum(TIDOID, TIDLessOperator,
+       /*
+        * Encode TIDs as int8 values for the sort, rather than directly sorting
+        * item pointers.  This can be significantly faster, primarily because TID
+        * is a pass-by-reference type on all platforms, whereas int8 is
+        * pass-by-value on most platforms.
+        */
+       state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
                                                                                        InvalidOid, false,
                                                                                        maintenance_work_mem,
                                                                                        false);
@@ -2871,6 +2879,46 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
        heap_close(heapRelation, NoLock);
 }
 
+/*
+ * itemptr_encode - Encode ItemPointer as int64/int8
+ *
+ * This representation must produce values encoded as int64 that sort in the
+ * same order as their corresponding original TID values would (using the
+ * default int8 opclass to produce a result equivalent to the default TID
+ * opclass).
+ *
+ * As noted in validate_index(), this can be significantly faster.
+ */
+static inline int64
+itemptr_encode(ItemPointer itemptr)
+{
+       BlockNumber             block = ItemPointerGetBlockNumber(itemptr);
+       OffsetNumber    offset = ItemPointerGetOffsetNumber(itemptr);
+       int64                   encoded;
+
+       /*
+        * Use the 16 least significant bits for the offset.  32 adjacent bits are
+        * used for the block number.  Since remaining bits are unused, there
+        * cannot be negative encoded values (We assume a two's complement
+        * representation).
+        */
+       encoded = ((uint64) block << 16) | (uint16) offset;
+
+       return encoded;
+}
+
+/*
+ * itemptr_decode - Decode int64/int8 representation back to ItemPointer
+ */
+static inline void
+itemptr_decode(ItemPointer itemptr, int64 encoded)
+{
+       BlockNumber             block = (BlockNumber) (encoded >> 16);
+       OffsetNumber    offset = (OffsetNumber) (encoded & 0xFFFF);
+
+       ItemPointerSet(itemptr, block, offset);
+}
+
 /*
  * validate_index_callback - bulkdelete callback to collect the index TIDs
  */
@@ -2878,8 +2926,9 @@ static bool
 validate_index_callback(ItemPointer itemptr, void *opaque)
 {
        v_i_state  *state = (v_i_state *) opaque;
+       int64           encoded = itemptr_encode(itemptr);
 
-       tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
+       tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
        state->itups += 1;
        return false;                           /* never actually delete anything */
 }
@@ -2911,6 +2960,7 @@ validate_index_heapscan(Relation heapRelation,
 
        /* state variables for the merge */
        ItemPointer indexcursor = NULL;
+       ItemPointerData         decoded;
        bool            tuplesort_empty = false;
 
        /*
@@ -3020,13 +3070,26 @@ validate_index_heapscan(Relation heapRelation,
                                 */
                                if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
                                        in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
-                               pfree(indexcursor);
                        }
 
                        tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
                                                                                                  &ts_val, &ts_isnull);
                        Assert(tuplesort_empty || !ts_isnull);
-                       indexcursor = (ItemPointer) DatumGetPointer(ts_val);
+                       if (!tuplesort_empty)
+                       {
+                               itemptr_decode(&decoded, DatumGetInt64(ts_val));
+                               indexcursor = &decoded;
+
+                               /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
+#ifndef USE_FLOAT8_BYVAL
+                               pfree(DatumGetPointer(ts_val));
+#endif
+                       }
+                       else
+                       {
+                               /* Be tidy */
+                               indexcursor = NULL;
+                       }
                }
 
                /*