From ab5b4e2f9ede250bd77d09aa8ec60bfeb982fb01 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 21 Jul 2009 19:53:12 +0000 Subject: [PATCH] Speed up AllocSetFreeIndex, which is a significant cost in palloc and pfree, by using a lookup table instead of a naive shift-and-count loop. Based on code originally posted by Sean Eron Anderson at http://graphics.stanford.edu/%7eseander/bithacks.html. Greg Stark did the research and benchmarking to show that this is what we should use. Jeremy Kerr first noticed that this is a hotspot that could be optimized, though we ended up not using his suggestion of platform-specific bit-searching code. --- src/backend/utils/mmgr/aset.c | 41 +++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index f11e15d57c..520c9acbcc 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mmgr/aset.c,v 1.79 2009/06/11 14:49:06 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mmgr/aset.c,v 1.80 2009/07/21 19:53:12 tgl Exp $ * * NOTE: * This is a new (Feb. 05, 1999) implementation of the allocation set @@ -238,6 +238,17 @@ static MemoryContextMethods AllocSetMethods = { #endif }; +/* + * Table for AllocSetFreeIndex + */ +#define LT16(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n + +static const unsigned char LogTable256[256] = +{ + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + LT16(5), LT16(6), LT16(6), LT16(7), LT16(7), LT16(7), LT16(7), + LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8) +}; /* ---------- * Debug macros @@ -266,18 +277,30 @@ static MemoryContextMethods AllocSetMethods = { static inline int AllocSetFreeIndex(Size size) { - int idx = 0; + int idx; + unsigned int t, + tsize; - if (size > 0) + if (size > (1 << ALLOC_MINBITS)) { - size = (size - 1) >> ALLOC_MINBITS; - while (size != 0) - { - idx++; - size >>= 1; - } + tsize = (size - 1) >> ALLOC_MINBITS; + + /* + * At this point we need to obtain log2(tsize)+1, ie, the number + * of not-all-zero bits at the right. We used to do this with a + * shift-and-count loop, but this function is enough of a hotspot + * to justify micro-optimization effort. The best approach seems + * to be to use a lookup table. Note that this code assumes that + * ALLOCSET_NUM_FREELISTS <= 17, since we only cope with two bytes + * of the tsize value. + */ + t = tsize >> 8; + idx = t ? LogTable256[t] + 8 : LogTable256[tsize]; + Assert(idx < ALLOCSET_NUM_FREELISTS); } + else + idx = 0; return idx; } -- 2.40.0