+# PGAC_C_BUILTIN_POPCOUNT
+# -------------------------
+AC_DEFUN([PGAC_C_BUILTIN_POPCOUNT],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_popcount])])dnl
+AC_CACHE_CHECK([for __builtin_popcount], [Ac_cachevar],
+[pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS -mpopcnt"
+AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+[static int x = __builtin_popcount(255);])],
+[Ac_cachevar=yes],
+[Ac_cachevar=no])
+CFLAGS="$pgac_save_CFLAGS"])
+if test x"$Ac_cachevar" = x"yes"; then
+ CFLAGS_POPCNT="-mpopcnt"
+AC_DEFINE(HAVE__BUILTIN_POPCOUNT, 1,
+ [Define to 1 if your compiler understands __builtin_popcount.])
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_C_BUILTIN_POPCOUNT
+
+
+
+# PGAC_C_BUILTIN_POPCOUNTL
+# -------------------------
+AC_DEFUN([PGAC_C_BUILTIN_POPCOUNTL],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_popcountl])])dnl
+AC_CACHE_CHECK([for __builtin_popcountl], [Ac_cachevar],
+[pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS -mpopcnt"
+AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+[static int x = __builtin_popcountl(255);])],
+[Ac_cachevar=yes],
+[Ac_cachevar=no])
+CFLAGS="$pgac_save_CFLAGS"])
+if test x"$Ac_cachevar" = x"yes"; then
+ CFLAGS_POPCNT="-mpopcnt"
+AC_DEFINE(HAVE__BUILTIN_POPCOUNTL, 1,
+ [Define to 1 if your compiler understands __builtin_popcountl.])
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_C_BUILTIN_POPCOUNTL
+
+
+
+# PGAC_C_BUILTIN_CTZ
+# -------------------------
+# Check if the C compiler understands __builtin_ctz(),
+# and define HAVE__BUILTIN_CTZ if so.
+AC_DEFUN([PGAC_C_BUILTIN_CTZ],
+[AC_CACHE_CHECK(for __builtin_ctz, pgac_cv__builtin_ctz,
+[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+[static int x = __builtin_ctz(256);]
+)],
+[pgac_cv__builtin_ctz=yes],
+[pgac_cv__builtin_ctz=no])])
+if test x"$pgac_cv__builtin_ctz" = xyes ; then
+AC_DEFINE(HAVE__BUILTIN_CTZ, 1,
+ [Define to 1 if your compiler understands __builtin_ctz.])
+fi])# PGAC_C_BUILTIN_CTZ
+
+
+
+# PGAC_C_BUILTIN_CTZL
+# -------------------------
+# Check if the C compiler understands __builtin_ctzl(),
+# and define HAVE__BUILTIN_CTZL if so.
+AC_DEFUN([PGAC_C_BUILTIN_CTZL],
+[AC_CACHE_CHECK(for __builtin_ctzl, pgac_cv__builtin_ctzl,
+[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+[static int x = __builtin_ctzl(256);]
+)],
+[pgac_cv__builtin_ctzl=yes],
+[pgac_cv__builtin_ctzl=no])])
+if test x"$pgac_cv__builtin_ctzl" = xyes ; then
+AC_DEFINE(HAVE__BUILTIN_CTZL, 1,
+ [Define to 1 if your compiler understands __builtin_ctzl.])
+fi])# PGAC_C_BUILTIN_CTZL
+
+
+
+# PGAC_C_BUILTIN_CLZ
+# -------------------------
+# Check if the C compiler understands __builtin_clz(),
+# and define HAVE__BUILTIN_CLZ if so.
+AC_DEFUN([PGAC_C_BUILTIN_CLZ],
+[AC_CACHE_CHECK(for __builtin_clz, pgac_cv__builtin_clz,
+[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+[static int x = __builtin_clz(256);]
+)],
+[pgac_cv__builtin_clz=yes],
+[pgac_cv__builtin_clz=no])])
+if test x"$pgac_cv__builtin_clz" = xyes ; then
+AC_DEFINE(HAVE__BUILTIN_CLZ, 1,
+ [Define to 1 if your compiler understands __builtin_clz.])
+fi])# PGAC_C_BUILTIN_CLZ
+
+
+
+# PGAC_C_BUILTIN_CLZL
+# -------------------------
+# Check if the C compiler understands __builtin_clzl(),
+# and define HAVE__BUILTIN_CLZL if so.
+AC_DEFUN([PGAC_C_BUILTIN_CLZL],
+[AC_CACHE_CHECK(for __builtin_clzl, pgac_cv__builtin_clzl,
+[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+[static int x = __builtin_clzl(256);]
+)],
+[pgac_cv__builtin_clzl=yes],
+[pgac_cv__builtin_clzl=no])])
+if test x"$pgac_cv__builtin_clzl" = xyes ; then
+AC_DEFINE(HAVE__BUILTIN_CLZL, 1,
+ [Define to 1 if your compiler understands __builtin_clzl.])
+fi])# PGAC_C_BUILTIN_CLZL
+
+
+
# PGAC_C_BUILTIN_UNREACHABLE
# --------------------------
# Check if the C compiler understands __builtin_unreachable(),
CFLAGS_SSE42
have_win32_dbghelp
LIBOBJS
+CFLAGS_POPCNT
UUID_LIBS
LDAP_LIBS_BE
LDAP_LIBS_FE
$as_echo "#define HAVE__BUILTIN_CONSTANT_P 1" >>confdefs.h
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
+$as_echo_n "checking for __builtin_popcount... " >&6; }
+if ${pgac_cv_popcount+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS -mpopcnt"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+static int x = __builtin_popcount(255);
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pgac_cv_popcount=yes
+else
+ pgac_cv_popcount=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+CFLAGS="$pgac_save_CFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_popcount" >&5
+$as_echo "$pgac_cv_popcount" >&6; }
+if test x"$pgac_cv_popcount" = x"yes"; then
+ CFLAGS_POPCNT="-mpopcnt"
+
+$as_echo "#define HAVE__BUILTIN_POPCOUNT 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcountl" >&5
+$as_echo_n "checking for __builtin_popcountl... " >&6; }
+if ${pgac_cv_popcountl+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS -mpopcnt"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+static int x = __builtin_popcountl(255);
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pgac_cv_popcountl=yes
+else
+ pgac_cv_popcountl=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+CFLAGS="$pgac_save_CFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_popcountl" >&5
+$as_echo "$pgac_cv_popcountl" >&6; }
+if test x"$pgac_cv_popcountl" = x"yes"; then
+ CFLAGS_POPCNT="-mpopcnt"
+
+$as_echo "#define HAVE__BUILTIN_POPCOUNTL 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctz" >&5
+$as_echo_n "checking for __builtin_ctz... " >&6; }
+if ${pgac_cv__builtin_ctz+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+static int x = __builtin_ctz(256);
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pgac_cv__builtin_ctz=yes
+else
+ pgac_cv__builtin_ctz=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctz" >&5
+$as_echo "$pgac_cv__builtin_ctz" >&6; }
+if test x"$pgac_cv__builtin_ctz" = xyes ; then
+
+$as_echo "#define HAVE__BUILTIN_CTZ 1" >>confdefs.h
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctzl" >&5
+$as_echo_n "checking for __builtin_ctzl... " >&6; }
+if ${pgac_cv__builtin_ctzl+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+static int x = __builtin_ctzl(256);
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pgac_cv__builtin_ctzl=yes
+else
+ pgac_cv__builtin_ctzl=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctzl" >&5
+$as_echo "$pgac_cv__builtin_ctzl" >&6; }
+if test x"$pgac_cv__builtin_ctzl" = xyes ; then
+
+$as_echo "#define HAVE__BUILTIN_CTZL 1" >>confdefs.h
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
+$as_echo_n "checking for __builtin_clz... " >&6; }
+if ${pgac_cv__builtin_clz+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+static int x = __builtin_clz(256);
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pgac_cv__builtin_clz=yes
+else
+ pgac_cv__builtin_clz=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clz" >&5
+$as_echo "$pgac_cv__builtin_clz" >&6; }
+if test x"$pgac_cv__builtin_clz" = xyes ; then
+
+$as_echo "#define HAVE__BUILTIN_CLZ 1" >>confdefs.h
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzl" >&5
+$as_echo_n "checking for __builtin_clzl... " >&6; }
+if ${pgac_cv__builtin_clzl+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+static int x = __builtin_clzl(256);
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pgac_cv__builtin_clzl=yes
+else
+ pgac_cv__builtin_clzl=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clzl" >&5
+$as_echo "$pgac_cv__builtin_clzl" >&6; }
+if test x"$pgac_cv__builtin_clzl" = xyes ; then
+
+$as_echo "#define HAVE__BUILTIN_CLZL 1" >>confdefs.h
+
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable" >&5
$as_echo_n "checking for __builtin_unreachable... " >&6; }
fi
+
+
# MSVC doesn't cope well with defining restrict to __restrict, the
# spelling it understands, because it conflicts with
# __declspec(restrict). Therefore we define pg_restrict to the
PGAC_C_BUILTIN_BSWAP32
PGAC_C_BUILTIN_BSWAP64
PGAC_C_BUILTIN_CONSTANT_P
+PGAC_C_BUILTIN_POPCOUNT
+PGAC_C_BUILTIN_POPCOUNTL
+PGAC_C_BUILTIN_CTZ
+PGAC_C_BUILTIN_CTZL
+PGAC_C_BUILTIN_CLZ
+PGAC_C_BUILTIN_CLZL
PGAC_C_BUILTIN_UNREACHABLE
PGAC_C_COMPUTED_GOTO
PGAC_STRUCT_TIMEZONE
PGAC_TYPE_LOCALE_T
+AC_SUBST(CFLAGS_POPCNT)
+
# MSVC doesn't cope well with defining restrict to __restrict, the
# spelling it understands, because it conflicts with
# __declspec(restrict). Therefore we define pg_restrict to the
CFLAGS = @CFLAGS@
CFLAGS_VECTOR = @CFLAGS_VECTOR@
CFLAGS_SSE42 = @CFLAGS_SSE42@
+CFLAGS_POPCNT = @CFLAGS_POPCNT@
CFLAGS_ARMV8_CRC32C = @CFLAGS_ARMV8_CRC32C@
PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@
CXXFLAGS = @CXXFLAGS@
#include "access/visibilitymap.h"
#include "access/xlog.h"
#include "miscadmin.h"
+#include "port/pg_bitutils.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/inval.h"
-
/*#define TRACE_VISIBILITYMAP */
/*
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
-/* tables for fast counting of set bits for visible and frozen */
-static const uint8 number_of_ones_for_visible[256] = {
- 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
- 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
-};
-static const uint8 number_of_ones_for_frozen[256] = {
- 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
- 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
- 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
- 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
- 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
-};
+/* Masks for bit counting bits in the visibility map. */
+#define VISIBLE_MASK64 0x5555555555555555 /* The lower bit of each bit pair */
+#define FROZEN_MASK64 0xaaaaaaaaaaaaaaaa /* The upper bit of each bit pair */
/* prototypes for internal routines */
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
{
BlockNumber mapBlock;
+ BlockNumber nvisible = 0;
+ BlockNumber nfrozen = 0;
/* all_visible must be specified */
Assert(all_visible);
- *all_visible = 0;
- if (all_frozen)
- *all_frozen = 0;
-
for (mapBlock = 0;; mapBlock++)
{
Buffer mapBuffer;
- unsigned char *map;
+ uint64 *map;
int i;
/*
* immediately stale anyway if anyone is concurrently setting or
* clearing bits, and we only really need an approximate value.
*/
- map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
+ map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer));
- for (i = 0; i < MAPSIZE; i++)
+ StaticAssertStmt(MAPSIZE % sizeof(uint64) == 0,
+ "unsupported MAPSIZE");
+ if (all_frozen == NULL)
+ {
+ for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
+ nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
+ }
+ else
{
- *all_visible += number_of_ones_for_visible[map[i]];
- if (all_frozen)
- *all_frozen += number_of_ones_for_frozen[map[i]];
+ for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
+ {
+ nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
+ nfrozen += pg_popcount64(map[i] & FROZEN_MASK64);
+ }
}
ReleaseBuffer(mapBuffer);
}
+
+ *all_visible = nvisible;
+ if (all_frozen)
+ *all_frozen = nfrozen;
}
/*
#include "access/hash.h"
#include "lib/bloomfilter.h"
+#include "port/pg_bitutils.h"
#define MAX_HASH_FUNCS 10
bloom_prop_bits_set(bloom_filter *filter)
{
int bitset_bytes = filter->m / BITS_PER_BYTE;
- uint64 bits_set = 0;
- int i;
-
- for (i = 0; i < bitset_bytes; i++)
- {
- unsigned char byte = filter->bitset[i];
-
- while (byte)
- {
- bits_set++;
- byte &= (byte - 1);
- }
- }
+ uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
return bits_set / (double) filter->m;
}
#include "access/hash.h"
#include "nodes/pg_list.h"
+#include "port/pg_bitutils.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
#define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
+/* Set the bitwise macro version we must use based on the bitmapword size */
+#if BITS_PER_BITMAPWORD == 32
-/*
- * Lookup tables to avoid need for bit-by-bit groveling
- *
- * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
- * in a nonzero byte value x. The entry for x=0 is never used.
- *
- * leftmost_one_pos[x] gives the bit number (0-7) of the leftmost one bit in a
- * nonzero byte value x. The entry for x=0 is never used.
- *
- * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
- *
- * We could make these tables larger and reduce the number of iterations
- * in the functions that use them, but bytewise shifts and masks are
- * especially fast on many machines, so working a byte at a time seems best.
- */
+#define bmw_popcount(w) pg_popcount32(w)
+#define bmw_rightmost_one(w) pg_rightmost_one32(w)
+#define bmw_leftmost_one(w) pg_leftmost_one32(w)
+
+#elif BITS_PER_BITMAPWORD == 64
+
+#define bmw_popcount(w) pg_popcount64(w)
+#define bmw_rightmost_one(w) pg_rightmost_one64(w)
+#define bmw_leftmost_one(w) pg_leftmost_one64(w)
+
+#else
+#error "invalid BITS_PER_BITMAPWORD"
+#endif
-static const uint8 rightmost_one_pos[256] = {
- 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-};
-
-static const uint8 leftmost_one_pos[256] = {
- 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-};
-
-static const uint8 number_of_ones[256] = {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-};
/*
if (result >= 0 || HAS_MULTIPLE_ONES(w))
elog(ERROR, "bitmapset has multiple members");
result = wordnum * BITS_PER_BITMAPWORD;
- while ((w & 255) == 0)
- {
- w >>= 8;
- result += 8;
- }
- result += rightmost_one_pos[w & 255];
+ result += bmw_rightmost_one(w);
}
}
if (result < 0)
if (result >= 0 || HAS_MULTIPLE_ONES(w))
return false;
result = wordnum * BITS_PER_BITMAPWORD;
- while ((w & 255) == 0)
- {
- w >>= 8;
- result += 8;
- }
- result += rightmost_one_pos[w & 255];
+ result += bmw_rightmost_one(w);
}
}
if (result < 0)
{
bitmapword w = a->words[wordnum];
- /* we assume here that bitmapword is an unsigned type */
- while (w != 0)
- {
- result += number_of_ones[w & 255];
- w >>= 8;
- }
+ /* No need to count the bits in a zero word */
+ if (w != 0)
+ result += bmw_popcount(w);
}
return result;
}
a->words[wordnum] &= ~w;
result = wordnum * BITS_PER_BITMAPWORD;
- while ((w & 255) == 0)
- {
- w >>= 8;
- result += 8;
- }
- result += rightmost_one_pos[w & 255];
+ result += bmw_rightmost_one(w);
return result;
}
}
int result;
result = wordnum * BITS_PER_BITMAPWORD;
- while ((w & 255) == 0)
- {
- w >>= 8;
- result += 8;
- }
- result += rightmost_one_pos[w & 255];
+ result += bmw_rightmost_one(w);
return result;
}
if (w != 0)
{
int result;
- int shift = BITS_PER_BITMAPWORD - 8;
result = wordnum * BITS_PER_BITMAPWORD;
-
- while ((w >> shift) == 0)
- shift -= 8;
-
- result += shift + leftmost_one_pos[(w >> shift) & 255];
+ result += bmw_leftmost_one(w);
return result;
}
/* Define to 1 if your compiler understands __builtin_$op_overflow. */
#undef HAVE__BUILTIN_OP_OVERFLOW
+/* Define to 1 if your compiler understands __builtin_popcount. */
+#undef HAVE__BUILTIN_POPCOUNT
+
+/* Define to 1 if your compiler understands __builtin_popcountl. */
+#undef HAVE__BUILTIN_POPCOUNTL
+
+/* Define to 1 if your compiler understands __builtin_ctz. */
+#undef HAVE__BUILTIN_CTZ
+
+/* Define to 1 if your compiler understands __builtin_ctzl. */
+#undef HAVE__BUILTIN_CTZL
+
+/* Define to 1 if your compiler understands __builtin_clz. */
+#undef HAVE__BUILTIN_CLZ
+
+/* Define to 1 if your compiler understands __builtin_clzl. */
+#undef HAVE__BUILTIN_CLZL
+
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */
#undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
/* Define to 1 if your compiler understands __builtin_$op_overflow. */
/* #undef HAVE__BUILTIN_OP_OVERFLOW */
+/* Define to 1 if your compiler understands __builtin_popcount. */
+/* #undef HAVE__BUILTIN_POPCOUNT */
+
+/* Define to 1 if your compiler understands __builtin_popcountl. */
+/* #undef HAVE__BUILTIN_POPCOUNTL */
+
+/* Define to 1 if your compiler understands __builtin_ctz. */
+/* #undef HAVE__BUILTIN_CTZ */
+
+/* Define to 1 if your compiler understands __builtin_ctzl. */
+/* #undef HAVE__BUILTIN_CTZL */
+
+/* Define to 1 if your compiler understands __builtin_clz. */
+/* #undef HAVE__BUILTIN_CLZ */
+
+/* Define to 1 if your compiler understands __builtin_clzl. */
+/* #undef HAVE__BUILTIN_CLZL */
+
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */
/* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */
--- /dev/null
+/*------------------------------------------------------------------------ -
+ *
+ * pg_bitutils.h
+ * miscellaneous functions for bit-wise operations.
+ *
+ *
+ * Portions Copyright(c) 2019, PostgreSQL Global Development Group
+ *
+ * src/include/port/pg_bitutils.h
+ *
+ *------------------------------------------------------------------------ -
+ */
+
+#ifndef PG_BITUTILS_H
+#define PG_BITUTILS_H
+
+extern int (*pg_popcount32) (uint32 word);
+extern int (*pg_popcount64) (uint64 word);
+extern int (*pg_rightmost_one32) (uint32 word);
+extern int (*pg_rightmost_one64) (uint64 word);
+extern int (*pg_leftmost_one32) (uint32 word);
+extern int (*pg_leftmost_one64) (uint64 word);
+
+extern uint64 pg_popcount(const char *buf, int bytes);
+
+#endif /* PG_BITUTILS_H */
LIBS += $(PTHREAD_LIBS)
OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \
- noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \
+ noblock.o path.o pg_bitutils.o pgcheckdir.o pgmkdirp.o pgsleep.o \
pg_strong_random.o pgstrcasecmp.o pgstrsignal.o pqsignal.o \
qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \
tar.o thread.o
pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
+# pg_bitutils.c needs CFLAGS_POPCNT
+pg_bitutils.o: CFLAGS+=$(CFLAGS_POPCNT)
+
#
# Shared library versions of object files
#
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * pg_bitutils.c
+ * miscellaneous functions for bit-wise operations.
+ *
+ * Portions Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/port/pg_bitutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#ifdef HAVE__GET_CPUID
+#include <cpuid.h>
+#endif
+
+#ifdef HAVE__CPUID
+#include <intrin.h>
+#endif
+
+#include "port/pg_bitutils.h"
+
+#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_POPCOUNT) || defined(HAVE__BUILTIN_POPCOUNTL))
+static bool pg_popcount_available(void);
+#endif
+
+#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID)
+static int pg_popcount32_choose(uint32 word);
+static int pg_popcount32_sse42(uint32 word);
+#endif
+static int pg_popcount32_slow(uint32 word);
+
+#if defined(HAVE__BUILTIN_POPCOUNTL) && defined(HAVE__GET_CPUID)
+static int pg_popcount64_choose(uint64 word);
+static int pg_popcount64_sse42(uint64 word);
+#endif
+static int pg_popcount64_slow(uint64 word);
+
+#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CTZL) || defined(HAVE__BUILTIN_CLZ) || defined(HAVE__BUILTIN_CLZL))
+static bool pg_lzcnt_available(void);
+#endif
+
+#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID)
+static int pg_rightmost_one32_choose(uint32 word);
+static int pg_rightmost_one32_abm(uint32 word);
+#endif
+static int pg_rightmost_one32_slow(uint32 word);
+
+#if defined(HAVE__BUILTIN_CTZL) && defined(HAVE__GET_CPUID)
+static int pg_rightmost_one64_choose(uint64 word);
+static int pg_rightmost_one64_abm(uint64 word);
+#endif
+static int pg_rightmost_one64_slow(uint64 word);
+
+#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID)
+static int pg_leftmost_one32_choose(uint32 word);
+static int pg_leftmost_one32_abm(uint32 word);
+#endif
+static int pg_leftmost_one32_slow(uint32 word);
+
+#if defined(HAVE__BUILTIN_CLZL) && defined(HAVE__GET_CPUID)
+static int pg_leftmost_one64_choose(uint64 word);
+static int pg_leftmost_one64_abm(uint64 word);
+#endif
+static int pg_leftmost_one64_slow(uint64 word);
+
+#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID)
+int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
+#else
+int (*pg_popcount32) (uint32 word) = pg_popcount32_slow;
+#endif
+
+#if defined(HAVE__BUILTIN_POPCOUNTL) && defined(HAVE__GET_CPUID)
+int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
+#else
+int (*pg_popcount64) (uint64 word) = pg_popcount64_slow;
+#endif
+
+#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID)
+int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_choose;
+#else
+int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_slow;
+#endif
+
+#if defined(HAVE__BUILTIN_CTZL) && defined(HAVE__GET_CPUID)
+int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_choose;
+#else
+int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_slow;
+#endif
+
+#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID)
+int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_choose;
+#else
+int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_slow;
+#endif
+
+#if defined(HAVE__BUILTIN_CLZL) && defined(HAVE__GET_CPUID)
+int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_choose;
+#else
+int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_slow;
+#endif
+
+
+/* Array marking the number of 1-bits for each value of 0-255. */
+static const uint8 number_of_ones[256] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+};
+
+/*
+ * Array marking the position of the right-most set bit for each value of
+ * 1-255. We count the right-most position as the 0th bit, and the
+ * left-most the 7th bit. The 0th index of the array must not be used.
+ */
+static const uint8 rightmost_one_pos[256] = {
+ 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+/*
+ * Array marking the position of the left-most set bit for each value of
+ * 1-255. We count the right-most position as the 0th bit, and the
+ * left-most the 7th bit. The 0th index of the array must not be used.
+ */
+static const uint8 leftmost_one_pos[256] = {
+ 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+};
+
+#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_POPCOUNT) || defined(HAVE__BUILTIN_POPCOUNTL))
+
+static bool
+pg_popcount_available(void)
+{
+ unsigned int exx[4] = { 0, 0, 0, 0 };
+
+#if defined(HAVE__GET_CPUID)
+ __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+ __cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+
+ return (exx[2] & (1 << 23)) != 0; /* POPCNT */
+}
+#endif
+
+#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT)
+
+/*
+ * This gets called on the first call. It replaces the function pointer
+ * so that subsequent calls are routed directly to the chosen implementation.
+ */
+static int
+pg_popcount32_choose(uint32 word)
+{
+ if (pg_popcount_available())
+ pg_popcount32 = pg_popcount32_sse42;
+ else
+ pg_popcount32 = pg_popcount32_slow;
+
+ return pg_popcount32(word);
+}
+
+static int
+pg_popcount32_sse42(uint32 word)
+{
+ return __builtin_popcount(word);
+}
+#endif
+
+/*
+ * pg_popcount32_slow
+ * Return the number of 1 bits set in word
+ */
+static int
+pg_popcount32_slow(uint32 word)
+{
+ int result = 0;
+
+ while (word != 0)
+ {
+ result += number_of_ones[word & 255];
+ word >>= 8;
+ }
+
+ return result;
+}
+
+/*
+ * pg_popcount
+ * Returns the number of 1-bits in buf
+ */
+uint64
+pg_popcount(const char *buf, int bytes)
+{
+ uint64 popcnt = 0;
+
+#if SIZEOF_VOID_P >= 8
+ /* Process in 64-bit chunks if the buffer is aligned. */
+ if (buf == (char *) TYPEALIGN(8, buf))
+ {
+ uint64 *words = (uint64 *) buf;
+
+ while (bytes >= 8)
+ {
+ popcnt += pg_popcount64(*words++);
+ bytes -= 8;
+ }
+
+ buf = (char *) words;
+ }
+#else
+ /* Process in 32-bit chunks if the buffer is aligned. */
+ if (buf == (char *) TYPEALIGN(4, buf))
+ {
+ uint32 *words = (uint32 *) buf;
+
+ while (bytes >= 4)
+ {
+ popcnt += pg_popcount32(*words++);
+ bytes -= 4;
+ }
+
+ buf = (char *) words;
+ }
+#endif
+
+ /* Process any remaining bytes */
+ while (bytes--)
+ popcnt += number_of_ones[(unsigned char) *buf++];
+
+ return popcnt;
+}
+
+#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNTL)
+
+/*
+ * This gets called on the first call. It replaces the function pointer
+ * so that subsequent calls are routed directly to the chosen implementation.
+ */
+static int
+pg_popcount64_choose(uint64 word)
+{
+ if (pg_popcount_available())
+ pg_popcount64 = pg_popcount64_sse42;
+ else
+ pg_popcount64 = pg_popcount64_slow;
+
+ return pg_popcount64(word);
+}
+
+static int
+pg_popcount64_sse42(uint64 word)
+{
+ return __builtin_popcountl(word);
+}
+
+#endif
+
+/*
+ * pg_popcount64_slow
+ * Return the number of 1 bits set in word
+ */
+static int
+pg_popcount64_slow(uint64 word)
+{
+ int result = 0;
+
+ while (word != 0)
+ {
+ result += number_of_ones[word & 255];
+ word >>= 8;
+ }
+
+ return result;
+}
+
+#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CTZL) || defined(HAVE__BUILTIN_CLZ) || defined(HAVE__BUILTIN_CLZL))
+
+static bool
+pg_lzcnt_available(void)
+{
+
+ unsigned int exx[4] = { 0, 0, 0, 0 };
+
+#if defined(HAVE__GET_CPUID)
+ __get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+ __cpuid(exx, 0x80000001);
+#else
+#error cpuid instruction not available
+#endif
+
+ return (exx[2] & (1 << 5)) != 0; /* LZCNT */
+}
+#endif
+
+#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ)
+/*
+ * This gets called on the first call. It replaces the function pointer
+ * so that subsequent calls are routed directly to the chosen implementation.
+ */
+static int
+pg_rightmost_one32_choose(uint32 word)
+{
+ if (pg_lzcnt_available())
+ pg_rightmost_one32 = pg_rightmost_one32_abm;
+ else
+ pg_rightmost_one32 = pg_rightmost_one32_slow;
+
+ return pg_rightmost_one32(word);
+}
+
+static int
+pg_rightmost_one32_abm(uint32 word)
+{
+ return __builtin_ctz(word);
+}
+
+#endif
+
+/*
+ * pg_rightmost_one32_slow
+ * Returns the number of trailing 0-bits in word, starting at the least
+ * significant bit position. word must not be 0.
+ */
+static int
+pg_rightmost_one32_slow(uint32 word)
+{
+ int result = 0;
+
+ Assert(word != 0);
+
+ while ((word & 255) == 0)
+ {
+ word >>= 8;
+ result += 8;
+ }
+ result += rightmost_one_pos[word & 255];
+
+ return result;
+}
+
+#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZL)
+/*
+ * This gets called on the first call. It replaces the function pointer
+ * so that subsequent calls are routed directly to the chosen implementation.
+ */
+static int
+pg_rightmost_one64_choose(uint64 word)
+{
+ if (pg_lzcnt_available())
+ pg_rightmost_one64 = pg_rightmost_one64_abm;
+ else
+ pg_rightmost_one64 = pg_rightmost_one64_slow;
+
+ return pg_rightmost_one64(word);
+}
+
+static int
+pg_rightmost_one64_abm(uint64 word)
+{
+ return __builtin_ctzl(word);
+}
+#endif
+
+/*
+ * pg_rightmost_one64_slow
+ * Returns the number of trailing 0-bits in word, starting at the least
+ * significant bit position. word must not be 0.
+ */
+static int
+pg_rightmost_one64_slow(uint64 word)
+{
+ int result = 0;
+
+ Assert(word != 0);
+
+ while ((word & 255) == 0)
+ {
+ word >>= 8;
+ result += 8;
+ }
+ result += rightmost_one_pos[word & 255];
+
+ return result;
+}
+
+#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ)
+/*
+ * This gets called on the first call. It replaces the function pointer
+ * so that subsequent calls are routed directly to the chosen implementation.
+ */
+static int
+pg_leftmost_one32_choose(uint32 word)
+{
+ if (pg_lzcnt_available())
+ pg_leftmost_one32 = pg_leftmost_one32_abm;
+ else
+ pg_leftmost_one32 = pg_leftmost_one32_slow;
+
+ return pg_leftmost_one32(word);
+}
+
+static int
+pg_leftmost_one32_abm(uint32 word)
+{
+ return 31 - __builtin_clz(word);
+}
+#endif
+
+/*
+ * pg_leftmost_one32_slow
+ * Returns the 0-based position of the most significant set bit in word
+ * measured from the least significant bit. word must not be 0.
+ */
+static int
+pg_leftmost_one32_slow(uint32 word)
+{
+ int shift = 32 - 8;
+
+ Assert(word != 0);
+
+ while ((word >> shift) == 0)
+ shift -= 8;
+
+ return shift + leftmost_one_pos[(word >> shift) & 255];
+}
+
+#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZL)
+/*
+ * This gets called on the first call. It replaces the function pointer
+ * so that subsequent calls are routed directly to the chosen implementation.
+ */
+static int
+pg_leftmost_one64_choose(uint64 word)
+{
+ if (pg_lzcnt_available())
+ pg_leftmost_one64 = pg_leftmost_one64_abm;
+ else
+ pg_leftmost_one64 = pg_leftmost_one64_slow;
+
+ return pg_leftmost_one64(word);
+}
+
+static int
+pg_leftmost_one64_abm(uint64 word)
+{
+ return 63 - __builtin_clzl(word);
+}
+#endif
+
+/*
+ * pg_leftmost_one64_slow
+ * Returns the 0-based position of the most significant set bit in word
+ * measured from the least significant bit. word must not be 0.
+ */
+static int
+pg_leftmost_one64_slow(uint64 word)
+{
+ int shift = 64 - 8;
+
+ Assert(word != 0);
+
+ while ((word >> shift) == 0)
+ shift -= 8;
+
+ return shift + leftmost_one_pos[(word >> shift) & 255];
+}
push(@pgportfiles, 'pg_crc32c_sse42_choose.c');
push(@pgportfiles, 'pg_crc32c_sse42.c');
push(@pgportfiles, 'pg_crc32c_sb8.c');
+ push(@pgportfiles, 'pg_bitutils.c');
}
else
{