Commit 02a6a54e authored by Tom Lane's avatar Tom Lane

Make use of compiler builtins and/or assembly for CLZ, CTZ, POPCNT.

Test for the compiler builtins __builtin_clz, __builtin_ctz, and
__builtin_popcount, and make use of these in preference to
handwritten C code if they're available.  Create src/port
infrastructure for "leftmost one", "rightmost one", and "popcount"
so as to centralize these decisions.

On x86_64, __builtin_popcount generally won't make use of the POPCNT
opcode because that's not universally supported yet.  Provide code
that checks CPUID and then calls POPCNT via asm() if available.
This requires indirecting through a function pointer, which is
an annoying amount of overhead for a one-instruction operation,
but it's probably not worth working harder than this for our
current use-cases.

I'm not sure we've found all the existing places that could profit
from this new infrastructure; but we at least touched all the
ones that used copied-and-pasted versions of the bitmapset.c code,
and got rid of multiple copies of the associated constant arrays.

While at it, replace c-compiler.m4's one-per-builtin-function
macros with a single one that can handle all the cases we need
to worry about so far.  Also, because I'm paranoid, make those
checks into AC_LINK checks rather than just AC_COMPILE; the
former coding failed to verify that libgcc has support for the
builtin, in cases where it's not inline code.

David Rowley, Thomas Munro, Alvaro Herrera, Tom Lane

Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
parent 72880ac1
......@@ -273,60 +273,6 @@ AC_DEFINE(HAVE__BUILTIN_TYPES_COMPATIBLE_P, 1,
fi])# PGAC_C_TYPES_COMPATIBLE
# PGAC_C_BUILTIN_BSWAP16
# -------------------------
# Check if the C compiler understands __builtin_bswap16(),
# and define HAVE__BUILTIN_BSWAP16 if so.
AC_DEFUN([PGAC_C_BUILTIN_BSWAP16],
[AC_CACHE_CHECK(for __builtin_bswap16, pgac_cv__builtin_bswap16,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static unsigned long int x = __builtin_bswap16(0xaabb);]
)],
[pgac_cv__builtin_bswap16=yes],
[pgac_cv__builtin_bswap16=no])])
if test x"$pgac_cv__builtin_bswap16" = xyes ; then
AC_DEFINE(HAVE__BUILTIN_BSWAP16, 1,
[Define to 1 if your compiler understands __builtin_bswap16.])
fi])# PGAC_C_BUILTIN_BSWAP16
# PGAC_C_BUILTIN_BSWAP32
# -------------------------
# Check if the C compiler understands __builtin_bswap32(),
# and define HAVE__BUILTIN_BSWAP32 if so.
AC_DEFUN([PGAC_C_BUILTIN_BSWAP32],
[AC_CACHE_CHECK(for __builtin_bswap32, pgac_cv__builtin_bswap32,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static unsigned long int x = __builtin_bswap32(0xaabbccdd);]
)],
[pgac_cv__builtin_bswap32=yes],
[pgac_cv__builtin_bswap32=no])])
if test x"$pgac_cv__builtin_bswap32" = xyes ; then
AC_DEFINE(HAVE__BUILTIN_BSWAP32, 1,
[Define to 1 if your compiler understands __builtin_bswap32.])
fi])# PGAC_C_BUILTIN_BSWAP32
# PGAC_C_BUILTIN_BSWAP64
# -------------------------
# Check if the C compiler understands __builtin_bswap64(),
# and define HAVE__BUILTIN_BSWAP64 if so.
AC_DEFUN([PGAC_C_BUILTIN_BSWAP64],
[AC_CACHE_CHECK(for __builtin_bswap64, pgac_cv__builtin_bswap64,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static unsigned long int x = __builtin_bswap64(0xaabbccddeeff0011);]
)],
[pgac_cv__builtin_bswap64=yes],
[pgac_cv__builtin_bswap64=no])])
if test x"$pgac_cv__builtin_bswap64" = xyes ; then
AC_DEFINE(HAVE__BUILTIN_BSWAP64, 1,
[Define to 1 if your compiler understands __builtin_bswap64.])
fi])# PGAC_C_BUILTIN_BSWAP64
# PGAC_C_BUILTIN_CONSTANT_P
# -------------------------
# Check if the C compiler understands __builtin_constant_p(),
......@@ -423,6 +369,33 @@ fi])# PGAC_C_COMPUTED_GOTO
# PGAC_CHECK_BUILTIN_FUNC
# -----------------------
# This is similar to AC_CHECK_FUNCS(), except that it will work for compiler
# builtin functions, as that usually fails to.
# The first argument is the function name, eg [__builtin_clzl], and the
# second is its argument list, eg [unsigned long x]. The current coding
# works only for a single argument named x; we might generalize that later.
# It's assumed that the function's result type is coercible to int.
# On success, we define "HAVEfuncname" (there's usually more than enough
# underscores already, so we don't add another one).
AC_DEFUN([PGAC_CHECK_BUILTIN_FUNC],
[AC_CACHE_CHECK(for $1, pgac_cv$1,
[AC_LINK_IFELSE([AC_LANG_PROGRAM([
int
call$1($2)
{
return $1(x);
}], [])],
[pgac_cv$1=yes],
[pgac_cv$1=no])])
if test x"${pgac_cv$1}" = xyes ; then
AC_DEFINE_UNQUOTED(AS_TR_CPP([HAVE$1]), 1,
[Define to 1 if your compiler understands $1.])
fi])# PGAC_CHECK_BUILTIN_FUNC
# PGAC_PROG_VARCC_VARFLAGS_OPT
# -----------------------
# Given a compiler, variable name and a string, check if the compiler
......
......@@ -13960,78 +13960,6 @@ if test x"$pgac_cv__types_compatible" = xyes ; then
$as_echo "#define HAVE__BUILTIN_TYPES_COMPATIBLE_P 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap16" >&5
$as_echo_n "checking for __builtin_bswap16... " >&6; }
if ${pgac_cv__builtin_bswap16+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static unsigned long int x = __builtin_bswap16(0xaabb);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_bswap16=yes
else
pgac_cv__builtin_bswap16=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap16" >&5
$as_echo "$pgac_cv__builtin_bswap16" >&6; }
if test x"$pgac_cv__builtin_bswap16" = xyes ; then
$as_echo "#define HAVE__BUILTIN_BSWAP16 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap32" >&5
$as_echo_n "checking for __builtin_bswap32... " >&6; }
if ${pgac_cv__builtin_bswap32+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static unsigned long int x = __builtin_bswap32(0xaabbccdd);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_bswap32=yes
else
pgac_cv__builtin_bswap32=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap32" >&5
$as_echo "$pgac_cv__builtin_bswap32" >&6; }
if test x"$pgac_cv__builtin_bswap32" = xyes ; then
$as_echo "#define HAVE__BUILTIN_BSWAP32 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap64" >&5
$as_echo_n "checking for __builtin_bswap64... " >&6; }
if ${pgac_cv__builtin_bswap64+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static unsigned long int x = __builtin_bswap64(0xaabbccddeeff0011);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_bswap64=yes
else
pgac_cv__builtin_bswap64=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap64" >&5
$as_echo "$pgac_cv__builtin_bswap64" >&6; }
if test x"$pgac_cv__builtin_bswap64" = xyes ; then
$as_echo "#define HAVE__BUILTIN_BSWAP64 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_constant_p" >&5
$as_echo_n "checking for __builtin_constant_p... " >&6; }
......@@ -14687,12 +14615,49 @@ fi
fi
# On PPC, check if assembler supports LWARX instruction's mutex hint bit
case $host_cpu in
x86_64)
# On x86_64, check if we can compile a popcntq instruction
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler supports x86_64 popcntq" >&5
$as_echo_n "checking whether assembler supports x86_64 popcntq... " >&6; }
if ${pgac_cv_have_x86_64_popcntq+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main ()
{
long long x = 1; long long r;
__asm__ __volatile__ (" popcntq %1,%0\n" : "=q"(r) : "rm"(x));
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv_have_x86_64_popcntq=yes
else
pgac_cv_have_x86_64_popcntq=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_x86_64_popcntq" >&5
$as_echo "$pgac_cv_have_x86_64_popcntq" >&6; }
if test x"$pgac_cv_have_x86_64_popcntq" = xyes ; then
$as_echo "#define HAVE_X86_64_POPCNTQ 1" >>confdefs.h
fi
;;
ppc*|powerpc*)
# On PPC, check if assembler supports LWARX instruction's mutex hint bit
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler supports lwarx hint bit" >&5
$as_echo_n "checking whether assembler supports lwarx hint bit... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
if ${pgac_cv_have_ppc_mutex_hint+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
......@@ -14710,7 +14675,8 @@ else
pgac_cv_have_ppc_mutex_hint=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_ppc_mutex_hint" >&5
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_ppc_mutex_hint" >&5
$as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; }
if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
......@@ -15223,6 +15189,237 @@ fi
done
# These typically are compiler builtins, for which AC_CHECK_FUNCS fails.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap16" >&5
$as_echo_n "checking for __builtin_bswap16... " >&6; }
if ${pgac_cv__builtin_bswap16+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
call__builtin_bswap16(int x)
{
return __builtin_bswap16(x);
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv__builtin_bswap16=yes
else
pgac_cv__builtin_bswap16=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap16" >&5
$as_echo "$pgac_cv__builtin_bswap16" >&6; }
if test x"${pgac_cv__builtin_bswap16}" = xyes ; then
cat >>confdefs.h <<_ACEOF
#define HAVE__BUILTIN_BSWAP16 1
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap32" >&5
$as_echo_n "checking for __builtin_bswap32... " >&6; }
if ${pgac_cv__builtin_bswap32+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
call__builtin_bswap32(int x)
{
return __builtin_bswap32(x);
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv__builtin_bswap32=yes
else
pgac_cv__builtin_bswap32=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap32" >&5
$as_echo "$pgac_cv__builtin_bswap32" >&6; }
if test x"${pgac_cv__builtin_bswap32}" = xyes ; then
cat >>confdefs.h <<_ACEOF
#define HAVE__BUILTIN_BSWAP32 1
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap64" >&5
$as_echo_n "checking for __builtin_bswap64... " >&6; }
if ${pgac_cv__builtin_bswap64+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
call__builtin_bswap64(long int x)
{
return __builtin_bswap64(x);
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv__builtin_bswap64=yes
else
pgac_cv__builtin_bswap64=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap64" >&5
$as_echo "$pgac_cv__builtin_bswap64" >&6; }
if test x"${pgac_cv__builtin_bswap64}" = xyes ; then
cat >>confdefs.h <<_ACEOF
#define HAVE__BUILTIN_BSWAP64 1
_ACEOF
fi
# We assume that we needn't test all widths of these explicitly:
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
$as_echo_n "checking for __builtin_clz... " >&6; }
if ${pgac_cv__builtin_clz+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
call__builtin_clz(unsigned int x)
{
return __builtin_clz(x);
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv__builtin_clz=yes
else
pgac_cv__builtin_clz=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clz" >&5
$as_echo "$pgac_cv__builtin_clz" >&6; }
if test x"${pgac_cv__builtin_clz}" = xyes ; then
cat >>confdefs.h <<_ACEOF
#define HAVE__BUILTIN_CLZ 1
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctz" >&5
$as_echo_n "checking for __builtin_ctz... " >&6; }
if ${pgac_cv__builtin_ctz+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
call__builtin_ctz(unsigned int x)
{
return __builtin_ctz(x);
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv__builtin_ctz=yes
else
pgac_cv__builtin_ctz=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctz" >&5
$as_echo "$pgac_cv__builtin_ctz" >&6; }
if test x"${pgac_cv__builtin_ctz}" = xyes ; then
cat >>confdefs.h <<_ACEOF
#define HAVE__BUILTIN_CTZ 1
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
$as_echo_n "checking for __builtin_popcount... " >&6; }
if ${pgac_cv__builtin_popcount+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
call__builtin_popcount(unsigned int x)
{
return __builtin_popcount(x);
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv__builtin_popcount=yes
else
pgac_cv__builtin_popcount=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcount" >&5
$as_echo "$pgac_cv__builtin_popcount" >&6; }
if test x"${pgac_cv__builtin_popcount}" = xyes ; then
cat >>confdefs.h <<_ACEOF
#define HAVE__BUILTIN_POPCOUNT 1
_ACEOF
fi
ac_fn_c_check_func "$LINENO" "fseeko" "ac_cv_func_fseeko"
if test "x$ac_cv_func_fseeko" = xyes; then :
$as_echo "#define HAVE_FSEEKO 1" >>confdefs.h
......
......@@ -1485,9 +1485,6 @@ PGAC_C_FUNCNAME_SUPPORT
PGAC_C_STATIC_ASSERT
PGAC_C_TYPEOF
PGAC_C_TYPES_COMPATIBLE
PGAC_C_BUILTIN_BSWAP16
PGAC_C_BUILTIN_BSWAP32
PGAC_C_BUILTIN_BSWAP64
PGAC_C_BUILTIN_CONSTANT_P
PGAC_C_BUILTIN_UNREACHABLE
PGAC_C_COMPUTED_GOTO
......@@ -1542,16 +1539,29 @@ Use --without-zlib to disable zlib support.])],
[#include <zlib.h>])
fi
# On PPC, check if assembler supports LWARX instruction's mutex hint bit
case $host_cpu in
x86_64)
# On x86_64, check if we can compile a popcntq instruction
AC_CACHE_CHECK([whether assembler supports x86_64 popcntq],
[pgac_cv_have_x86_64_popcntq],
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
[long long x = 1; long long r;
__asm__ __volatile__ (" popcntq %1,%0\n" : "=q"(r) : "rm"(x));])],
[pgac_cv_have_x86_64_popcntq=yes],
[pgac_cv_have_x86_64_popcntq=no])])
if test x"$pgac_cv_have_x86_64_popcntq" = xyes ; then
AC_DEFINE(HAVE_X86_64_POPCNTQ, 1, [Define to 1 if the assembler supports X86_64's POPCNTQ instruction.])
fi
;;
ppc*|powerpc*)
AC_MSG_CHECKING([whether assembler supports lwarx hint bit])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
# On PPC, check if assembler supports LWARX instruction's mutex hint bit
AC_CACHE_CHECK([whether assembler supports lwarx hint bit],
[pgac_cv_have_ppc_mutex_hint],
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
[int a = 0; int *p = &a; int r;
__asm__ __volatile__ (" lwarx %0,0,%1,1\n" : "=&r"(r) : "r"(p));])],
[pgac_cv_have_ppc_mutex_hint=yes],
[pgac_cv_have_ppc_mutex_hint=no])
AC_MSG_RESULT([$pgac_cv_have_ppc_mutex_hint])
[pgac_cv_have_ppc_mutex_hint=no])])
if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler supports PPC's LWARX mutex hint bit.])
fi
......@@ -1631,6 +1641,15 @@ AC_CHECK_FUNCS(m4_normalize([
wcstombs_l
]))
# These typically are compiler builtins, for which AC_CHECK_FUNCS fails.
PGAC_CHECK_BUILTIN_FUNC([__builtin_bswap16], [int x])
PGAC_CHECK_BUILTIN_FUNC([__builtin_bswap32], [int x])
PGAC_CHECK_BUILTIN_FUNC([__builtin_bswap64], [long int x])
# We assume that we needn't test all widths of these explicitly:
PGAC_CHECK_BUILTIN_FUNC([__builtin_clz], [unsigned int x])
PGAC_CHECK_BUILTIN_FUNC([__builtin_ctz], [unsigned int x])
PGAC_CHECK_BUILTIN_FUNC([__builtin_popcount], [unsigned int x])
AC_REPLACE_FUNCS(fseeko)
case $host_os in
# NetBSD uses a custom fseeko/ftello built on fsetpos/fgetpos
......
......@@ -5,6 +5,7 @@
#include "access/gist.h"
#include "access/stratnum.h"
#include "port/pg_bitutils.h"
#include "_int.h"
......@@ -19,27 +20,6 @@ PG_FUNCTION_INFO_V1(g_intbig_penalty);
PG_FUNCTION_INFO_V1(g_intbig_picksplit);
PG_FUNCTION_INFO_V1(g_intbig_union);
PG_FUNCTION_INFO_V1(g_intbig_same);
/* Number of one-bits in an unsigned byte */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
PG_FUNCTION_INFO_V1(_intbig_in);
PG_FUNCTION_INFO_V1(_intbig_out);
......@@ -207,12 +187,7 @@ g_intbig_compress(PG_FUNCTION_ARGS)
static int32
sizebitvec(BITVECP sign)
{
int32 size = 0,
i;
LOOPBYTE
size += number_of_ones[(unsigned char) sign[i]];
return size;
return pg_popcount(sign, SIGLEN);
}
static int
......@@ -225,7 +200,8 @@ hemdistsign(BITVECP a, BITVECP b)
LOOPBYTE
{
diff = (unsigned char) (a[i] ^ b[i]);
dist += number_of_ones[diff];
/* Using the popcount functions here isn't likely to win */
dist += pg_number_of_ones[diff];
}
return dist;
}
......
......@@ -9,6 +9,8 @@
#include "access/gist.h"
#include "access/stratnum.h"
#include "port/pg_bitutils.h"
#include "crc32.h"
#include "ltree.h"
......@@ -23,26 +25,6 @@ PG_FUNCTION_INFO_V1(_ltree_consistent);
#define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
#define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
/* Number of one-bits in an unsigned byte */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
......@@ -209,12 +191,7 @@ _ltree_union(PG_FUNCTION_ARGS)
static int32
sizebitvec(BITVECP sign)
{
int32 size = 0,
i;
ALOOPBYTE
size += number_of_ones[(unsigned char) sign[i]];
return size;
return pg_popcount((const char *) sign, ASIGLEN);
}
static int
......@@ -227,7 +204,8 @@ hemdistsign(BITVECP a, BITVECP b)
ALOOPBYTE
{
diff = (unsigned char) (a[i] ^ b[i]);
dist += number_of_ones[diff];
/* Using the popcount functions here isn't likely to win */
dist += pg_number_of_ones[diff];
}
return dist;
}
......
......@@ -7,6 +7,7 @@
#include "access/stratnum.h"
#include "fmgr.h"
#include "port/pg_bitutils.h"
typedef struct
......@@ -39,26 +40,6 @@ PG_FUNCTION_INFO_V1(gtrgm_same);
PG_FUNCTION_INFO_V1(gtrgm_penalty);
PG_FUNCTION_INFO_V1(gtrgm_picksplit);
/* Number of one-bits in an unsigned byte */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
Datum
gtrgm_in(PG_FUNCTION_ARGS)
......@@ -634,12 +615,7 @@ gtrgm_same(PG_FUNCTION_ARGS)
static int32
sizebitvec(BITVECP sign)
{
int32 size = 0,
i;
LOOPBYTE
size += number_of_ones[(unsigned char) sign[i]];
return size;
return pg_popcount(sign, SIGLEN);
}
static int
......@@ -652,7 +628,8 @@ hemdistsign(BITVECP a, BITVECP b)
LOOPBYTE
{
diff = (unsigned char) (a[i] ^ b[i]);
dist += number_of_ones[diff];
/* Using the popcount functions here isn't likely to win */
dist += pg_number_of_ones[diff];
}
return dist;
}
......
......@@ -89,6 +89,7 @@
#include "access/visibilitymap.h"
#include "access/xlog.h"
#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
......@@ -115,43 +116,11 @@
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
/* tables for fast counting of set bits for visible and frozen */
static const uint8 number_of_ones_for_visible[256] = {
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
};
static const uint8 number_of_ones_for_frozen[256] = {
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
};
/* Masks for counting subsets of bits in the visibility map. */
#define VISIBLE_MASK64 UINT64CONST(0x5555555555555555) /* The lower bit of each
* bit pair */
#define FROZEN_MASK64 UINT64CONST(0xaaaaaaaaaaaaaaaa) /* The upper bit of each
* bit pair */
/* prototypes for internal routines */
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
......@@ -408,18 +377,16 @@ void
visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
{
BlockNumber mapBlock;
BlockNumber nvisible = 0;
BlockNumber nfrozen = 0;
/* all_visible must be specified */
Assert(all_visible);
*all_visible = 0;
if (all_frozen)
*all_frozen = 0;
for (mapBlock = 0;; mapBlock++)
{
Buffer mapBuffer;
unsigned char *map;
uint64 *map;
int i;
/*
......@@ -436,17 +403,30 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro
* immediately stale anyway if anyone is concurrently setting or
* clearing bits, and we only really need an approximate value.
*/
map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer));
for (i = 0; i < MAPSIZE; i++)
StaticAssertStmt(MAPSIZE % sizeof(uint64) == 0,
"unsupported MAPSIZE");
if (all_frozen == NULL)
{
*all_visible += number_of_ones_for_visible[map[i]];
if (all_frozen)
*all_frozen += number_of_ones_for_frozen[map[i]];
for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
}
else
{
for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
{
nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
nfrozen += pg_popcount64(map[i] & FROZEN_MASK64);
}
}
ReleaseBuffer(mapBuffer);
}
*all_visible = nvisible;
if (all_frozen)
*all_frozen = nfrozen;
}
/*
......
......@@ -37,6 +37,7 @@
#include "access/hash.h"
#include "lib/bloomfilter.h"
#include "port/pg_bitutils.h"
#define MAX_HASH_FUNCS 10
......@@ -187,19 +188,7 @@ double
bloom_prop_bits_set(bloom_filter *filter)
{
int bitset_bytes = filter->m / BITS_PER_BYTE;
uint64 bits_set = 0;
int i;
for (i = 0; i < bitset_bytes; i++)
{
unsigned char byte = filter->bitset[i];
while (byte)
{
bits_set++;
byte &= (byte - 1);
}
}
uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
return bits_set / (double) filter->m;
}
......
......@@ -22,6 +22,7 @@
#include "access/hash.h"
#include "nodes/pg_list.h"
#include "port/pg_bitutils.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
......@@ -51,79 +52,18 @@
#define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
/*
* Lookup tables to avoid need for bit-by-bit groveling
*
* rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
* in a nonzero byte value x. The entry for x=0 is never used.
*
* leftmost_one_pos[x] gives the bit number (0-7) of the leftmost one bit in a
* nonzero byte value x. The entry for x=0 is never used.
*
* number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
*
* We could make these tables larger and reduce the number of iterations
* in the functions that use them, but bytewise shifts and masks are
* especially fast on many machines, so working a byte at a time seems best.
*/
static const uint8 rightmost_one_pos[256] = {
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
static const uint8 leftmost_one_pos[256] = {
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
/* Select appropriate bit-twiddling functions for bitmap word size */
#if BITS_PER_BITMAPWORD == 32
#define bmw_leftmost_one_pos(w) pg_leftmost_one_pos32(w)
#define bmw_rightmost_one_pos(w) pg_rightmost_one_pos32(w)
#define bmw_popcount(w) pg_popcount32(w)
#elif BITS_PER_BITMAPWORD == 64
#define bmw_leftmost_one_pos(w) pg_leftmost_one_pos64(w)
#define bmw_rightmost_one_pos(w) pg_rightmost_one_pos64(w)
#define bmw_popcount(w) pg_popcount64(w)
#else
#error "invalid BITS_PER_BITMAPWORD"
#endif
/*
......@@ -607,12 +547,7 @@ bms_singleton_member(const Bitmapset *a)
if (result >= 0 || HAS_MULTIPLE_ONES(w))
elog(ERROR, "bitmapset has multiple members");
result = wordnum * BITS_PER_BITMAPWORD;
while ((w & 255) == 0)
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
result += bmw_rightmost_one_pos(w);
}
}
if (result < 0)
......@@ -650,12 +585,7 @@ bms_get_singleton_member(const Bitmapset *a, int *member)
if (result >= 0 || HAS_MULTIPLE_ONES(w))
return false;
result = wordnum * BITS_PER_BITMAPWORD;
while ((w & 255) == 0)
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
result += bmw_rightmost_one_pos(w);
}
}
if (result < 0)
......@@ -681,12 +611,9 @@ bms_num_members(const Bitmapset *a)
{
bitmapword w = a->words[wordnum];
/* we assume here that bitmapword is an unsigned type */
while (w != 0)
{
result += number_of_ones[w & 255];
w >>= 8;
}
/* No need to count the bits in a zero word */
if (w != 0)
result += bmw_popcount(w);
}
return result;
}
......@@ -1041,12 +968,7 @@ bms_first_member(Bitmapset *a)
a->words[wordnum] &= ~w;
result = wordnum * BITS_PER_BITMAPWORD;
while ((w & 255) == 0)
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
result += bmw_rightmost_one_pos(w);
return result;
}
}
......@@ -1096,12 +1018,7 @@ bms_next_member(const Bitmapset *a, int prevbit)
int result;
result = wordnum * BITS_PER_BITMAPWORD;
while ((w & 255) == 0)
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
result += bmw_rightmost_one_pos(w);
return result;
}
......@@ -1168,14 +1085,9 @@ bms_prev_member(const Bitmapset *a, int prevbit)
if (w != 0)
{
int result;
int shift = BITS_PER_BITMAPWORD - 8;
result = wordnum * BITS_PER_BITMAPWORD;
while ((w >> shift) == 0)
shift -= 8;
result += shift + leftmost_one_pos[(w >> shift) & 255];
result += bmw_leftmost_one_pos(w);
return result;
}
......
......@@ -16,6 +16,7 @@
#include "access/gist.h"
#include "access/tuptoaster.h"
#include "port/pg_bitutils.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/pg_crc.h"
......@@ -70,26 +71,6 @@ typedef struct
#define GETARR(x) ( (int32*)( (char*)(x)+GTHDRSIZE ) )
#define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int32) )
/* Number of one-bits in an unsigned byte */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
static int32 sizebitvec(BITVECP sign);
Datum
......@@ -503,12 +484,7 @@ gtsvector_same(PG_FUNCTION_ARGS)
static int32
sizebitvec(BITVECP sign)
{
int32 size = 0,
i;
LOOPBYTE
size += number_of_ones[(unsigned char) sign[i]];
return size;
return pg_popcount(sign, SIGLEN);
}
static int
......@@ -521,7 +497,8 @@ hemdistsign(BITVECP a, BITVECP b)
LOOPBYTE
{
diff = (unsigned char) (a[i] ^ b[i]);
dist += number_of_ones[diff];
/* Using the popcount functions here isn't likely to win */
dist += pg_number_of_ones[diff];
}
return dist;
}
......
......@@ -736,6 +736,9 @@
/* Define to 1 if you have the `X509_get_signature_nid' function. */
#undef HAVE_X509_GET_SIGNATURE_NID
/* Define to 1 if the assembler supports X86_64's POPCNTQ instruction. */
#undef HAVE_X86_64_POPCNTQ
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
......@@ -748,12 +751,21 @@
/* Define to 1 if your compiler understands __builtin_bswap64. */
#undef HAVE__BUILTIN_BSWAP64
/* Define to 1 if your compiler understands __builtin_clz. */
#undef HAVE__BUILTIN_CLZ
/* Define to 1 if your compiler understands __builtin_constant_p. */
#undef HAVE__BUILTIN_CONSTANT_P
/* Define to 1 if your compiler understands __builtin_ctz. */
#undef HAVE__BUILTIN_CTZ
/* Define to 1 if your compiler understands __builtin_$op_overflow. */
#undef HAVE__BUILTIN_OP_OVERFLOW
/* Define to 1 if your compiler understands __builtin_popcount. */
#undef HAVE__BUILTIN_POPCOUNT
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */
#undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
......
......@@ -575,6 +575,9 @@
/* Define to 1 if you have the `X509_get_signature_nid' function. */
#define HAVE_X509_GET_SIGNATURE_NID 1
/* Define to 1 if the assembler supports X86_64's POPCNTQ instruction. */
/* #undef HAVE_X86_64_POPCNTQ */
/* Define to 1 if the system has the type `_Bool'. */
/* #undef HAVE__BOOL */
......@@ -587,12 +590,21 @@
/* Define to 1 if your compiler understands __builtin_bswap64. */
/* #undef HAVE__BUILTIN_BSWAP64 */
/* Define to 1 if your compiler understands __builtin_clz. */
/* #undef HAVE__BUILTIN_CLZ */
/* Define to 1 if your compiler understands __builtin_constant_p. */
/* #undef HAVE__BUILTIN_CONSTANT_P */
/* Define to 1 if your compiler understands __builtin_ctz. */
/* #undef HAVE__BUILTIN_CTZ */
/* Define to 1 if your compiler understands __builtin_$op_overflow. */
/* #undef HAVE__BUILTIN_OP_OVERFLOW */
/* Define to 1 if your compiler understands __builtin_popcount. */
/* #undef HAVE__BUILTIN_POPCOUNT */
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */
/* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */
......
/*-------------------------------------------------------------------------
*
* pg_bitutils.h
* Miscellaneous functions for bit-wise operations.
*
*
* Copyright (c) 2019, PostgreSQL Global Development Group
*
* src/include/port/pg_bitutils.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_BITUTILS_H
#define PG_BITUTILS_H
extern PGDLLIMPORT const uint8 pg_leftmost_one_pos[256];
extern PGDLLIMPORT const uint8 pg_rightmost_one_pos[256];
extern PGDLLIMPORT const uint8 pg_number_of_ones[256];
/*
* pg_leftmost_one_pos32
* Returns the position of the most significant set bit in "word",
* measured from the least significant bit. word must not be 0.
*/
static inline int
pg_leftmost_one_pos32(uint32 word)
{
#ifdef HAVE__BUILTIN_CLZ
Assert(word != 0);
return 31 - __builtin_clz(word);
#else
int shift = 32 - 8;
Assert(word != 0);
while ((word >> shift) == 0)
shift -= 8;
return shift + pg_leftmost_one_pos[(word >> shift) & 255];
#endif /* HAVE__BUILTIN_CLZ */
}
/*
* pg_leftmost_one_pos64
* As above, but for a 64-bit word.
*/
static inline int
pg_leftmost_one_pos64(uint64 word)
{
#ifdef HAVE__BUILTIN_CLZ
Assert(word != 0);
#if defined(HAVE_LONG_INT_64)
return 63 - __builtin_clzl(word);
#elif defined(HAVE_LONG_LONG_INT_64)
return 63 - __builtin_clzll(word);
#else
#error must have a working 64-bit integer datatype
#endif
#else /* !HAVE__BUILTIN_CLZ */
int shift = 64 - 8;
Assert(word != 0);
while ((word >> shift) == 0)
shift -= 8;
return shift + pg_leftmost_one_pos[(word >> shift) & 255];
#endif /* HAVE__BUIILTIN_CLZ */
}
/*
* pg_rightmost_one_pos32
* Returns the position of the least significant set bit in "word",
* measured from the least significant bit. word must not be 0.
*/
static inline int
pg_rightmost_one_pos32(uint32 word)
{
#ifdef HAVE__BUILTIN_CTZ
Assert(word != 0);
return __builtin_ctz(word);
#else
int result = 0;
Assert(word != 0);
while ((word & 255) == 0)
{
word >>= 8;
result += 8;
}
result += pg_rightmost_one_pos[word & 255];
return result;
#endif /* HAVE__BUILTIN_CTZ */
}
/*
* pg_rightmost_one_pos64
* As above, but for a 64-bit word.
*/
static inline int
pg_rightmost_one_pos64(uint64 word)
{
#ifdef HAVE__BUILTIN_CTZ
Assert(word != 0);
#if defined(HAVE_LONG_INT_64)
return __builtin_ctzl(word);
#elif defined(HAVE_LONG_LONG_INT_64)
return __builtin_ctzll(word);
#else
#error must have a working 64-bit integer datatype
#endif
#else /* !HAVE__BUILTIN_CTZ */
int result = 0;
Assert(word != 0);
while ((word & 255) == 0)
{
word >>= 8;
result += 8;
}
result += pg_rightmost_one_pos[word & 255];
return result;
#endif /* HAVE__BUILTIN_CTZ */
}
/* Count the number of one-bits in a uint32 or uint64 */
extern int (*pg_popcount32) (uint32 word);
extern int (*pg_popcount64) (uint64 word);
/* Count the number of one-bits in a byte array */
extern uint64 pg_popcount(const char *buf, int bytes);
#endif /* PG_BITUTILS_H */
......@@ -36,7 +36,7 @@ override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS)
LIBS += $(PTHREAD_LIBS)
OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \
noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \
noblock.o path.o pg_bitutils.o pgcheckdir.o pgmkdirp.o pgsleep.o \
pg_strong_random.o pgstrcasecmp.o pgstrsignal.o pqsignal.o \
qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \
tar.o thread.o
......
/*-------------------------------------------------------------------------
*
* pg_bitutils.c
* Miscellaneous functions for bit-wise operations.
*
* Copyright (c) 2019, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/port/pg_bitutils.c
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#ifdef HAVE__GET_CPUID
#include <cpuid.h>
#endif
#ifdef HAVE__CPUID
#include <intrin.h>
#endif
#include "port/pg_bitutils.h"
/*
* Array giving the position of the left-most set bit for each possible
* byte value. We count the right-most position as the 0th bit, and the
* left-most the 7th bit. The 0th entry of the array should not be used.
*
* Note: this is not used by the functions in pg_bitutils.h when
* HAVE_BUILTIN_CLZ is defined, but we provide it anyway, so that
* extensions possibly compiled with a different compiler can use it.
*/
const uint8 pg_leftmost_one_pos[256] = {
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
/*
* Array giving the position of the right-most set bit for each possible
* byte value. We count the right-most position as the 0th bit, and the
* left-most the 7th bit. The 0th entry of the array should not be used.
*
* Note: this is not used by the functions in pg_bitutils.h when
* HAVE_BUILTIN_CTZ is defined, but we provide it anyway, so that
* extensions possibly compiled with a different compiler can use it.
*/
const uint8 pg_rightmost_one_pos[256] = {
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
/*
* Array giving the number of 1-bits in each possible byte value.
*
* Note: we export this for use by functions in which explicit use
* of the popcount functions seems unlikely to be a win.
*/
const uint8 pg_number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
/*
* On x86_64, we can use the hardware popcount instruction, but only if
* we can verify that the CPU supports it via the cpuid instruction.
*
* Otherwise, we fall back to __builtin_popcount if the compiler has that,
* or a hand-rolled implementation if not.
*/
#ifdef HAVE_X86_64_POPCNTQ
#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
#define USE_POPCNT_ASM 1
#endif
#endif
static int pg_popcount32_slow(uint32 word);
static int pg_popcount64_slow(uint64 word);
#ifdef USE_POPCNT_ASM
static bool pg_popcount_available(void);
static int pg_popcount32_choose(uint32 word);
static int pg_popcount64_choose(uint64 word);
static int pg_popcount32_asm(uint32 word);
static int pg_popcount64_asm(uint64 word);
int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
#else
int (*pg_popcount32) (uint32 word) = pg_popcount32_slow;
int (*pg_popcount64) (uint64 word) = pg_popcount64_slow;
#endif /* USE_POPCNT_ASM */
#ifdef USE_POPCNT_ASM
/*
* Return true if CPUID indicates that the POPCNT instruction is available.
*/
static bool
pg_popcount_available(void)
{
unsigned int exx[4] = {0, 0, 0, 0};
#if defined(HAVE__GET_CPUID)
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUID)
__cpuid(exx, 1);
#else
#error cpuid instruction not available
#endif
return (exx[2] & (1 << 23)) != 0; /* POPCNT */
}
/*
* These functions get called on the first call to pg_popcount32 etc.
* They detect whether we can use the asm implementations, and replace
* the function pointers so that subsequent calls are routed directly to
* the chosen implementation.
*/
static int
pg_popcount32_choose(uint32 word)
{
if (pg_popcount_available())
{
pg_popcount32 = pg_popcount32_asm;
pg_popcount64 = pg_popcount64_asm;
}
else
{
pg_popcount32 = pg_popcount32_slow;
pg_popcount64 = pg_popcount64_slow;
}
return pg_popcount32(word);
}
static int
pg_popcount64_choose(uint64 word)
{
if (pg_popcount_available())
{
pg_popcount32 = pg_popcount32_asm;
pg_popcount64 = pg_popcount64_asm;
}
else
{
pg_popcount32 = pg_popcount32_slow;
pg_popcount64 = pg_popcount64_slow;
}
return pg_popcount64(word);
}
/*
* pg_popcount32_asm
* Return the number of 1 bits set in word
*/
static int
pg_popcount32_asm(uint32 word)
{
uint32 res;
__asm__ __volatile__(" popcntl %1,%0\n" : "=q"(res) : "rm"(word) : "cc");
return (int) res;
}
/*
* pg_popcount64_asm
* Return the number of 1 bits set in word
*/
static int
pg_popcount64_asm(uint64 word)
{
uint64 res;
__asm__ __volatile__(" popcntq %1,%0\n" : "=q"(res) : "rm"(word) : "cc");
return (int) res;
}
#endif /* USE_POPCNT_ASM */
/*
* pg_popcount32_slow
* Return the number of 1 bits set in word
*/
static int
pg_popcount32_slow(uint32 word)
{
#ifdef HAVE__BUILTIN_POPCOUNT
return __builtin_popcount(word);
#else /* !HAVE__BUILTIN_POPCOUNT */
int result = 0;
while (word != 0)
{
result += pg_number_of_ones[word & 255];
word >>= 8;
}
return result;
#endif /* HAVE__BUILTIN_POPCOUNT */
}
/*
* pg_popcount64_slow
* Return the number of 1 bits set in word
*/
static int
pg_popcount64_slow(uint64 word)
{
#ifdef HAVE__BUILTIN_POPCOUNT
#if defined(HAVE_LONG_INT_64)
return __builtin_popcountl(word);
#elif defined(HAVE_LONG_LONG_INT_64)
return __builtin_popcountll(word);
#else
#error must have a working 64-bit integer datatype
#endif
#else /* !HAVE__BUILTIN_POPCOUNT */
int result = 0;
while (word != 0)
{
result += pg_number_of_ones[word & 255];
word >>= 8;
}
return result;
#endif /* HAVE__BUILTIN_POPCOUNT */
}
/*
* pg_popcount
* Returns the number of 1-bits in buf
*/
uint64
pg_popcount(const char *buf, int bytes)
{
uint64 popcnt = 0;
#if SIZEOF_VOID_P >= 8
/* Process in 64-bit chunks if the buffer is aligned. */
if (buf == (const char *) TYPEALIGN(8, buf))
{
const uint64 *words = (const uint64 *) buf;
while (bytes >= 8)
{
popcnt += pg_popcount64(*words++);
bytes -= 8;
}
buf = (const char *) words;
}
#else
/* Process in 32-bit chunks if the buffer is aligned. */
if (buf == (const char *) TYPEALIGN(4, buf))
{
const uint32 *words = (const uint32 *) buf;
while (bytes >= 4)
{
popcnt += pg_popcount32(*words++);
bytes -= 4;
}
buf = (const char *) words;
}
#endif
/* Process any remaining bytes */
while (bytes--)
popcnt += pg_number_of_ones[(unsigned char) *buf++];
return popcnt;
}
......@@ -97,7 +97,7 @@ sub mkvcbuild
srandom.c getaddrinfo.c gettimeofday.c inet_net_ntop.c kill.c open.c
erand48.c snprintf.c strlcat.c strlcpy.c dirmod.c noblock.c path.c
dirent.c dlopen.c getopt.c getopt_long.c
pread.c pwrite.c
pread.c pwrite.c pg_bitutils.c
pg_strong_random.c pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c
pqsignal.c mkdtemp.c qsort.c qsort_arg.c quotes.c system.c
sprompt.c strerror.c tar.c thread.c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment