Commit 457aef0f authored by Alvaro Herrera's avatar Alvaro Herrera

Revert attempts to use POPCNT etc instructions

This reverts commits fc6c7274, 109de05c, d0b4663c and
711bab1e.

Somebody will have to try harder before submitting this patch again.
I've spent entirely too much time on it already, and the #ifdef maze yet
to be written in order for it to build at all got on my nerves.  The
amount of work needed to get a platform-specific performance improvement
that's barely above the noise level is not worth it.
parent e89f14e2
...@@ -378,58 +378,6 @@ fi])# PGAC_C_BUILTIN_OP_OVERFLOW ...@@ -378,58 +378,6 @@ fi])# PGAC_C_BUILTIN_OP_OVERFLOW
# PGAC_C_BUILTIN_POPCOUNT
# -------------------------
AC_DEFUN([PGAC_C_BUILTIN_POPCOUNT],
[AC_CACHE_CHECK([for __builtin_popcount], pgac_cv__builtin_popcount,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static int x = __builtin_popcount(255);]
)],
[pgac_cv__builtin_popcount=yes],
[pgac_cv__builtin_popcount=no])])
if test x"$pgac_cv__builtin_popcount" = x"yes"; then
AC_DEFINE(HAVE__BUILTIN_POPCOUNT, 1,
[Define to 1 if your compiler understands __builtin_popcount.])
fi])# PGAC_C_BUILTIN_POPCOUNT
# PGAC_C_BUILTIN_CTZ
# -------------------------
# Check if the C compiler understands __builtin_ctz(),
# and define HAVE__BUILTIN_CTZ if so.
AC_DEFUN([PGAC_C_BUILTIN_CTZ],
[AC_CACHE_CHECK(for __builtin_ctz, pgac_cv__builtin_ctz,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static int x = __builtin_ctz(256);]
)],
[pgac_cv__builtin_ctz=yes],
[pgac_cv__builtin_ctz=no])])
if test x"$pgac_cv__builtin_ctz" = xyes ; then
AC_DEFINE(HAVE__BUILTIN_CTZ, 1,
[Define to 1 if your compiler understands __builtin_ctz.])
fi])# PGAC_C_BUILTIN_CTZ
# PGAC_C_BUILTIN_CLZ
# -------------------------
# Check if the C compiler understands __builtin_clz(),
# and define HAVE__BUILTIN_CLZ if so.
AC_DEFUN([PGAC_C_BUILTIN_CLZ],
[AC_CACHE_CHECK(for __builtin_clz, pgac_cv__builtin_clz,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static int x = __builtin_clz(256);]
)],
[pgac_cv__builtin_clz=yes],
[pgac_cv__builtin_clz=no])])
if test x"$pgac_cv__builtin_clz" = xyes ; then
AC_DEFINE(HAVE__BUILTIN_CLZ, 1,
[Define to 1 if your compiler understands __builtin_clz.])
fi])# PGAC_C_BUILTIN_CLZ
# PGAC_C_BUILTIN_UNREACHABLE # PGAC_C_BUILTIN_UNREACHABLE
# -------------------------- # --------------------------
# Check if the C compiler understands __builtin_unreachable(), # Check if the C compiler understands __builtin_unreachable(),
......
...@@ -651,7 +651,6 @@ CFLAGS_ARMV8_CRC32C ...@@ -651,7 +651,6 @@ CFLAGS_ARMV8_CRC32C
CFLAGS_SSE42 CFLAGS_SSE42
have_win32_dbghelp have_win32_dbghelp
LIBOBJS LIBOBJS
have__builtin_popcount
UUID_LIBS UUID_LIBS
LDAP_LIBS_BE LDAP_LIBS_BE
LDAP_LIBS_FE LDAP_LIBS_FE
...@@ -733,7 +732,6 @@ CPP ...@@ -733,7 +732,6 @@ CPP
BITCODE_CXXFLAGS BITCODE_CXXFLAGS
BITCODE_CFLAGS BITCODE_CFLAGS
CFLAGS_VECTOR CFLAGS_VECTOR
CFLAGS_POPCNT
PERMIT_DECLARATION_AFTER_STATEMENT PERMIT_DECLARATION_AFTER_STATEMENT
LLVM_BINPATH LLVM_BINPATH
LLVM_CXXFLAGS LLVM_CXXFLAGS
...@@ -6582,48 +6580,6 @@ fi ...@@ -6582,48 +6580,6 @@ fi
fi fi
# Optimization flags and options for bit-twiddling
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -mpopcnt, for CFLAGS_POPCNT" >&5
$as_echo_n "checking whether ${CC} supports -mpopcnt, for CFLAGS_POPCNT... " >&6; }
if ${pgac_cv_prog_CC_cflags__mpopcnt+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CFLAGS=$CFLAGS
pgac_save_CC=$CC
CC=${CC}
CFLAGS="${CFLAGS_POPCNT} -mpopcnt"
ac_save_c_werror_flag=$ac_c_werror_flag
ac_c_werror_flag=yes
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv_prog_CC_cflags__mpopcnt=yes
else
pgac_cv_prog_CC_cflags__mpopcnt=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_c_werror_flag=$ac_save_c_werror_flag
CFLAGS="$pgac_save_CFLAGS"
CC="$pgac_save_CC"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CC_cflags__mpopcnt" >&5
$as_echo "$pgac_cv_prog_CC_cflags__mpopcnt" >&6; }
if test x"$pgac_cv_prog_CC_cflags__mpopcnt" = x"yes"; then
CFLAGS_POPCNT="${CFLAGS_POPCNT} -mpopcnt"
fi
CFLAGS_VECTOR=$CFLAGS_VECTOR CFLAGS_VECTOR=$CFLAGS_VECTOR
...@@ -14076,30 +14032,6 @@ if test x"$pgac_cv__builtin_bswap64" = xyes ; then ...@@ -14076,30 +14032,6 @@ if test x"$pgac_cv__builtin_bswap64" = xyes ; then
$as_echo "#define HAVE__BUILTIN_BSWAP64 1" >>confdefs.h $as_echo "#define HAVE__BUILTIN_BSWAP64 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
$as_echo_n "checking for __builtin_clz... " >&6; }
if ${pgac_cv__builtin_clz+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static int x = __builtin_clz(256);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_clz=yes
else
pgac_cv__builtin_clz=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clz" >&5
$as_echo "$pgac_cv__builtin_clz" >&6; }
if test x"$pgac_cv__builtin_clz" = xyes ; then
$as_echo "#define HAVE__BUILTIN_CLZ 1" >>confdefs.h
fi fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_constant_p" >&5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_constant_p" >&5
$as_echo_n "checking for __builtin_constant_p... " >&6; } $as_echo_n "checking for __builtin_constant_p... " >&6; }
...@@ -14127,54 +14059,6 @@ if test x"$pgac_cv__builtin_constant_p" = xyes ; then ...@@ -14127,54 +14059,6 @@ if test x"$pgac_cv__builtin_constant_p" = xyes ; then
$as_echo "#define HAVE__BUILTIN_CONSTANT_P 1" >>confdefs.h $as_echo "#define HAVE__BUILTIN_CONSTANT_P 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctz" >&5
$as_echo_n "checking for __builtin_ctz... " >&6; }
if ${pgac_cv__builtin_ctz+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static int x = __builtin_ctz(256);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_ctz=yes
else
pgac_cv__builtin_ctz=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctz" >&5
$as_echo "$pgac_cv__builtin_ctz" >&6; }
if test x"$pgac_cv__builtin_ctz" = xyes ; then
$as_echo "#define HAVE__BUILTIN_CTZ 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
$as_echo_n "checking for __builtin_popcount... " >&6; }
if ${pgac_cv__builtin_popcount+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static int x = __builtin_popcount(255);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_popcount=yes
else
pgac_cv__builtin_popcount=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcount" >&5
$as_echo "$pgac_cv__builtin_popcount" >&6; }
if test x"$pgac_cv__builtin_popcount" = x"yes"; then
$as_echo "#define HAVE__BUILTIN_POPCOUNT 1" >>confdefs.h
fi fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable" >&5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable" >&5
$as_echo_n "checking for __builtin_unreachable... " >&6; } $as_echo_n "checking for __builtin_unreachable... " >&6; }
...@@ -14693,9 +14577,6 @@ $as_echo "#define LOCALE_T_IN_XLOCALE 1" >>confdefs.h ...@@ -14693,9 +14577,6 @@ $as_echo "#define LOCALE_T_IN_XLOCALE 1" >>confdefs.h
fi fi
have__builtin_popcount=$pgac_cv__builtin_popcount
# MSVC doesn't cope well with defining restrict to __restrict, the # MSVC doesn't cope well with defining restrict to __restrict, the
# spelling it understands, because it conflicts with # spelling it understands, because it conflicts with
# __declspec(restrict). Therefore we define pg_restrict to the # __declspec(restrict). Therefore we define pg_restrict to the
......
...@@ -547,10 +547,6 @@ elif test "$PORTNAME" = "hpux"; then ...@@ -547,10 +547,6 @@ elif test "$PORTNAME" = "hpux"; then
PGAC_PROG_CXX_CFLAGS_OPT([+Olibmerrno]) PGAC_PROG_CXX_CFLAGS_OPT([+Olibmerrno])
fi fi
# Optimization flags and options for bit-twiddling
PGAC_PROG_CC_VAR_OPT(CFLAGS_POPCNT, [-mpopcnt])
AC_SUBST(CFLAGS_POPCNT)
AC_SUBST(CFLAGS_VECTOR, $CFLAGS_VECTOR) AC_SUBST(CFLAGS_VECTOR, $CFLAGS_VECTOR)
# Determine flags used to emit bitcode for JIT inlining. Need to test # Determine flags used to emit bitcode for JIT inlining. Need to test
...@@ -1492,10 +1488,7 @@ PGAC_C_TYPES_COMPATIBLE ...@@ -1492,10 +1488,7 @@ PGAC_C_TYPES_COMPATIBLE
PGAC_C_BUILTIN_BSWAP16 PGAC_C_BUILTIN_BSWAP16
PGAC_C_BUILTIN_BSWAP32 PGAC_C_BUILTIN_BSWAP32
PGAC_C_BUILTIN_BSWAP64 PGAC_C_BUILTIN_BSWAP64
PGAC_C_BUILTIN_CLZ
PGAC_C_BUILTIN_CONSTANT_P PGAC_C_BUILTIN_CONSTANT_P
PGAC_C_BUILTIN_CTZ
PGAC_C_BUILTIN_POPCOUNT
PGAC_C_BUILTIN_UNREACHABLE PGAC_C_BUILTIN_UNREACHABLE
PGAC_C_COMPUTED_GOTO PGAC_C_COMPUTED_GOTO
PGAC_STRUCT_TIMEZONE PGAC_STRUCT_TIMEZONE
...@@ -1510,8 +1503,6 @@ AC_TYPE_LONG_LONG_INT ...@@ -1510,8 +1503,6 @@ AC_TYPE_LONG_LONG_INT
PGAC_TYPE_LOCALE_T PGAC_TYPE_LOCALE_T
AC_SUBST(have__builtin_popcount, $pgac_cv__builtin_popcount)
# MSVC doesn't cope well with defining restrict to __restrict, the # MSVC doesn't cope well with defining restrict to __restrict, the
# spelling it understands, because it conflicts with # spelling it understands, because it conflicts with
# __declspec(restrict). Therefore we define pg_restrict to the # __declspec(restrict). Therefore we define pg_restrict to the
......
...@@ -260,7 +260,6 @@ CXX = @CXX@ ...@@ -260,7 +260,6 @@ CXX = @CXX@
CFLAGS = @CFLAGS@ CFLAGS = @CFLAGS@
CFLAGS_VECTOR = @CFLAGS_VECTOR@ CFLAGS_VECTOR = @CFLAGS_VECTOR@
CFLAGS_SSE42 = @CFLAGS_SSE42@ CFLAGS_SSE42 = @CFLAGS_SSE42@
CFLAGS_POPCNT = @CFLAGS_POPCNT@
CFLAGS_ARMV8_CRC32C = @CFLAGS_ARMV8_CRC32C@ CFLAGS_ARMV8_CRC32C = @CFLAGS_ARMV8_CRC32C@
PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@ PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@
CXXFLAGS = @CXXFLAGS@ CXXFLAGS = @CXXFLAGS@
...@@ -517,9 +516,6 @@ WIN32_STACK_RLIMIT=4194304 ...@@ -517,9 +516,6 @@ WIN32_STACK_RLIMIT=4194304
# Set if we have a working win32 crashdump header # Set if we have a working win32 crashdump header
have_win32_dbghelp = @have_win32_dbghelp@ have_win32_dbghelp = @have_win32_dbghelp@
# Set if __builtin_popcount() is supported by $(CC)
have__builtin_popcount = @have__builtin_popcount@
# Pull in platform-specific magic # Pull in platform-specific magic
include $(top_builddir)/src/Makefile.port include $(top_builddir)/src/Makefile.port
......
...@@ -89,12 +89,12 @@ ...@@ -89,12 +89,12 @@
#include "access/visibilitymap.h" #include "access/visibilitymap.h"
#include "access/xlog.h" #include "access/xlog.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "utils/inval.h" #include "utils/inval.h"
/*#define TRACE_VISIBILITYMAP */ /*#define TRACE_VISIBILITYMAP */
/* /*
...@@ -115,9 +115,43 @@ ...@@ -115,9 +115,43 @@
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK) #define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
/* Masks for bit counting bits in the visibility map. */ /* tables for fast counting of set bits for visible and frozen */
#define VISIBLE_MASK64 0x5555555555555555 /* The lower bit of each bit pair */ static const uint8 number_of_ones_for_visible[256] = {
#define FROZEN_MASK64 0xaaaaaaaaaaaaaaaa /* The upper bit of each bit pair */ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
};
static const uint8 number_of_ones_for_frozen[256] = {
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
};
/* prototypes for internal routines */ /* prototypes for internal routines */
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend); static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
...@@ -374,16 +408,18 @@ void ...@@ -374,16 +408,18 @@ void
visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen) visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
{ {
BlockNumber mapBlock; BlockNumber mapBlock;
BlockNumber nvisible = 0;
BlockNumber nfrozen = 0;
/* all_visible must be specified */ /* all_visible must be specified */
Assert(all_visible); Assert(all_visible);
*all_visible = 0;
if (all_frozen)
*all_frozen = 0;
for (mapBlock = 0;; mapBlock++) for (mapBlock = 0;; mapBlock++)
{ {
Buffer mapBuffer; Buffer mapBuffer;
uint64 *map; unsigned char *map;
int i; int i;
/* /*
...@@ -400,30 +436,17 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro ...@@ -400,30 +436,17 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro
* immediately stale anyway if anyone is concurrently setting or * immediately stale anyway if anyone is concurrently setting or
* clearing bits, and we only really need an approximate value. * clearing bits, and we only really need an approximate value.
*/ */
map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer)); map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
StaticAssertStmt(MAPSIZE % sizeof(uint64) == 0, for (i = 0; i < MAPSIZE; i++)
"unsupported MAPSIZE");
if (all_frozen == NULL)
{
for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
}
else
{ {
for (i = 0; i < MAPSIZE / sizeof(uint64); i++) *all_visible += number_of_ones_for_visible[map[i]];
{ if (all_frozen)
nvisible += pg_popcount64(map[i] & VISIBLE_MASK64); *all_frozen += number_of_ones_for_frozen[map[i]];
nfrozen += pg_popcount64(map[i] & FROZEN_MASK64);
}
} }
ReleaseBuffer(mapBuffer); ReleaseBuffer(mapBuffer);
} }
*all_visible = nvisible;
if (all_frozen)
*all_frozen = nfrozen;
} }
/* /*
......
...@@ -37,7 +37,6 @@ ...@@ -37,7 +37,6 @@
#include "access/hash.h" #include "access/hash.h"
#include "lib/bloomfilter.h" #include "lib/bloomfilter.h"
#include "port/pg_bitutils.h"
#define MAX_HASH_FUNCS 10 #define MAX_HASH_FUNCS 10
...@@ -188,7 +187,19 @@ double ...@@ -188,7 +187,19 @@ double
bloom_prop_bits_set(bloom_filter *filter) bloom_prop_bits_set(bloom_filter *filter)
{ {
int bitset_bytes = filter->m / BITS_PER_BYTE; int bitset_bytes = filter->m / BITS_PER_BYTE;
uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes); uint64 bits_set = 0;
int i;
for (i = 0; i < bitset_bytes; i++)
{
unsigned char byte = filter->bitset[i];
while (byte)
{
bits_set++;
byte &= (byte - 1);
}
}
return bits_set / (double) filter->m; return bits_set / (double) filter->m;
} }
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
#include "access/hash.h" #include "access/hash.h"
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
#include "port/pg_bitutils.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
...@@ -52,23 +51,79 @@ ...@@ -52,23 +51,79 @@
#define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x)) #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
/* Set the bitwise macro version we must use based on the bitmapword size */
#if BITS_PER_BITMAPWORD == 32
#define bmw_popcount(w) pg_popcount32(w) /*
#define bmw_rightmost_one(w) pg_rightmost_one32(w) * Lookup tables to avoid need for bit-by-bit groveling
#define bmw_leftmost_one(w) pg_leftmost_one32(w) *
* rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
#elif BITS_PER_BITMAPWORD == 64 * in a nonzero byte value x. The entry for x=0 is never used.
*
#define bmw_popcount(w) pg_popcount64(w) * leftmost_one_pos[x] gives the bit number (0-7) of the leftmost one bit in a
#define bmw_rightmost_one(w) pg_rightmost_one64(w) * nonzero byte value x. The entry for x=0 is never used.
#define bmw_leftmost_one(w) pg_leftmost_one64(w) *
* number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
#else *
#error "invalid BITS_PER_BITMAPWORD" * We could make these tables larger and reduce the number of iterations
#endif * in the functions that use them, but bytewise shifts and masks are
* especially fast on many machines, so working a byte at a time seems best.
*/
static const uint8 rightmost_one_pos[256] = {
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
static const uint8 leftmost_one_pos[256] = {
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
/* /*
...@@ -552,7 +607,12 @@ bms_singleton_member(const Bitmapset *a) ...@@ -552,7 +607,12 @@ bms_singleton_member(const Bitmapset *a)
if (result >= 0 || HAS_MULTIPLE_ONES(w)) if (result >= 0 || HAS_MULTIPLE_ONES(w))
elog(ERROR, "bitmapset has multiple members"); elog(ERROR, "bitmapset has multiple members");
result = wordnum * BITS_PER_BITMAPWORD; result = wordnum * BITS_PER_BITMAPWORD;
result += bmw_rightmost_one(w); while ((w & 255) == 0)
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
} }
} }
if (result < 0) if (result < 0)
...@@ -590,7 +650,12 @@ bms_get_singleton_member(const Bitmapset *a, int *member) ...@@ -590,7 +650,12 @@ bms_get_singleton_member(const Bitmapset *a, int *member)
if (result >= 0 || HAS_MULTIPLE_ONES(w)) if (result >= 0 || HAS_MULTIPLE_ONES(w))
return false; return false;
result = wordnum * BITS_PER_BITMAPWORD; result = wordnum * BITS_PER_BITMAPWORD;
result += bmw_rightmost_one(w); while ((w & 255) == 0)
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
} }
} }
if (result < 0) if (result < 0)
...@@ -616,9 +681,12 @@ bms_num_members(const Bitmapset *a) ...@@ -616,9 +681,12 @@ bms_num_members(const Bitmapset *a)
{ {
bitmapword w = a->words[wordnum]; bitmapword w = a->words[wordnum];
/* No need to count the bits in a zero word */ /* we assume here that bitmapword is an unsigned type */
if (w != 0) while (w != 0)
result += bmw_popcount(w); {
result += number_of_ones[w & 255];
w >>= 8;
}
} }
return result; return result;
} }
...@@ -973,7 +1041,12 @@ bms_first_member(Bitmapset *a) ...@@ -973,7 +1041,12 @@ bms_first_member(Bitmapset *a)
a->words[wordnum] &= ~w; a->words[wordnum] &= ~w;
result = wordnum * BITS_PER_BITMAPWORD; result = wordnum * BITS_PER_BITMAPWORD;
result += bmw_rightmost_one(w); while ((w & 255) == 0)
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
return result; return result;
} }
} }
...@@ -1023,7 +1096,12 @@ bms_next_member(const Bitmapset *a, int prevbit) ...@@ -1023,7 +1096,12 @@ bms_next_member(const Bitmapset *a, int prevbit)
int result; int result;
result = wordnum * BITS_PER_BITMAPWORD; result = wordnum * BITS_PER_BITMAPWORD;
result += bmw_rightmost_one(w); while ((w & 255) == 0)
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
return result; return result;
} }
...@@ -1090,9 +1168,14 @@ bms_prev_member(const Bitmapset *a, int prevbit) ...@@ -1090,9 +1168,14 @@ bms_prev_member(const Bitmapset *a, int prevbit)
if (w != 0) if (w != 0)
{ {
int result; int result;
int shift = BITS_PER_BITMAPWORD - 8;
result = wordnum * BITS_PER_BITMAPWORD; result = wordnum * BITS_PER_BITMAPWORD;
result += bmw_leftmost_one(w);
while ((w >> shift) == 0)
shift -= 8;
result += shift + leftmost_one_pos[(w >> shift) & 255];
return result; return result;
} }
......
...@@ -748,21 +748,12 @@ ...@@ -748,21 +748,12 @@
/* Define to 1 if your compiler understands __builtin_bswap64. */ /* Define to 1 if your compiler understands __builtin_bswap64. */
#undef HAVE__BUILTIN_BSWAP64 #undef HAVE__BUILTIN_BSWAP64
/* Define to 1 if your compiler understands __builtin_clz. */
#undef HAVE__BUILTIN_CLZ
/* Define to 1 if your compiler understands __builtin_constant_p. */ /* Define to 1 if your compiler understands __builtin_constant_p. */
#undef HAVE__BUILTIN_CONSTANT_P #undef HAVE__BUILTIN_CONSTANT_P
/* Define to 1 if your compiler understands __builtin_ctz. */
#undef HAVE__BUILTIN_CTZ
/* Define to 1 if your compiler understands __builtin_$op_overflow. */ /* Define to 1 if your compiler understands __builtin_$op_overflow. */
#undef HAVE__BUILTIN_OP_OVERFLOW #undef HAVE__BUILTIN_OP_OVERFLOW
/* Define to 1 if your compiler understands __builtin_popcount. */
#undef HAVE__BUILTIN_POPCOUNT
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */ /* Define to 1 if your compiler understands __builtin_types_compatible_p. */
#undef HAVE__BUILTIN_TYPES_COMPATIBLE_P #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
......
...@@ -587,21 +587,12 @@ ...@@ -587,21 +587,12 @@
/* Define to 1 if your compiler understands __builtin_bswap64. */ /* Define to 1 if your compiler understands __builtin_bswap64. */
/* #undef HAVE__BUILTIN_BSWAP64 */ /* #undef HAVE__BUILTIN_BSWAP64 */
/* Define to 1 if your compiler understands __builtin_clz. */
/* #undef HAVE__BUILTIN_CLZ */
/* Define to 1 if your compiler understands __builtin_constant_p. */ /* Define to 1 if your compiler understands __builtin_constant_p. */
/* #undef HAVE__BUILTIN_CONSTANT_P */ /* #undef HAVE__BUILTIN_CONSTANT_P */
/* Define to 1 if your compiler understands __builtin_ctz. */
/* #undef HAVE__BUILTIN_CTZ */
/* Define to 1 if your compiler understands __builtin_$op_overflow. */ /* Define to 1 if your compiler understands __builtin_$op_overflow. */
/* #undef HAVE__BUILTIN_OP_OVERFLOW */ /* #undef HAVE__BUILTIN_OP_OVERFLOW */
/* Define to 1 if your compiler understands __builtin_popcount. */
/* #undef HAVE__BUILTIN_POPCOUNT */
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */ /* Define to 1 if your compiler understands __builtin_types_compatible_p. */
/* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */ /* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */
......
/*------------------------------------------------------------------------ -
*
* pg_bitutils.h
* miscellaneous functions for bit-wise operations.
*
*
* Portions Copyright(c) 2019, PostgreSQL Global Development Group
*
* src/include/port/pg_bitutils.h
*
*------------------------------------------------------------------------ -
*/
#ifndef PG_BITUTILS_H
#define PG_BITUTILS_H
extern int (*pg_popcount32) (uint32 word);
extern int (*pg_popcount64) (uint64 word);
extern uint64 pg_popcount(const char *buf, int bytes);
/* in pg_bitutils_hwpopcnt.c */
extern int pg_popcount32_hw(uint32 word);
extern int pg_popcount64_hw(uint64 word);
#ifndef HAVE__BUILTIN_CTZ
/*
* Array marking the position of the right-most set bit for each value of
* 1-255. We count the right-most position as the 0th bit, and the
* left-most the 7th bit. The 0th index of the array must not be used.
*/
static const uint8 rightmost_one_pos[256] = {
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
#endif /* !HAVE__BUILTIN_CTZ */
/*
* pg_rightmost_one32
* Returns the number of trailing 0-bits in word, starting at the least
* significant bit position. word must not be 0.
*/
static inline int
pg_rightmost_one32(uint32 word)
{
int result = 0;
Assert(word != 0);
#ifdef HAVE__BUILTIN_CTZ
result = __builtin_ctz(word);
#else
while ((word & 255) == 0)
{
word >>= 8;
result += 8;
}
result += rightmost_one_pos[word & 255];
#endif /* HAVE__BUILTIN_CTZ */
return result;
}
/*
* pg_rightmost_one64
* Returns the number of trailing 0-bits in word, starting at the least
* significant bit position. word must not be 0.
*/
static inline int
pg_rightmost_one64(uint64 word)
{
int result = 0;
Assert(word != 0);
#ifdef HAVE__BUILTIN_CTZ
#if defined(HAVE_LONG_INT_64)
return __builtin_ctzl(word);
#elif defined(HAVE_LONG_LONG_INT_64)
return __builtin_ctzll(word);
#else
#error must have a working 64-bit integer datatype
#endif
#else /* HAVE__BUILTIN_CTZ */
while ((word & 255) == 0)
{
word >>= 8;
result += 8;
}
result += rightmost_one_pos[word & 255];
#endif
return result;
}
#ifndef HAVE__BUILTIN_CLZ
/*
* Array marking the position of the left-most set bit for each value of
* 1-255. We count the right-most position as the 0th bit, and the
* left-most the 7th bit. The 0th index of the array must not be used.
*/
static const uint8 leftmost_one_pos[256] = {
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
#endif /* !HAVE_BUILTIN_CLZ */
/*
* pg_leftmost_one32
* Returns the 0-based position of the most significant set bit in word
* measured from the least significant bit. word must not be 0.
*/
static inline int
pg_leftmost_one32(uint32 word)
{
#ifdef HAVE__BUILTIN_CLZ
Assert(word != 0);
return 31 - __builtin_clz(word);
#else
int shift = 32 - 8;
Assert(word != 0);
while ((word >> shift) == 0)
shift -= 8;
return shift + leftmost_one_pos[(word >> shift) & 255];
#endif /* HAVE__BUILTIN_CLZ */
}
/*
* pg_leftmost_one64
* Returns the 0-based position of the most significant set bit in word
* measured from the least significant bit. word must not be 0.
*/
static inline int
pg_leftmost_one64(uint64 word)
{
#ifdef HAVE__BUILTIN_CLZ
Assert(word != 0);
#if defined(HAVE_LONG_INT_64)
return 63 - __builtin_clzl(word);
#elif defined(HAVE_LONG_LONG_INT_64)
return 63 - __builtin_clzll(word);
#else
#error must have a working 64-bit integer datatype
#endif
#else /* HAVE__BUILTIN_CLZ */
int shift = 64 - 8;
Assert(word != 0);
while ((word >> shift) == 0)
shift -= 8;
return shift + leftmost_one_pos[(word >> shift) & 255];
#endif /* !HAVE__BUIILTIN_CLZ */
}
#endif /* PG_BITUTILS_H */
...@@ -36,19 +36,11 @@ override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS) ...@@ -36,19 +36,11 @@ override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS)
LIBS += $(PTHREAD_LIBS) LIBS += $(PTHREAD_LIBS)
OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \ OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \
noblock.o path.o pg_bitutils.o pgcheckdir.o pgmkdirp.o pgsleep.o \ noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \
pg_strong_random.o pgstrcasecmp.o pgstrsignal.o pqsignal.o \ pg_strong_random.o pgstrcasecmp.o pgstrsignal.o pqsignal.o \
qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \ qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \
tar.o thread.o tar.o thread.o
# If the compiler supports a special flag for the POPCOUNT instruction and it
# has __builtin_popcount, add pg_bitutils_hwpopcnt.o.
ifneq ($(CFLAGS_POPCNT),)
ifeq ($(have__builtin_popcount),yes)
OBJS += pg_bitutils_hwpopcnt.o
endif
endif
# libpgport.a, libpgport_shlib.a, and libpgport_srv.a contain the same files # libpgport.a, libpgport_shlib.a, and libpgport_srv.a contain the same files
# foo.o, foo_shlib.o, and foo_srv.o are all built from foo.c # foo.o, foo_shlib.o, and foo_srv.o are all built from foo.c
OBJS_SHLIB = $(OBJS:%.o=%_shlib.o) OBJS_SHLIB = $(OBJS:%.o=%_shlib.o)
...@@ -86,11 +78,6 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) ...@@ -86,11 +78,6 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
# all versions of pg_bitutils_hwpopcnt.c need CFLAGS_POPCNT
pg_bitutils_hwpopcnt.o: CFLAGS+=$(CFLAGS_POPCNT)
pg_bitutils_hwpopcnt_shlib.o: CFLAGS+=$(CFLAGS_POPCNT)
pg_bitutils_hwpopcnt_srv.o: CFLAGS+=$(CFLAGS_POPCNT)
# #
# Shared library versions of object files # Shared library versions of object files
# #
......
/*-------------------------------------------------------------------------
*
* pg_bitutils.c
* miscellaneous functions for bit-wise operations.
*
* Portions Copyright (c) 2019, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/port/pg_bitutils.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef HAVE__GET_CPUID
#include <cpuid.h>
#endif
#ifdef HAVE__CPUID
#include <intrin.h>
#endif
#include "port/pg_bitutils.h"
#ifdef HAVE__BUILTIN_POPCOUNT
static bool pg_popcount_available(void);
static int pg_popcount32_choose(uint32 word);
static int pg_popcount32_builtin(uint32 word);
static int pg_popcount64_choose(uint64 word);
static int pg_popcount64_builtin(uint64 word);
int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
#else
static int pg_popcount32_slow(uint32 word);
static int pg_popcount64_slow(uint64 word);
int (*pg_popcount32) (uint32 word) = pg_popcount32_slow;
int (*pg_popcount64) (uint64 word) = pg_popcount64_slow;
#endif /* !HAVE_BUILTIN_POPCOUNT */
/* Array marking the number of 1-bits for each value of 0-255. */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
/*
* Return true iff we have CPUID support and it indicates that the POPCNT
* instruction is available.
*/
static bool
pg_popcount_available(void)
{
#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
unsigned int exx[4] = {0, 0, 0, 0};
#if defined(HAVE__GET_CPUID)
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUID)
__cpuid(exx, 1);
#endif
return (exx[2] & (1 << 23)) != 0; /* POPCNT */
#else /* HAVE__GET_CPUID || HAVE__CPUID */
return false;
#endif
}
#ifdef HAVE__BUILTIN_POPCOUNT
/*
* This gets called on the first call to pg_popcount32. It replaces the
* function pointer so that subsequent calls are routed directly to the chosen
* implementation.
*/
static int
pg_popcount32_choose(uint32 word)
{
if (pg_popcount_available())
pg_popcount32 = pg_popcount32_hw;
else
pg_popcount32 = pg_popcount32_builtin;
return pg_popcount32(word);
}
static int
pg_popcount32_builtin(uint32 word)
{
return __builtin_popcount(word);
}
#else /* HAVE__BUILTIN_POPCOUNT */
/*
* pg_popcount32_slow
* Return the number of 1 bits set in word
*/
static int
pg_popcount32_slow(uint32 word)
{
int result = 0;
while (word != 0)
{
result += number_of_ones[word & 255];
word >>= 8;
}
return result;
}
#endif
/*
* pg_popcount
* Returns the number of 1-bits in buf
*/
uint64
pg_popcount(const char *buf, int bytes)
{
uint64 popcnt = 0;
#if SIZEOF_VOID_P >= 8
/* Process in 64-bit chunks if the buffer is aligned. */
if (buf == (char *) TYPEALIGN(8, buf))
{
uint64 *words = (uint64 *) buf;
while (bytes >= 8)
{
popcnt += pg_popcount64(*words++);
bytes -= 8;
}
buf = (char *) words;
}
#else
/* Process in 32-bit chunks if the buffer is aligned. */
if (buf == (char *) TYPEALIGN(4, buf))
{
uint32 *words = (uint32 *) buf;
while (bytes >= 4)
{
popcnt += pg_popcount32(*words++);
bytes -= 4;
}
buf = (char *) words;
}
#endif
/* Process any remaining bytes */
while (bytes--)
popcnt += number_of_ones[(unsigned char) *buf++];
return popcnt;
}
#ifdef HAVE__BUILTIN_POPCOUNT
/*
* This gets called on the first call to pg_popcount64. It replaces the
* function pointer so that subsequent calls are routed directly to the chosen
* implementation.
*/
static int
pg_popcount64_choose(uint64 word)
{
if (pg_popcount_available())
pg_popcount64 = pg_popcount64_hw;
else
pg_popcount64 = pg_popcount64_builtin;
return pg_popcount64(word);
}
static int
pg_popcount64_builtin(uint64 word)
{
#if defined(HAVE_LONG_INT_64)
return __builtin_popcountl(word);
#elif defined(HAVE_LONG_LONG_INT_64)
return __builtin_popcountll(word);
#else
#error must have a working 64-bit integer datatype
#endif
}
#else /* HAVE__BUILTIN_POPCOUNT */
/*
* pg_popcount64_slow
* Return the number of 1 bits set in word
*/
static int
pg_popcount64_slow(uint64 word)
{
int result = 0;
while (word != 0)
{
result += number_of_ones[word & 255];
word >>= 8;
}
return result;
}
#endif
/*-------------------------------------------------------------------------
*
* pg_bitutils_hwpopcnt.c
* CPU-optimized implementation of pg_popcount variants
*
* This file must be compiled with a compiler-specific flag to enable the
* POPCNT instruction.
*
* Portions Copyright (c) 2019, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/port/pg_bitutils_hwpopcnt.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "port/pg_bitutils.h"
int
pg_popcount32_hw(uint32 word)
{
return __builtin_popcount(word);
}
int
pg_popcount64_hw(uint64 word)
{
#if defined(HAVE_LONG_INT_64)
return __builtin_popcountl(word);
#elif defined(HAVE_LONG_LONG_INT_64)
return __builtin_popcountll(word);
#else
#error must have a working 64-bit integer datatype
#endif
}
...@@ -112,7 +112,6 @@ sub mkvcbuild ...@@ -112,7 +112,6 @@ sub mkvcbuild
push(@pgportfiles, 'pg_crc32c_sse42_choose.c'); push(@pgportfiles, 'pg_crc32c_sse42_choose.c');
push(@pgportfiles, 'pg_crc32c_sse42.c'); push(@pgportfiles, 'pg_crc32c_sse42.c');
push(@pgportfiles, 'pg_crc32c_sb8.c'); push(@pgportfiles, 'pg_crc32c_sb8.c');
push(@pgportfiles, 'pg_bitutils.c');
} }
else else
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment