Commit 711bab1e authored by Alvaro Herrera's avatar Alvaro Herrera

Add basic support for using the POPCNT and SSE4.2s LZCNT opcodes

These opcodes have been around in the AMD world since 2007, and 2008 in
the case of intel.  They're supported in GCC and Clang via some __builtin
macros.  The opcodes may be unavailable during runtime, in which case we
fall back on a C-based implementation of the code.  In order to get the
POPCNT instruction we must pass the -mpopcnt option to the compiler.  We
do this only for the pg_bitutils.c file.

David Rowley (with fragments taken from a patch by Thomas Munro)

Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
parent 754ca993
...@@ -378,6 +378,122 @@ fi])# PGAC_C_BUILTIN_OP_OVERFLOW ...@@ -378,6 +378,122 @@ fi])# PGAC_C_BUILTIN_OP_OVERFLOW
# PGAC_C_BUILTIN_POPCOUNT
# -------------------------
AC_DEFUN([PGAC_C_BUILTIN_POPCOUNT],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_popcount])])dnl
AC_CACHE_CHECK([for __builtin_popcount], [Ac_cachevar],
[pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS -mpopcnt"
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static int x = __builtin_popcount(255);])],
[Ac_cachevar=yes],
[Ac_cachevar=no])
CFLAGS="$pgac_save_CFLAGS"])
if test x"$Ac_cachevar" = x"yes"; then
CFLAGS_POPCNT="-mpopcnt"
AC_DEFINE(HAVE__BUILTIN_POPCOUNT, 1,
[Define to 1 if your compiler understands __builtin_popcount.])
fi
undefine([Ac_cachevar])dnl
])# PGAC_C_BUILTIN_POPCOUNT
# PGAC_C_BUILTIN_POPCOUNTL
# -------------------------
AC_DEFUN([PGAC_C_BUILTIN_POPCOUNTL],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_popcountl])])dnl
AC_CACHE_CHECK([for __builtin_popcountl], [Ac_cachevar],
[pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS -mpopcnt"
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static int x = __builtin_popcountl(255);])],
[Ac_cachevar=yes],
[Ac_cachevar=no])
CFLAGS="$pgac_save_CFLAGS"])
if test x"$Ac_cachevar" = x"yes"; then
CFLAGS_POPCNT="-mpopcnt"
AC_DEFINE(HAVE__BUILTIN_POPCOUNTL, 1,
[Define to 1 if your compiler understands __builtin_popcountl.])
fi
undefine([Ac_cachevar])dnl
])# PGAC_C_BUILTIN_POPCOUNTL
# PGAC_C_BUILTIN_CTZ
# -------------------------
# Check if the C compiler understands __builtin_ctz(),
# and define HAVE__BUILTIN_CTZ if so.
AC_DEFUN([PGAC_C_BUILTIN_CTZ],
[AC_CACHE_CHECK(for __builtin_ctz, pgac_cv__builtin_ctz,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static int x = __builtin_ctz(256);]
)],
[pgac_cv__builtin_ctz=yes],
[pgac_cv__builtin_ctz=no])])
if test x"$pgac_cv__builtin_ctz" = xyes ; then
AC_DEFINE(HAVE__BUILTIN_CTZ, 1,
[Define to 1 if your compiler understands __builtin_ctz.])
fi])# PGAC_C_BUILTIN_CTZ
# PGAC_C_BUILTIN_CTZL
# -------------------------
# Check if the C compiler understands __builtin_ctzl(),
# and define HAVE__BUILTIN_CTZL if so.
AC_DEFUN([PGAC_C_BUILTIN_CTZL],
[AC_CACHE_CHECK(for __builtin_ctzl, pgac_cv__builtin_ctzl,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static int x = __builtin_ctzl(256);]
)],
[pgac_cv__builtin_ctzl=yes],
[pgac_cv__builtin_ctzl=no])])
if test x"$pgac_cv__builtin_ctzl" = xyes ; then
AC_DEFINE(HAVE__BUILTIN_CTZL, 1,
[Define to 1 if your compiler understands __builtin_ctzl.])
fi])# PGAC_C_BUILTIN_CTZL
# PGAC_C_BUILTIN_CLZ
# -------------------------
# Check if the C compiler understands __builtin_clz(),
# and define HAVE__BUILTIN_CLZ if so.
AC_DEFUN([PGAC_C_BUILTIN_CLZ],
[AC_CACHE_CHECK(for __builtin_clz, pgac_cv__builtin_clz,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static int x = __builtin_clz(256);]
)],
[pgac_cv__builtin_clz=yes],
[pgac_cv__builtin_clz=no])])
if test x"$pgac_cv__builtin_clz" = xyes ; then
AC_DEFINE(HAVE__BUILTIN_CLZ, 1,
[Define to 1 if your compiler understands __builtin_clz.])
fi])# PGAC_C_BUILTIN_CLZ
# PGAC_C_BUILTIN_CLZL
# -------------------------
# Check if the C compiler understands __builtin_clzl(),
# and define HAVE__BUILTIN_CLZL if so.
AC_DEFUN([PGAC_C_BUILTIN_CLZL],
[AC_CACHE_CHECK(for __builtin_clzl, pgac_cv__builtin_clzl,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[static int x = __builtin_clzl(256);]
)],
[pgac_cv__builtin_clzl=yes],
[pgac_cv__builtin_clzl=no])])
if test x"$pgac_cv__builtin_clzl" = xyes ; then
AC_DEFINE(HAVE__BUILTIN_CLZL, 1,
[Define to 1 if your compiler understands __builtin_clzl.])
fi])# PGAC_C_BUILTIN_CLZL
# PGAC_C_BUILTIN_UNREACHABLE # PGAC_C_BUILTIN_UNREACHABLE
# -------------------------- # --------------------------
# Check if the C compiler understands __builtin_unreachable(), # Check if the C compiler understands __builtin_unreachable(),
......
...@@ -651,6 +651,7 @@ CFLAGS_ARMV8_CRC32C ...@@ -651,6 +651,7 @@ CFLAGS_ARMV8_CRC32C
CFLAGS_SSE42 CFLAGS_SSE42
have_win32_dbghelp have_win32_dbghelp
LIBOBJS LIBOBJS
CFLAGS_POPCNT
UUID_LIBS UUID_LIBS
LDAP_LIBS_BE LDAP_LIBS_BE
LDAP_LIBS_FE LDAP_LIBS_FE
...@@ -14059,6 +14060,158 @@ if test x"$pgac_cv__builtin_constant_p" = xyes ; then ...@@ -14059,6 +14060,158 @@ if test x"$pgac_cv__builtin_constant_p" = xyes ; then
$as_echo "#define HAVE__BUILTIN_CONSTANT_P 1" >>confdefs.h $as_echo "#define HAVE__BUILTIN_CONSTANT_P 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
$as_echo_n "checking for __builtin_popcount... " >&6; }
if ${pgac_cv_popcount+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS -mpopcnt"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static int x = __builtin_popcount(255);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv_popcount=yes
else
pgac_cv_popcount=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
CFLAGS="$pgac_save_CFLAGS"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_popcount" >&5
$as_echo "$pgac_cv_popcount" >&6; }
if test x"$pgac_cv_popcount" = x"yes"; then
CFLAGS_POPCNT="-mpopcnt"
$as_echo "#define HAVE__BUILTIN_POPCOUNT 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcountl" >&5
$as_echo_n "checking for __builtin_popcountl... " >&6; }
if ${pgac_cv_popcountl+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS -mpopcnt"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static int x = __builtin_popcountl(255);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv_popcountl=yes
else
pgac_cv_popcountl=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
CFLAGS="$pgac_save_CFLAGS"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_popcountl" >&5
$as_echo "$pgac_cv_popcountl" >&6; }
if test x"$pgac_cv_popcountl" = x"yes"; then
CFLAGS_POPCNT="-mpopcnt"
$as_echo "#define HAVE__BUILTIN_POPCOUNTL 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctz" >&5
$as_echo_n "checking for __builtin_ctz... " >&6; }
if ${pgac_cv__builtin_ctz+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static int x = __builtin_ctz(256);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_ctz=yes
else
pgac_cv__builtin_ctz=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctz" >&5
$as_echo "$pgac_cv__builtin_ctz" >&6; }
if test x"$pgac_cv__builtin_ctz" = xyes ; then
$as_echo "#define HAVE__BUILTIN_CTZ 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctzl" >&5
$as_echo_n "checking for __builtin_ctzl... " >&6; }
if ${pgac_cv__builtin_ctzl+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static int x = __builtin_ctzl(256);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_ctzl=yes
else
pgac_cv__builtin_ctzl=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctzl" >&5
$as_echo "$pgac_cv__builtin_ctzl" >&6; }
if test x"$pgac_cv__builtin_ctzl" = xyes ; then
$as_echo "#define HAVE__BUILTIN_CTZL 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
$as_echo_n "checking for __builtin_clz... " >&6; }
if ${pgac_cv__builtin_clz+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static int x = __builtin_clz(256);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_clz=yes
else
pgac_cv__builtin_clz=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clz" >&5
$as_echo "$pgac_cv__builtin_clz" >&6; }
if test x"$pgac_cv__builtin_clz" = xyes ; then
$as_echo "#define HAVE__BUILTIN_CLZ 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzl" >&5
$as_echo_n "checking for __builtin_clzl... " >&6; }
if ${pgac_cv__builtin_clzl+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static int x = __builtin_clzl(256);
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_clzl=yes
else
pgac_cv__builtin_clzl=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clzl" >&5
$as_echo "$pgac_cv__builtin_clzl" >&6; }
if test x"$pgac_cv__builtin_clzl" = xyes ; then
$as_echo "#define HAVE__BUILTIN_CLZL 1" >>confdefs.h
fi fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable" >&5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable" >&5
$as_echo_n "checking for __builtin_unreachable... " >&6; } $as_echo_n "checking for __builtin_unreachable... " >&6; }
...@@ -14577,6 +14730,8 @@ $as_echo "#define LOCALE_T_IN_XLOCALE 1" >>confdefs.h ...@@ -14577,6 +14730,8 @@ $as_echo "#define LOCALE_T_IN_XLOCALE 1" >>confdefs.h
fi fi
# MSVC doesn't cope well with defining restrict to __restrict, the # MSVC doesn't cope well with defining restrict to __restrict, the
# spelling it understands, because it conflicts with # spelling it understands, because it conflicts with
# __declspec(restrict). Therefore we define pg_restrict to the # __declspec(restrict). Therefore we define pg_restrict to the
......
...@@ -1489,6 +1489,12 @@ PGAC_C_BUILTIN_BSWAP16 ...@@ -1489,6 +1489,12 @@ PGAC_C_BUILTIN_BSWAP16
PGAC_C_BUILTIN_BSWAP32 PGAC_C_BUILTIN_BSWAP32
PGAC_C_BUILTIN_BSWAP64 PGAC_C_BUILTIN_BSWAP64
PGAC_C_BUILTIN_CONSTANT_P PGAC_C_BUILTIN_CONSTANT_P
PGAC_C_BUILTIN_POPCOUNT
PGAC_C_BUILTIN_POPCOUNTL
PGAC_C_BUILTIN_CTZ
PGAC_C_BUILTIN_CTZL
PGAC_C_BUILTIN_CLZ
PGAC_C_BUILTIN_CLZL
PGAC_C_BUILTIN_UNREACHABLE PGAC_C_BUILTIN_UNREACHABLE
PGAC_C_COMPUTED_GOTO PGAC_C_COMPUTED_GOTO
PGAC_STRUCT_TIMEZONE PGAC_STRUCT_TIMEZONE
...@@ -1503,6 +1509,8 @@ AC_TYPE_LONG_LONG_INT ...@@ -1503,6 +1509,8 @@ AC_TYPE_LONG_LONG_INT
PGAC_TYPE_LOCALE_T PGAC_TYPE_LOCALE_T
AC_SUBST(CFLAGS_POPCNT)
# MSVC doesn't cope well with defining restrict to __restrict, the # MSVC doesn't cope well with defining restrict to __restrict, the
# spelling it understands, because it conflicts with # spelling it understands, because it conflicts with
# __declspec(restrict). Therefore we define pg_restrict to the # __declspec(restrict). Therefore we define pg_restrict to the
......
...@@ -260,6 +260,7 @@ CXX = @CXX@ ...@@ -260,6 +260,7 @@ CXX = @CXX@
CFLAGS = @CFLAGS@ CFLAGS = @CFLAGS@
CFLAGS_VECTOR = @CFLAGS_VECTOR@ CFLAGS_VECTOR = @CFLAGS_VECTOR@
CFLAGS_SSE42 = @CFLAGS_SSE42@ CFLAGS_SSE42 = @CFLAGS_SSE42@
CFLAGS_POPCNT = @CFLAGS_POPCNT@
CFLAGS_ARMV8_CRC32C = @CFLAGS_ARMV8_CRC32C@ CFLAGS_ARMV8_CRC32C = @CFLAGS_ARMV8_CRC32C@
PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@ PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@
CXXFLAGS = @CXXFLAGS@ CXXFLAGS = @CXXFLAGS@
......
...@@ -89,12 +89,12 @@ ...@@ -89,12 +89,12 @@
#include "access/visibilitymap.h" #include "access/visibilitymap.h"
#include "access/xlog.h" #include "access/xlog.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "utils/inval.h" #include "utils/inval.h"
/*#define TRACE_VISIBILITYMAP */ /*#define TRACE_VISIBILITYMAP */
/* /*
...@@ -115,43 +115,9 @@ ...@@ -115,43 +115,9 @@
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK) #define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
/* tables for fast counting of set bits for visible and frozen */ /* Masks for bit counting bits in the visibility map. */
static const uint8 number_of_ones_for_visible[256] = { #define VISIBLE_MASK64 0x5555555555555555 /* The lower bit of each bit pair */
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, #define FROZEN_MASK64 0xaaaaaaaaaaaaaaaa /* The upper bit of each bit pair */
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
};
static const uint8 number_of_ones_for_frozen[256] = {
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
};
/* prototypes for internal routines */ /* prototypes for internal routines */
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend); static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
...@@ -408,18 +374,16 @@ void ...@@ -408,18 +374,16 @@ void
visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen) visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
{ {
BlockNumber mapBlock; BlockNumber mapBlock;
BlockNumber nvisible = 0;
BlockNumber nfrozen = 0;
/* all_visible must be specified */ /* all_visible must be specified */
Assert(all_visible); Assert(all_visible);
*all_visible = 0;
if (all_frozen)
*all_frozen = 0;
for (mapBlock = 0;; mapBlock++) for (mapBlock = 0;; mapBlock++)
{ {
Buffer mapBuffer; Buffer mapBuffer;
unsigned char *map; uint64 *map;
int i; int i;
/* /*
...@@ -436,17 +400,30 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro ...@@ -436,17 +400,30 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro
* immediately stale anyway if anyone is concurrently setting or * immediately stale anyway if anyone is concurrently setting or
* clearing bits, and we only really need an approximate value. * clearing bits, and we only really need an approximate value.
*/ */
map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer)); map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer));
for (i = 0; i < MAPSIZE; i++) StaticAssertStmt(MAPSIZE % sizeof(uint64) == 0,
"unsupported MAPSIZE");
if (all_frozen == NULL)
{ {
*all_visible += number_of_ones_for_visible[map[i]]; for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
if (all_frozen) nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
*all_frozen += number_of_ones_for_frozen[map[i]]; }
else
{
for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
{
nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
nfrozen += pg_popcount64(map[i] & FROZEN_MASK64);
}
} }
ReleaseBuffer(mapBuffer); ReleaseBuffer(mapBuffer);
} }
*all_visible = nvisible;
if (all_frozen)
*all_frozen = nfrozen;
} }
/* /*
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "access/hash.h" #include "access/hash.h"
#include "lib/bloomfilter.h" #include "lib/bloomfilter.h"
#include "port/pg_bitutils.h"
#define MAX_HASH_FUNCS 10 #define MAX_HASH_FUNCS 10
...@@ -187,19 +188,7 @@ double ...@@ -187,19 +188,7 @@ double
bloom_prop_bits_set(bloom_filter *filter) bloom_prop_bits_set(bloom_filter *filter)
{ {
int bitset_bytes = filter->m / BITS_PER_BYTE; int bitset_bytes = filter->m / BITS_PER_BYTE;
uint64 bits_set = 0; uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
int i;
for (i = 0; i < bitset_bytes; i++)
{
unsigned char byte = filter->bitset[i];
while (byte)
{
bits_set++;
byte &= (byte - 1);
}
}
return bits_set / (double) filter->m; return bits_set / (double) filter->m;
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "access/hash.h" #include "access/hash.h"
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
#include "port/pg_bitutils.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
...@@ -51,79 +52,23 @@ ...@@ -51,79 +52,23 @@
#define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x)) #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
/* Set the bitwise macro version we must use based on the bitmapword size */
#if BITS_PER_BITMAPWORD == 32
/* #define bmw_popcount(w) pg_popcount32(w)
* Lookup tables to avoid need for bit-by-bit groveling #define bmw_rightmost_one(w) pg_rightmost_one32(w)
* #define bmw_leftmost_one(w) pg_leftmost_one32(w)
* rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
* in a nonzero byte value x. The entry for x=0 is never used. #elif BITS_PER_BITMAPWORD == 64
*
* leftmost_one_pos[x] gives the bit number (0-7) of the leftmost one bit in a #define bmw_popcount(w) pg_popcount64(w)
* nonzero byte value x. The entry for x=0 is never used. #define bmw_rightmost_one(w) pg_rightmost_one64(w)
* #define bmw_leftmost_one(w) pg_leftmost_one64(w)
* number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
* #else
* We could make these tables larger and reduce the number of iterations #error "invalid BITS_PER_BITMAPWORD"
* in the functions that use them, but bytewise shifts and masks are #endif
* especially fast on many machines, so working a byte at a time seems best.
*/
static const uint8 rightmost_one_pos[256] = {
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
static const uint8 leftmost_one_pos[256] = {
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
/* /*
...@@ -607,12 +552,7 @@ bms_singleton_member(const Bitmapset *a) ...@@ -607,12 +552,7 @@ bms_singleton_member(const Bitmapset *a)
if (result >= 0 || HAS_MULTIPLE_ONES(w)) if (result >= 0 || HAS_MULTIPLE_ONES(w))
elog(ERROR, "bitmapset has multiple members"); elog(ERROR, "bitmapset has multiple members");
result = wordnum * BITS_PER_BITMAPWORD; result = wordnum * BITS_PER_BITMAPWORD;
while ((w & 255) == 0) result += bmw_rightmost_one(w);
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
} }
} }
if (result < 0) if (result < 0)
...@@ -650,12 +590,7 @@ bms_get_singleton_member(const Bitmapset *a, int *member) ...@@ -650,12 +590,7 @@ bms_get_singleton_member(const Bitmapset *a, int *member)
if (result >= 0 || HAS_MULTIPLE_ONES(w)) if (result >= 0 || HAS_MULTIPLE_ONES(w))
return false; return false;
result = wordnum * BITS_PER_BITMAPWORD; result = wordnum * BITS_PER_BITMAPWORD;
while ((w & 255) == 0) result += bmw_rightmost_one(w);
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
} }
} }
if (result < 0) if (result < 0)
...@@ -681,12 +616,9 @@ bms_num_members(const Bitmapset *a) ...@@ -681,12 +616,9 @@ bms_num_members(const Bitmapset *a)
{ {
bitmapword w = a->words[wordnum]; bitmapword w = a->words[wordnum];
/* we assume here that bitmapword is an unsigned type */ /* No need to count the bits in a zero word */
while (w != 0) if (w != 0)
{ result += bmw_popcount(w);
result += number_of_ones[w & 255];
w >>= 8;
}
} }
return result; return result;
} }
...@@ -1041,12 +973,7 @@ bms_first_member(Bitmapset *a) ...@@ -1041,12 +973,7 @@ bms_first_member(Bitmapset *a)
a->words[wordnum] &= ~w; a->words[wordnum] &= ~w;
result = wordnum * BITS_PER_BITMAPWORD; result = wordnum * BITS_PER_BITMAPWORD;
while ((w & 255) == 0) result += bmw_rightmost_one(w);
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
return result; return result;
} }
} }
...@@ -1096,12 +1023,7 @@ bms_next_member(const Bitmapset *a, int prevbit) ...@@ -1096,12 +1023,7 @@ bms_next_member(const Bitmapset *a, int prevbit)
int result; int result;
result = wordnum * BITS_PER_BITMAPWORD; result = wordnum * BITS_PER_BITMAPWORD;
while ((w & 255) == 0) result += bmw_rightmost_one(w);
{
w >>= 8;
result += 8;
}
result += rightmost_one_pos[w & 255];
return result; return result;
} }
...@@ -1168,14 +1090,9 @@ bms_prev_member(const Bitmapset *a, int prevbit) ...@@ -1168,14 +1090,9 @@ bms_prev_member(const Bitmapset *a, int prevbit)
if (w != 0) if (w != 0)
{ {
int result; int result;
int shift = BITS_PER_BITMAPWORD - 8;
result = wordnum * BITS_PER_BITMAPWORD; result = wordnum * BITS_PER_BITMAPWORD;
result += bmw_leftmost_one(w);
while ((w >> shift) == 0)
shift -= 8;
result += shift + leftmost_one_pos[(w >> shift) & 255];
return result; return result;
} }
......
...@@ -754,6 +754,24 @@ ...@@ -754,6 +754,24 @@
/* Define to 1 if your compiler understands __builtin_$op_overflow. */ /* Define to 1 if your compiler understands __builtin_$op_overflow. */
#undef HAVE__BUILTIN_OP_OVERFLOW #undef HAVE__BUILTIN_OP_OVERFLOW
/* Define to 1 if your compiler understands __builtin_popcount. */
#undef HAVE__BUILTIN_POPCOUNT
/* Define to 1 if your compiler understands __builtin_popcountl. */
#undef HAVE__BUILTIN_POPCOUNTL
/* Define to 1 if your compiler understands __builtin_ctz. */
#undef HAVE__BUILTIN_CTZ
/* Define to 1 if your compiler understands __builtin_ctzl. */
#undef HAVE__BUILTIN_CTZL
/* Define to 1 if your compiler understands __builtin_clz. */
#undef HAVE__BUILTIN_CLZ
/* Define to 1 if your compiler understands __builtin_clzl. */
#undef HAVE__BUILTIN_CLZL
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */ /* Define to 1 if your compiler understands __builtin_types_compatible_p. */
#undef HAVE__BUILTIN_TYPES_COMPATIBLE_P #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
......
...@@ -593,6 +593,24 @@ ...@@ -593,6 +593,24 @@
/* Define to 1 if your compiler understands __builtin_$op_overflow. */ /* Define to 1 if your compiler understands __builtin_$op_overflow. */
/* #undef HAVE__BUILTIN_OP_OVERFLOW */ /* #undef HAVE__BUILTIN_OP_OVERFLOW */
/* Define to 1 if your compiler understands __builtin_popcount. */
/* #undef HAVE__BUILTIN_POPCOUNT */
/* Define to 1 if your compiler understands __builtin_popcountl. */
/* #undef HAVE__BUILTIN_POPCOUNTL */
/* Define to 1 if your compiler understands __builtin_ctz. */
/* #undef HAVE__BUILTIN_CTZ */
/* Define to 1 if your compiler understands __builtin_ctzl. */
/* #undef HAVE__BUILTIN_CTZL */
/* Define to 1 if your compiler understands __builtin_clz. */
/* #undef HAVE__BUILTIN_CLZ */
/* Define to 1 if your compiler understands __builtin_clzl. */
/* #undef HAVE__BUILTIN_CLZL */
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */ /* Define to 1 if your compiler understands __builtin_types_compatible_p. */
/* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */ /* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */
......
/*------------------------------------------------------------------------ -
*
* pg_bitutils.h
* miscellaneous functions for bit-wise operations.
*
*
* Portions Copyright(c) 2019, PostgreSQL Global Development Group
*
* src/include/port/pg_bitutils.h
*
*------------------------------------------------------------------------ -
*/
#ifndef PG_BITUTILS_H
#define PG_BITUTILS_H
extern int (*pg_popcount32) (uint32 word);
extern int (*pg_popcount64) (uint64 word);
extern int (*pg_rightmost_one32) (uint32 word);
extern int (*pg_rightmost_one64) (uint64 word);
extern int (*pg_leftmost_one32) (uint32 word);
extern int (*pg_leftmost_one64) (uint64 word);
extern uint64 pg_popcount(const char *buf, int bytes);
#endif /* PG_BITUTILS_H */
...@@ -36,7 +36,7 @@ override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS) ...@@ -36,7 +36,7 @@ override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS)
LIBS += $(PTHREAD_LIBS) LIBS += $(PTHREAD_LIBS)
OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \ OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \
noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \ noblock.o path.o pg_bitutils.o pgcheckdir.o pgmkdirp.o pgsleep.o \
pg_strong_random.o pgstrcasecmp.o pgstrsignal.o pqsignal.o \ pg_strong_random.o pgstrcasecmp.o pgstrsignal.o pqsignal.o \
qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \ qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \
tar.o thread.o tar.o thread.o
...@@ -78,6 +78,9 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) ...@@ -78,6 +78,9 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
# pg_bitutils.c needs CFLAGS_POPCNT
pg_bitutils.o: CFLAGS+=$(CFLAGS_POPCNT)
# #
# Shared library versions of object files # Shared library versions of object files
# #
......
This diff is collapsed.
...@@ -112,6 +112,7 @@ sub mkvcbuild ...@@ -112,6 +112,7 @@ sub mkvcbuild
push(@pgportfiles, 'pg_crc32c_sse42_choose.c'); push(@pgportfiles, 'pg_crc32c_sse42_choose.c');
push(@pgportfiles, 'pg_crc32c_sse42.c'); push(@pgportfiles, 'pg_crc32c_sse42.c');
push(@pgportfiles, 'pg_crc32c_sb8.c'); push(@pgportfiles, 'pg_crc32c_sb8.c');
push(@pgportfiles, 'pg_bitutils.c');
} }
else else
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment