Commit 3dc2d62d authored by Heikki Linnakangas's avatar Heikki Linnakangas

Use Intel SSE 4.2 CRC instructions where available.

Modern x86 and x86-64 processors with SSE 4.2 support have special
instructions, crc32b and crc32q, for calculating CRC-32C. They greatly
speed up CRC calculation.

Whether the instructions can be used or not depends on the compiler and the
target architecture. If generation of SSE 4.2 instructions is allowed for
the target (-msse4.2 flag on gcc and clang), use them. If they are not
allowed by default, but the compiler supports the -msse4.2 flag to enable
them, compile just the CRC-32C function with -msse4.2 flag, and check at
runtime whether the processor we're running on supports it. If it doesn't,
fall back to the slicing-by-8 algorithm. (With the common defaults on
current operating systems, the runtime-check variant is what you get in
practice.)

Abhijit Menon-Sen, heavily modified by me, reviewed by Andres Freund.
parent 4f700bcd
......@@ -473,3 +473,30 @@ AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT64_CAS],
if test x"$pgac_cv_gcc_atomic_int64_cas" = x"yes"; then
AC_DEFINE(HAVE_GCC__ATOMIC_INT64_CAS, 1, [Define to 1 if you have __atomic_compare_exchange_n(int64 *, int *, int64).])
fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
# PGAC_SSE42_CRC32_INTRINSICS
# -----------------------
# Check if the compiler supports _mm_crc32_u8 and _mm_crc32_u64 intrinsics.
# An optional compiler flag can be passed as argument (e.g. -msse4.2). If the
# intrinsics are supported, sets pgac_sse42_crc32_intrinsics, and CFLAGS_SSE42.
AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics_$1])])dnl
AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=$1], [Ac_cachevar],
[pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS $1"
ac_save_c_werror_flag=$ac_c_werror_flag
ac_c_werror_flag=yes
AC_TRY_LINK([#include <nmmintrin.h>],
[unsigned int crc = 0;
crc = _mm_crc32_u8(crc, 0);
crc = (unsigned int) _mm_crc32_u64(crc, 0);],
[Ac_cachevar=yes],
[Ac_cachevar=no])
ac_c_werror_flag=$ac_save_c_werror_flag
CFLAGS="$pgac_save_CFLAGS"])
if test x"$Ac_cachevar" = x"yes"; then
CFLAGS_SSE42="$1"
pgac_sse42_crc32_intrinsics=yes
fi
undefine([Ac_cachevar])dnl
])# PGAC_SSE42_CRC32_INTRINSICS
......@@ -650,6 +650,8 @@ MSGMERGE
MSGFMT_FLAGS
MSGFMT
HAVE_POSIX_SIGNALS
PG_CRC32C_OBJS
CFLAGS_SSE42
LDAP_LIBS_BE
LDAP_LIBS_FE
PTHREAD_CFLAGS
......@@ -14095,6 +14097,216 @@ $as_echo "#define HAVE_GCC__ATOMIC_INT64_CAS 1" >>confdefs.h
fi
# Check for x86 cpuid instruction
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __get_cpuid" >&5
$as_echo_n "checking for __get_cpuid... " >&6; }
if ${pgac_cv__get_cpuid+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <cpuid.h>
int
main ()
{
unsigned int exx[4] = {0, 0, 0, 0};
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv__get_cpuid="yes"
else
pgac_cv__get_cpuid="no"
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__get_cpuid" >&5
$as_echo "$pgac_cv__get_cpuid" >&6; }
if test x"$pgac_cv__get_cpuid" = x"yes"; then
$as_echo "#define HAVE__GET_CPUID 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuid" >&5
$as_echo_n "checking for __cpuid... " >&6; }
if ${pgac_cv__cpuid+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <intrin.h>
int
main ()
{
unsigned int exx[4] = {0, 0, 0, 0};
__get_cpuid(exx[0], 1);
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv__cpuid="yes"
else
pgac_cv__cpuid="no"
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__cpuid" >&5
$as_echo "$pgac_cv__cpuid" >&6; }
if test x"$pgac_cv__cpuid" = x"yes"; then
$as_echo "#define HAVE__CPUID 1" >>confdefs.h
fi
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
#
# First check if the _mm_crc32_u8 and _mmcrc32_u64 intrinsics can be used
# with the default compiler flags. If not, check if adding the -msse4.2
# flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=" >&5
$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=... " >&6; }
if ${pgac_cv_sse42_crc32_intrinsics_+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS "
ac_save_c_werror_flag=$ac_c_werror_flag
ac_c_werror_flag=yes
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <nmmintrin.h>
int
main ()
{
unsigned int crc = 0;
crc = _mm_crc32_u8(crc, 0);
crc = (unsigned int) _mm_crc32_u64(crc, 0);
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv_sse42_crc32_intrinsics_=yes
else
pgac_cv_sse42_crc32_intrinsics_=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
ac_c_werror_flag=$ac_save_c_werror_flag
CFLAGS="$pgac_save_CFLAGS"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics_" >&5
$as_echo "$pgac_cv_sse42_crc32_intrinsics_" >&6; }
if test x"$pgac_cv_sse42_crc32_intrinsics_" = x"yes"; then
CFLAGS_SSE42=""
pgac_sse42_crc32_intrinsics=yes
fi
if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=-msse4.2" >&5
$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=-msse4.2... " >&6; }
if ${pgac_cv_sse42_crc32_intrinsics__msse4_2+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS -msse4.2"
ac_save_c_werror_flag=$ac_c_werror_flag
ac_c_werror_flag=yes
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <nmmintrin.h>
int
main ()
{
unsigned int crc = 0;
crc = _mm_crc32_u8(crc, 0);
crc = (unsigned int) _mm_crc32_u64(crc, 0);
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv_sse42_crc32_intrinsics__msse4_2=yes
else
pgac_cv_sse42_crc32_intrinsics__msse4_2=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
ac_c_werror_flag=$ac_save_c_werror_flag
CFLAGS="$pgac_save_CFLAGS"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics__msse4_2" >&5
$as_echo "$pgac_cv_sse42_crc32_intrinsics__msse4_2" >&6; }
if test x"$pgac_cv_sse42_crc32_intrinsics__msse4_2" = x"yes"; then
CFLAGS_SSE42="-msse4.2"
pgac_sse42_crc32_intrinsics=yes
fi
fi
# Select CRC-32C implementation.
#
# If the SSE 4.2 intrinsics are available without extra CFLAGS, then use them
# always. If they require extra CFLAGS, compile both implementations and
# select which one to use at runtime, depending on whether SSE 4.2 is
# supported by the processor we're running on.
#
# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1
# in the template or configure command line.
if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$CFLAGS_SSE42" = x"" ; then
USE_SSE42_CRC32C=1
else
# the CPUID instruction is needed for the runtime check.
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then
USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1
else
USE_SLICING_BY_8_CRC32C=1
fi
fi
fi
# Set PG_CRC32C_OBJS appropriately depending on the selected implementation.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking which CRC-32C implementation to use" >&5
$as_echo_n "checking which CRC-32C implementation to use... " >&6; }
if test x"$USE_SSE42_CRC32C" = x"1"; then
$as_echo "#define USE_SSE42_CRC32C 1" >>confdefs.h
PG_CRC32C_OBJS="pg_crc32c_sse42.o"
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2" >&5
$as_echo "SSE 4.2" >&6; }
else
if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
$as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h
PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o"
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5
$as_echo "SSE 4.2 with runtime check" >&6; }
else
$as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h
PG_CRC32C_OBJS="pg_crc32c_sb8.o"
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5
$as_echo "slicing-by-8" >&6; }
fi
fi
# Check that POSIX signals are available if thread safety is enabled.
if test "$PORTNAME" != "win32"
then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for POSIX signal interface" >&5
......
......@@ -1790,6 +1790,84 @@ PGAC_HAVE_GCC__SYNC_INT64_CAS
PGAC_HAVE_GCC__ATOMIC_INT32_CAS
PGAC_HAVE_GCC__ATOMIC_INT64_CAS
# Check for x86 cpuid instruction
AC_CACHE_CHECK([for __get_cpuid], [pgac_cv__get_cpuid],
[AC_TRY_LINK([#include <cpuid.h>],
[unsigned int exx[4] = {0, 0, 0, 0};
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
],
[pgac_cv__get_cpuid="yes"],
[pgac_cv__get_cpuid="no"])])
if test x"$pgac_cv__get_cpuid" = x"yes"; then
AC_DEFINE(HAVE__GET_CPUID, 1, [Define to 1 if you have __get_cpuid.])
fi
AC_CACHE_CHECK([for __cpuid], [pgac_cv__cpuid],
[AC_TRY_LINK([#include <intrin.h>],
[unsigned int exx[4] = {0, 0, 0, 0};
__get_cpuid(exx[0], 1);
],
[pgac_cv__cpuid="yes"],
[pgac_cv__cpuid="no"])])
if test x"$pgac_cv__cpuid" = x"yes"; then
AC_DEFINE(HAVE__CPUID, 1, [Define to 1 if you have __cpuid.])
fi
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
#
# First check if the _mm_crc32_u8 and _mmcrc32_u64 intrinsics can be used
# with the default compiler flags. If not, check if adding the -msse4.2
# flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required.
PGAC_SSE42_CRC32_INTRINSICS([])
if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then
PGAC_SSE42_CRC32_INTRINSICS([-msse4.2])
fi
AC_SUBST(CFLAGS_SSE42)
# Select CRC-32C implementation.
#
# If the SSE 4.2 intrinsics are available without extra CFLAGS, then use them
# always. If they require extra CFLAGS, compile both implementations and
# select which one to use at runtime, depending on whether SSE 4.2 is
# supported by the processor we're running on.
#
# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1
# in the template or configure command line.
if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$CFLAGS_SSE42" = x"" ; then
USE_SSE42_CRC32C=1
else
# the CPUID instruction is needed for the runtime check.
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then
USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1
else
USE_SLICING_BY_8_CRC32C=1
fi
fi
fi
# Set PG_CRC32C_OBJS appropriately depending on the selected implementation.
AC_MSG_CHECKING([which CRC-32C implementation to use])
if test x"$USE_SSE42_CRC32C" = x"1"; then
AC_DEFINE(USE_SSE42_CRC32C, 1, [Define to 1 use Intel SSE 4.2 CRC instructions.])
PG_CRC32C_OBJS="pg_crc32c_sse42.o"
AC_MSG_RESULT(SSE 4.2)
else
if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check.])
PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o"
AC_MSG_RESULT(SSE 4.2 with runtime check)
else
AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.])
PG_CRC32C_OBJS="pg_crc32c_sb8.o"
AC_MSG_RESULT(slicing-by-8)
fi
fi
AC_SUBST(PG_CRC32C_OBJS)
# Check that POSIX signals are available if thread safety is enabled.
if test "$PORTNAME" != "win32"
then
PGAC_FUNC_POSIX_SIGNALS
......
......@@ -225,6 +225,7 @@ GCC = @GCC@
SUN_STUDIO_CC = @SUN_STUDIO_CC@
CFLAGS = @CFLAGS@
CFLAGS_VECTOR = @CFLAGS_VECTOR@
CFLAGS_SSE42 = @CFLAGS_SSE42@
# Kind-of compilers
......@@ -548,6 +549,9 @@ endif
LIBOBJS = @LIBOBJS@
# files needed for the chosen CRC-32C implementation
PG_CRC32C_OBJS = @PG_CRC32C_OBJS@
LIBS := -lpgcommon -lpgport $(LIBS)
# to make ws2_32.lib the last library
......
......@@ -675,6 +675,12 @@
/* Define to 1 if your compiler understands __builtin_unreachable. */
#undef HAVE__BUILTIN_UNREACHABLE
/* Define to 1 if you have __cpuid. */
#undef HAVE__CPUID
/* Define to 1 if you have __get_cpuid. */
#undef HAVE__GET_CPUID
/* Define to 1 if your compiler understands _Static_assert. */
#undef HAVE__STATIC_ASSERT
......@@ -818,6 +824,15 @@
/* Use replacement snprintf() functions. */
#undef USE_REPL_SNPRINTF
/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */
#undef USE_SLICING_BY_8_CRC32C
/* Define to 1 use Intel SSE 4.2 CRC instructions. */
#undef USE_SSE42_CRC32C
/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */
#undef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
/* Define to select SysV-style semaphores. */
#undef USE_SYSV_SEMAPHORES
......
......@@ -6,8 +6,8 @@
*
* HAVE_CBRT, HAVE_FUNCNAME_FUNC, HAVE_GETOPT, HAVE_GETOPT_H, HAVE_INTTYPES_H,
* HAVE_GETOPT_LONG, HAVE_LOCALE_T, HAVE_RINT, HAVE_STRINGS_H, HAVE_STRTOLL,
* HAVE_STRTOULL, HAVE_STRUCT_OPTION, ENABLE_THREAD_SAFETY,
* PG_USE_INLINE, inline
* HAVE_STRTOULL, HAVE_STRUCT_OPTION, ENABLE_THREAD_SAFETY, PG_USE_INLINE,
* inline, USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
*/
/* Define to the type of arg 1 of 'accept' */
......@@ -529,6 +529,12 @@
/* Define to 1 if your compiler understands __builtin_unreachable. */
/* #undef HAVE__BUILTIN_UNREACHABLE */
/* Define to 1 if you have __cpuid. */
#define HAVE__CPUID 1
/* Define to 1 if you have __get_cpuid. */
#undef HAVE__GET_CPUID
/* Define to 1 if your compiler understands _Static_assert. */
/* #undef HAVE__STATIC_ASSERT */
......@@ -639,6 +645,19 @@
/* Use replacement snprintf() functions. */
#define USE_REPL_SNPRINTF 1
/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */
#if (_MSC_VER < 1500)
#define USE_SLICING_BY_8_CRC32C 1
#end
/* Define to 1 use Intel SSE 4.2 CRC instructions. */
/* #undef USE_SSE42_CRC32C */
/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */
#if (_MSC_VER >= 1500)
#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
#endif
/* Define to select SysV-style semaphores. */
/* #undef USE_SYSV_SEMAPHORES */
......
......@@ -3,6 +3,25 @@
* pg_crc32c.h
* Routines for computing CRC-32C checksums.
*
* The speed of CRC-32C calculation has a big impact on performance, so we
* jump through some hoops to get the best implementation for each
* platform. Some CPU architectures have special instructions for speeding
* up CRC calculations (e.g. Intel SSE 4.2), on other platforms we use the
* Slicing-by-8 algorithm which uses lookup tables.
*
* The public interface consists of four macros:
*
* INIT_CRC32C(crc)
* Initialize a CRC accumulator
*
* COMP_CRC32C(crc, data, len)
* Accumulate some (more) bytes into a CRC
*
* FIN_CRC32C(crc)
* Finish a CRC calculation
*
* EQ_CRC32C(c1, c2)
* Check for equality of two CRCs.
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
......@@ -16,9 +35,32 @@
typedef uint32 pg_crc32c;
/* The INIT and EQ macros are the same for all implementations. */
#define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF)
#define EQ_CRC32C(c1, c2) ((c1) == (c2))
#if defined(USE_SSE42_CRC32C)
/* Use SSE4.2 instructions. */
#define COMP_CRC32C(crc, data, len) \
((crc) = pg_comp_crc32c_sse42((crc), (data), (len)))
#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK)
/*
* Use SSE4.2 instructions, but perform a runtime check first to check that
* they are available.
*/
#define COMP_CRC32C(crc, data, len) \
((crc) = pg_comp_crc32c((crc), (data), (len)))
#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len);
#else
/*
* Use slicing-by-8 algorithm.
*
......@@ -46,4 +88,6 @@ typedef uint32 pg_crc32c;
extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
#endif
#endif /* PG_CRC32C_H */
......@@ -30,10 +30,10 @@ include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS)
LIBS += $(PTHREAD_LIBS)
OBJS = $(LIBOBJS) chklocale.o erand48.o inet_net_ntop.o \
OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \
noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \
pgstrcasecmp.o pqsignal.o \
qsort.o qsort_arg.o quotes.o sprompt.o tar.o thread.o pg_crc32c_sb8.o
qsort.o qsort_arg.o quotes.o sprompt.o tar.o thread.o
# foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND
OBJS_SRV = $(OBJS:%.o=%_srv.o)
......@@ -57,6 +57,10 @@ libpgport.a: $(OBJS)
# thread.o needs PTHREAD_CFLAGS (but thread_srv.o does not)
thread.o: CFLAGS+=$(PTHREAD_CFLAGS)
# pg_crc32c_sse42.o and its _srv.o version need CFLAGS_SSE42
pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42)
pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42)
#
# Server versions of object files
#
......
/*-------------------------------------------------------------------------
*
* pg_crc32c_choose.c
* Choose which CRC-32C implementation to use, at runtime.
*
* Try to the special CRC instructions introduced in Intel SSE 4.2,
* if available on the platform we're running on, but fall back to the
* slicing-by-8 implementation otherwise.
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/port/pg_crc32c_choose.c
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#ifdef HAVE__GET_CPUID
#include <cpuid.h>
#endif
#ifdef HAVE__CPUID
#include <intrin.h>
#endif
#include "port/pg_crc32c.h"
static bool
pg_crc32c_sse42_available(void)
{
unsigned int exx[4] = {0, 0, 0, 0};
#if defined(HAVE__GET_CPUID)
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUID)
__cpuid(exx, 1);
#else
#error cpuid instruction not available
#endif
return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */
}
/*
* This gets called on the first call. It replaces the function pointer
* so that subsequent calls are routed directly to the chosen implementation.
*/
static pg_crc32c
pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
{
if (pg_crc32c_sse42_available())
pg_comp_crc32c = pg_comp_crc32c_sse42;
else
pg_comp_crc32c = pg_comp_crc32c_sb8;
return pg_comp_crc32c(crc, data, len);
}
pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
/*-------------------------------------------------------------------------
*
* pg_crc32c_sse42.c
* Compute CRC-32C checksum using Intel SSE 4.2 instructions.
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/port/pg_crc32c_sse42.c
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#include "port/pg_crc32c.h"
#include <nmmintrin.h>
pg_crc32c
pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
{
const unsigned char *p = data;
const uint64 *p8;
/*
* Process eight bytes of data at a time.
*
* NB: We do unaligned 8-byte accesses here. The Intel architecture
* allows that, and performance testing didn't show any performance
* gain from aligning the beginning address.
*/
p8 = (const uint64 *) p;
while (len >= 8)
{
crc = (uint32) _mm_crc32_u64(crc, *p8++);
len -= 8;
}
/*
* Handle any remaining bytes one at a time.
*/
p = (const unsigned char *) p8;
while (len > 0)
{
crc = _mm_crc32_u8(crc, *p++);
len--;
}
return crc;
}
......@@ -92,10 +92,21 @@ sub mkvcbuild
pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c pqsignal.c
mkdtemp.c qsort.c qsort_arg.c quotes.c system.c
sprompt.c tar.c thread.c getopt.c getopt_long.c dirent.c
win32env.c win32error.c win32setlocale.c pg_crc32c_sb8.c);
win32env.c win32error.c win32setlocale.c);
push(@pgportfiles, 'rint.c') if ($vsVersion < '12.00');
if ($vsVersion >= '9.00')
{
push(@pgportfiles, 'pg_crc32c_choose.c');
push(@pgportfiles, 'pg_crc32c_sse42.c');
push(@pgportfiles, 'pg_crc32c_sb8.c');
}
else
{
push(@pgportfiles, 'pg_crc32c_sb8.c')
}
our @pgcommonallfiles = qw(
exec.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
string.c username.c wait_error.c);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment