All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yazen Ghannam <yazen.ghannam@linaro.org>
To: ceph-devel@vger.kernel.org
Cc: Yazen Ghannam <yazen.ghannam@linaro.org>
Subject: [PATCH] crc32c: add aarch64 optimized crc32c implementation
Date: Mon, 26 Jan 2015 09:20:35 -0500	[thread overview]
Message-ID: <1422282035-30121-1-git-send-email-yazen.ghannam@linaro.org> (raw)

ARMv8 defines a set of optional CRC32/CRC32C instructions.
This patch defines an optimized function that uses these
instructions when available rather than table-based lookup.
Optimized function based on a Hadoop patch by Ed Nevill.

Autotools updated to check for compiler support.
Optimized function is selected at runtime based on HWCAP_CRC32.
Added crc32c "performance" unit test and arch unit test.

Tested on AMD Seattle.
Passes all crc32c unit tests.
Unit test shows ~4x performance increase versus sctp.

Signed-off-by: Yazen Ghannam <yazen.ghannam@linaro.org>
Reviewed-by: Steve Capper <steve.capper@linaro.org>
---
 configure.ac                   |  1 +
 m4/ax_arm.m4                   | 18 ++++++++++++++--
 src/arch/arm.c                 |  2 ++
 src/arch/arm.h                 |  1 +
 src/common/Makefile.am         | 10 ++++++++-
 src/common/crc32c.cc           |  6 ++++++
 src/common/crc32c_aarch64.c    | 47 ++++++++++++++++++++++++++++++++++++++++++
 src/common/crc32c_aarch64.h    | 27 ++++++++++++++++++++++++
 src/test/common/test_crc32c.cc | 10 +++++++++
 src/test/test_arch.cc          | 14 +++++++++++++
 10 files changed, 133 insertions(+), 3 deletions(-)
 create mode 100644 src/common/crc32c_aarch64.c
 create mode 100644 src/common/crc32c_aarch64.h

diff --git a/configure.ac b/configure.ac
index d836b02..60e4feb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -575,6 +575,7 @@ AC_LANG_POP([C++])
 # Find supported SIMD / NEON / SSE extensions supported by the compiler
 AX_ARM_FEATURES()
 AM_CONDITIONAL(HAVE_NEON, [ test "x$ax_cv_support_neon_ext" = "xyes"])
+AM_CONDITIONAL(HAVE_ARMV8_CRC, [ test "x$ax_cv_support_crc_ext" = "xyes"])
 AX_INTEL_FEATURES()
 AM_CONDITIONAL(HAVE_SSSE3, [ test "x$ax_cv_support_ssse3_ext" = "xyes"])
 AM_CONDITIONAL(HAVE_SSE4_PCLMUL, [ test "x$ax_cv_support_pclmuldq_ext" = "xyes"])
diff --git a/m4/ax_arm.m4 b/m4/ax_arm.m4
index 2ccc9a9..37ea0aa 100644
--- a/m4/ax_arm.m4
+++ b/m4/ax_arm.m4
@@ -13,13 +13,27 @@ AC_DEFUN([AX_ARM_FEATURES],
       fi
     ;;
     aarch64*)
+      AX_CHECK_COMPILE_FLAG(-march=armv8-a, ax_cv_support_armv8=yes, [])
+      if test x"$ax_cv_support_armv8" = x"yes"; then
+        ARM_ARCH_FLAGS="-march=armv8-a"
+        ARM_DEFINE_FLAGS="-DARCH_AARCH64"
+      fi
       AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, ax_cv_support_neon_ext=yes, [])
       if test x"$ax_cv_support_neon_ext" = x"yes"; then
+        ARM_ARCH_FLAGS="$ARM_ARCH_FLAGS+simd"
+        ARM_DEFINE_FLAGS="$ARM_DEFINE_FLAGS -DARM_NEON"
         ARM_NEON_FLAGS="-march=armv8-a+simd -DARCH_AARCH64 -DARM_NEON"
-        AC_SUBST(ARM_NEON_FLAGS)
-        ARM_FLAGS="$ARM_FLAGS $ARM_NEON_FLAGS"
         AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
+        AC_SUBST(ARM_NEON_FLAGS)
+      fi
+      AX_CHECK_COMPILE_FLAG(-march=armv8-a+crc, ax_cv_support_crc_ext=yes, [])
+      if test x"$ax_cv_support_crc_ext" = x"yes"; then
+        ARM_ARCH_FLAGS="$ARM_ARCH_FLAGS+crc"
+        ARM_CRC_FLAGS="-march=armv8-a+crc -DARCH_AARCH64"
+        AC_DEFINE(HAVE_ARMV8_CRC,,[Support ARMv8 CRC instructions])
+        AC_SUBST(ARM_CRC_FLAGS)
       fi
+        ARM_FLAGS="$ARM_ARCH_FLAGS $ARM_DEFINE_FLAGS"
     ;;
   esac
 
diff --git a/src/arch/arm.c b/src/arch/arm.c
index 93d079a..5a47e33 100644
--- a/src/arch/arm.c
+++ b/src/arch/arm.c
@@ -2,6 +2,7 @@
 
 /* flags we export */
 int ceph_arch_neon = 0;
+int ceph_arch_aarch64_crc32 = 0;
 
 #include <stdio.h>
 
@@ -47,6 +48,7 @@ int ceph_arch_arm_probe(void)
 	ceph_arch_neon = (get_hwcap() & HWCAP_NEON) == HWCAP_NEON;
 #elif __aarch64__ && __linux__
 	ceph_arch_neon = (get_hwcap() & HWCAP_ASIMD) == HWCAP_ASIMD;
+	ceph_arch_aarch64_crc32 = (get_hwcap() & HWCAP_CRC32) == HWCAP_CRC32;
 #else
 	if (0)
 		get_hwcap();  // make compiler shut up
diff --git a/src/arch/arm.h b/src/arch/arm.h
index f613438..1659b2e 100644
--- a/src/arch/arm.h
+++ b/src/arch/arm.h
@@ -6,6 +6,7 @@ extern "C" {
 #endif
 
 extern int ceph_arch_neon;  /* true if we have ARM NEON or ASIMD abilities */
+extern int ceph_arch_aarch64_crc32;  /* true if we have AArch64 CRC32/CRC32C abilities */
 
 extern int ceph_arch_arm_probe(void);
 
diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index 2888194..37d1404 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -112,11 +112,19 @@ endif
 LIBCOMMON_DEPS += libcommon_crc.la
 noinst_LTLIBRARIES += libcommon_crc.la
 
+if HAVE_ARMV8_CRC
+libcommon_crc_aarch64_la_SOURCES = common/crc32c_aarch64.c
+libcommon_crc_aarch64_la_CFLAGS = $(AM_CFLAGS) $(ARM_CRC_FLAGS)
+LIBCOMMON_DEPS += libcommon_crc_aarch64.la
+noinst_LTLIBRARIES += libcommon_crc_aarch64.la
+endif
+
 noinst_HEADERS += \
 	common/bloom_filter.hpp \
 	common/sctp_crc32.h \
 	common/crc32c_intel_baseline.h \
-	common/crc32c_intel_fast.h
+	common/crc32c_intel_fast.h \
+	common/crc32c_aarch64.h
 
 
 # important; libmsg before libauth!
diff --git a/src/common/crc32c.cc b/src/common/crc32c.cc
index e2e81a4..45432f5 100644
--- a/src/common/crc32c.cc
+++ b/src/common/crc32c.cc
@@ -5,9 +5,11 @@
 
 #include "arch/probe.h"
 #include "arch/intel.h"
+#include "arch/arm.h"
 #include "common/sctp_crc32.h"
 #include "common/crc32c_intel_baseline.h"
 #include "common/crc32c_intel_fast.h"
+#include "common/crc32c_aarch64.h"
 
 /*
  * choose best implementation based on the CPU architecture.
@@ -24,6 +26,10 @@ ceph_crc32c_func_t ceph_choose_crc32(void)
     return ceph_crc32c_intel_fast;
   }
 
+  if (ceph_arch_aarch64_crc32){
+    return ceph_crc32c_aarch64;
+  }
+
   // default
   return ceph_crc32c_sctp;
 }
diff --git a/src/common/crc32c_aarch64.c b/src/common/crc32c_aarch64.c
new file mode 100644
index 0000000..d33827d
--- /dev/null
+++ b/src/common/crc32c_aarch64.c
@@ -0,0 +1,47 @@
+#include "acconfig.h"
+#include "include/int_types.h"
+#include "common/crc32c_aarch64.h"
+
+#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+
+uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+	int64_t length = len;
+
+	if (!buffer) {
+
+		while ((length -= sizeof(uint64_t)) >= 0)
+			CRC32CX(crc, 0);
+
+		/* The following is more efficient than the straight loop */
+		if (length & sizeof(uint32_t))
+			CRC32CW(crc, 0);
+
+		if (length & sizeof(uint16_t))
+			CRC32CH(crc, 0);
+
+		if (length & sizeof(uint8_t))
+			CRC32CB(crc, 0);
+	} else {
+		while ((length -= sizeof(uint64_t)) >= 0) {
+			CRC32CX(crc, *(uint64_t *)buffer);
+			buffer += sizeof(uint64_t);
+		}
+
+		/* The following is more efficient than the straight loop */
+		if (length & sizeof(uint32_t)) {
+			CRC32CW(crc, *(uint32_t *)buffer);
+			buffer += sizeof(uint32_t);
+		}
+		if (length & sizeof(uint16_t)) {
+			CRC32CH(crc, *(uint16_t *)buffer);
+			buffer += sizeof(uint16_t);
+		}
+		if (length & sizeof(uint8_t))
+			CRC32CB(crc, *buffer);
+	}
+	return crc;
+}
diff --git a/src/common/crc32c_aarch64.h b/src/common/crc32c_aarch64.h
new file mode 100644
index 0000000..3727f54
--- /dev/null
+++ b/src/common/crc32c_aarch64.h
@@ -0,0 +1,27 @@
+#ifndef CEPH_COMMON_CRC32C_AARCH64_H
+#define CEPH_COMMON_CRC32C_AARCH64_H
+
+#include "arch/arm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_ARMV8_CRC
+
+extern uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len);
+
+#else
+
+static inline uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+	return 0;
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/test/common/test_crc32c.cc b/src/test/common/test_crc32c.cc
index b4297c6..527f115 100644
--- a/src/test/common/test_crc32c.cc
+++ b/src/test/common/test_crc32c.cc
@@ -13,6 +13,7 @@
 
 #include "common/sctp_crc32.h"
 #include "common/crc32c_intel_baseline.h"
+#include "common/crc32c_aarch64.h"
 
 TEST(Crc32c, Small) {
   const char *a = "foo bar baz";
@@ -80,6 +81,15 @@ TEST(Crc32c, Performance) {
     std::cout << "intel baseline = " << rate << " MB/sec" << std::endl;
     ASSERT_EQ(261108528u, val);
   }
+  if (ceph_arch_aarch64_crc32) // Skip if CRC32C instructions are not defined.
+  {
+    utime_t start = ceph_clock_now(NULL);
+    unsigned val = ceph_crc32c_aarch64(0, (unsigned char *)a, len);
+    utime_t end = ceph_clock_now(NULL);
+    float rate = (float)len / (float)(1024*1024) / (float)(end - start);
+    std::cout << "aarch64 baseline = " << rate << " MB/sec" << std::endl;
+    ASSERT_EQ(261108528u, val);
+  }
 
 }
 
diff --git a/src/test/test_arch.cc b/src/test/test_arch.cc
index b129262..e2c225b 100644
--- a/src/test/test_arch.cc
+++ b/src/test/test_arch.cc
@@ -47,9 +47,20 @@ TEST(Arch, all)
 
   int expected;
 
+#if (__arm__ || __aarch64__)
+
   expected = (strstr(flags, " neon ") || strstr(flags, " asimd ")) ? 1 : 0;
   EXPECT_EQ(expected, ceph_arch_neon);
 
+#endif
+#if (__aarch64__)
+
+  expected = strstr(flags, " crc32 ") ? 1 : 0;
+  EXPECT_EQ(expected, ceph_arch_aarch64_crc32);
+
+#endif
+#if (__x86_64__)
+
   expected = strstr(flags, " pclmulqdq ") ? 1 : 0;
   EXPECT_EQ(expected, ceph_arch_intel_pclmul);
 
@@ -67,6 +78,9 @@ TEST(Arch, all)
 
   expected = strstr(flags, " sse2 ") ? 1 : 0;
   EXPECT_EQ(expected, ceph_arch_intel_sse2);
+
+#endif
+
 #endif
 }
 
-- 
2.1.0


             reply	other threads:[~2015-01-26 14:21 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-26 14:20 Yazen Ghannam [this message]
2015-02-03 19:31 ` [PATCH] crc32c: add aarch64 optimized crc32c implementation Yazen Ghannam
2015-02-03 20:05   ` Gregory Farnum
2015-02-03 20:37     ` Yazen Ghannam

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1422282035-30121-1-git-send-email-yazen.ghannam@linaro.org \
    --to=yazen.ghannam@linaro.org \
    --cc=ceph-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.