All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Björn Töpel" <bjorn.topel@gmail.com>
To: ast@kernel.org, daniel@iogearbox.net, netdev@vger.kernel.org,
	bpf@vger.kernel.org
Cc: "Björn Töpel" <bjorn.topel@intel.com>,
	magnus.karlsson@intel.com, jonathan.lemon@gmail.com,
	maximmi@nvidia.com, andrii@kernel.org, toke@redhat.com,
	will@kernel.org, paulmck@kernel.org, stern@rowland.harvard.edu
Subject: [PATCH bpf-next v2 2/2] libbpf, xsk: add libbpf_smp_store_release libbpf_smp_load_acquire
Date: Fri,  5 Mar 2021 10:41:13 +0100	[thread overview]
Message-ID: <20210305094113.413544-3-bjorn.topel@gmail.com> (raw)
In-Reply-To: <20210305094113.413544-1-bjorn.topel@gmail.com>

From: Björn Töpel <bjorn.topel@intel.com>

Now that the AF_XDP rings have load-acquire/store-release semantics,
move libbpf to that as well.

The library-internal libbpf_smp_{load_acquire,store_release} are only
valid for 32-bit words on ARM64.

Also, remove the barriers that are no longer in use.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 tools/lib/bpf/libbpf_util.h | 72 +++++++++++++++++++++++++------------
 tools/lib/bpf/xsk.h         | 17 +++------
 2 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
index 59c779c5790c..94a0d7bb6f3c 100644
--- a/tools/lib/bpf/libbpf_util.h
+++ b/tools/lib/bpf/libbpf_util.h
@@ -5,6 +5,7 @@
 #define __LIBBPF_LIBBPF_UTIL_H
 
 #include <stdbool.h>
+#include <linux/compiler.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -15,29 +16,56 @@ extern "C" {
  * application that uses libbpf.
  */
 #if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_rmb() asm volatile("" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("" : : : "memory")
-# define libbpf_smp_mb() \
-	asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
-/* Hinders stores to be observed before older loads. */
-# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
+# define libbpf_smp_store_release(p, v)					\
+	do {								\
+		asm volatile("" : : : "memory");			\
+		WRITE_ONCE(*p, v);					\
+	} while (0)
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		asm volatile("" : : : "memory");			\
+		___p1;							\
+	})
 #elif defined(__aarch64__)
-# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#elif defined(__arm__)
-/* These are only valid for armv7 and above */
-# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#else
-/* Architecture missing native barrier functions. */
-# define libbpf_smp_rmb() __sync_synchronize()
-# define libbpf_smp_wmb() __sync_synchronize()
-# define libbpf_smp_mb() __sync_synchronize()
-# define libbpf_smp_rwmb() __sync_synchronize()
+# define libbpf_smp_store_release(p, v)					\
+		asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1;					\
+		asm volatile ("ldar %w0, %1"				\
+			      : "=r" (___p1) : "Q" (*p) : "memory");	\
+		__p1;							\
+	})
+#elif defined(__riscv)
+# define libbpf_smp_store_release(p, v)					\
+	do {								\
+		asm volatile ("fence rw,w" : : : "memory");		\
+		WRITE_ONCE(*p, v);					\
+	} while (0)
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		asm volatile ("fence r,rw" : : : "memory");		\
+		___p1;							\
+	})
+#endif
+
+#ifndef libbpf_smp_store_release
+#define libbpf_smp_store_release(p, v)					\
+	do {								\
+		__sync_synchronize();					\
+		WRITE_ONCE(*p, v);					\
+	} while (0)
+#endif
+
+#ifndef libbpf_smp_load_acquire
+#define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		__sync_synchronize();					\
+		___p1;							\
+	})
 #endif
 
 #ifdef __cplusplus
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index e9f121f5d129..a9fdea87b5cd 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -96,7 +96,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
 	 * this function. Without this optimization it whould have been
 	 * free_entries = r->cached_prod - r->cached_cons + r->size.
 	 */
-	r->cached_cons = *r->consumer + r->size;
+	r->cached_cons = libbpf_smp_load_acquire(r->consumer);
+	r->cached_cons += r->size;
 
 	return r->cached_cons - r->cached_prod;
 }
@@ -106,7 +107,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
 	__u32 entries = r->cached_prod - r->cached_cons;
 
 	if (entries == 0) {
-		r->cached_prod = *r->producer;
+		r->cached_prod = libbpf_smp_load_acquire(r->producer);
 		entries = r->cached_prod - r->cached_cons;
 	}
 
@@ -129,9 +130,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
 	/* Make sure everything has been written to the ring before indicating
 	 * this to the kernel by writing the producer pointer.
 	 */
-	libbpf_smp_wmb();
-
-	*prod->producer += nb;
+	libbpf_smp_store_release(prod->producer, *prod->producer + nb);
 }
 
 static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
@@ -139,11 +138,6 @@ static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __
 	__u32 entries = xsk_cons_nb_avail(cons, nb);
 
 	if (entries > 0) {
-		/* Make sure we do not speculatively read the data before
-		 * we have received the packet buffers from the ring.
-		 */
-		libbpf_smp_rmb();
-
 		*idx = cons->cached_cons;
 		cons->cached_cons += entries;
 	}
@@ -161,9 +155,8 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
 	/* Make sure data has been read before indicating we are done
 	 * with the entries by updating the consumer pointer.
 	 */
-	libbpf_smp_rwmb();
+	libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
 
-	*cons->consumer += nb;
 }
 
 static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
-- 
2.27.0


  parent reply	other threads:[~2021-03-05  9:42 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-05  9:41 [PATCH bpf-next v2 0/2] load-acquire/store-release barriers for AF_XDP rings Björn Töpel
2021-03-05  9:41 ` [PATCH bpf-next v2 1/2] xsk: update rings for load-acquire/store-release barriers Björn Töpel
2021-03-05  9:41 ` Björn Töpel [this message]
2021-03-05 13:50 ` [PATCH bpf-next v2 0/2] load-acquire/store-release barriers for AF_XDP rings Toke Høiland-Jørgensen
2021-03-07  3:40 ` patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210305094113.413544-3-bjorn.topel@gmail.com \
    --to=bjorn.topel@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bjorn.topel@intel.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=jonathan.lemon@gmail.com \
    --cc=magnus.karlsson@intel.com \
    --cc=maximmi@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=paulmck@kernel.org \
    --cc=stern@rowland.harvard.edu \
    --cc=toke@redhat.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.