bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf] libbpf: fix compatibility for kernels without need_wakeup
@ 2019-10-08 10:23 Magnus Karlsson
  2019-10-08 19:28 ` John Fastabend
  0 siblings, 1 reply; 7+ messages in thread
From: Magnus Karlsson @ 2019-10-08 10:23 UTC (permalink / raw)
  To: magnus.karlsson, bjorn.topel, ast, daniel, netdev; +Cc: jonathan.lemon, bpf

When the need_wakeup flag was added to AF_XDP, the format of the
XDP_MMAP_OFFSETS getsockopt was extended. Code was added to the kernel
to take care of compatibility issues arrising from running
applications using any of the two formats. However, libbpf was not
extended to take care of the case when the application/libbpf uses the
new format but the kernel only supports the old format. This patch
adds support in libbpf for parsing the old format, before the
need_wakeup flag was added, and emulating a set of static need_wakeup
flags that will always work for the application.

Fixes: a4500432c2587cb2a ("libbpf: add support for need_wakeup flag in AF_XDP part")
Reported-by: Eloy Degen <degeneloy@gmail.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
---
 tools/lib/bpf/xsk.c | 109 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 78 insertions(+), 31 deletions(-)

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index a902838..46f9687 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -44,6 +44,25 @@
  #define PF_XDP AF_XDP
 #endif
 
+#define is_mmap_offsets_v1(optlen) \
+	((optlen) == sizeof(struct xdp_mmap_offsets_v1))
+
+#define get_prod_off(ring) \
+	(is_mmap_offsets_v1(optlen) ? \
+	 ((struct xdp_mmap_offsets_v1 *)&off)->ring.producer : \
+	 off.ring.producer)
+#define get_cons_off(ring) \
+	(is_mmap_offsets_v1(optlen) ? \
+	 ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer : \
+	 off.ring.consumer)
+#define get_desc_off(ring) \
+	(is_mmap_offsets_v1(optlen) ? \
+	 ((struct xdp_mmap_offsets_v1 *)&off)->ring.desc : off.ring.desc)
+#define get_flags_off(ring) \
+	(is_mmap_offsets_v1(optlen) ? \
+	 ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer + sizeof(u32) : \
+	 off.ring.flags)
+
 struct xsk_umem {
 	struct xsk_ring_prod *fill;
 	struct xsk_ring_cons *comp;
@@ -73,6 +92,20 @@ struct xsk_nl_info {
 	int fd;
 };
 
+struct xdp_ring_offset_v1 {
+	__u64 producer;
+	__u64 consumer;
+	__u64 desc;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_mmap_offsets_v1 {
+	struct xdp_ring_offset_v1 rx;
+	struct xdp_ring_offset_v1 tx;
+	struct xdp_ring_offset_v1 fr;
+	struct xdp_ring_offset_v1 cr;
+};
+
 int xsk_umem__fd(const struct xsk_umem *umem)
 {
 	return umem ? umem->fd : -EINVAL;
@@ -196,7 +229,8 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 		goto out_socket;
 	}
 
-	map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+	map = mmap(NULL, get_desc_off(fr) +
+		   umem->config.fill_size * sizeof(__u64),
 		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
 		   XDP_UMEM_PGOFF_FILL_RING);
 	if (map == MAP_FAILED) {
@@ -207,13 +241,18 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 	umem->fill = fill;
 	fill->mask = umem->config.fill_size - 1;
 	fill->size = umem->config.fill_size;
-	fill->producer = map + off.fr.producer;
-	fill->consumer = map + off.fr.consumer;
-	fill->flags = map + off.fr.flags;
-	fill->ring = map + off.fr.desc;
+	fill->producer = map + get_prod_off(fr);
+	fill->consumer = map + get_cons_off(fr);
+	fill->flags = map + get_flags_off(fr);
+	fill->ring = map + get_desc_off(fr);
 	fill->cached_cons = umem->config.fill_size;
 
-	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+	if (is_mmap_offsets_v1(optlen))
+		/* Initialized the flag to never signal wakeup */
+		*fill->flags = 0;
+
+	map = mmap(NULL, get_desc_off(cr) +
+		   umem->config.comp_size * sizeof(__u64),
 		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
 		   XDP_UMEM_PGOFF_COMPLETION_RING);
 	if (map == MAP_FAILED) {
@@ -224,16 +263,16 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 	umem->comp = comp;
 	comp->mask = umem->config.comp_size - 1;
 	comp->size = umem->config.comp_size;
-	comp->producer = map + off.cr.producer;
-	comp->consumer = map + off.cr.consumer;
-	comp->flags = map + off.cr.flags;
-	comp->ring = map + off.cr.desc;
+	comp->producer = map + get_prod_off(cr);
+	comp->consumer = map + get_cons_off(cr);
+	comp->flags = map + get_flags_off(cr);
+	comp->ring = map + get_desc_off(cr);
 
 	*umem_ptr = umem;
 	return 0;
 
 out_mmap:
-	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+	munmap(map, get_desc_off(fr) + umem->config.fill_size * sizeof(__u64));
 out_socket:
 	close(umem->fd);
 out_umem_alloc:
@@ -558,7 +597,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 	}
 
 	if (rx) {
-		rx_map = mmap(NULL, off.rx.desc +
+		rx_map = mmap(NULL, get_desc_off(rx) +
 			      xsk->config.rx_size * sizeof(struct xdp_desc),
 			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
 			      xsk->fd, XDP_PGOFF_RX_RING);
@@ -569,15 +608,15 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 
 		rx->mask = xsk->config.rx_size - 1;
 		rx->size = xsk->config.rx_size;
-		rx->producer = rx_map + off.rx.producer;
-		rx->consumer = rx_map + off.rx.consumer;
-		rx->flags = rx_map + off.rx.flags;
-		rx->ring = rx_map + off.rx.desc;
+		rx->producer = rx_map + get_prod_off(rx);
+		rx->consumer = rx_map + get_cons_off(rx);
+		rx->flags = rx_map + get_flags_off(rx);
+		rx->ring = rx_map + get_desc_off(rx);
 	}
 	xsk->rx = rx;
 
 	if (tx) {
-		tx_map = mmap(NULL, off.tx.desc +
+		tx_map = mmap(NULL, get_desc_off(tx) +
 			      xsk->config.tx_size * sizeof(struct xdp_desc),
 			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
 			      xsk->fd, XDP_PGOFF_TX_RING);
@@ -588,11 +627,15 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 
 		tx->mask = xsk->config.tx_size - 1;
 		tx->size = xsk->config.tx_size;
-		tx->producer = tx_map + off.tx.producer;
-		tx->consumer = tx_map + off.tx.consumer;
-		tx->flags = tx_map + off.tx.flags;
-		tx->ring = tx_map + off.tx.desc;
+		tx->producer = tx_map + get_prod_off(tx);
+		tx->consumer = tx_map + get_cons_off(tx);
+		tx->flags = tx_map + get_flags_off(tx);
+		tx->ring = tx_map + get_desc_off(tx);
 		tx->cached_cons = xsk->config.tx_size;
+
+		if (is_mmap_offsets_v1(optlen))
+			/* Initialized the flag to always signal wakeup */
+			*tx->flags = XDP_RING_NEED_WAKEUP;
 	}
 	xsk->tx = tx;
 
@@ -620,11 +663,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 
 out_mmap_tx:
 	if (tx)
-		munmap(tx_map, off.tx.desc +
+		munmap(tx_map, get_desc_off(tx) +
 		       xsk->config.tx_size * sizeof(struct xdp_desc));
 out_mmap_rx:
 	if (rx)
-		munmap(rx_map, off.rx.desc +
+		munmap(rx_map, get_desc_off(rx) +
 		       xsk->config.rx_size * sizeof(struct xdp_desc));
 out_socket:
 	if (--umem->refcount)
@@ -649,10 +692,12 @@ int xsk_umem__delete(struct xsk_umem *umem)
 	optlen = sizeof(off);
 	err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
 	if (!err) {
-		munmap(umem->fill->ring - off.fr.desc,
-		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
-		munmap(umem->comp->ring - off.cr.desc,
-		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
+		munmap(umem->fill->ring - get_desc_off(fr),
+		       get_desc_off(fr) +
+		       umem->config.fill_size * sizeof(__u64));
+		munmap(umem->comp->ring - get_desc_off(cr),
+		       get_desc_off(cr) +
+		       umem->config.comp_size * sizeof(__u64));
 	}
 
 	close(umem->fd);
@@ -680,12 +725,14 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
 	if (!err) {
 		if (xsk->rx) {
-			munmap(xsk->rx->ring - off.rx.desc,
-			       off.rx.desc + xsk->config.rx_size * desc_sz);
+			munmap(xsk->rx->ring - get_desc_off(rx),
+			       get_desc_off(rx) +
+			       xsk->config.rx_size * desc_sz);
 		}
 		if (xsk->tx) {
-			munmap(xsk->tx->ring - off.tx.desc,
-			       off.tx.desc + xsk->config.tx_size * desc_sz);
+			munmap(xsk->tx->ring - get_desc_off(tx),
+			       get_desc_off(tx) +
+			       xsk->config.tx_size * desc_sz);
 		}
 
 	}
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* RE: [PATCH bpf] libbpf: fix compatibility for kernels without need_wakeup
  2019-10-08 10:23 [PATCH bpf] libbpf: fix compatibility for kernels without need_wakeup Magnus Karlsson
@ 2019-10-08 19:28 ` John Fastabend
  2019-10-09  7:31   ` Magnus Karlsson
  0 siblings, 1 reply; 7+ messages in thread
From: John Fastabend @ 2019-10-08 19:28 UTC (permalink / raw)
  To: Magnus Karlsson, magnus.karlsson, bjorn.topel, ast, daniel, netdev
  Cc: jonathan.lemon, bpf

Magnus Karlsson wrote:
> When the need_wakeup flag was added to AF_XDP, the format of the
> XDP_MMAP_OFFSETS getsockopt was extended. Code was added to the kernel
> to take care of compatibility issues arrising from running
> applications using any of the two formats. However, libbpf was not
> extended to take care of the case when the application/libbpf uses the
> new format but the kernel only supports the old format. This patch
> adds support in libbpf for parsing the old format, before the
> need_wakeup flag was added, and emulating a set of static need_wakeup
> flags that will always work for the application.
> 
> Fixes: a4500432c2587cb2a ("libbpf: add support for need_wakeup flag in AF_XDP part")
> Reported-by: Eloy Degen <degeneloy@gmail.com>
> Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
> ---
>  tools/lib/bpf/xsk.c | 109 +++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 78 insertions(+), 31 deletions(-)
> 
> diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> index a902838..46f9687 100644
> --- a/tools/lib/bpf/xsk.c
> +++ b/tools/lib/bpf/xsk.c
> @@ -44,6 +44,25 @@
>   #define PF_XDP AF_XDP
>  #endif
>  
> +#define is_mmap_offsets_v1(optlen) \
> +	((optlen) == sizeof(struct xdp_mmap_offsets_v1))
> +
> +#define get_prod_off(ring) \
> +	(is_mmap_offsets_v1(optlen) ? \
> +	 ((struct xdp_mmap_offsets_v1 *)&off)->ring.producer : \
> +	 off.ring.producer)
> +#define get_cons_off(ring) \
> +	(is_mmap_offsets_v1(optlen) ? \
> +	 ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer : \
> +	 off.ring.consumer)
> +#define get_desc_off(ring) \
> +	(is_mmap_offsets_v1(optlen) ? \
> +	 ((struct xdp_mmap_offsets_v1 *)&off)->ring.desc : off.ring.desc)
> +#define get_flags_off(ring) \
> +	(is_mmap_offsets_v1(optlen) ? \
> +	 ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer + sizeof(u32) : \
> +	 off.ring.flags)
> +
 
It seems the only thing added was flags right? If so seems we
only need the last one there, get_flags_off(). I think it would
be a bit cleaner to just use the macros where its actually
needed IMO.

Thanks,
John

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf] libbpf: fix compatibility for kernels without need_wakeup
  2019-10-08 19:28 ` John Fastabend
@ 2019-10-09  7:31   ` Magnus Karlsson
  2019-10-11 20:58     ` John Fastabend
  0 siblings, 1 reply; 7+ messages in thread
From: Magnus Karlsson @ 2019-10-09  7:31 UTC (permalink / raw)
  To: John Fastabend
  Cc: Magnus Karlsson, Björn Töpel, Alexei Starovoitov,
	Daniel Borkmann, Network Development, Jonathan Lemon, bpf

On Tue, Oct 8, 2019 at 9:29 PM John Fastabend <john.fastabend@gmail.com> wrote:
>
> Magnus Karlsson wrote:
> > When the need_wakeup flag was added to AF_XDP, the format of the
> > XDP_MMAP_OFFSETS getsockopt was extended. Code was added to the kernel
> > to take care of compatibility issues arrising from running
> > applications using any of the two formats. However, libbpf was not
> > extended to take care of the case when the application/libbpf uses the
> > new format but the kernel only supports the old format. This patch
> > adds support in libbpf for parsing the old format, before the
> > need_wakeup flag was added, and emulating a set of static need_wakeup
> > flags that will always work for the application.
> >
> > Fixes: a4500432c2587cb2a ("libbpf: add support for need_wakeup flag in AF_XDP part")
> > Reported-by: Eloy Degen <degeneloy@gmail.com>
> > Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
> > ---
> >  tools/lib/bpf/xsk.c | 109 +++++++++++++++++++++++++++++++++++++---------------
> >  1 file changed, 78 insertions(+), 31 deletions(-)
> >
> > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > index a902838..46f9687 100644
> > --- a/tools/lib/bpf/xsk.c
> > +++ b/tools/lib/bpf/xsk.c
> > @@ -44,6 +44,25 @@
> >   #define PF_XDP AF_XDP
> >  #endif
> >
> > +#define is_mmap_offsets_v1(optlen) \
> > +     ((optlen) == sizeof(struct xdp_mmap_offsets_v1))
> > +
> > +#define get_prod_off(ring) \
> > +     (is_mmap_offsets_v1(optlen) ? \
> > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.producer : \
> > +      off.ring.producer)
> > +#define get_cons_off(ring) \
> > +     (is_mmap_offsets_v1(optlen) ? \
> > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer : \
> > +      off.ring.consumer)
> > +#define get_desc_off(ring) \
> > +     (is_mmap_offsets_v1(optlen) ? \
> > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.desc : off.ring.desc)
> > +#define get_flags_off(ring) \
> > +     (is_mmap_offsets_v1(optlen) ? \
> > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer + sizeof(u32) : \
> > +      off.ring.flags)
> > +
>
> It seems the only thing added was flags right? If so seems we
> only need the last one there, get_flags_off(). I think it would
> be a bit cleaner to just use the macros where its actually
> needed IMO.

The flag is indeed added to the end of struct xdp_ring_offsets, but
this struct is replicated four times in the struct xdp_mmap_offsets,
so the added flags are present four time there at different offsets.
This means that 3 out of the 4 prod, cons and desc variables are
located at different offsets from the original. Do not know how I can
get rid of these macros in this case. But it might just be me not
seeing it, of course :-).

Thanks: Magnus

> Thanks,
> John

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf] libbpf: fix compatibility for kernels without need_wakeup
  2019-10-09  7:31   ` Magnus Karlsson
@ 2019-10-11 20:58     ` John Fastabend
  2019-10-12 16:54       ` Alexei Starovoitov
  0 siblings, 1 reply; 7+ messages in thread
From: John Fastabend @ 2019-10-11 20:58 UTC (permalink / raw)
  To: Magnus Karlsson, John Fastabend
  Cc: Magnus Karlsson, Björn Töpel, Alexei Starovoitov,
	Daniel Borkmann, Network Development, Jonathan Lemon, bpf

Magnus Karlsson wrote:
> On Tue, Oct 8, 2019 at 9:29 PM John Fastabend <john.fastabend@gmail.com> wrote:
> >
> > Magnus Karlsson wrote:
> > > When the need_wakeup flag was added to AF_XDP, the format of the
> > > XDP_MMAP_OFFSETS getsockopt was extended. Code was added to the kernel
> > > to take care of compatibility issues arrising from running
> > > applications using any of the two formats. However, libbpf was not
> > > extended to take care of the case when the application/libbpf uses the
> > > new format but the kernel only supports the old format. This patch
> > > adds support in libbpf for parsing the old format, before the
> > > need_wakeup flag was added, and emulating a set of static need_wakeup
> > > flags that will always work for the application.
> > >
> > > Fixes: a4500432c2587cb2a ("libbpf: add support for need_wakeup flag in AF_XDP part")
> > > Reported-by: Eloy Degen <degeneloy@gmail.com>
> > > Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
> > > ---
> > >  tools/lib/bpf/xsk.c | 109 +++++++++++++++++++++++++++++++++++++---------------
> > >  1 file changed, 78 insertions(+), 31 deletions(-)
> > >
> > > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > > index a902838..46f9687 100644
> > > --- a/tools/lib/bpf/xsk.c
> > > +++ b/tools/lib/bpf/xsk.c
> > > @@ -44,6 +44,25 @@
> > >   #define PF_XDP AF_XDP
> > >  #endif
> > >
> > > +#define is_mmap_offsets_v1(optlen) \
> > > +     ((optlen) == sizeof(struct xdp_mmap_offsets_v1))
> > > +
> > > +#define get_prod_off(ring) \
> > > +     (is_mmap_offsets_v1(optlen) ? \
> > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.producer : \
> > > +      off.ring.producer)
> > > +#define get_cons_off(ring) \
> > > +     (is_mmap_offsets_v1(optlen) ? \
> > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer : \
> > > +      off.ring.consumer)
> > > +#define get_desc_off(ring) \
> > > +     (is_mmap_offsets_v1(optlen) ? \
> > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.desc : off.ring.desc)
> > > +#define get_flags_off(ring) \
> > > +     (is_mmap_offsets_v1(optlen) ? \
> > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer + sizeof(u32) : \
> > > +      off.ring.flags)
> > > +
> >
> > It seems the only thing added was flags right? If so seems we
> > only need the last one there, get_flags_off(). I think it would
> > be a bit cleaner to just use the macros where its actually
> > needed IMO.
> 
> The flag is indeed added to the end of struct xdp_ring_offsets, but
> this struct is replicated four times in the struct xdp_mmap_offsets,
> so the added flags are present four time there at different offsets.
> This means that 3 out of the 4 prod, cons and desc variables are
> located at different offsets from the original. Do not know how I can
> get rid of these macros in this case. But it might just be me not
> seeing it, of course :-).

Not sure I like it but not seeing a cleaner solution that doesn't cause
larger changes so...

Acked-by: John Fastabend <john.fastabend.gmail.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf] libbpf: fix compatibility for kernels without need_wakeup
  2019-10-11 20:58     ` John Fastabend
@ 2019-10-12 16:54       ` Alexei Starovoitov
  2019-10-14  8:26         ` Magnus Karlsson
  0 siblings, 1 reply; 7+ messages in thread
From: Alexei Starovoitov @ 2019-10-12 16:54 UTC (permalink / raw)
  To: John Fastabend
  Cc: Magnus Karlsson, Magnus Karlsson, Björn Töpel,
	Alexei Starovoitov, Daniel Borkmann, Network Development,
	Jonathan Lemon, bpf

On Fri, Oct 11, 2019 at 1:58 PM John Fastabend <john.fastabend@gmail.com> wrote:
>
> Magnus Karlsson wrote:
> > On Tue, Oct 8, 2019 at 9:29 PM John Fastabend <john.fastabend@gmail.com> wrote:
> > >
> > > Magnus Karlsson wrote:
> > > > When the need_wakeup flag was added to AF_XDP, the format of the
> > > > XDP_MMAP_OFFSETS getsockopt was extended. Code was added to the kernel
> > > > to take care of compatibility issues arrising from running
> > > > applications using any of the two formats. However, libbpf was not
> > > > extended to take care of the case when the application/libbpf uses the
> > > > new format but the kernel only supports the old format. This patch
> > > > adds support in libbpf for parsing the old format, before the
> > > > need_wakeup flag was added, and emulating a set of static need_wakeup
> > > > flags that will always work for the application.
> > > >
> > > > Fixes: a4500432c2587cb2a ("libbpf: add support for need_wakeup flag in AF_XDP part")
> > > > Reported-by: Eloy Degen <degeneloy@gmail.com>
> > > > Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
> > > > ---
> > > >  tools/lib/bpf/xsk.c | 109 +++++++++++++++++++++++++++++++++++++---------------
> > > >  1 file changed, 78 insertions(+), 31 deletions(-)
> > > >
> > > > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > > > index a902838..46f9687 100644
> > > > --- a/tools/lib/bpf/xsk.c
> > > > +++ b/tools/lib/bpf/xsk.c
> > > > @@ -44,6 +44,25 @@
> > > >   #define PF_XDP AF_XDP
> > > >  #endif
> > > >
> > > > +#define is_mmap_offsets_v1(optlen) \
> > > > +     ((optlen) == sizeof(struct xdp_mmap_offsets_v1))
> > > > +
> > > > +#define get_prod_off(ring) \
> > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.producer : \
> > > > +      off.ring.producer)
> > > > +#define get_cons_off(ring) \
> > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer : \
> > > > +      off.ring.consumer)
> > > > +#define get_desc_off(ring) \
> > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.desc : off.ring.desc)
> > > > +#define get_flags_off(ring) \
> > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer + sizeof(u32) : \
> > > > +      off.ring.flags)
> > > > +
> > >
> > > It seems the only thing added was flags right? If so seems we
> > > only need the last one there, get_flags_off(). I think it would
> > > be a bit cleaner to just use the macros where its actually
> > > needed IMO.
> >
> > The flag is indeed added to the end of struct xdp_ring_offsets, but
> > this struct is replicated four times in the struct xdp_mmap_offsets,
> > so the added flags are present four time there at different offsets.
> > This means that 3 out of the 4 prod, cons and desc variables are
> > located at different offsets from the original. Do not know how I can
> > get rid of these macros in this case. But it might just be me not
> > seeing it, of course :-).
>
> Not sure I like it but not seeing a cleaner solution that doesn't cause
> larger changes so...
>
> Acked-by: John Fastabend <john.fastabend.gmail.com>

Frankly above hack looks awful.
What is _v1 ?! Is it going to be _v2?
What was _v0?
I also don't see how this is a fix. imo bpf-next is more appropriate
and if "large changes" are necessary then go ahead and do them.
We're not doing fixes-branches in libbpf.
The library always moves forward and compatible with all older kernels.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf] libbpf: fix compatibility for kernels without need_wakeup
  2019-10-12 16:54       ` Alexei Starovoitov
@ 2019-10-14  8:26         ` Magnus Karlsson
  2019-10-16  3:42           ` Alexei Starovoitov
  0 siblings, 1 reply; 7+ messages in thread
From: Magnus Karlsson @ 2019-10-14  8:26 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: John Fastabend, Magnus Karlsson, Björn Töpel,
	Alexei Starovoitov, Daniel Borkmann, Network Development,
	Jonathan Lemon, bpf

On Sat, Oct 12, 2019 at 6:55 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Oct 11, 2019 at 1:58 PM John Fastabend <john.fastabend@gmail.com> wrote:
> >
> > Magnus Karlsson wrote:
> > > On Tue, Oct 8, 2019 at 9:29 PM John Fastabend <john.fastabend@gmail.com> wrote:
> > > >
> > > > Magnus Karlsson wrote:
> > > > > When the need_wakeup flag was added to AF_XDP, the format of the
> > > > > XDP_MMAP_OFFSETS getsockopt was extended. Code was added to the kernel
> > > > > to take care of compatibility issues arrising from running
> > > > > applications using any of the two formats. However, libbpf was not
> > > > > extended to take care of the case when the application/libbpf uses the
> > > > > new format but the kernel only supports the old format. This patch
> > > > > adds support in libbpf for parsing the old format, before the
> > > > > need_wakeup flag was added, and emulating a set of static need_wakeup
> > > > > flags that will always work for the application.
> > > > >
> > > > > Fixes: a4500432c2587cb2a ("libbpf: add support for need_wakeup flag in AF_XDP part")
> > > > > Reported-by: Eloy Degen <degeneloy@gmail.com>
> > > > > Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
> > > > > ---
> > > > >  tools/lib/bpf/xsk.c | 109 +++++++++++++++++++++++++++++++++++++---------------
> > > > >  1 file changed, 78 insertions(+), 31 deletions(-)
> > > > >
> > > > > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > > > > index a902838..46f9687 100644
> > > > > --- a/tools/lib/bpf/xsk.c
> > > > > +++ b/tools/lib/bpf/xsk.c
> > > > > @@ -44,6 +44,25 @@
> > > > >   #define PF_XDP AF_XDP
> > > > >  #endif
> > > > >
> > > > > +#define is_mmap_offsets_v1(optlen) \
> > > > > +     ((optlen) == sizeof(struct xdp_mmap_offsets_v1))
> > > > > +
> > > > > +#define get_prod_off(ring) \
> > > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.producer : \
> > > > > +      off.ring.producer)
> > > > > +#define get_cons_off(ring) \
> > > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer : \
> > > > > +      off.ring.consumer)
> > > > > +#define get_desc_off(ring) \
> > > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.desc : off.ring.desc)
> > > > > +#define get_flags_off(ring) \
> > > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer + sizeof(u32) : \
> > > > > +      off.ring.flags)
> > > > > +
> > > >
> > > > It seems the only thing added was flags right? If so seems we
> > > > only need the last one there, get_flags_off(). I think it would
> > > > be a bit cleaner to just use the macros where its actually
> > > > needed IMO.
> > >
> > > The flag is indeed added to the end of struct xdp_ring_offsets, but
> > > this struct is replicated four times in the struct xdp_mmap_offsets,
> > > so the added flags are present four time there at different offsets.
> > > This means that 3 out of the 4 prod, cons and desc variables are
> > > located at different offsets from the original. Do not know how I can
> > > get rid of these macros in this case. But it might just be me not
> > > seeing it, of course :-).
> >
> > Not sure I like it but not seeing a cleaner solution that doesn't cause
> > larger changes so...
> >
> > Acked-by: John Fastabend <john.fastabend.gmail.com>
>
> Frankly above hack looks awful.
> What is _v1 ?! Is it going to be _v2?
> What was _v0?
> I also don't see how this is a fix. imo bpf-next is more appropriate
> and if "large changes" are necessary then go ahead and do them.
> We're not doing fixes-branches in libbpf.
> The library always moves forward and compatible with all older kernels.

The fix in this patch is about making libbpf compatible with older
kernels (<=5.3). It is not at the moment in bpf. The current code in
bpf and bpf-next only works with the 5.3-rc kernels, which I think is
bad and a bug. But please let me know if this is bpf or bpf-next and I
will adjust accordingly.

As for the hack, I do not like it and neither did John, but no one
managed to come up with something better. But if this is a fix for bpf
(will not work at all for bpf-next for compatibility reasons), we
could potentially do something like this, as this is only present in
the 5.4-rc series. Currently the extension of the XDP_MMAP_OFFSETS in
5.4-rc is from:

struct xdp_ring_offset {
       __u64 producer;
       __u64 consumer;
       __u64 desc;
};

to:

struct xdp_ring_offset {
       __u64 producer;
       __u64 consumer;
       __u64 desc;
       __u64 flags;
};

while the overall struct provided to the getsockopt stayed the same:

struct xdp_mmap_offsets {
       struct xdp_ring_offset rx;
       struct xdp_ring_offset tx;
       struct xdp_ring_offset fr;
       struct xdp_ring_offset cr;
};

If we instead keep the original struct xdp_ring_offset and append the
new flag offsets at the end of struct xdp_mmap_offsets to something
like this:

struct xdp_mmap_offsets {
       struct xdp_ring_offset rx;
       struct xdp_ring_offset tx;
       struct xdp_ring_offset fr;
       struct xdp_ring_offset cr;
       __u64 rx_flag;
       __u64 tx_flag;
       __u64 fr_flag;
       __u64 cr_flag;
};

the implementation in both the kernel and libbpf becomes much cleaner.
The only change needed in libbpf and the kernel is to introduce a new
function for reading the flag field. The other offset reading and
setting code would stay the same, in contrast to the current scheme.
None of the current patch's macro crap would be needed. This would
also simplify the kernel implementation, improving maintainability.
But this would only be possible to change in bpf. So let me know what
you think. Do not know what the policy is here, so need some advice
please.

Cheers: Magnus

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf] libbpf: fix compatibility for kernels without need_wakeup
  2019-10-14  8:26         ` Magnus Karlsson
@ 2019-10-16  3:42           ` Alexei Starovoitov
  0 siblings, 0 replies; 7+ messages in thread
From: Alexei Starovoitov @ 2019-10-16  3:42 UTC (permalink / raw)
  To: Magnus Karlsson
  Cc: John Fastabend, Magnus Karlsson, Björn Töpel,
	Alexei Starovoitov, Daniel Borkmann, Network Development,
	Jonathan Lemon, bpf

On Mon, Oct 14, 2019 at 10:26:10AM +0200, Magnus Karlsson wrote:
> On Sat, Oct 12, 2019 at 6:55 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Fri, Oct 11, 2019 at 1:58 PM John Fastabend <john.fastabend@gmail.com> wrote:
> > >
> > > Magnus Karlsson wrote:
> > > > On Tue, Oct 8, 2019 at 9:29 PM John Fastabend <john.fastabend@gmail.com> wrote:
> > > > >
> > > > > Magnus Karlsson wrote:
> > > > > > When the need_wakeup flag was added to AF_XDP, the format of the
> > > > > > XDP_MMAP_OFFSETS getsockopt was extended. Code was added to the kernel
> > > > > > to take care of compatibility issues arrising from running
> > > > > > applications using any of the two formats. However, libbpf was not
> > > > > > extended to take care of the case when the application/libbpf uses the
> > > > > > new format but the kernel only supports the old format. This patch
> > > > > > adds support in libbpf for parsing the old format, before the
> > > > > > need_wakeup flag was added, and emulating a set of static need_wakeup
> > > > > > flags that will always work for the application.
> > > > > >
> > > > > > Fixes: a4500432c2587cb2a ("libbpf: add support for need_wakeup flag in AF_XDP part")
> > > > > > Reported-by: Eloy Degen <degeneloy@gmail.com>
> > > > > > Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
> > > > > > ---
> > > > > >  tools/lib/bpf/xsk.c | 109 +++++++++++++++++++++++++++++++++++++---------------
> > > > > >  1 file changed, 78 insertions(+), 31 deletions(-)
> > > > > >
> > > > > > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > > > > > index a902838..46f9687 100644
> > > > > > --- a/tools/lib/bpf/xsk.c
> > > > > > +++ b/tools/lib/bpf/xsk.c
> > > > > > @@ -44,6 +44,25 @@
> > > > > >   #define PF_XDP AF_XDP
> > > > > >  #endif
> > > > > >
> > > > > > +#define is_mmap_offsets_v1(optlen) \
> > > > > > +     ((optlen) == sizeof(struct xdp_mmap_offsets_v1))
> > > > > > +
> > > > > > +#define get_prod_off(ring) \
> > > > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.producer : \
> > > > > > +      off.ring.producer)
> > > > > > +#define get_cons_off(ring) \
> > > > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer : \
> > > > > > +      off.ring.consumer)
> > > > > > +#define get_desc_off(ring) \
> > > > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.desc : off.ring.desc)
> > > > > > +#define get_flags_off(ring) \
> > > > > > +     (is_mmap_offsets_v1(optlen) ? \
> > > > > > +      ((struct xdp_mmap_offsets_v1 *)&off)->ring.consumer + sizeof(u32) : \
> > > > > > +      off.ring.flags)
> > > > > > +
> > > > >
> > > > > It seems the only thing added was flags right? If so seems we
> > > > > only need the last one there, get_flags_off(). I think it would
> > > > > be a bit cleaner to just use the macros where its actually
> > > > > needed IMO.
> > > >
> > > > The flag is indeed added to the end of struct xdp_ring_offsets, but
> > > > this struct is replicated four times in the struct xdp_mmap_offsets,
> > > > so the added flags are present four time there at different offsets.
> > > > This means that 3 out of the 4 prod, cons and desc variables are
> > > > located at different offsets from the original. Do not know how I can
> > > > get rid of these macros in this case. But it might just be me not
> > > > seeing it, of course :-).
> > >
> > > Not sure I like it but not seeing a cleaner solution that doesn't cause
> > > larger changes so...
> > >
> > > Acked-by: John Fastabend <john.fastabend.gmail.com>
> >
> > Frankly above hack looks awful.
> > What is _v1 ?! Is it going to be _v2?
> > What was _v0?
> > I also don't see how this is a fix. imo bpf-next is more appropriate
> > and if "large changes" are necessary then go ahead and do them.
> > We're not doing fixes-branches in libbpf.
> > The library always moves forward and compatible with all older kernels.
> 
> The fix in this patch is about making libbpf compatible with older
> kernels (<=5.3). It is not at the moment in bpf. The current code in
> bpf and bpf-next only works with the 5.3-rc kernels, which I think is
> bad and a bug. But please let me know if this is bpf or bpf-next and I
> will adjust accordingly.
> 
> As for the hack, I do not like it and neither did John, but no one
> managed to come up with something better. But if this is a fix for bpf
> (will not work at all for bpf-next for compatibility reasons), we
> could potentially do something like this, as this is only present in
> the 5.4-rc series.

Practically there is no bpf tree for libbpf.
bpf-next is the only place where most of the fixes to libbpf should go.
libbpf must be compatible with _all_ older kernels.
We have no plans of branching previously released libbpf.
If there is a bug in libbpf 0.0.5 (current latest and released)
then it will be fixed in libbpf 0.0.6.
So please target your fixes to bpf-next tree and upcoming libbpf release.
Please make sure that your fixes work with kernel 5.3 and 5.4-rc.

There are two exceptions where libbpf fixes should actually be in bpf tree:
- fixes to libbpf that are necessary to fix perf builds in bpf tree.
- fixes to libbpf that are necessary to support selftest/bpf/ in bpf tree.
Because these two are actually kernel tree specific.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-10-16  3:42 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-08 10:23 [PATCH bpf] libbpf: fix compatibility for kernels without need_wakeup Magnus Karlsson
2019-10-08 19:28 ` John Fastabend
2019-10-09  7:31   ` Magnus Karlsson
2019-10-11 20:58     ` John Fastabend
2019-10-12 16:54       ` Alexei Starovoitov
2019-10-14  8:26         ` Magnus Karlsson
2019-10-16  3:42           ` Alexei Starovoitov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).