All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown
@ 2019-04-30 12:45 Björn Töpel
  2019-04-30 12:45 ` [PATCH bpf 1/2] libbpf: fix invalid munmap call Björn Töpel
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Björn Töpel @ 2019-04-30 12:45 UTC (permalink / raw)
  To: ast, daniel, netdev
  Cc: Björn Töpel, magnus.karlsson, magnus.karlsson, bpf, u9012063

William found two bugs, when doing socket teardown within the same
process.

The first issue was an invalid munmap call, and the second one was an
invalid XSKMAP cleanup. Both resulted in that the process kept
references to the socket, which was not correctly cleaned up. When a
new socket was created, the bind() call would fail, since the old
socket was still lingering, refusing to give up the queue on the
netdev.

More details can be found in the individual commits.

Thanks,
Björn


Björn Töpel (2):
  libbpf: fix invalid munmap call
  libbpf: proper XSKMAP cleanup

 tools/lib/bpf/xsk.c | 192 +++++++++++++++++++++++---------------------
 1 file changed, 100 insertions(+), 92 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH bpf 1/2] libbpf: fix invalid munmap call
  2019-04-30 12:45 [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown Björn Töpel
@ 2019-04-30 12:45 ` Björn Töpel
  2019-05-01  3:36   ` William Tu
  2019-05-06  8:26   ` Daniel Borkmann
  2019-04-30 12:45 ` [PATCH bpf 2/2] libbpf: proper XSKMAP cleanup Björn Töpel
  2019-04-30 15:38 ` [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown Jonathan Lemon
  2 siblings, 2 replies; 10+ messages in thread
From: Björn Töpel @ 2019-04-30 12:45 UTC (permalink / raw)
  To: ast, daniel, netdev
  Cc: Björn Töpel, magnus.karlsson, magnus.karlsson, bpf, u9012063

From: Björn Töpel <bjorn.topel@intel.com>

When unmapping the AF_XDP memory regions used for the rings, an
invalid address was passed to the munmap() calls. Instead of passing
the beginning of the memory region, the descriptor region was passed
to munmap.

When the userspace application tried to tear down an AF_XDP socket,
the operation failed and the application would still have a reference
to socket it wished to get rid of.

Reported-by: William Tu <u9012063@gmail.com>
Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 tools/lib/bpf/xsk.c | 77 +++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 37 deletions(-)

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 8d0078b65486..af5f310ecca1 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -248,8 +248,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
 	return 0;
 
 out_mmap:
-	munmap(umem->fill,
-	       off.fr.desc + umem->config.fill_size * sizeof(__u64));
+	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
 out_socket:
 	close(umem->fd);
 out_umem_alloc:
@@ -523,11 +522,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
 		       const struct xsk_socket_config *usr_config)
 {
+	void *rx_map = NULL, *tx_map = NULL;
 	struct sockaddr_xdp sxdp = {};
 	struct xdp_mmap_offsets off;
 	struct xsk_socket *xsk;
 	socklen_t optlen;
-	void *map;
 	int err;
 
 	if (!umem || !xsk_ptr || !rx || !tx)
@@ -593,40 +592,40 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 	}
 
 	if (rx) {
-		map = xsk_mmap(NULL, off.rx.desc +
-			       xsk->config.rx_size * sizeof(struct xdp_desc),
-			       PROT_READ | PROT_WRITE,
-			       MAP_SHARED | MAP_POPULATE,
-			       xsk->fd, XDP_PGOFF_RX_RING);
-		if (map == MAP_FAILED) {
+		rx_map = xsk_mmap(NULL, off.rx.desc +
+				  xsk->config.rx_size * sizeof(struct xdp_desc),
+				  PROT_READ | PROT_WRITE,
+				  MAP_SHARED | MAP_POPULATE,
+				  xsk->fd, XDP_PGOFF_RX_RING);
+		if (rx_map == MAP_FAILED) {
 			err = -errno;
 			goto out_socket;
 		}
 
 		rx->mask = xsk->config.rx_size - 1;
 		rx->size = xsk->config.rx_size;
-		rx->producer = map + off.rx.producer;
-		rx->consumer = map + off.rx.consumer;
-		rx->ring = map + off.rx.desc;
+		rx->producer = rx_map + off.rx.producer;
+		rx->consumer = rx_map + off.rx.consumer;
+		rx->ring = rx_map + off.rx.desc;
 	}
 	xsk->rx = rx;
 
 	if (tx) {
-		map = xsk_mmap(NULL, off.tx.desc +
-			       xsk->config.tx_size * sizeof(struct xdp_desc),
-			       PROT_READ | PROT_WRITE,
-			       MAP_SHARED | MAP_POPULATE,
-			       xsk->fd, XDP_PGOFF_TX_RING);
-		if (map == MAP_FAILED) {
+		tx_map = xsk_mmap(NULL, off.tx.desc +
+				  xsk->config.tx_size * sizeof(struct xdp_desc),
+				  PROT_READ | PROT_WRITE,
+				  MAP_SHARED | MAP_POPULATE,
+				  xsk->fd, XDP_PGOFF_TX_RING);
+		if (tx_map == MAP_FAILED) {
 			err = -errno;
 			goto out_mmap_rx;
 		}
 
 		tx->mask = xsk->config.tx_size - 1;
 		tx->size = xsk->config.tx_size;
-		tx->producer = map + off.tx.producer;
-		tx->consumer = map + off.tx.consumer;
-		tx->ring = map + off.tx.desc;
+		tx->producer = tx_map + off.tx.producer;
+		tx->consumer = tx_map + off.tx.consumer;
+		tx->ring = tx_map + off.tx.desc;
 		tx->cached_cons = xsk->config.tx_size;
 	}
 	xsk->tx = tx;
@@ -653,13 +652,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 
 out_mmap_tx:
 	if (tx)
-		munmap(xsk->tx,
-		       off.tx.desc +
+		munmap(tx_map, off.tx.desc +
 		       xsk->config.tx_size * sizeof(struct xdp_desc));
 out_mmap_rx:
 	if (rx)
-		munmap(xsk->rx,
-		       off.rx.desc +
+		munmap(rx_map, off.rx.desc +
 		       xsk->config.rx_size * sizeof(struct xdp_desc));
 out_socket:
 	if (--umem->refcount)
@@ -684,10 +681,12 @@ int xsk_umem__delete(struct xsk_umem *umem)
 	optlen = sizeof(off);
 	err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
 	if (!err) {
-		munmap(umem->fill->ring,
-		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
-		munmap(umem->comp->ring,
-		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
+		(void)munmap(umem->fill->ring - off.fr.desc,
+			     off.fr.desc +
+			     umem->config.fill_size * sizeof(__u64));
+		(void)munmap(umem->comp->ring - off.cr.desc,
+			     off.cr.desc +
+			     umem->config.comp_size * sizeof(__u64));
 	}
 
 	close(umem->fd);
@@ -698,6 +697,7 @@ int xsk_umem__delete(struct xsk_umem *umem)
 
 void xsk_socket__delete(struct xsk_socket *xsk)
 {
+	size_t desc_sz = sizeof(struct xdp_desc);
 	struct xdp_mmap_offsets off;
 	socklen_t optlen;
 	int err;
@@ -710,14 +710,17 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	optlen = sizeof(off);
 	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
 	if (!err) {
-		if (xsk->rx)
-			munmap(xsk->rx->ring,
-			       off.rx.desc +
-			       xsk->config.rx_size * sizeof(struct xdp_desc));
-		if (xsk->tx)
-			munmap(xsk->tx->ring,
-			       off.tx.desc +
-			       xsk->config.tx_size * sizeof(struct xdp_desc));
+		if (xsk->rx) {
+			(void)munmap(xsk->rx->ring - off.rx.desc,
+				     off.rx.desc +
+				     xsk->config.rx_size * desc_sz);
+		}
+		if (xsk->tx) {
+			(void)munmap(xsk->tx->ring - off.tx.desc,
+				     off.tx.desc +
+				     xsk->config.tx_size * desc_sz);
+		}
+
 	}
 
 	xsk->umem->refcount--;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH bpf 2/2] libbpf: proper XSKMAP cleanup
  2019-04-30 12:45 [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown Björn Töpel
  2019-04-30 12:45 ` [PATCH bpf 1/2] libbpf: fix invalid munmap call Björn Töpel
@ 2019-04-30 12:45 ` Björn Töpel
  2019-05-01  3:34   ` William Tu
  2019-04-30 15:38 ` [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown Jonathan Lemon
  2 siblings, 1 reply; 10+ messages in thread
From: Björn Töpel @ 2019-04-30 12:45 UTC (permalink / raw)
  To: ast, daniel, netdev
  Cc: Björn Töpel, magnus.karlsson, magnus.karlsson, bpf, u9012063

From: Björn Töpel <bjorn.topel@intel.com>

The bpf_map_update_elem() function, when used on an XSKMAP, will fail
if not a valid AF_XDP socket is passed as value. Therefore, this is
function cannot be used to clear the XSKMAP. Instead, the
bpf_map_delete_elem() function should be used for that.

This patch also simplifies the code by breaking up
xsk_update_bpf_maps() into three smaller functions.

Reported-by: William Tu <u9012063@gmail.com>
Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 tools/lib/bpf/xsk.c | 115 +++++++++++++++++++++++---------------------
 1 file changed, 60 insertions(+), 55 deletions(-)

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index af5f310ecca1..c2e6da464504 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -386,21 +386,17 @@ static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
 {
 	close(xsk->qidconf_map_fd);
 	close(xsk->xsks_map_fd);
+	xsk->qidconf_map_fd = -1;
+	xsk->xsks_map_fd = -1;
 }
 
-static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
-			       int xsks_value)
+static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 {
-	bool qidconf_map_updated = false, xsks_map_updated = false;
+	__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
+	__u32 map_len = sizeof(struct bpf_map_info);
 	struct bpf_prog_info prog_info = {};
-	__u32 prog_len = sizeof(prog_info);
 	struct bpf_map_info map_info;
-	__u32 map_len = sizeof(map_info);
-	__u32 *map_ids;
-	int reset_value = 0;
-	__u32 num_maps;
-	unsigned int i;
-	int err;
+	int fd, err;
 
 	err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
 	if (err)
@@ -421,66 +417,71 @@ static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
 		goto out_map_ids;
 
 	for (i = 0; i < prog_info.nr_map_ids; i++) {
-		int fd;
+		if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1)
+			break;
 
 		fd = bpf_map_get_fd_by_id(map_ids[i]);
-		if (fd < 0) {
-			err = -errno;
-			goto out_maps;
-		}
+		if (fd < 0)
+			continue;
 
 		err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
-		if (err)
-			goto out_maps;
+		if (err) {
+			close(fd);
+			continue;
+		}
 
 		if (!strcmp(map_info.name, "qidconf_map")) {
-			err = bpf_map_update_elem(fd, &xsk->queue_id,
-						  &qidconf_value, 0);
-			if (err)
-				goto out_maps;
-			qidconf_map_updated = true;
 			xsk->qidconf_map_fd = fd;
-		} else if (!strcmp(map_info.name, "xsks_map")) {
-			err = bpf_map_update_elem(fd, &xsk->queue_id,
-						  &xsks_value, 0);
-			if (err)
-				goto out_maps;
-			xsks_map_updated = true;
+			continue;
+		}
+
+		if (!strcmp(map_info.name, "xsks_map")) {
 			xsk->xsks_map_fd = fd;
+			continue;
 		}
 
-		if (qidconf_map_updated && xsks_map_updated)
-			break;
+		close(fd);
 	}
 
-	if (!(qidconf_map_updated && xsks_map_updated)) {
+	err = 0;
+	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
 		err = -ENOENT;
-		goto out_maps;
+		xsk_delete_bpf_maps(xsk);
 	}
 
-	err = 0;
-	goto out_success;
-
-out_maps:
-	if (qidconf_map_updated)
-		(void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id,
-					  &reset_value, 0);
-	if (xsks_map_updated)
-		(void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
-					  &reset_value, 0);
-out_success:
-	if (qidconf_map_updated)
-		close(xsk->qidconf_map_fd);
-	if (xsks_map_updated)
-		close(xsk->xsks_map_fd);
 out_map_ids:
 	free(map_ids);
 	return err;
 }
 
+static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
+{
+	int qid = false;
+
+	(void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
+	(void)bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
+}
+
+static int xsk_set_bpf_maps(struct xsk_socket *xsk)
+{
+	int qid = true, fd = xsk->fd, err;
+
+	err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
+	if (err)
+		goto out;
+
+	err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0);
+	if (err)
+		goto out;
+
+	return 0;
+out:
+	xsk_clear_bpf_maps(xsk);
+	return err;
+}
+
 static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 {
-	bool prog_attached = false;
 	__u32 prog_id = 0;
 	int err;
 
@@ -490,7 +491,6 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 		return err;
 
 	if (!prog_id) {
-		prog_attached = true;
 		err = xsk_create_bpf_maps(xsk);
 		if (err)
 			return err;
@@ -500,20 +500,21 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 			goto out_maps;
 	} else {
 		xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+		err = xsk_lookup_bpf_maps(xsk);
+		if (err)
+			goto out_load;
 	}
 
-	err = xsk_update_bpf_maps(xsk, true, xsk->fd);
+	err = xsk_set_bpf_maps(xsk);
 	if (err)
 		goto out_load;
 
 	return 0;
 
 out_load:
-	if (prog_attached)
-		close(xsk->prog_fd);
+	close(xsk->prog_fd);
 out_maps:
-	if (prog_attached)
-		xsk_delete_bpf_maps(xsk);
+	xsk_delete_bpf_maps(xsk);
 	return err;
 }
 
@@ -641,6 +642,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 		goto out_mmap_tx;
 	}
 
+	xsk->qidconf_map_fd = -1;
+	xsk->xsks_map_fd = -1;
+
 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
 		err = xsk_setup_xdp_prog(xsk);
 		if (err)
@@ -705,7 +709,8 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	if (!xsk)
 		return;
 
-	(void)xsk_update_bpf_maps(xsk, 0, 0);
+	xsk_clear_bpf_maps(xsk);
+	xsk_delete_bpf_maps(xsk);
 
 	optlen = sizeof(off);
 	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown
  2019-04-30 12:45 [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown Björn Töpel
  2019-04-30 12:45 ` [PATCH bpf 1/2] libbpf: fix invalid munmap call Björn Töpel
  2019-04-30 12:45 ` [PATCH bpf 2/2] libbpf: proper XSKMAP cleanup Björn Töpel
@ 2019-04-30 15:38 ` Jonathan Lemon
  2019-05-05  6:30   ` Alexei Starovoitov
  2 siblings, 1 reply; 10+ messages in thread
From: Jonathan Lemon @ 2019-04-30 15:38 UTC (permalink / raw)
  To: Björn Töpel
  Cc: ast, daniel, netdev, magnus.karlsson, magnus.karlsson, bpf, u9012063



On 30 Apr 2019, at 5:45, Björn Töpel wrote:

> William found two bugs, when doing socket teardown within the same
> process.
>
> The first issue was an invalid munmap call, and the second one was an
> invalid XSKMAP cleanup. Both resulted in that the process kept
> references to the socket, which was not correctly cleaned up. When a
> new socket was created, the bind() call would fail, since the old
> socket was still lingering, refusing to give up the queue on the
> netdev.
>
> More details can be found in the individual commits.

Reviewed-by: Jonathan Lemon <jonathan.lemon@gmail.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf 2/2] libbpf: proper XSKMAP cleanup
  2019-04-30 12:45 ` [PATCH bpf 2/2] libbpf: proper XSKMAP cleanup Björn Töpel
@ 2019-05-01  3:34   ` William Tu
  0 siblings, 0 replies; 10+ messages in thread
From: William Tu @ 2019-05-01  3:34 UTC (permalink / raw)
  To: bpf, Linux Kernel Network Developers

On Tue, Apr 30, 2019 at 5:46 AM Björn Töpel <bjorn.topel@gmail.com> wrote:
>
> From: Björn Töpel <bjorn.topel@intel.com>
>
> The bpf_map_update_elem() function, when used on an XSKMAP, will fail
> if not a valid AF_XDP socket is passed as value. Therefore, this is
> function cannot be used to clear the XSKMAP. Instead, the
> bpf_map_delete_elem() function should be used for that.
>
> This patch also simplifies the code by breaking up
> xsk_update_bpf_maps() into three smaller functions.
>
> Reported-by: William Tu <u9012063@gmail.com>
> Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---

Thank you.
Tested-by: William Tu <u9012063@gmail.com>

>  tools/lib/bpf/xsk.c | 115 +++++++++++++++++++++++---------------------
>  1 file changed, 60 insertions(+), 55 deletions(-)
>
> diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> index af5f310ecca1..c2e6da464504 100644
> --- a/tools/lib/bpf/xsk.c
> +++ b/tools/lib/bpf/xsk.c
> @@ -386,21 +386,17 @@ static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
>  {
>         close(xsk->qidconf_map_fd);
>         close(xsk->xsks_map_fd);
> +       xsk->qidconf_map_fd = -1;
> +       xsk->xsks_map_fd = -1;
>  }
>
> -static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
> -                              int xsks_value)
> +static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
>  {
> -       bool qidconf_map_updated = false, xsks_map_updated = false;
> +       __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
> +       __u32 map_len = sizeof(struct bpf_map_info);
>         struct bpf_prog_info prog_info = {};
> -       __u32 prog_len = sizeof(prog_info);
>         struct bpf_map_info map_info;
> -       __u32 map_len = sizeof(map_info);
> -       __u32 *map_ids;
> -       int reset_value = 0;
> -       __u32 num_maps;
> -       unsigned int i;
> -       int err;
> +       int fd, err;
>
>         err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
>         if (err)
> @@ -421,66 +417,71 @@ static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
>                 goto out_map_ids;
>
>         for (i = 0; i < prog_info.nr_map_ids; i++) {
> -               int fd;
> +               if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1)
> +                       break;
>
>                 fd = bpf_map_get_fd_by_id(map_ids[i]);
> -               if (fd < 0) {
> -                       err = -errno;
> -                       goto out_maps;
> -               }
> +               if (fd < 0)
> +                       continue;
>
>                 err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
> -               if (err)
> -                       goto out_maps;
> +               if (err) {
> +                       close(fd);
> +                       continue;
> +               }
>
>                 if (!strcmp(map_info.name, "qidconf_map")) {
> -                       err = bpf_map_update_elem(fd, &xsk->queue_id,
> -                                                 &qidconf_value, 0);
> -                       if (err)
> -                               goto out_maps;
> -                       qidconf_map_updated = true;
>                         xsk->qidconf_map_fd = fd;
> -               } else if (!strcmp(map_info.name, "xsks_map")) {
> -                       err = bpf_map_update_elem(fd, &xsk->queue_id,
> -                                                 &xsks_value, 0);
> -                       if (err)
> -                               goto out_maps;
> -                       xsks_map_updated = true;
> +                       continue;
> +               }
> +
> +               if (!strcmp(map_info.name, "xsks_map")) {
>                         xsk->xsks_map_fd = fd;
> +                       continue;
>                 }
>
> -               if (qidconf_map_updated && xsks_map_updated)
> -                       break;
> +               close(fd);
>         }
>
> -       if (!(qidconf_map_updated && xsks_map_updated)) {
> +       err = 0;
> +       if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
>                 err = -ENOENT;
> -               goto out_maps;
> +               xsk_delete_bpf_maps(xsk);
>         }
>
> -       err = 0;
> -       goto out_success;
> -
> -out_maps:
> -       if (qidconf_map_updated)
> -               (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id,
> -                                         &reset_value, 0);
> -       if (xsks_map_updated)
> -               (void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
> -                                         &reset_value, 0);
> -out_success:
> -       if (qidconf_map_updated)
> -               close(xsk->qidconf_map_fd);
> -       if (xsks_map_updated)
> -               close(xsk->xsks_map_fd);
>  out_map_ids:
>         free(map_ids);
>         return err;
>  }
>
> +static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
> +{
> +       int qid = false;
> +
> +       (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
> +       (void)bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
> +}
> +
> +static int xsk_set_bpf_maps(struct xsk_socket *xsk)
> +{
> +       int qid = true, fd = xsk->fd, err;
> +
> +       err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
> +       if (err)
> +               goto out;
> +
> +       err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0);
> +       if (err)
> +               goto out;
> +
> +       return 0;
> +out:
> +       xsk_clear_bpf_maps(xsk);
> +       return err;
> +}
> +
>  static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
>  {
> -       bool prog_attached = false;
>         __u32 prog_id = 0;
>         int err;
>
> @@ -490,7 +491,6 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
>                 return err;
>
>         if (!prog_id) {
> -               prog_attached = true;
>                 err = xsk_create_bpf_maps(xsk);
>                 if (err)
>                         return err;
> @@ -500,20 +500,21 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
>                         goto out_maps;
>         } else {
>                 xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
> +               err = xsk_lookup_bpf_maps(xsk);
> +               if (err)
> +                       goto out_load;
>         }
>
> -       err = xsk_update_bpf_maps(xsk, true, xsk->fd);
> +       err = xsk_set_bpf_maps(xsk);
>         if (err)
>                 goto out_load;
>
>         return 0;
>
>  out_load:
> -       if (prog_attached)
> -               close(xsk->prog_fd);
> +       close(xsk->prog_fd);
>  out_maps:
> -       if (prog_attached)
> -               xsk_delete_bpf_maps(xsk);
> +       xsk_delete_bpf_maps(xsk);
>         return err;
>  }
>
> @@ -641,6 +642,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
>                 goto out_mmap_tx;
>         }
>
> +       xsk->qidconf_map_fd = -1;
> +       xsk->xsks_map_fd = -1;
> +
>         if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
>                 err = xsk_setup_xdp_prog(xsk);
>                 if (err)
> @@ -705,7 +709,8 @@ void xsk_socket__delete(struct xsk_socket *xsk)
>         if (!xsk)
>                 return;
>
> -       (void)xsk_update_bpf_maps(xsk, 0, 0);
> +       xsk_clear_bpf_maps(xsk);
> +       xsk_delete_bpf_maps(xsk);
>
>         optlen = sizeof(off);
>         err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
> --
> 2.20.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf 1/2] libbpf: fix invalid munmap call
  2019-04-30 12:45 ` [PATCH bpf 1/2] libbpf: fix invalid munmap call Björn Töpel
@ 2019-05-01  3:36   ` William Tu
  2019-05-06  8:26   ` Daniel Borkmann
  1 sibling, 0 replies; 10+ messages in thread
From: William Tu @ 2019-05-01  3:36 UTC (permalink / raw)
  To: bpf, Linux Kernel Network Developers

On Tue, Apr 30, 2019 at 5:46 AM Björn Töpel <bjorn.topel@gmail.com> wrote:
>
> From: Björn Töpel <bjorn.topel@intel.com>
>
> When unmapping the AF_XDP memory regions used for the rings, an
> invalid address was passed to the munmap() calls. Instead of passing
> the beginning of the memory region, the descriptor region was passed
> to munmap.
>
> When the userspace application tried to tear down an AF_XDP socket,
> the operation failed and the application would still have a reference
> to socket it wished to get rid of.
>
> Reported-by: William Tu <u9012063@gmail.com>
> Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---
Thank you.
Tested-by: William Tu <u9012063@gmail.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown
  2019-04-30 15:38 ` [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown Jonathan Lemon
@ 2019-05-05  6:30   ` Alexei Starovoitov
  0 siblings, 0 replies; 10+ messages in thread
From: Alexei Starovoitov @ 2019-05-05  6:30 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: Björn Töpel, Alexei Starovoitov, Daniel Borkmann,
	Network Development, Karlsson, Magnus, Magnus Karlsson, bpf,
	William Tu

On Tue, Apr 30, 2019 at 8:38 AM Jonathan Lemon <jlemon@flugsvamp.com> wrote:
>
>
>
> On 30 Apr 2019, at 5:45, Björn Töpel wrote:
>
> > William found two bugs, when doing socket teardown within the same
> > process.
> >
> > The first issue was an invalid munmap call, and the second one was an
> > invalid XSKMAP cleanup. Both resulted in that the process kept
> > references to the socket, which was not correctly cleaned up. When a
> > new socket was created, the bind() call would fail, since the old
> > socket was still lingering, refusing to give up the queue on the
> > netdev.
> >
> > More details can be found in the individual commits.
>
> Reviewed-by: Jonathan Lemon <jonathan.lemon@gmail.com>

Applied. Thanks!

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf 1/2] libbpf: fix invalid munmap call
  2019-04-30 12:45 ` [PATCH bpf 1/2] libbpf: fix invalid munmap call Björn Töpel
  2019-05-01  3:36   ` William Tu
@ 2019-05-06  8:26   ` Daniel Borkmann
  2019-05-06  8:40     ` Björn Töpel
  1 sibling, 1 reply; 10+ messages in thread
From: Daniel Borkmann @ 2019-05-06  8:26 UTC (permalink / raw)
  To: Björn Töpel, ast, netdev
  Cc: Björn Töpel, magnus.karlsson, magnus.karlsson, bpf, u9012063

On 04/30/2019 02:45 PM, Björn Töpel wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> When unmapping the AF_XDP memory regions used for the rings, an
> invalid address was passed to the munmap() calls. Instead of passing
> the beginning of the memory region, the descriptor region was passed
> to munmap.
> 
> When the userspace application tried to tear down an AF_XDP socket,
> the operation failed and the application would still have a reference
> to socket it wished to get rid of.
> 
> Reported-by: William Tu <u9012063@gmail.com>
> Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
[...]
>  out_mmap_tx:
>  	if (tx)
> -		munmap(xsk->tx,
> -		       off.tx.desc +
> +		munmap(tx_map, off.tx.desc +
>  		       xsk->config.tx_size * sizeof(struct xdp_desc));
>  out_mmap_rx:
>  	if (rx)
> -		munmap(xsk->rx,
> -		       off.rx.desc +
> +		munmap(rx_map, off.rx.desc +
>  		       xsk->config.rx_size * sizeof(struct xdp_desc));
>  out_socket:
>  	if (--umem->refcount)
> @@ -684,10 +681,12 @@ int xsk_umem__delete(struct xsk_umem *umem)
>  	optlen = sizeof(off);
>  	err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
>  	if (!err) {
> -		munmap(umem->fill->ring,
> -		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
> -		munmap(umem->comp->ring,
> -		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
> +		(void)munmap(umem->fill->ring - off.fr.desc,
> +			     off.fr.desc +
> +			     umem->config.fill_size * sizeof(__u64));
> +		(void)munmap(umem->comp->ring - off.cr.desc,
> +			     off.cr.desc +
> +			     umem->config.comp_size * sizeof(__u64));

What's the rationale to cast to void here and other places (e.g. below)?
If there's no proper reason, then lets remove it. Given the patch has already
been applied, please send a follow up. Thanks.

>  	}
>  
>  	close(umem->fd);
> @@ -698,6 +697,7 @@ int xsk_umem__delete(struct xsk_umem *umem)
>  
>  void xsk_socket__delete(struct xsk_socket *xsk)
>  {
> +	size_t desc_sz = sizeof(struct xdp_desc);
>  	struct xdp_mmap_offsets off;
>  	socklen_t optlen;
>  	int err;
> @@ -710,14 +710,17 @@ void xsk_socket__delete(struct xsk_socket *xsk)
>  	optlen = sizeof(off);
>  	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
>  	if (!err) {
> -		if (xsk->rx)
> -			munmap(xsk->rx->ring,
> -			       off.rx.desc +
> -			       xsk->config.rx_size * sizeof(struct xdp_desc));
> -		if (xsk->tx)
> -			munmap(xsk->tx->ring,
> -			       off.tx.desc +
> -			       xsk->config.tx_size * sizeof(struct xdp_desc));
> +		if (xsk->rx) {
> +			(void)munmap(xsk->rx->ring - off.rx.desc,
> +				     off.rx.desc +
> +				     xsk->config.rx_size * desc_sz);
> +		}
> +		if (xsk->tx) {
> +			(void)munmap(xsk->tx->ring - off.tx.desc,
> +				     off.tx.desc +
> +				     xsk->config.tx_size * desc_sz);
> +		}
> +
>  	}
>  
>  	xsk->umem->refcount--;
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf 1/2] libbpf: fix invalid munmap call
  2019-05-06  8:26   ` Daniel Borkmann
@ 2019-05-06  8:40     ` Björn Töpel
  2019-05-06  9:01       ` Daniel Borkmann
  0 siblings, 1 reply; 10+ messages in thread
From: Björn Töpel @ 2019-05-06  8:40 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Alexei Starovoitov, Netdev, Björn Töpel, Karlsson,
	Magnus, Magnus Karlsson, bpf, William Tu

On Mon, 6 May 2019 at 10:26, Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 04/30/2019 02:45 PM, Björn Töpel wrote:
> > From: Björn Töpel <bjorn.topel@intel.com>
> >
> > When unmapping the AF_XDP memory regions used for the rings, an
> > invalid address was passed to the munmap() calls. Instead of passing
> > the beginning of the memory region, the descriptor region was passed
> > to munmap.
> >
> > When the userspace application tried to tear down an AF_XDP socket,
> > the operation failed and the application would still have a reference
> > to socket it wished to get rid of.
> >
> > Reported-by: William Tu <u9012063@gmail.com>
> > Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
> > Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> [...]
> >  out_mmap_tx:
> >       if (tx)
> > -             munmap(xsk->tx,
> > -                    off.tx.desc +
> > +             munmap(tx_map, off.tx.desc +
> >                      xsk->config.tx_size * sizeof(struct xdp_desc));
> >  out_mmap_rx:
> >       if (rx)
> > -             munmap(xsk->rx,
> > -                    off.rx.desc +
> > +             munmap(rx_map, off.rx.desc +
> >                      xsk->config.rx_size * sizeof(struct xdp_desc));
> >  out_socket:
> >       if (--umem->refcount)
> > @@ -684,10 +681,12 @@ int xsk_umem__delete(struct xsk_umem *umem)
> >       optlen = sizeof(off);
> >       err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
> >       if (!err) {
> > -             munmap(umem->fill->ring,
> > -                    off.fr.desc + umem->config.fill_size * sizeof(__u64));
> > -             munmap(umem->comp->ring,
> > -                    off.cr.desc + umem->config.comp_size * sizeof(__u64));
> > +             (void)munmap(umem->fill->ring - off.fr.desc,
> > +                          off.fr.desc +
> > +                          umem->config.fill_size * sizeof(__u64));
> > +             (void)munmap(umem->comp->ring - off.cr.desc,
> > +                          off.cr.desc +
> > +                          umem->config.comp_size * sizeof(__u64));
>
> What's the rationale to cast to void here and other places (e.g. below)?
> If there's no proper reason, then lets remove it. Given the patch has already
> been applied, please send a follow up. Thanks.
>

The rationale was to make it explicit that the return value is *not*
cared about. If this is not common practice, I'll remove it in a
follow up!

Thank,
Björn

> >       }
> >
> >       close(umem->fd);
> > @@ -698,6 +697,7 @@ int xsk_umem__delete(struct xsk_umem *umem)
> >
> >  void xsk_socket__delete(struct xsk_socket *xsk)
> >  {
> > +     size_t desc_sz = sizeof(struct xdp_desc);
> >       struct xdp_mmap_offsets off;
> >       socklen_t optlen;
> >       int err;
> > @@ -710,14 +710,17 @@ void xsk_socket__delete(struct xsk_socket *xsk)
> >       optlen = sizeof(off);
> >       err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
> >       if (!err) {
> > -             if (xsk->rx)
> > -                     munmap(xsk->rx->ring,
> > -                            off.rx.desc +
> > -                            xsk->config.rx_size * sizeof(struct xdp_desc));
> > -             if (xsk->tx)
> > -                     munmap(xsk->tx->ring,
> > -                            off.tx.desc +
> > -                            xsk->config.tx_size * sizeof(struct xdp_desc));
> > +             if (xsk->rx) {
> > +                     (void)munmap(xsk->rx->ring - off.rx.desc,
> > +                                  off.rx.desc +
> > +                                  xsk->config.rx_size * desc_sz);
> > +             }
> > +             if (xsk->tx) {
> > +                     (void)munmap(xsk->tx->ring - off.tx.desc,
> > +                                  off.tx.desc +
> > +                                  xsk->config.tx_size * desc_sz);
> > +             }
> > +
> >       }
> >
> >       xsk->umem->refcount--;
> >
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf 1/2] libbpf: fix invalid munmap call
  2019-05-06  8:40     ` Björn Töpel
@ 2019-05-06  9:01       ` Daniel Borkmann
  0 siblings, 0 replies; 10+ messages in thread
From: Daniel Borkmann @ 2019-05-06  9:01 UTC (permalink / raw)
  To: Björn Töpel
  Cc: Alexei Starovoitov, Netdev, Björn Töpel, Karlsson,
	Magnus, Magnus Karlsson, bpf, William Tu

On 05/06/2019 10:40 AM, Björn Töpel wrote:
> On Mon, 6 May 2019 at 10:26, Daniel Borkmann <daniel@iogearbox.net> wrote:
>> On 04/30/2019 02:45 PM, Björn Töpel wrote:
>>> From: Björn Töpel <bjorn.topel@intel.com>
>>>
>>> When unmapping the AF_XDP memory regions used for the rings, an
>>> invalid address was passed to the munmap() calls. Instead of passing
>>> the beginning of the memory region, the descriptor region was passed
>>> to munmap.
>>>
>>> When the userspace application tried to tear down an AF_XDP socket,
>>> the operation failed and the application would still have a reference
>>> to socket it wished to get rid of.
>>>
>>> Reported-by: William Tu <u9012063@gmail.com>
>>> Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
>>> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
>> [...]
>>>  out_mmap_tx:
>>>       if (tx)
>>> -             munmap(xsk->tx,
>>> -                    off.tx.desc +
>>> +             munmap(tx_map, off.tx.desc +
>>>                      xsk->config.tx_size * sizeof(struct xdp_desc));
>>>  out_mmap_rx:
>>>       if (rx)
>>> -             munmap(xsk->rx,
>>> -                    off.rx.desc +
>>> +             munmap(rx_map, off.rx.desc +
>>>                      xsk->config.rx_size * sizeof(struct xdp_desc));
>>>  out_socket:
>>>       if (--umem->refcount)
>>> @@ -684,10 +681,12 @@ int xsk_umem__delete(struct xsk_umem *umem)
>>>       optlen = sizeof(off);
>>>       err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
>>>       if (!err) {
>>> -             munmap(umem->fill->ring,
>>> -                    off.fr.desc + umem->config.fill_size * sizeof(__u64));
>>> -             munmap(umem->comp->ring,
>>> -                    off.cr.desc + umem->config.comp_size * sizeof(__u64));
>>> +             (void)munmap(umem->fill->ring - off.fr.desc,
>>> +                          off.fr.desc +
>>> +                          umem->config.fill_size * sizeof(__u64));
>>> +             (void)munmap(umem->comp->ring - off.cr.desc,
>>> +                          off.cr.desc +
>>> +                          umem->config.comp_size * sizeof(__u64));
>>
>> What's the rationale to cast to void here and other places (e.g. below)?
>> If there's no proper reason, then lets remove it. Given the patch has already
>> been applied, please send a follow up. Thanks.
> 
> The rationale was to make it explicit that the return value is *not*
> cared about. If this is not common practice, I'll remove it in a
> follow up!

Yeah, it's not common practice, so lets rather remove it.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-05-06  9:02 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-30 12:45 [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown Björn Töpel
2019-04-30 12:45 ` [PATCH bpf 1/2] libbpf: fix invalid munmap call Björn Töpel
2019-05-01  3:36   ` William Tu
2019-05-06  8:26   ` Daniel Borkmann
2019-05-06  8:40     ` Björn Töpel
2019-05-06  9:01       ` Daniel Borkmann
2019-04-30 12:45 ` [PATCH bpf 2/2] libbpf: proper XSKMAP cleanup Björn Töpel
2019-05-01  3:34   ` William Tu
2019-04-30 15:38 ` [PATCH bpf 0/2] libbpf: fixes for AF_XDP teardown Jonathan Lemon
2019-05-05  6:30   ` Alexei Starovoitov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.