All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maciej Fijalkowski <maciejromanfijalkowski@gmail.com>
To: "Björn Töpel" <bjorn.topel@intel.com>
Cc: magnus.karlsson@intel.com, netdev@vger.kernel.org,
	ast@kernel.org, daniel@iogearbox.net,
	jakub.kicinski@netronome.com, jonathan.lemon@gmail.com,
	songliubraving@fb.com, bpf <bpf@vger.kernel.org>
Subject: Re: [RFC PATCH bpf-next 4/4] libbpf: don't remove eBPF resources when other xsks are present
Date: Tue, 4 Jun 2019 17:07:22 +0200	[thread overview]
Message-ID: <20190604170722.000021b5@gmail.com> (raw)
In-Reply-To: <470fba94-a47f-83bd-d2c4-83d424dafb38@intel.com>

On Tue, 4 Jun 2019 10:08:03 +0200
Björn Töpel <bjorn.topel@intel.com> wrote:

> On 2019-06-03 15:19, Maciej Fijalkowski wrote:
> > In case where multiple xsk sockets are attached to a single interface
> > and one of them gets detached, the eBPF maps and program are removed.
> > This should not happen as the rest of xsksocks are still using these
> > resources.
> > 
> > In order to fix that, let's have an additional eBPF map with a single
> > entry that will be used as a xsks count. During the xsk_socket__delete,
> > remove the resources only when this count is equal to 0.  This map is
> > not being accessed from eBPF program, so the verifier is not associating
> > it with the prog, which in turn makes bpf_obj_get_info_by_fd not
> > reporting this map in nr_map_ids field of struct bpf_prog_info. The
> > described behaviour brings the need to have this map pinned, so in
> > case when socket is being created and the libbpf detects the presence of
> > bpf resources, it will be able to access that map.
> >
> 
> This commit is only needed after #3 is applied, right? So, this is a way 
> of refcounting XDP socks?

Yes, but as you pointed out it needs synchronization.

> 
> 
> > Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > ---
> >   tools/lib/bpf/xsk.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--------
> >   1 file changed, 51 insertions(+), 8 deletions(-)
> > 
> > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > index e28bedb0b078..88d2c931ad14 100644
> > --- a/tools/lib/bpf/xsk.c
> > +++ b/tools/lib/bpf/xsk.c
> > @@ -44,6 +44,8 @@
> >    #define PF_XDP AF_XDP
> >   #endif
> >   
> > +#define XSKS_CNT_MAP_PATH "/sys/fs/bpf/xsks_cnt_map"
> > +
> >   struct xsk_umem {
> >   	struct xsk_ring_prod *fill;
> >   	struct xsk_ring_cons *comp;
> > @@ -65,6 +67,7 @@ struct xsk_socket {
> >   	int prog_fd;
> >   	int qidconf_map_fd;
> >   	int xsks_map_fd;
> > +	int xsks_cnt_map_fd;
> >   	__u32 queue_id;
> >   	char ifname[IFNAMSIZ];
> >   };
> > @@ -372,7 +375,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
> >   static int xsk_create_bpf_maps(struct xsk_socket *xsk)
> >   {
> >   	int max_queues;
> > -	int fd;
> > +	int fd, ret;
> >   
> >   	max_queues = xsk_get_max_queues(xsk);
> >   	if (max_queues < 0)
> > @@ -392,6 +395,24 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
> >   	}
> >   	xsk->xsks_map_fd = fd;
> >   
> > +	fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "xsks_cnt_map",
> > +				 sizeof(int), sizeof(int), 1, 0);
> > +	if (fd < 0) {
> > +		close(xsk->qidconf_map_fd);
> > +		close(xsk->xsks_map_fd);
> > +		return fd;
> > +	}
> > +
> > +	ret = bpf_obj_pin(fd, XSKS_CNT_MAP_PATH);
> > +	if (ret < 0) {
> > +		pr_warning("pinning map failed; is bpffs mounted?\n");
> > +		close(xsk->qidconf_map_fd);
> > +		close(xsk->xsks_map_fd);
> > +		close(fd);
> > +		return ret;
> > +	}
> > +	xsk->xsks_cnt_map_fd = fd;
> > +
> >   	return 0;
> >   }
> >   
> > @@ -456,8 +477,10 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
> >   		close(fd);
> >   	}
> >   
> > +	xsk->xsks_cnt_map_fd = bpf_obj_get(XSKS_CNT_MAP_PATH);
> >   	err = 0;
> > -	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
> > +	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0 ||
> > +	    xsk->xsks_cnt_map_fd < 0) {
> >   		err = -ENOENT;
> >   		xsk_delete_bpf_maps(xsk);
> >   	}
> > @@ -467,17 +490,25 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
> >   	return err;
> >   }
> >   
> > -static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
> > +static void xsk_clear_bpf_maps(struct xsk_socket *xsk, long *xsks_cnt_ptr)
> >   {
> > +	long xsks_cnt, key = 0;
> >   	int qid = false;
> >   
> >   	bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
> >   	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
> > +	bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
> > +	if (xsks_cnt)
> > +		xsks_cnt--;
> > +	bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
> > +	if (xsks_cnt_ptr)
> > +		*xsks_cnt_ptr = xsks_cnt;
> 
> This refcount scheme will not work; There's no synchronization between 
> the updates (cross process)!

Ugh, shame. Let me fix this in v2 :(

> 
> >   }
> >   
> >   static int xsk_set_bpf_maps(struct xsk_socket *xsk)
> >   {
> >   	int qid = true, fd = xsk->fd, err;
> > +	long xsks_cnt, key = 0;
> >   
> >   	err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
> >   	if (err)
> > @@ -487,9 +518,18 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
> >   	if (err)
> >   		goto out;
> >   
> > +	err = bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
> > +	if (err)
> > +		goto out;
> > +
> > +	xsks_cnt++;
> > +	err = bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
> > +	if (err)
> > +		goto out;
> > +
> 
> Dito.
> 
> >   	return 0;
> >   out:
> > -	xsk_clear_bpf_maps(xsk);
> > +	xsk_clear_bpf_maps(xsk, NULL);
> >   	return err;
> >   }
> >   
> > @@ -752,13 +792,18 @@ void xsk_socket__delete(struct xsk_socket *xsk)
> >   	size_t desc_sz = sizeof(struct xdp_desc);
> >   	struct xdp_mmap_offsets off;
> >   	socklen_t optlen;
> > +	long xsks_cnt;
> >   	int err;
> >   
> >   	if (!xsk)
> >   		return;
> >   
> > -	xsk_clear_bpf_maps(xsk);
> > -	xsk_delete_bpf_maps(xsk);
> > +	xsk_clear_bpf_maps(xsk, &xsks_cnt);
> > +	unlink(XSKS_CNT_MAP_PATH);
> > +	if (!xsks_cnt) {
> > +		xsk_delete_bpf_maps(xsk);
> > +		xsk_remove_xdp_prog(xsk);
> > +	}
> >   
> >   	optlen = sizeof(off);
> >   	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
> > @@ -774,8 +819,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
> >   
> >   	}
> >   
> > -	xsk_remove_xdp_prog(xsk);
> > -
> >   	xsk->umem->refcount--;
> >   	/* Do not close an fd that also has an associated umem connected
> >   	 * to it.
> > 


  reply	other threads:[~2019-06-04 15:07 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-03 13:19 [RFC PATCH bpf-next 0/4] libbpf: xsk improvements Maciej Fijalkowski
2019-06-03 13:19 ` [RFC PATCH bpf-next 1/4] libbpf: fill the AF_XDP fill queue before bind() call Maciej Fijalkowski
2019-06-04  8:06   ` Björn Töpel
2019-06-04 15:04     ` Maciej Fijalkowski
2019-06-04 15:54       ` Jonathan Lemon
2019-06-05  9:00       ` Björn Töpel
2019-06-03 13:19 ` [RFC PATCH bpf-next 2/4] libbpf: check for channels.max_{t,r}x in xsk_get_max_queues Maciej Fijalkowski
2019-06-04  8:06   ` Björn Töpel
2019-06-04 15:05     ` Maciej Fijalkowski
2019-06-03 13:19 ` [RFC PATCH bpf-next 3/4] libbpf: move xdp program removal to libbpf Maciej Fijalkowski
2019-06-04  8:07   ` Björn Töpel
2019-06-04 15:06     ` Maciej Fijalkowski
2019-06-05  9:03       ` Björn Töpel
2019-06-03 13:19 ` [RFC PATCH bpf-next 4/4] libbpf: don't remove eBPF resources when other xsks are present Maciej Fijalkowski
2019-06-03 18:26   ` Jonathan Lemon
2019-06-04  8:08   ` Björn Töpel
2019-06-04 15:07     ` Maciej Fijalkowski [this message]
2019-06-05  9:26       ` Björn Töpel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190604170722.000021b5@gmail.com \
    --to=maciejromanfijalkowski@gmail.com \
    --cc=ast@kernel.org \
    --cc=bjorn.topel@intel.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=jakub.kicinski@netronome.com \
    --cc=jonathan.lemon@gmail.com \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=songliubraving@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.