netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Toke Høiland-Jørgensen" <toke@redhat.com>
To: Stephen Hemminger <stephen@networkplumber.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Alexei Starovoitov <ast@kernel.org>
Cc: "Martin KaFai Lau" <kafai@fb.com>,
	"Song Liu" <songliubraving@fb.com>, "Yonghong Song" <yhs@fb.com>,
	"David Miller" <davem@davemloft.net>,
	"Jesper Dangaard Brouer" <brouer@redhat.com>,
	netdev@vger.kernel.org, bpf@vger.kernel.org,
	"Toke Høiland-Jørgensen" <toke@redhat.com>
Subject: [RFC bpf-next 2/5] libbpf: Add support for auto-pinning of maps with reuse on program load
Date: Tue, 20 Aug 2019 13:47:03 +0200	[thread overview]
Message-ID: <20190820114706.18546-3-toke@redhat.com> (raw)
In-Reply-To: <20190820114706.18546-1-toke@redhat.com>

This adds support for automatically pinning maps on program load to libbpf.
This is needed for porting iproute2 bpf support to libbpf, but is also
useful in other contexts.

The semantics are modelled on those of the same functionality in iproute2,
namely:

- A path can be supplied in bpf_prog_load_attr specifying the directory
  that maps should be pinned into.

- Only maps that specify a non-zero value in its 'pinning' definition
  attribute will be pinned in the automatic mode.

- If an existing pinning is found at the pinning destination, its
  attributes will be compared and if they match, the existing map will be
  reused instead of creating a new map.

A subsequent commit will expand the functionality to enable programs to
support different pinning paths for different values of the map pinning
attribute, similar to what iproute2 does today.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 tools/lib/bpf/libbpf.c | 161 ++++++++++++++++++++++++++++++++++++++++-
 tools/lib/bpf/libbpf.h |   8 ++
 2 files changed, 168 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2233f919dd88..6d372a965c9d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -220,6 +220,7 @@ struct bpf_map {
 	size_t sec_offset;
 	int map_ifindex;
 	int inner_map_fd;
+	int pin_reused;
 	struct bpf_map_def def;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
@@ -3994,8 +3995,10 @@ int bpf_map__unpin(struct bpf_map *map, const char *path)
 	return 0;
 }
 
-int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
+int bpf_object__pin_maps2(struct bpf_object *obj, const char *path,
+			  enum bpf_pin_mode mode)
 {
+	int explicit = (mode == BPF_PIN_MODE_EXPLICIT);
 	struct bpf_map *map;
 	int err;
 
@@ -4015,6 +4018,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 		char buf[PATH_MAX];
 		int len;
 
+		if ((explicit && !map->def.pinning) || map->pin_reused)
+			continue;
+
 		len = snprintf(buf, PATH_MAX, "%s/%s", path,
 			       bpf_map__name(map));
 		if (len < 0) {
@@ -4037,6 +4043,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 		char buf[PATH_MAX];
 		int len;
 
+		if ((explicit && !map->def.pinning) || map->pin_reused)
+			continue;
+
 		len = snprintf(buf, PATH_MAX, "%s/%s", path,
 			       bpf_map__name(map));
 		if (len < 0)
@@ -4050,6 +4059,11 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 	return err;
 }
 
+int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
+{
+	return bpf_object__pin_maps2(obj, path, BPF_PIN_MODE_ALL);
+}
+
 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 {
 	struct bpf_map *map;
@@ -4802,6 +4816,141 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
 	return bpf_prog_load_xattr(&attr, pobj, prog_fd);
 }
 
+static int bpf_read_map_info(int fd, struct bpf_map_def *map,
+			     enum bpf_prog_type *type)
+{
+	unsigned int val, owner_type = 0;
+	char file[PATH_MAX], buff[4096];
+	FILE *fp;
+
+	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+	memset(map, 0, sizeof(*map));
+
+	fp = fopen(file, "r");
+	if (!fp) {
+		pr_warning("No procfs support?!\n");
+		return -EIO;
+	}
+
+	while (fgets(buff, sizeof(buff), fp)) {
+		if (sscanf(buff, "map_type:\t%u", &val) == 1)
+			map->type = val;
+		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+			map->key_size = val;
+		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+			map->value_size = val;
+		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+			map->max_entries = val;
+		else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
+			map->map_flags = val;
+		else if (sscanf(buff, "owner_prog_type:\t%i", &val) == 1)
+			owner_type = val;
+	}
+
+	fclose(fp);
+	if (type)
+		*type  = owner_type;
+
+	return 0;
+}
+
+static void bpf_map_pin_report(const struct bpf_map_def *pin,
+			       const struct bpf_map_def *obj)
+{
+	pr_warning("Map specification differs from pinned file!\n");
+
+	if (obj->type != pin->type)
+		pr_warning(" - Type:         %u (obj) != %u (pin)\n",
+			   obj->type, pin->type);
+	if (obj->key_size != pin->key_size)
+		pr_warning(" - Key size:     %u (obj) != %u (pin)\n",
+			   obj->key_size, pin->key_size);
+	if (obj->value_size != pin->value_size)
+		pr_warning(" - Value size:   %u (obj) != %u (pin)\n",
+			   obj->value_size, pin->value_size);
+	if (obj->max_entries != pin->max_entries)
+		pr_warning(" - Max entries:    %u (obj) != %u (pin)\n",
+			   obj->max_entries, pin->max_entries);
+	if (obj->map_flags != pin->map_flags)
+		pr_warning(" - Flags:        %#x (obj) != %#x (pin)\n",
+			   obj->map_flags, pin->map_flags);
+
+	pr_warning("\n");
+}
+
+
+
+static int bpf_map_selfcheck_pinned(int fd, const struct bpf_map_def *map,
+				    int length, enum bpf_prog_type type)
+{
+	enum bpf_prog_type owner_type = 0;
+	struct bpf_map_def tmp, zero = {};
+	int ret;
+
+	ret = bpf_read_map_info(fd, &tmp, &owner_type);
+	if (ret < 0)
+		return ret;
+
+	/* The decision to reject this is on kernel side eventually, but
+	 * at least give the user a chance to know what's wrong.
+	 */
+	if (owner_type && owner_type != type)
+		pr_warning("Program array map owner types differ: %u (obj) != %u (pin)\n",
+			   type, owner_type);
+
+	if (!memcmp(&tmp, map, length)) {
+		return 0;
+	} else {
+		/* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
+		 * so just accept it. We know we do have an eBPF fd and in this
+		 * case, everything is 0. It is guaranteed that no such map exists
+		 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
+		 */
+		if (!memcmp(&tmp, &zero, length))
+			return 0;
+
+		bpf_map_pin_report(&tmp, map);
+		return -EINVAL;
+	}
+}
+
+
+int bpf_probe_pinned(const struct bpf_map *map,
+		     const struct bpf_prog_load_attr *attr)
+{
+	const char *name = bpf_map__name(map);
+	char buf[PATH_MAX];
+	int fd, len, ret;
+
+	if (!attr->auto_pin_path)
+		return -ENOENT;
+
+	len = snprintf(buf, PATH_MAX, "%s/%s", attr->auto_pin_path,
+		       name);
+	if (len < 0)
+		return -EINVAL;
+	else if (len >= PATH_MAX)
+		return -ENAMETOOLONG;
+
+	fd = bpf_obj_get(buf);
+	if (fd <= 0)
+		return fd;
+
+	ret = bpf_map_selfcheck_pinned(fd, &map->def,
+				       offsetof(struct bpf_map_def,
+						map_id),
+				       attr->prog_type);
+	if (ret < 0) {
+		close(fd);
+		pr_warning("Map \'%s\' self-check failed!\n", name);
+		return ret;
+	}
+	if (attr->log_level)
+		pr_debug("Map \'%s\' loaded as pinned!\n", name);
+
+	return fd;
+}
+
 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 			struct bpf_object **pobj, int *prog_fd)
 {
@@ -4853,8 +5002,14 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 	}
 
 	bpf_object__for_each_map(map, obj) {
+		int fd;
+
 		if (!bpf_map__is_offload_neutral(map))
 			map->map_ifindex = attr->ifindex;
+
+		fd = bpf_probe_pinned(map, attr);
+		if (fd > 0 && !bpf_map__reuse_fd(map, fd))
+			map->pin_reused = 1;
 	}
 
 	if (!first_prog) {
@@ -4869,6 +5024,10 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 		return -EINVAL;
 	}
 
+	if (attr->auto_pin_path)
+		bpf_object__pin_maps2(obj, attr->auto_pin_path,
+				      BPF_PIN_MODE_EXPLICIT);
+
 	*pobj = obj;
 	*prog_fd = bpf_program__fd(first_prog);
 	return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5facba6ea1e1..3c5c3256e22d 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -67,6 +67,11 @@ struct bpf_object_open_attr {
 	enum bpf_prog_type prog_type;
 };
 
+enum bpf_pin_mode {
+	BPF_PIN_MODE_ALL = 0,
+	BPF_PIN_MODE_EXPLICIT,
+};
+
 LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
 LIBBPF_API struct bpf_object *
 bpf_object__open_xattr(struct bpf_object_open_attr *attr);
@@ -79,6 +84,8 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
 			     __u32 *size);
 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
 				__u32 *off);
+LIBBPF_API int bpf_object__pin_maps2(struct bpf_object *obj, const char *path,
+				     enum bpf_pin_mode mode);
 LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
 LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
 				      const char *path);
@@ -353,6 +360,7 @@ struct bpf_prog_load_attr {
 	int ifindex;
 	int log_level;
 	int prog_flags;
+	const char *auto_pin_path;
 };
 
 LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
-- 
2.22.1


  parent reply	other threads:[~2019-08-20 11:47 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-20 11:47 [RFC bpf-next 0/5] Convert iproute2 to use libbpf (WIP) Toke Høiland-Jørgensen
2019-08-20 11:47 ` [RFC bpf-next 1/5] libbpf: Add map definition struct fields from iproute2 Toke Høiland-Jørgensen
2019-08-20 11:47 ` Toke Høiland-Jørgensen [this message]
2019-08-20 11:47 ` [RFC bpf-next 3/5] libbpf: Add support for specifying map pinning path via callback Toke Høiland-Jørgensen
2019-08-20 11:47 ` [RFC bpf-next 4/5] iproute2: Allow compiling against libbpf Toke Høiland-Jørgensen
2019-08-22  8:58   ` Daniel Borkmann
2019-08-22 10:43     ` Toke Høiland-Jørgensen
2019-08-22 11:45       ` Daniel Borkmann
2019-08-22 12:04         ` Toke Høiland-Jørgensen
2019-08-22 12:33           ` Daniel Borkmann
2019-08-22 13:38             ` Toke Høiland-Jørgensen
2019-08-22 13:45               ` Daniel Borkmann
2019-08-22 15:28                 ` Toke Høiland-Jørgensen
2019-08-20 11:47 ` [RFC bpf-next 5/5] iproute2: Support loading XDP programs with libbpf Toke Høiland-Jørgensen
2019-08-21 19:26 ` [RFC bpf-next 0/5] Convert iproute2 to use libbpf (WIP) Alexei Starovoitov
2019-08-21 21:00   ` Toke Høiland-Jørgensen
2019-08-22  7:52     ` Andrii Nakryiko
2019-08-22 10:38       ` Toke Høiland-Jørgensen
2019-08-21 20:30 ` Andrii Nakryiko
2019-08-21 21:07   ` Toke Høiland-Jørgensen
2019-08-22  7:49     ` Andrii Nakryiko
2019-08-22  8:33       ` Daniel Borkmann
2019-08-22 11:48         ` Toke Høiland-Jørgensen
2019-08-22 11:49           ` Toke Høiland-Jørgensen
2019-08-23  6:31         ` Andrii Nakryiko
2019-08-23 11:29           ` Toke Høiland-Jørgensen
2019-08-28 20:40             ` Andrii Nakryiko
2020-02-03  7:29               ` Andrii Nakryiko
2020-02-03 19:34                 ` Toke Høiland-Jørgensen
2020-02-04  0:56                   ` Andrii Nakryiko
2020-02-04  1:46                     ` David Ahern
2020-02-04  3:41                       ` Andrii Nakryiko
2020-02-04  4:52                         ` David Ahern
2020-02-04  5:00                           ` Andrii Nakryiko
2020-02-04  8:25                             ` Toke Høiland-Jørgensen
2020-02-04 18:47                               ` Andrii Nakryiko
2020-02-04 19:19                                 ` Toke Høiland-Jørgensen
2020-02-04 19:29                                   ` Andrii Nakryiko
2020-02-04 21:56                                     ` Toke Høiland-Jørgensen
2020-02-04 22:12                                       ` David Ahern
2020-02-04 22:35                                         ` Toke Høiland-Jørgensen
2020-02-04 23:13                                           ` David Ahern
2020-02-05 10:37                                             ` Toke Høiland-Jørgensen
2020-02-04  8:27                     ` Toke Høiland-Jørgensen
2019-08-23 10:27   ` Jesper Dangaard Brouer
2019-08-28 20:23     ` Andrii Nakryiko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190820114706.18546-3-toke@redhat.com \
    --to=toke@redhat.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brouer@redhat.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=kafai@fb.com \
    --cc=netdev@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=stephen@networkplumber.org \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).