bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hou Tao <houtao1@huawei.com>
To: <linux-block@vger.kernel.org>, <bpf@vger.kernel.org>,
	<netdev@vger.kernel.org>, <axboe@kernel.dk>, <ast@kernel.org>
Cc: <hare@suse.com>, <osandov@fb.com>, <ming.lei@redhat.com>,
	<damien.lemoal@wdc.com>, <bvanassche@acm.org>,
	<daniel@iogearbox.net>, <kafai@fb.com>, <songliubraving@fb.com>,
	<yhs@fb.com>
Subject: [RFC PATCH 2/2] selftests/bpf: add test program for redirecting IO completion CPU
Date: Mon, 14 Oct 2019 20:28:33 +0800	[thread overview]
Message-ID: <20191014122833.64908-3-houtao1@huawei.com> (raw)
In-Reply-To: <20191014122833.64908-1-houtao1@huawei.com>

A simple round-robin strategy is implemented to redirect the IO
completion handling to all online CPUs or specific CPU set cyclically.

Using the following command to distribute the IO completion of vda
to all online CPUs:

	./test_blkdev_ccpu -d /dev/vda

And the following command to distribute the IO completion of nvme0n1
to a specific CPU set:
	./test_blkdev_ccpu -d /dev/nvme0n1 -s 4,8,10-13

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 tools/include/uapi/linux/bpf.h                |   2 +
 tools/lib/bpf/libbpf.c                        |   1 +
 tools/lib/bpf/libbpf_probes.c                 |   1 +
 tools/testing/selftests/bpf/Makefile          |   1 +
 .../selftests/bpf/progs/blkdev_ccpu_rr.c      |  66 +++++
 .../testing/selftests/bpf/test_blkdev_ccpu.c  | 246 ++++++++++++++++++
 6 files changed, 317 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/blkdev_ccpu_rr.c
 create mode 100644 tools/testing/selftests/bpf/test_blkdev_ccpu.c

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 77c6be96d676..36aa35e29be2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -173,6 +173,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_SYSCTL,
 	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
 	BPF_PROG_TYPE_CGROUP_SOCKOPT,
+	BPF_PROG_TYPE_BLKDEV,
 };
 
 enum bpf_attach_type {
@@ -199,6 +200,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_UDP6_RECVMSG,
 	BPF_CGROUP_GETSOCKOPT,
 	BPF_CGROUP_SETSOCKOPT,
+	BPF_BLKDEV_IOC_CPU,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e0276520171b..5a849d6d30be 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3579,6 +3579,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_PERF_EVENT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+	case BPF_PROG_TYPE_BLKDEV:
 		return false;
 	case BPF_PROG_TYPE_KPROBE:
 	default:
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 4b0b0364f5fc..311e13e778a3 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -102,6 +102,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+	case BPF_PROG_TYPE_BLKDEV:
 	default:
 		break;
 	}
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6889c19a628c..6a36234adfea 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -30,6 +30,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_cgroup_storage test_select_reuseport test_section_names \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
 	test_btf_dump test_cgroup_attach xdping
+TEST_GEN_PROGS += test_blkdev_ccpu
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
diff --git a/tools/testing/selftests/bpf/progs/blkdev_ccpu_rr.c b/tools/testing/selftests/bpf/progs/blkdev_ccpu_rr.c
new file mode 100644
index 000000000000..6f66d51fe6af
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/blkdev_ccpu_rr.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Hou Tao <houtao1@huawei.com>
+ */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Index to CPU set */
+struct bpf_map_def SEC("maps") idx_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+BPF_ANNOTATE_KV_PAIR(idx_map, __u32, __u32);
+
+/* Size of CPU set */
+struct bpf_map_def SEC("maps") cnt_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+BPF_ANNOTATE_KV_PAIR(cnt_map, __u32, __u32);
+
+/* CPU set */
+struct bpf_map_def SEC("maps") cpu_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 256,
+};
+BPF_ANNOTATE_KV_PAIR(cpu_map, __u32, __u32);
+
+SEC("ccpu_demo")
+int customized_round_robin_ccpu(void *ctx)
+{
+	__u32 key = 0;
+	__u32 *idx_ptr;
+	__u32 *cnt_ptr;
+	__u32 *cpu_ptr;
+	__u32 idx;
+	__u32 cnt;
+
+	idx_ptr = bpf_map_lookup_elem(&idx_map, &key);
+	if (!idx_ptr)
+		return -1;
+	idx = (*idx_ptr)++;
+
+	cnt_ptr = bpf_map_lookup_elem(&cnt_map, &key);
+	if (!cnt_ptr)
+		return -1;
+	cnt = *cnt_ptr;
+	if (!cnt)
+		return -1;
+
+	idx %= cnt;
+	cpu_ptr = bpf_map_lookup_elem(&cpu_map, &idx);
+	if (!cpu_ptr)
+		return -1;
+
+	return *cpu_ptr;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/test_blkdev_ccpu.c b/tools/testing/selftests/bpf/test_blkdev_ccpu.c
new file mode 100644
index 000000000000..ec5981e7e2ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_blkdev_ccpu.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Hou Tao <houtao1@huawei.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+
+static int
+print_all_levels(enum libbpf_print_level level,
+		 const char *format, va_list args)
+{
+	return vfprintf(stderr, format, args);
+}
+
+static void sig_handler(int num)
+{
+}
+
+static int parse_cpu_set(const char *str, const unsigned int **cpus,
+	int *cpu_nr)
+{
+	int total;
+	unsigned int *set;
+	int err;
+	int idx;
+	const char *from;
+
+	total = libbpf_num_possible_cpus();
+	if (total <= 0)
+		return -1;
+
+	set = calloc(total, sizeof(*set));
+	if (!set) {
+		printf("Failed to alloc cpuset (cpu nr: %d)\n", total);
+		return -1;
+	}
+
+	if (!str) {
+		for (idx = 0; idx < total; idx++)
+			set[idx] = idx;
+		*cpus = set;
+		*cpu_nr = total;
+
+		return 0;
+	}
+
+	err = 0;
+	idx = 0;
+	from = str;
+	while (1) {
+		char *endptr;
+		int start;
+		int end;
+
+		start = strtol(from, &endptr, 10);
+		if (*endptr != '-' && *endptr != ',' &&
+			(*endptr != '\0' || endptr == from)) {
+			err = -1;
+			break;
+		}
+		if (*endptr == '\0' || *endptr == ',') {
+			printf("add cpu %d\n", start);
+			set[idx++] = start;
+			if (*endptr == '\0')
+				break;
+		}
+		from = endptr + 1;
+		if (*endptr == ',')
+			continue;
+
+		end = strtol(from, &endptr, 10);
+		if (*endptr != ',' && (*endptr != '\0' || endptr == from)) {
+			err = -1;
+			break;
+		}
+		for (; start <= end; start++) {
+			printf("add cpu %d\n", start);
+			set[idx++] = start;
+		}
+		if (*endptr == '\0')
+			break;
+		from = endptr + 1;
+	}
+
+	if (err) {
+		printf("invalid cpu set spec '%s'\n", from);
+		free(set);
+		return -1;
+	}
+
+	*cpus = set;
+	*cpu_nr = idx;
+
+	return 0;
+}
+
+static int load_cpu_set(struct bpf_object *obj, const unsigned int *cpus,
+	int cnt)
+{
+	const char *name;
+	struct bpf_map *map;
+	int fd;
+	int idx;
+
+	name = "cpu_map";
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map) {
+		printf("no map %s\n", name);
+		return -1;
+	}
+
+	fd = bpf_map__fd(map);
+	if (fd < 0) {
+		printf("invalid fd for map %s\n", name);
+		return -1;
+	}
+
+	for (idx = 0; idx < cnt; idx++) {
+		if (bpf_map_update_elem(fd, &idx, &cpus[idx], 0)) {
+			printf("%s[%u] = %u error %s\n",
+					name, idx, cpus[idx], strerror(errno));
+			return -1;
+		}
+		printf("%s[%u] = %u\n", name, idx, cpus[idx]);
+	}
+
+	name = "cnt_map";
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map) {
+		printf("no map %s\n", name);
+		return -1;
+	}
+
+	fd = bpf_map__fd(map);
+	if (fd < 0) {
+		printf("invalid fd for map %s\n", name);
+		return -1;
+	}
+
+	idx = 0;
+	if (bpf_map_update_elem(fd, &idx, &cnt, 0)) {
+		printf("%s[%u] = %u error %s\n",
+				name, idx, cnt, strerror(errno));
+		return -1;
+	}
+	printf("%s[%u] = %u\n", name, idx, cnt);
+
+	return 0;
+}
+
+static void usage(const char *cmd)
+{
+	printf("Usage: %s -d blk_device [-s cpu_set]\n"
+			"  round-robin all CPUs: %s -d /dev/sda\n"
+			"  round-robin specific CPUs: %s -d /dev/sda -s 4-7,12-15\n",
+			cmd, cmd, cmd);
+	exit(1);
+}
+
+int main(int argc, char **argv)
+{
+	int opt;
+	const char *prog = "./blkdev_ccpu_rr.o";
+	const char *bdev;
+	const char *cpu_set_str = NULL;
+	const unsigned int *cpus;
+	int cpu_nr;
+	struct bpf_object *obj;
+	int prog_fd;
+	int bdev_fd;
+
+	while ((opt = getopt(argc, argv, "d:s:h")) != -1) {
+		switch (opt) {
+		case 'd':
+			bdev = optarg;
+			break;
+		case 's':
+			cpu_set_str = optarg;
+			break;
+		case 'h':
+			usage(argv[0]);
+			break;
+		}
+	}
+
+	if (!bdev)
+		usage(argv[0]);
+
+	printf("blk device %s, cpu set %s\n", bdev, cpu_set_str);
+
+	signal(SIGINT, sig_handler);
+	signal(SIGQUIT, sig_handler);
+
+	libbpf_set_print(print_all_levels);
+
+	if (parse_cpu_set(cpu_set_str, &cpus, &cpu_nr))
+		goto out;
+
+	if (bpf_prog_load(prog, BPF_PROG_TYPE_BLKDEV, &obj, &prog_fd)) {
+		printf("Failed to load %s\n", prog);
+		goto out;
+	}
+
+	if (load_cpu_set(obj, cpus, cpu_nr))
+		goto out;
+
+	bdev_fd = open(bdev, O_RDWR);
+	if (bdev_fd < 0) {
+		printf("Failed to open %s %s\n", bdev, strerror(errno));
+		goto out;
+	}
+
+	/* Attach bpf program */
+	if (bpf_prog_attach(prog_fd, bdev_fd, BPF_BLKDEV_IOC_CPU, 0)) {
+		printf("Failed to attach %s %s\n", prog, strerror(errno));
+		goto out;
+	}
+
+	printf("Attached, use Ctrl-C to detach\n\n");
+
+	pause();
+
+	if (bpf_prog_detach(bdev_fd, BPF_BLKDEV_IOC_CPU)) {
+		printf("Failed to detach %s %s\n", prog, strerror(errno));
+		goto out;
+	}
+
+	return 0;
+out:
+	return 1;
+}
-- 
2.22.0


  parent reply	other threads:[~2019-10-14 12:21 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-14 12:28 [RFC PATCH 0/2] block: use eBPF to redirect IO completion Hou Tao
2019-10-14 12:28 ` [RFC PATCH 1/2] block: add support for redirecting IO completion through eBPF Hou Tao
2019-10-15 21:04   ` Alexei Starovoitov
2019-10-16  7:05     ` Hannes Reinecke
2019-10-21 13:42     ` Hou Tao
2019-10-21 13:48       ` Bart Van Assche
2019-10-21 14:45         ` Jens Axboe
2019-10-14 12:28 ` Hou Tao [this message]
2019-10-15  1:20 ` [RFC PATCH 0/2] block: use eBPF to redirect IO completion Bob Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191014122833.64908-3-houtao1@huawei.com \
    --to=houtao1@huawei.com \
    --cc=ast@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=bpf@vger.kernel.org \
    --cc=bvanassche@acm.org \
    --cc=damien.lemoal@wdc.com \
    --cc=daniel@iogearbox.net \
    --cc=hare@suse.com \
    --cc=kafai@fb.com \
    --cc=linux-block@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=osandov@fb.com \
    --cc=songliubraving@fb.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).