All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/6] bpf: LRU performance and test-program improvements
@ 2017-04-14 17:30 Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 1/6] bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU Martin KaFai Lau
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

The first 4 patches make a few improvements to the LRU tests.

Patch 5/6 is to improve the performance of BPF_F_NO_COMMON_LRU map.

Patch 6/6 adds an example in using LRU map with map-in-map.

Martin KaFai Lau (6):
  bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU
  bpf: lru: Cleanup test_lru_map.c
  bpf: lru: Refactor LRU map tests in map_perf_test
  bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def
  bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4
  bpf: lru: Add map-in-map LRU example

 kernel/bpf/bpf_lru_list.c                  |   2 +-
 samples/bpf/bpf_load.c                     | 114 ++++++++++---
 samples/bpf/bpf_load.h                     |  13 ++
 samples/bpf/map_perf_test_kern.c           |  75 +++++++--
 samples/bpf/map_perf_test_user.c           | 247 +++++++++++++++++++++--------
 tools/testing/selftests/bpf/test_lru_map.c | 104 ++++++++----
 6 files changed, 428 insertions(+), 127 deletions(-)

-- 
2.9.3

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH net-next 1/6] bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 2/6] bpf: lru: Cleanup test_lru_map.c Martin KaFai Lau
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

test_lru_sanity3 is not applicable to BPF_F_NO_COMMON_LRU.
It just happens to work when PERCPU_FREE_TARGET == 16.

This patch:
1) Disable test_lru_sanity3 for BPF_F_NO_COMMON_LRU
2) Add test_lru_sanity6 to test list rotation for
   the BPF_F_NO_COMMON_LRU map.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_lru_map.c | 70 +++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 00b0aff56e2e..04b2242937cc 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -387,6 +387,10 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	unsigned int map_size;
 	int next_cpu = 0;
 
+	if (map_flags & BPF_F_NO_COMMON_LRU)
+		/* This test is only applicable to common LRU list */
+		return;
+
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
 	       map_flags);
 
@@ -396,11 +400,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	assert(batch_size * 2 == tgt_free);
 
 	map_size = tgt_free * 2;
-	if (map_flags & BPF_F_NO_COMMON_LRU)
-		lru_map_fd = create_map(map_type, map_flags,
-					map_size * nr_cpus);
-	else
-		lru_map_fd = create_map(map_type, map_flags, map_size);
+	lru_map_fd = create_map(map_type, map_flags, map_size);
 	assert(lru_map_fd != -1);
 
 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
@@ -566,6 +566,65 @@ static void test_lru_sanity5(int map_type, int map_flags)
 	printf("Pass\n");
 }
 
+/* Test list rotation for BPF_F_NO_COMMON_LRU map */
+static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
+{
+	int lru_map_fd, expected_map_fd;
+	unsigned long long key, value[nr_cpus];
+	unsigned int map_size = tgt_free * 2;
+	int next_cpu = 0;
+
+	if (!(map_flags & BPF_F_NO_COMMON_LRU))
+		return;
+
+	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+	       map_flags);
+
+	assert(sched_next_online(0, &next_cpu) != -1);
+
+	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+	assert(expected_map_fd != -1);
+
+	lru_map_fd = create_map(map_type, map_flags, map_size * nr_cpus);
+	assert(lru_map_fd != -1);
+
+	value[0] = 1234;
+
+	for (key = 1; key <= tgt_free; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	for (; key <= tgt_free * 2; key++) {
+		unsigned long long stable_key;
+
+		/* Make ref bit sticky for key: [1, tgt_free] */
+		for (stable_key = 1; stable_key <= tgt_free; stable_key++) {
+			/* Mark the ref bit */
+			assert(!bpf_map_lookup_elem(lru_map_fd, &stable_key,
+						    value));
+		}
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	for (; key <= tgt_free * 3; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	assert(map_equal(lru_map_fd, expected_map_fd));
+
+	close(expected_map_fd);
+	close(lru_map_fd);
+
+	printf("Pass\n");
+}
+
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
@@ -593,6 +652,7 @@ int main(int argc, char **argv)
 			test_lru_sanity3(map_types[t], map_flags[f], tgt_free);
 			test_lru_sanity4(map_types[t], map_flags[f], tgt_free);
 			test_lru_sanity5(map_types[t], map_flags[f]);
+			test_lru_sanity6(map_types[t], map_flags[f], tgt_free);
 
 			printf("\n");
 		}
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 2/6] bpf: lru: Cleanup test_lru_map.c
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 1/6] bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 3/6] bpf: lru: Refactor LRU map tests in map_perf_test Martin KaFai Lau
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

This patch does the following cleanup on test_lru_map.c
1) Fix indentation (Replace spaces by tabs)
2) Remove redundant BPF_F_NO_COMMON_LRU test
3) Simplify some comments

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_lru_map.c | 32 +++++++++---------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 04b2242937cc..87c05e5bdfdd 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -191,12 +191,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	int next_cpu = 0;
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
-		/* Ther percpu lru list (i.e each cpu has its own LRU
-		 * list) does not have a local free list.  Hence,
-		 * it will only free old nodes till there is no free
-		 * from the LRU list.  Hence, this test does not apply
-		 * to BPF_F_NO_COMMON_LRU
-		 */
+		/* This test is only applicable to common LRU list */
 		return;
 
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
@@ -227,7 +222,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	for (key = 1; key < end_key; key++) {
 		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	/* Insert 1+tgt_free to 2*tgt_free
@@ -273,12 +268,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	int next_cpu = 0;
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
-		/* Ther percpu lru list (i.e each cpu has its own LRU
-		 * list) does not have a local free list.  Hence,
-		 * it will only free old nodes till there is no free
-		 * from the LRU list.  Hence, this test does not apply
-		 * to BPF_F_NO_COMMON_LRU
-		 */
+		/* This test is only applicable to common LRU list */
 		return;
 
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
@@ -290,11 +280,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	assert(batch_size * 2 == tgt_free);
 
 	map_size = tgt_free + batch_size;
-	if (map_flags & BPF_F_NO_COMMON_LRU)
-		lru_map_fd = create_map(map_type, map_flags,
-					map_size * nr_cpus);
-	else
-		lru_map_fd = create_map(map_type, map_flags, map_size);
+	lru_map_fd = create_map(map_type, map_flags, map_size);
 	assert(lru_map_fd != -1);
 
 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
@@ -341,7 +327,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(value[0] == 4321);
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	value[0] = 1234;
@@ -361,7 +347,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
@@ -419,7 +405,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	for (key = 1; key < end_key; key++) {
 		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	/* Add 1+2*tgt_free to tgt_free*5/2
@@ -431,7 +417,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
@@ -491,7 +477,7 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 3/6] bpf: lru: Refactor LRU map tests in map_perf_test
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 1/6] bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 2/6] bpf: lru: Cleanup test_lru_map.c Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 4/6] bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def Martin KaFai Lau
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

One more LRU test will be added later in this patch series.
In this patch, we first move all existing LRU map tests into
a single syscall (connect) first so that the future new
LRU test can be added without hunting another syscall.

One of the map name is also changed from percpu_lru_hash_map
to nocommon_lru_hash_map to avoid the confusion with percpu_hash_map.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/map_perf_test_kern.c | 43 +++++++++++++++++++++++---------
 samples/bpf/map_perf_test_user.c | 53 +++++++++++++++++++++++++++-------------
 2 files changed, 67 insertions(+), 29 deletions(-)

diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 9da2a3441b0a..404ed53b8a53 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -26,7 +26,7 @@ struct bpf_map_def SEC("maps") lru_hash_map = {
 	.max_entries = 10000,
 };
 
-struct bpf_map_def SEC("maps") percpu_lru_hash_map = {
+struct bpf_map_def SEC("maps") nocommon_lru_hash_map = {
 	.type = BPF_MAP_TYPE_LRU_HASH,
 	.key_size = sizeof(u32),
 	.value_size = sizeof(long),
@@ -100,6 +100,7 @@ int stress_percpu_hmap(struct pt_regs *ctx)
 		bpf_map_delete_elem(&percpu_hash_map, &key);
 	return 0;
 }
+
 SEC("kprobe/sys_getgid")
 int stress_hmap_alloc(struct pt_regs *ctx)
 {
@@ -128,24 +129,42 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx)
 	return 0;
 }
 
-SEC("kprobe/sys_getpid")
+SEC("kprobe/sys_connect")
 int stress_lru_hmap_alloc(struct pt_regs *ctx)
 {
-	u32 key = bpf_get_prandom_u32();
+	struct sockaddr_in6 *in6;
+	u16 test_case, dst6[8];
+	int addrlen, ret;
+	char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%d\n";
 	long val = 1;
+	u32 key = bpf_get_prandom_u32();
 
-	bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
+	in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
+	addrlen = (int)PT_REGS_PARM3(ctx);
 
-	return 0;
-}
+	if (addrlen != sizeof(*in6))
+		return 0;
 
-SEC("kprobe/sys_getppid")
-int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx)
-{
-	u32 key = bpf_get_prandom_u32();
-	long val = 1;
+	ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+	if (ret)
+		goto done;
+
+	if (dst6[0] != 0xdead || dst6[1] != 0xbeef)
+		return 0;
+
+	test_case = dst6[7];
+
+	if (test_case == 0)
+		ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
+	else if (test_case == 1)
+		ret = bpf_map_update_elem(&nocommon_lru_hash_map, &key, &val,
+					  BPF_ANY);
+	else
+		ret = -EINVAL;
 
-	bpf_map_update_elem(&percpu_lru_hash_map, &key, &val, BPF_ANY);
+done:
+	if (ret)
+		bpf_trace_printk(fmt, sizeof(fmt), ret);
 
 	return 0;
 }
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index e29ff318a793..51cb8f238aa2 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -18,6 +18,9 @@
 #include <string.h>
 #include <time.h>
 #include <sys/resource.h>
+#include <arpa/inet.h>
+#include <errno.h>
+
 #include "libbpf.h"
 #include "bpf_load.h"
 
@@ -36,7 +39,7 @@ static __u64 time_get_ns(void)
 #define HASH_KMALLOC		(1 << 2)
 #define PERCPU_HASH_KMALLOC	(1 << 3)
 #define LRU_HASH_PREALLOC	(1 << 4)
-#define PERCPU_LRU_HASH_PREALLOC	(1 << 5)
+#define NOCOMMON_LRU_HASH_PREALLOC	(1 << 5)
 #define LPM_KMALLOC		(1 << 6)
 #define HASH_LOOKUP		(1 << 7)
 #define ARRAY_LOOKUP		(1 << 8)
@@ -55,28 +58,44 @@ static void test_hash_prealloc(int cpu)
 	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
 }
 
-static void test_lru_hash_prealloc(int cpu)
+static void do_test_lru(int lru_test_flag, int cpu)
 {
+	struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 };
+	const char *test_name;
 	__u64 start_time;
-	int i;
+	int i, ret;
+
+	in6.sin6_addr.s6_addr16[0] = 0xdead;
+	in6.sin6_addr.s6_addr16[1] = 0xbeef;
+
+	if (lru_test_flag & LRU_HASH_PREALLOC) {
+		test_name = "lru_hash_map_perf";
+		in6.sin6_addr.s6_addr16[7] = 0;
+	} else if (lru_test_flag & NOCOMMON_LRU_HASH_PREALLOC) {
+		test_name = "nocommon_lru_hash_map_perf";
+		in6.sin6_addr.s6_addr16[7] = 1;
+	} else {
+		assert(0);
+	}
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
-		syscall(__NR_getpid);
-	printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	for (i = 0; i < MAX_CNT; i++) {
+		ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6));
+		assert(ret == -1 && errno == EBADF);
+	}
+	printf("%d:%s pre-alloc %lld events per sec\n",
+	       cpu, test_name,
+	       MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
 }
 
-static void test_percpu_lru_hash_prealloc(int cpu)
+static void test_lru_hash_prealloc(int cpu)
 {
-	__u64 start_time;
-	int i;
+	do_test_lru(LRU_HASH_PREALLOC, cpu);
+}
 
-	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
-		syscall(__NR_getppid);
-	printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+static void test_nocommon_lru_hash_prealloc(int cpu)
+{
+	do_test_lru(NOCOMMON_LRU_HASH_PREALLOC, cpu);
 }
 
 static void test_percpu_hash_prealloc(int cpu)
@@ -174,8 +193,8 @@ static void loop(int cpu)
 	if (test_flags & LRU_HASH_PREALLOC)
 		test_lru_hash_prealloc(cpu);
 
-	if (test_flags & PERCPU_LRU_HASH_PREALLOC)
-		test_percpu_lru_hash_prealloc(cpu);
+	if (test_flags & NOCOMMON_LRU_HASH_PREALLOC)
+		test_nocommon_lru_hash_prealloc(cpu);
 
 	if (test_flags & LPM_KMALLOC)
 		test_lpm_kmalloc(cpu);
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 4/6] bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
                   ` (2 preceding siblings ...)
  2017-04-14 17:30 ` [PATCH net-next 3/6] bpf: lru: Refactor LRU map tests in map_perf_test Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 5/6] bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4 Martin KaFai Lau
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

The current bpf_map_def is statically defined during compile
time.  This patch allows the *_user.c program to change it during
runtime.  It is done by adding load_bpf_file_fixup_map() which
takes a callback.  The callback will be called before creating
each map so that it has a chance to modify the bpf_map_def.

The current usecase is to change max_entries in map_perf_test.
It is interesting to test with a much bigger map size in
some cases (e.g. the following patch on bpf_lru_map.c).
However,  it is hard to find one size to fit all testing
environment.  Hence, it is handy to take the max_entries
as a cmdline arg and then configure the bpf_map_def during
runtime.

This patch adds two cmdline args.  One is to configure
the map's max_entries.  Another is to configure the max_cnt
which controls how many times a syscall is called.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/bpf_load.c           | 114 +++++++++++++++++++++++++-----
 samples/bpf/bpf_load.h           |  13 ++++
 samples/bpf/map_perf_test_user.c | 148 ++++++++++++++++++++++++---------------
 3 files changed, 201 insertions(+), 74 deletions(-)

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index dcdce1270d38..0d449d8032d1 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -21,6 +21,7 @@
 #include <sys/mman.h>
 #include <poll.h>
 #include <ctype.h>
+#include <assert.h>
 #include "libbpf.h"
 #include "bpf_load.h"
 #include "perf-sys.h"
@@ -37,15 +38,6 @@ int event_fd[MAX_PROGS];
 int prog_cnt;
 int prog_array_fd = -1;
 
-struct bpf_map_def {
-	unsigned int type;
-	unsigned int key_size;
-	unsigned int value_size;
-	unsigned int max_entries;
-	unsigned int map_flags;
-	unsigned int inner_map_idx;
-};
-
 static int populate_prog_array(const char *event, int prog_fd)
 {
 	int ind = atoi(event), err;
@@ -193,11 +185,14 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	return 0;
 }
 
-static int load_maps(struct bpf_map_def *maps, int len)
+static int load_maps(struct bpf_map_def *maps, int len,
+		     const char **map_names, fixup_map_cb fixup_map)
 {
 	int i;
 
 	for (i = 0; i < len / sizeof(struct bpf_map_def); i++) {
+		if (fixup_map)
+			fixup_map(&maps[i], map_names[i], i);
 
 		if (maps[i].type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 		    maps[i].type == BPF_MAP_TYPE_HASH_OF_MAPS) {
@@ -280,14 +275,64 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
 	return 0;
 }
 
-int load_bpf_file(char *path)
+static int cmp_symbols(const void *l, const void *r)
+{
+	const GElf_Sym *lsym = (const GElf_Sym *)l;
+	const GElf_Sym *rsym = (const GElf_Sym *)r;
+
+	if (lsym->st_value < rsym->st_value)
+		return -1;
+	else if (lsym->st_value > rsym->st_value)
+		return 1;
+	else
+		return 0;
+}
+
+static int get_sorted_map_names(Elf *elf, Elf_Data *symbols, int maps_shndx,
+				int strtabidx, char **map_names)
+{
+	GElf_Sym map_symbols[MAX_MAPS];
+	int i, nr_maps = 0;
+
+	for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+		assert(nr_maps < MAX_MAPS);
+		if (!gelf_getsym(symbols, i, &map_symbols[nr_maps]))
+			continue;
+		if (map_symbols[nr_maps].st_shndx != maps_shndx)
+			continue;
+		nr_maps++;
+	}
+
+	qsort(map_symbols, nr_maps, sizeof(GElf_Sym), cmp_symbols);
+
+	for (i = 0; i < nr_maps; i++) {
+		char *map_name;
+
+		map_name = elf_strptr(elf, strtabidx, map_symbols[i].st_name);
+		if (!map_name) {
+			printf("cannot get map symbol\n");
+			return 1;
+		}
+
+		map_names[i] = strdup(map_name);
+		if (!map_names[i]) {
+			printf("strdup(%s): %s(%d)\n", map_name,
+			       strerror(errno), errno);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 {
-	int fd, i;
+	int fd, i, ret, maps_shndx = -1, strtabidx = -1;
 	Elf *elf;
 	GElf_Ehdr ehdr;
 	GElf_Shdr shdr, shdr_prog;
-	Elf_Data *data, *data_prog, *symbols = NULL;
-	char *shname, *shname_prog;
+	Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
+	char *shname, *shname_prog, *map_names[MAX_MAPS] = { NULL };
 
 	/* reset global variables */
 	kern_version = 0;
@@ -335,14 +380,33 @@ int load_bpf_file(char *path)
 			}
 			memcpy(&kern_version, data->d_buf, sizeof(int));
 		} else if (strcmp(shname, "maps") == 0) {
-			processed_sec[i] = true;
-			if (load_maps(data->d_buf, data->d_size))
-				return 1;
+			maps_shndx = i;
+			data_maps = data;
 		} else if (shdr.sh_type == SHT_SYMTAB) {
+			strtabidx = shdr.sh_link;
 			symbols = data;
 		}
 	}
 
+	ret = 1;
+
+	if (!symbols) {
+		printf("missing SHT_SYMTAB section\n");
+		goto done;
+	}
+
+	if (data_maps) {
+		if (get_sorted_map_names(elf, symbols, maps_shndx, strtabidx,
+					 map_names))
+			goto done;
+
+		if (load_maps(data_maps->d_buf, data_maps->d_size,
+			      (const char **)map_names, fixup_map))
+			goto done;
+
+		processed_sec[maps_shndx] = true;
+	}
+
 	/* load programs that need map fixup (relocations) */
 	for (i = 1; i < ehdr.e_shnum; i++) {
 		if (processed_sec[i])
@@ -399,8 +463,22 @@ int load_bpf_file(char *path)
 			load_and_attach(shname, data->d_buf, data->d_size);
 	}
 
+	ret = 0;
+done:
+	for (i = 0; i < MAX_MAPS; i++)
+		free(map_names[i]);
 	close(fd);
-	return 0;
+	return ret;
+}
+
+int load_bpf_file(char *path)
+{
+	return do_load_bpf_file(path, NULL);
+}
+
+int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map)
+{
+	return do_load_bpf_file(path, fixup_map);
 }
 
 void read_trace_pipe(void)
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index c827827299b3..68f6b2d22507 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -6,6 +6,18 @@
 #define MAX_MAPS 32
 #define MAX_PROGS 32
 
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+	unsigned int map_flags;
+	unsigned int inner_map_idx;
+};
+
+typedef void (*fixup_map_cb)(struct bpf_map_def *map, const char *map_name,
+			     int idx);
+
 extern int map_fd[MAX_MAPS];
 extern int prog_fd[MAX_PROGS];
 extern int event_fd[MAX_PROGS];
@@ -25,6 +37,7 @@ extern int prog_cnt;
  * returns zero on success
  */
 int load_bpf_file(char *path);
+int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
 
 void read_trace_pipe(void);
 struct ksym {
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 51cb8f238aa2..2a12f48b5c6d 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -24,7 +24,7 @@
 #include "libbpf.h"
 #include "bpf_load.h"
 
-#define MAX_CNT 1000000
+#define TEST_BIT(t) (1U << (t))
 
 static __u64 time_get_ns(void)
 {
@@ -34,17 +34,39 @@ static __u64 time_get_ns(void)
 	return ts.tv_sec * 1000000000ull + ts.tv_nsec;
 }
 
-#define HASH_PREALLOC		(1 << 0)
-#define PERCPU_HASH_PREALLOC	(1 << 1)
-#define HASH_KMALLOC		(1 << 2)
-#define PERCPU_HASH_KMALLOC	(1 << 3)
-#define LRU_HASH_PREALLOC	(1 << 4)
-#define NOCOMMON_LRU_HASH_PREALLOC	(1 << 5)
-#define LPM_KMALLOC		(1 << 6)
-#define HASH_LOOKUP		(1 << 7)
-#define ARRAY_LOOKUP		(1 << 8)
+enum test_type {
+	HASH_PREALLOC,
+	PERCPU_HASH_PREALLOC,
+	HASH_KMALLOC,
+	PERCPU_HASH_KMALLOC,
+	LRU_HASH_PREALLOC,
+	NOCOMMON_LRU_HASH_PREALLOC,
+	LPM_KMALLOC,
+	HASH_LOOKUP,
+	ARRAY_LOOKUP,
+	NR_TESTS,
+};
+
+const char *test_map_names[NR_TESTS] = {
+	[HASH_PREALLOC] = "hash_map",
+	[PERCPU_HASH_PREALLOC] = "percpu_hash_map",
+	[HASH_KMALLOC] = "hash_map_alloc",
+	[PERCPU_HASH_KMALLOC] = "percpu_hash_map_alloc",
+	[LRU_HASH_PREALLOC] = "lru_hash_map",
+	[NOCOMMON_LRU_HASH_PREALLOC] = "nocommon_lru_hash_map",
+	[LPM_KMALLOC] = "lpm_trie_map_alloc",
+	[HASH_LOOKUP] = "hash_map",
+	[ARRAY_LOOKUP] = "array_map",
+};
 
 static int test_flags = ~0;
+static uint32_t num_map_entries;
+static uint32_t max_cnt = 1000000;
+
+static int check_test_flags(enum test_type t)
+{
+	return test_flags & TEST_BIT(t);
+}
 
 static void test_hash_prealloc(int cpu)
 {
@@ -52,13 +74,13 @@ static void test_hash_prealloc(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_getuid);
 	printf("%d:hash_map_perf pre-alloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
-static void do_test_lru(int lru_test_flag, int cpu)
+static void do_test_lru(enum test_type test, int cpu)
 {
 	struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 };
 	const char *test_name;
@@ -68,10 +90,10 @@ static void do_test_lru(int lru_test_flag, int cpu)
 	in6.sin6_addr.s6_addr16[0] = 0xdead;
 	in6.sin6_addr.s6_addr16[1] = 0xbeef;
 
-	if (lru_test_flag & LRU_HASH_PREALLOC) {
+	if (test == LRU_HASH_PREALLOC) {
 		test_name = "lru_hash_map_perf";
 		in6.sin6_addr.s6_addr16[7] = 0;
-	} else if (lru_test_flag & NOCOMMON_LRU_HASH_PREALLOC) {
+	} else if (test == NOCOMMON_LRU_HASH_PREALLOC) {
 		test_name = "nocommon_lru_hash_map_perf";
 		in6.sin6_addr.s6_addr16[7] = 1;
 	} else {
@@ -79,13 +101,13 @@ static void do_test_lru(int lru_test_flag, int cpu)
 	}
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++) {
+	for (i = 0; i < max_cnt; i++) {
 		ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6));
 		assert(ret == -1 && errno == EBADF);
 	}
 	printf("%d:%s pre-alloc %lld events per sec\n",
 	       cpu, test_name,
-	       MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
 static void test_lru_hash_prealloc(int cpu)
@@ -104,10 +126,10 @@ static void test_percpu_hash_prealloc(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_geteuid);
 	printf("%d:percpu_hash_map_perf pre-alloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
 static void test_hash_kmalloc(int cpu)
@@ -116,10 +138,10 @@ static void test_hash_kmalloc(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_getgid);
 	printf("%d:hash_map_perf kmalloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
 static void test_percpu_hash_kmalloc(int cpu)
@@ -128,10 +150,10 @@ static void test_percpu_hash_kmalloc(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_getegid);
 	printf("%d:percpu_hash_map_perf kmalloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
 static void test_lpm_kmalloc(int cpu)
@@ -140,10 +162,10 @@ static void test_lpm_kmalloc(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_gettid);
 	printf("%d:lpm_perf kmalloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
 static void test_hash_lookup(int cpu)
@@ -152,10 +174,10 @@ static void test_hash_lookup(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_getpgid, 0);
 	printf("%d:hash_lookup %lld lookups per sec\n",
-	       cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
 }
 
 static void test_array_lookup(int cpu)
@@ -164,46 +186,38 @@ static void test_array_lookup(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_getpgrp, 0);
 	printf("%d:array_lookup %lld lookups per sec\n",
-	       cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
 }
 
+typedef void (*test_func)(int cpu);
+const test_func test_funcs[] = {
+	[HASH_PREALLOC] = test_hash_prealloc,
+	[PERCPU_HASH_PREALLOC] = test_percpu_hash_prealloc,
+	[HASH_KMALLOC] = test_hash_kmalloc,
+	[PERCPU_HASH_KMALLOC] = test_percpu_hash_kmalloc,
+	[LRU_HASH_PREALLOC] = test_lru_hash_prealloc,
+	[NOCOMMON_LRU_HASH_PREALLOC] = test_nocommon_lru_hash_prealloc,
+	[LPM_KMALLOC] = test_lpm_kmalloc,
+	[HASH_LOOKUP] = test_hash_lookup,
+	[ARRAY_LOOKUP] = test_array_lookup,
+};
+
 static void loop(int cpu)
 {
 	cpu_set_t cpuset;
+	int i;
 
 	CPU_ZERO(&cpuset);
 	CPU_SET(cpu, &cpuset);
 	sched_setaffinity(0, sizeof(cpuset), &cpuset);
 
-	if (test_flags & HASH_PREALLOC)
-		test_hash_prealloc(cpu);
-
-	if (test_flags & PERCPU_HASH_PREALLOC)
-		test_percpu_hash_prealloc(cpu);
-
-	if (test_flags & HASH_KMALLOC)
-		test_hash_kmalloc(cpu);
-
-	if (test_flags & PERCPU_HASH_KMALLOC)
-		test_percpu_hash_kmalloc(cpu);
-
-	if (test_flags & LRU_HASH_PREALLOC)
-		test_lru_hash_prealloc(cpu);
-
-	if (test_flags & NOCOMMON_LRU_HASH_PREALLOC)
-		test_nocommon_lru_hash_prealloc(cpu);
-
-	if (test_flags & LPM_KMALLOC)
-		test_lpm_kmalloc(cpu);
-
-	if (test_flags & HASH_LOOKUP)
-		test_hash_lookup(cpu);
-
-	if (test_flags & ARRAY_LOOKUP)
-		test_array_lookup(cpu);
+	for (i = 0; i < NR_TESTS; i++) {
+		if (check_test_flags(i))
+			test_funcs[i](cpu);
+	}
 }
 
 static void run_perf_test(int tasks)
@@ -260,6 +274,22 @@ static void fill_lpm_trie(void)
 	assert(!r);
 }
 
+static void fixup_map(struct bpf_map_def *map, const char *name, int idx)
+{
+	int i;
+
+	if (num_map_entries <= 0)
+		return;
+
+	/* Only change the max_entries for the enabled test(s) */
+	for (i = 0; i < NR_TESTS; i++) {
+		if (!strcmp(test_map_names[i], name) &&
+		    (check_test_flags(i))) {
+			map->max_entries = num_map_entries;
+		}
+	}
+}
+
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
@@ -275,7 +305,13 @@ int main(int argc, char **argv)
 	if (argc > 2)
 		num_cpu = atoi(argv[2]) ? : num_cpu;
 
-	if (load_bpf_file(filename)) {
+	if (argc > 3)
+		num_map_entries = atoi(argv[3]);
+
+	if (argc > 4)
+		max_cnt = atoi(argv[4]);
+
+	if (load_bpf_file_fixup_map(filename, fixup_map)) {
 		printf("%s", bpf_log_buf);
 		return 1;
 	}
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 5/6] bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
                   ` (3 preceding siblings ...)
  2017-04-14 17:30 ` [PATCH net-next 4/6] bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 6/6] bpf: lru: Add map-in-map LRU example Martin KaFai Lau
  2017-04-17 17:56 ` [PATCH net-next 0/6] bpf: LRU performance and test-program improvements David Miller
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

After doing map_perf_test with a much bigger
BPF_F_NO_COMMON_LRU map, the perf report shows a
lot of time spent in rotating the inactive list (i.e.
__bpf_lru_list_rotate_inactive):
> map_perf_test 32 8 10000 1000000 | awk '{sum += $3}END{print sum}'
19644783 (19M/s)
> map_perf_test 32 8 10000000 10000000 |  awk '{sum += $3}END{print sum}'
6283930 (6.28M/s)

By inactive, it usually means the element is not in cache.  Hence,
there is a need to tune the PERCPU_NR_SCANS value.

This patch finds a better number of elements to
scan during each list rotation.  The PERCPU_NR_SCANS (which
is defined the same as PERCPU_FREE_TARGET) decreases
from 16 elements to 4 elements.  This change only
affects the BPF_F_NO_COMMON_LRU map.

The test_lru_dist does not show meaningful difference
between 16 and 4.  Our production L4 load balancer which uses
the LRU map for conntrack-ing also shows little change in cache
hit rate.  Since both benchmark and production data show no
cache-hit difference, PERCPU_NR_SCANS is lowered from 16 to 4.
We can consider making it configurable if we find a usecase
later that shows another value works better and/or use
a different rotation strategy.

After this change:
> map_perf_test 32 8 10000000 10000000 |  awk '{sum += $3}END{print sum}'
9240324 (9.2M/s)

i.e. 6.28M/s -> 9.2M/s

The test_lru_dist has not shown meaningful difference:
> test_lru_dist zipf.100k.a1_01.out 4000 1:
nr_misses: 31575 (Before) vs 31566 (After)

> test_lru_dist zipf.100k.a0_01.out 40000 1
nr_misses: 67036 (Before) vs 67031 (After)

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/bpf_lru_list.c                  | 2 +-
 tools/testing/selftests/bpf/test_lru_map.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index f62d1d56f41d..e6ef4401a138 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -13,7 +13,7 @@
 #define LOCAL_FREE_TARGET		(128)
 #define LOCAL_NR_SCANS			LOCAL_FREE_TARGET
 
-#define PERCPU_FREE_TARGET		(16)
+#define PERCPU_FREE_TARGET		(4)
 #define PERCPU_NR_SCANS			PERCPU_FREE_TARGET
 
 /* Helpers to get the local list index */
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 87c05e5bdfdd..8c10c9180c1a 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -22,7 +22,7 @@
 #include "bpf_util.h"
 
 #define LOCAL_FREE_TARGET	(128)
-#define PERCPU_FREE_TARGET	(16)
+#define PERCPU_FREE_TARGET	(4)
 
 static int nr_cpus;
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 6/6] bpf: lru: Add map-in-map LRU example
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
                   ` (4 preceding siblings ...)
  2017-04-14 17:30 ` [PATCH net-next 5/6] bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4 Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-17 17:56 ` [PATCH net-next 0/6] bpf: LRU performance and test-program improvements David Miller
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

This patch adds a map-in-map LRU example.
If we know only a subset of cores will use the
LRU, we can allocate a common LRU list per targeting core
and store it into an array-of-hashs.

It allows using the common LRU map with map-update performance
comparable to the BPF_F_NO_COMMON_LRU map but without wasting memory
on the unused cores that we know they will never access the LRU map.

BPF_F_NO_COMMON_LRU:
> map_perf_test 32 8 10000000 10000000 | awk '{sum += $3}END{print sum}'
9234314 (9.23M/s)

map-in-map LRU:
> map_perf_test 512 8 1260000 80000000 | awk '{sum += $3}END{print sum}'
9962743 (9.96M/s)

Notes that the max_entries for the map-in-map LRU test is 1260000 which
is the max_entries for each inner LRU map.  8 processes have been
started, so 8 * 1260000 = 10080000 (~10M) which is close to what is
used in the BPF_F_NO_COMMON_LRU test.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 samples/bpf/map_perf_test_kern.c | 34 ++++++++++++++++++++--
 samples/bpf/map_perf_test_user.c | 62 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 404ed53b8a53..245165817fbe 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -11,6 +11,7 @@
 #include "bpf_helpers.h"
 
 #define MAX_ENTRIES 1000
+#define MAX_NR_CPUS 1024
 
 struct bpf_map_def SEC("maps") hash_map = {
 	.type = BPF_MAP_TYPE_HASH,
@@ -34,6 +35,19 @@ struct bpf_map_def SEC("maps") nocommon_lru_hash_map = {
 	.map_flags = BPF_F_NO_COMMON_LRU,
 };
 
+struct bpf_map_def SEC("maps") inner_lru_hash_map = {
+	.type = BPF_MAP_TYPE_LRU_HASH,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = MAX_ENTRIES,
+};
+
+struct bpf_map_def SEC("maps") array_of_lru_hashs = {
+	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	.key_size = sizeof(u32),
+	.max_entries = MAX_NR_CPUS,
+};
+
 struct bpf_map_def SEC("maps") percpu_hash_map = {
 	.type = BPF_MAP_TYPE_PERCPU_HASH,
 	.key_size = sizeof(u32),
@@ -154,13 +168,27 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
 
 	test_case = dst6[7];
 
-	if (test_case == 0)
+	if (test_case == 0) {
 		ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
-	else if (test_case == 1)
+	} else if (test_case == 1) {
 		ret = bpf_map_update_elem(&nocommon_lru_hash_map, &key, &val,
 					  BPF_ANY);
-	else
+	} else if (test_case == 2) {
+		void *nolocal_lru_map;
+		int cpu = bpf_get_smp_processor_id();
+
+		nolocal_lru_map = bpf_map_lookup_elem(&array_of_lru_hashs,
+						      &cpu);
+		if (!nolocal_lru_map) {
+			ret = -ENOENT;
+			goto done;
+		}
+
+		ret = bpf_map_update_elem(nolocal_lru_map, &key, &val,
+					  BPF_ANY);
+	} else {
 		ret = -EINVAL;
+	}
 
 done:
 	if (ret)
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 2a12f48b5c6d..6ac778153315 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -25,6 +25,7 @@
 #include "bpf_load.h"
 
 #define TEST_BIT(t) (1U << (t))
+#define MAX_NR_CPUS 1024
 
 static __u64 time_get_ns(void)
 {
@@ -44,6 +45,7 @@ enum test_type {
 	LPM_KMALLOC,
 	HASH_LOOKUP,
 	ARRAY_LOOKUP,
+	INNER_LRU_HASH_PREALLOC,
 	NR_TESTS,
 };
 
@@ -57,10 +59,14 @@ const char *test_map_names[NR_TESTS] = {
 	[LPM_KMALLOC] = "lpm_trie_map_alloc",
 	[HASH_LOOKUP] = "hash_map",
 	[ARRAY_LOOKUP] = "array_map",
+	[INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map",
 };
 
 static int test_flags = ~0;
 static uint32_t num_map_entries;
+static uint32_t inner_lru_hash_size;
+static int inner_lru_hash_idx = -1;
+static int array_of_lru_hashs_idx = -1;
 static uint32_t max_cnt = 1000000;
 
 static int check_test_flags(enum test_type t)
@@ -82,11 +88,42 @@ static void test_hash_prealloc(int cpu)
 
 static void do_test_lru(enum test_type test, int cpu)
 {
+	static int inner_lru_map_fds[MAX_NR_CPUS];
+
 	struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 };
 	const char *test_name;
 	__u64 start_time;
 	int i, ret;
 
+	if (test == INNER_LRU_HASH_PREALLOC) {
+		int outer_fd = map_fd[array_of_lru_hashs_idx];
+
+		assert(cpu < MAX_NR_CPUS);
+
+		if (cpu) {
+			inner_lru_map_fds[cpu] =
+				bpf_create_map(BPF_MAP_TYPE_LRU_HASH,
+					       sizeof(uint32_t), sizeof(long),
+					       inner_lru_hash_size, 0);
+			if (inner_lru_map_fds[cpu] == -1) {
+				printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
+				       strerror(errno), errno);
+				exit(1);
+			}
+		} else {
+			inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx];
+		}
+
+		ret = bpf_map_update_elem(outer_fd, &cpu,
+					  &inner_lru_map_fds[cpu],
+					  BPF_ANY);
+		if (ret) {
+			printf("cannot update ARRAY_OF_LRU_HASHS with key:%u. %s(%d)\n",
+			       cpu, strerror(errno), errno);
+			exit(1);
+		}
+	}
+
 	in6.sin6_addr.s6_addr16[0] = 0xdead;
 	in6.sin6_addr.s6_addr16[1] = 0xbeef;
 
@@ -96,6 +133,9 @@ static void do_test_lru(enum test_type test, int cpu)
 	} else if (test == NOCOMMON_LRU_HASH_PREALLOC) {
 		test_name = "nocommon_lru_hash_map_perf";
 		in6.sin6_addr.s6_addr16[7] = 1;
+	} else if (test == INNER_LRU_HASH_PREALLOC) {
+		test_name = "inner_lru_hash_map_perf";
+		in6.sin6_addr.s6_addr16[7] = 2;
 	} else {
 		assert(0);
 	}
@@ -120,6 +160,11 @@ static void test_nocommon_lru_hash_prealloc(int cpu)
 	do_test_lru(NOCOMMON_LRU_HASH_PREALLOC, cpu);
 }
 
+static void test_inner_lru_hash_prealloc(int cpu)
+{
+	do_test_lru(INNER_LRU_HASH_PREALLOC, cpu);
+}
+
 static void test_percpu_hash_prealloc(int cpu)
 {
 	__u64 start_time;
@@ -203,6 +248,7 @@ const test_func test_funcs[] = {
 	[LPM_KMALLOC] = test_lpm_kmalloc,
 	[HASH_LOOKUP] = test_hash_lookup,
 	[ARRAY_LOOKUP] = test_array_lookup,
+	[INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc,
 };
 
 static void loop(int cpu)
@@ -278,9 +324,25 @@ static void fixup_map(struct bpf_map_def *map, const char *name, int idx)
 {
 	int i;
 
+	if (!strcmp("inner_lru_hash_map", name)) {
+		inner_lru_hash_idx = idx;
+		inner_lru_hash_size = map->max_entries;
+	}
+
+	if (!strcmp("array_of_lru_hashs", name)) {
+		if (inner_lru_hash_idx == -1) {
+			printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n");
+			exit(1);
+		}
+		map->inner_map_idx = inner_lru_hash_idx;
+		array_of_lru_hashs_idx = idx;
+	}
+
 	if (num_map_entries <= 0)
 		return;
 
+	inner_lru_hash_size = num_map_entries;
+
 	/* Only change the max_entries for the enabled test(s) */
 	for (i = 0; i < NR_TESTS; i++) {
 		if (!strcmp(test_map_names[i], name) &&
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 0/6] bpf: LRU performance and test-program improvements
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
                   ` (5 preceding siblings ...)
  2017-04-14 17:30 ` [PATCH net-next 6/6] bpf: lru: Add map-in-map LRU example Martin KaFai Lau
@ 2017-04-17 17:56 ` David Miller
  6 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2017-04-17 17:56 UTC (permalink / raw)
  To: kafai; +Cc: netdev, ast, daniel

From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 14 Apr 2017 10:30:24 -0700

> The first 4 patches make a few improvements to the LRU tests.
> 
> Patch 5/6 is to improve the performance of BPF_F_NO_COMMON_LRU map.
> 
> Patch 6/6 adds an example in using LRU map with map-in-map.

Series applied, thank you.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-04-17 17:56 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 1/6] bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 2/6] bpf: lru: Cleanup test_lru_map.c Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 3/6] bpf: lru: Refactor LRU map tests in map_perf_test Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 4/6] bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 5/6] bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4 Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 6/6] bpf: lru: Add map-in-map LRU example Martin KaFai Lau
2017-04-17 17:56 ` [PATCH net-next 0/6] bpf: LRU performance and test-program improvements David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.