bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hou Tao <houtao@huaweicloud.com>
To: bpf@vger.kernel.org, Martin KaFai Lau <martin.lau@linux.dev>,
	Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Andrii Nakryiko <andrii@kernel.org>, Song Liu <song@kernel.org>,
	Hao Luo <haoluo@google.com>, Yonghong Song <yhs@fb.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	KP Singh <kpsingh@kernel.org>,
	Stanislav Fomichev <sdf@google.com>, Jiri Olsa <jolsa@kernel.org>,
	John Fastabend <john.fastabend@gmail.com>,
	"Paul E . McKenney" <paulmck@kernel.org>,
	rcu@vger.kernel.org, houtao1@huawei.com
Subject: [RFC PATCH bpf-next v5 2/2] bpf: Call rcu_momentary_dyntick_idle() in task work periodically
Date: Mon, 19 Jun 2023 22:32:31 +0800	[thread overview]
Message-ID: <20230619143231.222536-3-houtao@huaweicloud.com> (raw)
In-Reply-To: <20230619143231.222536-1-houtao@huaweicloud.com>

From: Hou Tao <houtao1@huawei.com>

After doing reuse-after-RCU-GP in bpf memory allocator, if there are
intensive memory allocation and free in bpf memory allocator and RCU GP
is slow, the peak memory usage for bpf memory allocator will be high.

To reduce memory usage for bpf memory allocator, call
rcu_momentary_dyntick_idle() in task work periodically to accelerate the
expiration of RCU grace period.

The following benchmark results the memory usage reduce a lot after
applying the patch:

Before:
overwrite           per-prod-op 49.11 ± 1.30k/s, avg mem 313.09 ± 80.36MiB, peak mem 509.09MiB
batch_add_batch_del per-prod-op 76.06 ± 2.38k/s, avg mem 287.97 ± 63.59MiB, peak mem 496.81MiB
add_del_on_diff_cpu per-prod-op 18.75 ± 0.09k/s, avg mem  27.71 ±  4.92MiB, peak mem  44.54MiB

After:
overwrite           per-prod-op 51.17 ± 0.30k/s, avg mem 105.09 ±  7.74MiB, peak mem 143.60MiB
batch_add_batch_del per-prod-op 86.43 ± 0.90k/s, avg mem  85.82 ± 11.81MiB, peak mem 118.93MiB
add_del_on_diff_cpu per-prod-op 18.71 ± 0.08k/s, avg mem  26.92 ±  5.50MiB, peak mem  43.18MiB

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 kernel/bpf/memalloc.c | 46 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 9b31c53fd285..c4b4cae04400 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -6,6 +6,7 @@
 #include <linux/irq_work.h>
 #include <linux/bpf_mem_alloc.h>
 #include <linux/memcontrol.h>
+#include <linux/task_work.h>
 #include <asm/local.h>
 
 /* Any context (including NMI) BPF specific memory allocator.
@@ -123,6 +124,16 @@ struct bpf_reuse_batch {
 	struct rcu_head rcu;
 };
 
+#define BPF_GP_ACC_RUNNING 0
+
+struct bpf_rcu_gp_acc_ctx {
+	unsigned long flags;
+	unsigned long next_run;
+	struct callback_head work;
+};
+
+static DEFINE_PER_CPU(struct bpf_rcu_gp_acc_ctx, bpf_acc_ctx);
+
 static struct llist_node notrace *__llist_del_first(struct llist_head *head)
 {
 	struct llist_node *entry, *next;
@@ -347,12 +358,47 @@ static void dyn_reuse_rcu(struct rcu_head *rcu)
 	kfree(batch);
 }
 
+static void bpf_rcu_gp_acc_work(struct callback_head *head)
+{
+	struct bpf_rcu_gp_acc_ctx *ctx = container_of(head, struct bpf_rcu_gp_acc_ctx, work);
+
+	local_irq_disable();
+	rcu_momentary_dyntick_idle();
+	local_irq_enable();
+
+	/* The interval between rcu_momentary_dyntick_idle() calls is
+	 * at least 10ms.
+	 */
+	WRITE_ONCE(ctx->next_run, jiffies + msecs_to_jiffies(10));
+	clear_bit(BPF_GP_ACC_RUNNING, &ctx->flags);
+}
+
+static void bpf_mem_rcu_gp_acc(struct bpf_mem_cache *c)
+{
+	struct bpf_rcu_gp_acc_ctx *ctx = this_cpu_ptr(&bpf_acc_ctx);
+
+	if (atomic_read(&c->dyn_reuse_rcu_cnt) < 128 ||
+	    time_before(jiffies, READ_ONCE(ctx->next_run)))
+		return;
+
+	if ((current->flags & PF_KTHREAD) ||
+	    test_and_set_bit(BPF_GP_ACC_RUNNING, &ctx->flags))
+		return;
+
+	init_task_work(&ctx->work, bpf_rcu_gp_acc_work);
+	/* Task is exiting ? */
+	if (task_work_add(current, &ctx->work, TWA_RESUME))
+		clear_bit(BPF_GP_ACC_RUNNING, &ctx->flags);
+}
+
 static void reuse_bulk(struct bpf_mem_cache *c)
 {
 	struct llist_node *head, *tail;
 	struct bpf_reuse_batch *batch;
 	unsigned long flags;
 
+	bpf_mem_rcu_gp_acc(c);
+
 	head = llist_del_all(&c->free_llist_extra);
 	tail = head;
 	while (tail && tail->next)
-- 
2.29.2


  parent reply	other threads:[~2023-06-19 14:00 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-19 14:32 [RFC PATCH bpf-next v5 0/2] Handle immediate reuse in bpf memory allocator Hou Tao
2023-06-19 14:32 ` [RFC PATCH bpf-next v5 1/2] bpf: Only reuse after one RCU GP " Hou Tao
2023-06-19 14:32 ` Hou Tao [this message]
2023-06-20 16:15   ` [RFC PATCH bpf-next v5 2/2] bpf: Call rcu_momentary_dyntick_idle() in task work periodically Alexei Starovoitov
2023-06-20 17:21     ` Paul E. McKenney
2023-06-21  1:07     ` Hou Tao
2023-06-21  1:20       ` Alexei Starovoitov
2023-06-21  1:38         ` Hou Tao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230619143231.222536-3-houtao@huaweicloud.com \
    --to=houtao@huaweicloud.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=haoluo@google.com \
    --cc=houtao1@huawei.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kpsingh@kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=paulmck@kernel.org \
    --cc=rcu@vger.kernel.org \
    --cc=sdf@google.com \
    --cc=song@kernel.org \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).