netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Amery Hung <ameryhung@gmail.com>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, yangpeihao@sjtu.edu.cn, toke@redhat.com,
	jhs@mojatatu.com, jiri@resnulli.us, sdf@google.com,
	xiyou.wangcong@gmail.com, yepeilin.cs@gmail.com
Subject: [RFC PATCH v7 4/8] net_sched: Add reset program
Date: Wed, 17 Jan 2024 21:56:20 +0000	[thread overview]
Message-ID: <a45e9b29b616fdfb71cb6920aaecc6d22b1540b4.1705432850.git.amery.hung@bytedance.com> (raw)
In-Reply-To: <cover.1705432850.git.amery.hung@bytedance.com>

Allow developers to implement customized reset logic through an optional
reset program. The program also takes bpf_qdisc_ctx as context, but
currently cannot access any field.

To release skbs, the program can release all references to bpf list or
rbtree serving as skb queues. The destructor kfunc bpf_skb_destroy()
will be called by bpf_map_free_deferred(). This prevents the qdisc from
holding the sch_tree_lock for too long when there are many packets in
the qdisc.

Signed-off-by: Amery Hung <amery.hung@bytedance.com>
---
 include/uapi/linux/bpf.h       |  1 +
 include/uapi/linux/pkt_sched.h |  4 ++++
 kernel/bpf/syscall.c           |  1 +
 net/core/filter.c              |  3 +++
 net/sched/sch_bpf.c            | 30 ++++++++++++++++++++++++++----
 tools/include/uapi/linux/bpf.h |  1 +
 6 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index df280bbb7c0d..84669886a493 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1059,6 +1059,7 @@ enum bpf_attach_type {
 	BPF_NETKIT_PEER,
 	BPF_QDISC_ENQUEUE,
 	BPF_QDISC_DEQUEUE,
+	BPF_QDISC_RESET,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index d05462309f5a..e9e1a83c22f7 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -1328,6 +1328,10 @@ enum {
 	TCA_SCH_BPF_DEQUEUE_PROG_FD,	/* u32 */
 	TCA_SCH_BPF_DEQUEUE_PROG_ID,	/* u32 */
 	TCA_SCH_BPF_DEQUEUE_PROG_TAG,	/* data */
+	TCA_SCH_BPF_RESET_PROG_NAME,	/* string */
+	TCA_SCH_BPF_RESET_PROG_FD,	/* u32 */
+	TCA_SCH_BPF_RESET_PROG_ID,	/* u32 */
+	TCA_SCH_BPF_RESET_PROG_TAG,	/* data */
 	__TCA_SCH_BPF_MAX,
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1838bddd8526..9af6fa542f2e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2506,6 +2506,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		switch (expected_attach_type) {
 		case BPF_QDISC_ENQUEUE:
 		case BPF_QDISC_DEQUEUE:
+		case BPF_QDISC_RESET:
 			return 0;
 		default:
 			return -EINVAL;
diff --git a/net/core/filter.c b/net/core/filter.c
index f25a0b6b5d56..f8e17465377f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8905,6 +8905,9 @@ static bool tc_qdisc_is_valid_access(int off, int size,
 {
 	struct btf *btf;
 
+	if (prog->expected_attach_type == BPF_QDISC_RESET)
+		return false;
+
 	if (off < 0 || off >= sizeof(struct bpf_qdisc_ctx))
 		return false;
 
diff --git a/net/sched/sch_bpf.c b/net/sched/sch_bpf.c
index 1910a58a3352..3f0f809dced6 100644
--- a/net/sched/sch_bpf.c
+++ b/net/sched/sch_bpf.c
@@ -42,6 +42,7 @@ struct bpf_sched_data {
 	struct Qdisc_class_hash clhash;
 	struct sch_bpf_prog __rcu enqueue_prog;
 	struct sch_bpf_prog __rcu dequeue_prog;
+	struct sch_bpf_prog __rcu reset_prog;
 
 	struct qdisc_watchdog watchdog;
 };
@@ -51,6 +52,9 @@ static int sch_bpf_dump_prog(const struct sch_bpf_prog *prog, struct sk_buff *sk
 {
 	struct nlattr *nla;
 
+	if (!prog->prog)
+		return 0;
+
 	if (prog->name &&
 	    nla_put_string(skb, name, prog->name))
 		return -EMSGSIZE;
@@ -81,6 +85,9 @@ static int sch_bpf_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (sch_bpf_dump_prog(&q->dequeue_prog, skb, TCA_SCH_BPF_DEQUEUE_PROG_NAME,
 			      TCA_SCH_BPF_DEQUEUE_PROG_ID, TCA_SCH_BPF_DEQUEUE_PROG_TAG))
 		goto nla_put_failure;
+	if (sch_bpf_dump_prog(&q->reset_prog, skb, TCA_SCH_BPF_RESET_PROG_NAME,
+			      TCA_SCH_BPF_RESET_PROG_ID, TCA_SCH_BPF_RESET_PROG_TAG))
+		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
 
@@ -259,16 +266,21 @@ static const struct nla_policy sch_bpf_policy[TCA_SCH_BPF_MAX + 1] = {
 	[TCA_SCH_BPF_DEQUEUE_PROG_FD]	= { .type = NLA_U32 },
 	[TCA_SCH_BPF_DEQUEUE_PROG_NAME]	= { .type = NLA_NUL_STRING,
 					    .len = ACT_BPF_NAME_LEN },
+	[TCA_SCH_BPF_RESET_PROG_FD]	= { .type = NLA_U32 },
+	[TCA_SCH_BPF_RESET_PROG_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = ACT_BPF_NAME_LEN },
 };
 
-static int bpf_init_prog(struct nlattr *fd, struct nlattr *name, struct sch_bpf_prog *prog)
+static int bpf_init_prog(struct nlattr *fd, struct nlattr *name,
+			 struct sch_bpf_prog *prog, bool optional)
 {
 	struct bpf_prog *fp, *old_fp;
 	char *prog_name = NULL;
 	u32 bpf_fd;
 
 	if (!fd)
-		return -EINVAL;
+		return optional ? 0 : -EINVAL;
+
 	bpf_fd = nla_get_u32(fd);
 
 	fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_QDISC);
@@ -327,11 +339,15 @@ static int sch_bpf_change(struct Qdisc *sch, struct nlattr *opt,
 	sch_tree_lock(sch);
 
 	err = bpf_init_prog(tb[TCA_SCH_BPF_ENQUEUE_PROG_FD],
-			    tb[TCA_SCH_BPF_ENQUEUE_PROG_NAME], &q->enqueue_prog);
+			    tb[TCA_SCH_BPF_ENQUEUE_PROG_NAME], &q->enqueue_prog, false);
 	if (err)
 		goto failure;
 	err = bpf_init_prog(tb[TCA_SCH_BPF_DEQUEUE_PROG_FD],
-			    tb[TCA_SCH_BPF_DEQUEUE_PROG_NAME], &q->dequeue_prog);
+			    tb[TCA_SCH_BPF_DEQUEUE_PROG_NAME], &q->dequeue_prog, false);
+	if (err)
+		goto failure;
+	err = bpf_init_prog(tb[TCA_SCH_BPF_RESET_PROG_FD],
+			    tb[TCA_SCH_BPF_RESET_PROG_NAME], &q->reset_prog, true);
 failure:
 	sch_tree_unlock(sch);
 	return err;
@@ -360,7 +376,9 @@ static int sch_bpf_init(struct Qdisc *sch, struct nlattr *opt,
 static void sch_bpf_reset(struct Qdisc *sch)
 {
 	struct bpf_sched_data *q = qdisc_priv(sch);
+	struct bpf_qdisc_ctx ctx = {};
 	struct sch_bpf_class *cl;
+	struct bpf_prog *reset;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
@@ -371,6 +389,9 @@ static void sch_bpf_reset(struct Qdisc *sch)
 	}
 
 	qdisc_watchdog_cancel(&q->watchdog);
+	reset = rcu_dereference(q->reset_prog.prog);
+	if (reset)
+		bpf_prog_run(reset, &ctx);
 }
 
 static void sch_bpf_destroy_class(struct Qdisc *sch, struct sch_bpf_class *cl)
@@ -398,6 +419,7 @@ static void sch_bpf_destroy(struct Qdisc *sch)
 	sch_tree_lock(sch);
 	bpf_cleanup_prog(&q->enqueue_prog);
 	bpf_cleanup_prog(&q->dequeue_prog);
+	bpf_cleanup_prog(&q->reset_prog);
 	sch_tree_unlock(sch);
 }
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index df280bbb7c0d..84669886a493 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1059,6 +1059,7 @@ enum bpf_attach_type {
 	BPF_NETKIT_PEER,
 	BPF_QDISC_ENQUEUE,
 	BPF_QDISC_DEQUEUE,
+	BPF_QDISC_RESET,
 	__MAX_BPF_ATTACH_TYPE
 };
 
-- 
2.20.1


  parent reply	other threads:[~2024-01-17 21:56 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-17 21:56 [RFC PATCH v7 0/8] net_sched: Introduce eBPF based Qdisc Amery Hung
2024-01-17 21:56 ` [RFC PATCH v7 1/8] " Amery Hung
2024-01-23 23:51   ` Martin KaFai Lau
2024-01-24  5:22     ` Amery Hung
2024-01-26  2:22       ` Martin KaFai Lau
2024-01-27  1:17         ` Amery Hung
2024-01-30  6:39           ` Martin KaFai Lau
2024-01-30 17:49             ` Kui-Feng Lee
2024-01-31  1:01               ` Martin KaFai Lau
2024-01-31 16:49                 ` Kui-Feng Lee
2024-01-31 16:59                   ` Amery Hung
2024-01-31 16:23             ` Amery Hung
2024-02-02  1:47               ` Martin KaFai Lau
2024-02-09 20:14                 ` Amery Hung
2024-01-17 21:56 ` [RFC PATCH v7 2/8] net_sched: Add kfuncs for working with skb Amery Hung
2024-01-17 21:56 ` [RFC PATCH v7 3/8] net_sched: Introduce kfunc bpf_skb_tc_classify() Amery Hung
2024-01-17 21:56 ` Amery Hung [this message]
2024-01-17 21:56 ` [RFC PATCH v7 5/8] net_sched: Add init program Amery Hung
2024-01-17 21:56 ` [RFC PATCH v7 6/8] tools/libbpf: Add support for BPF_PROG_TYPE_QDISC Amery Hung
2024-01-23  0:17   ` Andrii Nakryiko
2024-01-23 19:40     ` Amery Hung
2024-01-17 21:56 ` [RFC PATCH v7 7/8] samples/bpf: Add an example of bpf fq qdisc Amery Hung
2024-01-24 10:29   ` Daniel Borkmann
2024-01-26 19:49     ` Amery Hung
2024-01-17 21:56 ` [RFC PATCH v7 8/8] samples/bpf: Add an example of bpf netem qdisc Amery Hung
2024-01-23 21:13 ` [RFC PATCH v7 0/8] net_sched: Introduce eBPF based Qdisc Stanislav Fomichev
2024-01-24 10:10   ` Daniel Borkmann
2024-01-24 12:09   ` Jamal Hadi Salim
2024-01-24 13:07     ` Daniel Borkmann
2024-01-24 14:11       ` Jamal Hadi Salim
2024-01-24 15:26         ` Daniel Borkmann
2024-01-24 21:26           ` Amery Hung
2024-01-25 11:57             ` Daniel Borkmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a45e9b29b616fdfb71cb6920aaecc6d22b1540b4.1705432850.git.amery.hung@bytedance.com \
    --to=ameryhung@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=netdev@vger.kernel.org \
    --cc=sdf@google.com \
    --cc=toke@redhat.com \
    --cc=xiyou.wangcong@gmail.com \
    --cc=yangpeihao@sjtu.edu.cn \
    --cc=yepeilin.cs@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).