All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Toke Høiland-Jørgensen" <toke@redhat.com>
To: Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>,
	Martin KaFai Lau <martin.lau@linux.dev>,
	Song Liu <song@kernel.org>, Yonghong Song <yhs@fb.com>,
	John Fastabend <john.fastabend@gmail.com>,
	KP Singh <kpsingh@kernel.org>,
	Stanislav Fomichev <sdf@google.com>, Hao Luo <haoluo@google.com>,
	Jiri Olsa <jolsa@kernel.org>,
	"David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	Jesper Dangaard Brouer <hawk@kernel.org>
Cc: "Kumar Kartikeya Dwivedi" <memxor@gmail.com>,
	netdev@vger.kernel.org, bpf@vger.kernel.org,
	"Freysteinn Alfredsson" <freysteinn.alfredsson@kau.se>,
	"Cong Wang" <xiyou.wangcong@gmail.com>,
	"Toke Høiland-Jørgensen" <toke@redhat.com>,
	"Eric Dumazet" <edumazet@google.com>,
	"Paolo Abeni" <pabeni@redhat.com>
Subject: [RFC PATCH 12/17] bpf: Add helper to schedule an interface for TX dequeue
Date: Wed, 13 Jul 2022 13:14:20 +0200	[thread overview]
Message-ID: <20220713111430.134810-13-toke@redhat.com> (raw)
In-Reply-To: <20220713111430.134810-1-toke@redhat.com>

This adds a helper that a BPF program can call to schedule an interface for
transmission. The helper can be used from both a regular XDP program (to
schedule transmission after queueing a packet), and from a dequeue program
to (re-)schedule transmission after a dequeue operation. In particular, the
latter use can be combined with BPF timers to schedule delayed
transmission, for instance to implement traffic shaping.

The helper always schedules transmission on the interface on the current
CPU. For cross-CPU operation, it is up to the BPF program to arrange for
the helper to be called on the appropriate CPU, either by configuring
hardware RSS appropriately, or by using a cpumap. Likewise, it is up to the
BPF programs to decide whether to use separate queues per CPU (by using
multiple maps to queue packets in), or accept the lock contention of using
a single map across CPUs.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 include/uapi/linux/bpf.h       | 11 +++++++
 net/core/filter.c              | 52 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 11 +++++++
 3 files changed, 74 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d44382644391..b352ecc280f4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5358,6 +5358,16 @@ union bpf_attr {
  *		*bpf_packet_dequeue()* (and checked to not be NULL).
  *	Return
  *		This always succeeds and returns zero.
+ *
+ * long bpf_schedule_iface_dequeue(void *ctx, int ifindex, int flags)
+ *	Description
+ *		Schedule the interface with index *ifindex* for transmission from
+ *		its dequeue program as soon as possible. The *flags* argument
+ *		must be zero.
+ *
+ *	Return
+ *		Returns zero on success, or -ENOENT if no dequeue program is
+ *		loaded on the interface.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5570,6 +5580,7 @@ union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(packet_dequeue),		\
 	FN(packet_drop),		\
+	FN(schedule_iface_dequeue),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index 7c89eaa01c29..bb556d873b52 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4431,6 +4431,54 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
+static int bpf_schedule_iface_dequeue(struct net *net, int ifindex, int flags)
+{
+	struct net_device *dev;
+	struct bpf_prog *prog;
+
+	if (flags)
+		return -EINVAL;
+
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (!dev)
+		return -ENODEV;
+
+	prog = rcu_dereference(dev->xdp_dequeue_prog);
+	if (!prog)
+		return -ENOENT;
+
+	dev_schedule_xdp_dequeue(dev);
+	return 0;
+}
+
+BPF_CALL_3(bpf_xdp_schedule_iface_dequeue, struct xdp_buff *, ctx, int, ifindex, int, flags)
+{
+	return bpf_schedule_iface_dequeue(dev_net(ctx->rxq->dev), ifindex, flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_schedule_iface_dequeue_proto = {
+	.func           = bpf_xdp_schedule_iface_dequeue,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_dequeue_schedule_iface_dequeue, struct dequeue_data *, ctx, int, ifindex, int, flags)
+{
+	return bpf_schedule_iface_dequeue(dev_net(ctx->txq->dev), ifindex, flags);
+}
+
+static const struct bpf_func_proto bpf_dequeue_schedule_iface_dequeue_proto = {
+	.func           = bpf_dequeue_schedule_iface_dequeue,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
 BTF_ID_LIST_SINGLE(xdp_md_btf_ids, struct, xdp_md)
 
 BPF_CALL_4(bpf_packet_dequeue, struct dequeue_data *, ctx, struct bpf_map *, map,
@@ -8068,6 +8116,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_fib_lookup_proto;
 	case BPF_FUNC_check_mtu:
 		return &bpf_xdp_check_mtu_proto;
+	case BPF_FUNC_schedule_iface_dequeue:
+		return &bpf_xdp_schedule_iface_dequeue_proto;
 #ifdef CONFIG_INET
 	case BPF_FUNC_sk_lookup_udp:
 		return &bpf_xdp_sk_lookup_udp_proto;
@@ -8105,6 +8155,8 @@ dequeue_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_packet_dequeue_proto;
 	case BPF_FUNC_packet_drop:
 		return &bpf_packet_drop_proto;
+	case BPF_FUNC_schedule_iface_dequeue:
+		return &bpf_dequeue_schedule_iface_dequeue_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1dab68a89e18..9eb9a5b52c76 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5358,6 +5358,16 @@ union bpf_attr {
  *		*bpf_packet_dequeue()* (and checked to not be NULL).
  *	Return
  *		This always succeeds and returns zero.
+ *
+ * long bpf_schedule_iface_dequeue(void *ctx, int ifindex, int flags)
+ *	Description
+ *		Schedule the interface with index *ifindex* for transmission from
+ *		its dequeue program as soon as possible. The *flags* argument
+ *		must be zero.
+ *
+ *	Return
+ *		Returns zero on success, or -ENOENT if no dequeue program is
+ *		loaded on the interface.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5570,6 +5580,7 @@ union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(packet_dequeue),		\
 	FN(packet_drop),		\
+	FN(schedule_iface_dequeue),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.37.0


  parent reply	other threads:[~2022-07-13 11:18 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-13 11:14 [RFC PATCH 00/17] xdp: Add packet queueing and scheduling capabilities Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 01/17] dev: Move received_rps counter next to RPS members in softnet data Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 02/17] bpf: Expand map key argument of bpf_redirect_map to u64 Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 03/17] bpf: Use 64-bit return value for bpf_prog_run Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 04/17] bpf: Add a PIFO priority queue map type Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 05/17] pifomap: Add queue rotation for continuously increasing rank mode Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 06/17] xdp: Add dequeue program type for getting packets from a PIFO Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 07/17] bpf: Teach the verifier about referenced packets returned from dequeue programs Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 08/17] bpf: Add helpers to dequeue from a PIFO map Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 09/17] bpf: Introduce pkt_uid member for PTR_TO_PACKET Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 10/17] bpf: Implement direct packet access in dequeue progs Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 11/17] dev: Add XDP dequeue hook Toke Høiland-Jørgensen
2022-07-13 11:14 ` Toke Høiland-Jørgensen [this message]
2022-07-13 11:14 ` [RFC PATCH 13/17] libbpf: Add support for dequeue program type and PIFO map type Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 14/17] libbpf: Add support for querying dequeue programs Toke Høiland-Jørgensen
2022-07-14  5:36   ` Andrii Nakryiko
2022-07-14 10:13     ` Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 15/17] selftests/bpf: Add verifier tests for dequeue prog Toke Høiland-Jørgensen
2022-07-14  5:38   ` Andrii Nakryiko
2022-07-14  6:45     ` Kumar Kartikeya Dwivedi
2022-07-14 18:54       ` Andrii Nakryiko
2022-07-15 11:11         ` Kumar Kartikeya Dwivedi
2022-07-13 11:14 ` [RFC PATCH 16/17] selftests/bpf: Add test for XDP queueing through PIFO maps Toke Høiland-Jørgensen
2022-07-14  5:41   ` Andrii Nakryiko
2022-07-14 10:18     ` Toke Høiland-Jørgensen
2022-07-13 11:14 ` [RFC PATCH 17/17] samples/bpf: Add queueing support to xdp_fwd sample Toke Høiland-Jørgensen
2022-07-13 18:36 ` [RFC PATCH 00/17] xdp: Add packet queueing and scheduling capabilities Stanislav Fomichev
2022-07-13 21:52   ` Toke Høiland-Jørgensen
2022-07-13 22:56     ` Stanislav Fomichev
2022-07-14 10:46       ` Toke Høiland-Jørgensen
2022-07-14 17:24         ` Stanislav Fomichev
2022-07-15  1:12         ` Alexei Starovoitov
2022-07-15 12:55           ` Toke Høiland-Jørgensen
2022-07-17 19:12         ` Cong Wang
2022-07-18 12:25           ` Toke Høiland-Jørgensen
2022-07-14  6:34     ` Kumar Kartikeya Dwivedi
2022-07-17 18:17     ` Cong Wang
2022-07-17 18:41       ` Kumar Kartikeya Dwivedi
2022-07-17 19:23         ` Cong Wang
2022-07-18 12:12       ` Toke Høiland-Jørgensen
2022-07-14 14:05 ` Jamal Hadi Salim
2022-07-14 14:56   ` Dave Taht
2022-07-14 15:33     ` Jamal Hadi Salim
2022-07-14 16:21   ` Toke Høiland-Jørgensen
2022-07-17 17:46 ` Cong Wang
2022-07-18 12:45   ` Toke Høiland-Jørgensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220713111430.134810-13-toke@redhat.com \
    --to=toke@redhat.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=freysteinn.alfredsson@kau.se \
    --cc=haoluo@google.com \
    --cc=hawk@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=memxor@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@google.com \
    --cc=song@kernel.org \
    --cc=xiyou.wangcong@gmail.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.