From mboxrd@z Thu Jan 1 00:00:00 1970 From: Daniel Borkmann Subject: Re: [PATCH net-next 3/4] net/sched: refactor TC_ACT_REDIRECT handling Date: Fri, 13 Jul 2018 18:32:44 +0200 Message-ID: <1f38712e-92df-6894-9af0-ca0ffc972d4b@iogearbox.net> References: <20ab3afa3843dc325570cdc502e296f1d4c0da91.1531473946.git.pabeni@redhat.com> <5937186c-0fab-953f-a305-e6379a52fda8@iogearbox.net> <15b8d8cd41ea46e157a947fb46d9f35ae8a3b3ac.camel@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit Cc: Jamal Hadi Salim , Cong Wang , Jiri Pirko , Alexei Starovoitov , Marcelo Ricardo Leitner To: Paolo Abeni , netdev@vger.kernel.org Return-path: Received: from www62.your-server.de ([213.133.104.62]:44345 "EHLO www62.your-server.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729592AbeGMQsP (ORCPT ); Fri, 13 Jul 2018 12:48:15 -0400 In-Reply-To: <15b8d8cd41ea46e157a947fb46d9f35ae8a3b3ac.camel@redhat.com> Content-Language: en-US Sender: netdev-owner@vger.kernel.org List-ID: On 07/13/2018 04:37 PM, Paolo Abeni wrote: > On Fri, 2018-07-13 at 16:13 +0200, Daniel Borkmann wrote: >> On 07/13/2018 11:55 AM, Paolo Abeni wrote: >>> This patch changes the TC_ACT_REDIRECT code path to allow >>> providing the redirect parameters via the tcf_result argument. >>> >>> Such union is expanded to host the redirect device, the redirect >>> direction (ingress/egress) and the stats to be updated on error >>> conditions. >>> >>> Actions/classifiers using TC_ACT_REDIRECT can either: >>> * fill the tcf_result redirect related fields >>> * clear such fields and use the bpf per cpu redirect info >>> >>> skb_do_redirect now tries to fetch the relevant data from tcf_result >>> and fall back to access redirect info. It also updates the stats >>> accordingly to the redirect result, if provided by the caller. >>> >>> This will allow using the TC_ACT_REDIRECT action in more places in >>> the next patch. >>> >>> Signed-off-by: Paolo Abeni >>> --- >>> include/net/sch_generic.h | 15 ++++++++++++++- >>> net/core/dev.c | 4 ++-- >>> net/core/filter.c | 29 +++++++++++++++++++++++------ >>> net/core/lwt_bpf.c | 5 ++++- >>> net/sched/act_bpf.c | 4 +++- >>> net/sched/cls_bpf.c | 8 +++++--- >>> 6 files changed, 51 insertions(+), 14 deletions(-) >>> >>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h >>> index 056dc1083aa3..dd9e00d017b3 100644 >>> --- a/include/net/sch_generic.h >>> +++ b/include/net/sch_generic.h >>> @@ -235,9 +235,22 @@ struct tcf_result { >>> u32 classid; >>> }; >>> const struct tcf_proto *goto_tp; >>> + >>> + /* used by the TC_ACT_REDIRECT action */ >>> + struct { >>> + /* device and direction, or 0 bpf redirect */ >>> + long dev_ingress; >>> + struct gnet_stats_queue *qstats; >>> + }; >>> }; >>> }; >>> >>> +#define TCF_RESULT_REDIR_DEV(res) \ >>> + ((struct net_device *)((res)->dev_ingress & ~1)) >>> +#define TCF_RESULT_REDIR_INGRESS(res) ((res)->dev_ingress & 1) >>> +#define TCF_RESULT_SET_REDIRECT(res, dev, ingress) \ >>> + ((res)->dev_ingress = (long)(dev) | (!!(ingress))) >>> + >>> struct tcf_proto_ops { >>> struct list_head head; >>> char kind[IFNAMSIZ]; >>> @@ -543,7 +556,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, >>> struct netlink_ext_ack *extack); >>> void __qdisc_calculate_pkt_len(struct sk_buff *skb, >>> const struct qdisc_size_table *stab); >>> -int skb_do_redirect(struct sk_buff *); >>> +int skb_do_redirect(struct sk_buff *skb, struct tcf_result *res); >>> >>> static inline void skb_reset_tc(struct sk_buff *skb) >>> { >>> diff --git a/net/core/dev.c b/net/core/dev.c >>> index 14a748ee8cc9..a283dbfde30c 100644 >>> --- a/net/core/dev.c >>> +++ b/net/core/dev.c >>> @@ -3538,7 +3538,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) >>> return NULL; >>> case TC_ACT_REDIRECT: >>> /* No need to push/pop skb's mac_header here on egress! */ >>> - skb_do_redirect(skb); >>> + skb_do_redirect(skb, &cl_res); >>> *ret = NET_XMIT_SUCCESS; >>> return NULL; >>> default: >>> @@ -4600,7 +4600,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, >>> * redirecting to another netdev >>> */ >>> __skb_push(skb, skb->mac_len); >>> - skb_do_redirect(skb); >>> + skb_do_redirect(skb, &cl_res); >>> return NULL; >>> default: >>> break; >>> diff --git a/net/core/filter.c b/net/core/filter.c >>> index b9ec916f4e3a..4f64cf5189e6 100644 >>> --- a/net/core/filter.c >>> +++ b/net/core/filter.c >>> @@ -2062,19 +2062,36 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) >>> return TC_ACT_REDIRECT; >>> } >>> >>> -int skb_do_redirect(struct sk_buff *skb) >>> +int skb_do_redirect(struct sk_buff *skb, struct tcf_result *res) >>> { >>> - struct redirect_info *ri = this_cpu_ptr(&redirect_info); >>> + struct gnet_stats_queue *stats; >>> struct net_device *dev; >>> + int ret, flags; >>> >>> - dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); >>> - ri->ifindex = 0; >>> + if (!res->dev_ingress) { >>> + struct redirect_info *ri = this_cpu_ptr(&redirect_info); >>> + >>> + dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); >>> + flags = ri->flags; >>> + ri->ifindex = 0; >>> + stats = NULL; >>> + } else { >>> + dev = TCF_RESULT_REDIR_DEV(res); >>> + flags = TCF_RESULT_REDIR_INGRESS(res) ? BPF_F_INGRESS : 0; >>> + stats = res->qstats; >>> + } >>> if (unlikely(!dev)) { >>> kfree_skb(skb); >>> - return -EINVAL; >>> + ret = -EINVAL; >>> + goto out; >>> } >>> >>> - return __bpf_redirect(skb, dev, ri->flags); >>> + ret = __bpf_redirect(skb, dev, flags); >>> + >>> +out: >>> + if (ret && stats) >>> + qstats_overlimit_inc(res->qstats); >>> + return ret; >>> } >>> >>> static const struct bpf_func_proto bpf_redirect_proto = { >>> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c >>> index e7e626fb87bb..8dde1093994a 100644 >>> --- a/net/core/lwt_bpf.c >>> +++ b/net/core/lwt_bpf.c >>> @@ -65,7 +65,10 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, >>> lwt->name ? : ""); >>> ret = BPF_OK; >>> } else { >>> - ret = skb_do_redirect(skb); >>> + struct tcf_result res; >>> + >>> + res.dev_ingress = 0; >>> + ret = skb_do_redirect(skb, &res); >>> if (ret == 0) >>> ret = BPF_REDIRECT; >>> } >>> diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c >>> index ac20266460c0..6fd46b691181 100644 >>> --- a/net/sched/act_bpf.c >>> +++ b/net/sched/act_bpf.c >>> @@ -67,10 +67,12 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, >>> * returned. >>> */ >>> switch (filter_res) { >>> + case TC_ACT_REDIRECT: >>> + res->dev_ingress = 0; >>> + /* fall-through */ >>> case TC_ACT_PIPE: >>> case TC_ACT_RECLASSIFY: >>> case TC_ACT_OK: >>> - case TC_ACT_REDIRECT: >>> action = filter_res; >>> break; >>> case TC_ACT_SHOT: >>> diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c >>> index 66e0ac9811f9..f0fb7ded8fe2 100644 >>> --- a/net/sched/cls_bpf.c >>> +++ b/net/sched/cls_bpf.c >>> @@ -65,14 +65,16 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { >>> .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, >>> }; >>> >>> -static int cls_bpf_exec_opcode(int code) >>> +static int cls_bpf_exec_opcode(int code, struct tcf_result *res) >>> { >>> switch (code) { >>> + case TC_ACT_REDIRECT: >>> + res->dev_ingress = 0; >>> + /* fall-through */ >>> case TC_ACT_OK: >>> case TC_ACT_SHOT: >>> case TC_ACT_STOLEN: >>> case TC_ACT_TRAP: >>> - case TC_ACT_REDIRECT: >>> case TC_ACT_UNSPEC: >>> return code; >>> default: >>> @@ -113,7 +115,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, >>> res->classid = TC_H_MAJ(prog->res.classid) | >>> qdisc_skb_cb(skb)->tc_classid; >>> >>> - ret = cls_bpf_exec_opcode(filter_res); >>> + ret = cls_bpf_exec_opcode(filter_res, res); >>> if (ret == TC_ACT_UNSPEC) >>> continue; >>> break; >>> >> >> Can't we just export the struct redirect_info and let others like >> act_mirred use it, then we wouldn't need all these extra changes in >> fast path? > > Thank you for the feedback. > > The use of tcf_result allows passing to the redirect helper the stats > to be updated, and avoids an additional dev lookup for the mirred > datapath. > > Some changes are necessary to make the skb_do_redirect() function more > generic, and code duplication could be reduced with some additional > helper. Do you have so strong opinion against that? Couldn't the needed pointers be added into struct redirect_info (and/or the dev lookup be deferred for this specific scenario) thus that we would avoid all the extra hassle just to transfer this state from yet a second entity? I'd like to have the fast-path minimal and short, and I think this should be doable this way for more broader reuse. Thanks, Daniel