* [PATCH net-next 1/3] net/sched: act_ct: Fill offloading tuple iifidx
2022-01-03 11:44 [PATCH net-next 0/3] net/sched: Pass originating device to drivers offloading ct connection Paul Blakey
@ 2022-01-03 11:44 ` Paul Blakey
2022-01-03 11:44 ` [PATCH net-next 2/3] net: openvswitch: Fill act ct extension Paul Blakey
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Paul Blakey @ 2022-01-03 11:44 UTC (permalink / raw)
To: Paul Blakey, dev, netdev, Saeed Mahameed, Cong Wang,
Jamal Hadi Salim, Pravin B Shelar, davem, Jiri Pirko,
Jakub Kicinski, Marcelo Ricardo Leitner
Cc: Oz Shlomo, Vlad Buslov, Roi Dayan
Driver offloading ct tuples can use the information of which devices
received the packets that created the offloaded connections, to
more efficiently offload them only to the relevant device.
Add new act_ct nf conntrack extension, which is used to store the skb
devices before offloading the connection, and then fill in the tuple
iifindex so drivers can get the device via metadata dissector match.
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Signed-off-by: Paul Blakey <paulb@nvidia.com>
---
include/net/netfilter/nf_conntrack_act_ct.h | 50 +++++++++++++++++++++
include/net/netfilter/nf_conntrack_extend.h | 4 ++
net/netfilter/nf_conntrack_core.c | 6 ++-
net/sched/act_ct.c | 27 +++++++++++
4 files changed, 86 insertions(+), 1 deletion(-)
create mode 100644 include/net/netfilter/nf_conntrack_act_ct.h
diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h
new file mode 100644
index 000000000000..078d3c52c03f
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_act_ct.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_ACT_CT_H
+#define _NF_CONNTRACK_ACT_CT_H
+
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+struct nf_conn_act_ct_ext {
+ int ifindex[IP_CT_DIR_MAX];
+};
+
+static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ return nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT);
+#else
+ return NULL;
+#endif
+}
+
+static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT);
+
+ if (act_ct)
+ return act_ct;
+
+ act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC);
+ return act_ct;
+#else
+ return NULL;
+#endif
+}
+
+static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ struct nf_conn_act_ct_ext *act_ct_ext;
+
+ act_ct_ext = nf_conn_act_ct_ext_find(ct);
+ if (dev_net(skb->dev) == &init_net && act_ct_ext)
+ act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex;
+#endif
+}
+
+#endif /* _NF_CONNTRACK_ACT_CT_H */
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index e1e588387103..c7515d82ab06 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -27,6 +27,9 @@ enum nf_ct_ext_id {
#endif
#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
NF_CT_EXT_SYNPROXY,
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ NF_CT_EXT_ACT_CT,
#endif
NF_CT_EXT_NUM,
};
@@ -40,6 +43,7 @@ enum nf_ct_ext_id {
#define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
#define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
+#define NF_CT_EXT_ACT_CT_TYPE struct nf_conn_act_ct_ext
/* Extensions: optional stuff which isn't permanently in struct. */
struct nf_ct_ext {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d7e313548066..01d6589fba6e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -47,6 +47,7 @@
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_act_ct.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
@@ -2626,7 +2627,7 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
static __always_inline unsigned int total_extension_size(void)
{
/* remember to add new extensions below */
- BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
+ BUILD_BUG_ON(NF_CT_EXT_NUM > 10);
return sizeof(struct nf_ct_ext) +
sizeof(struct nf_conn_help)
@@ -2649,6 +2650,9 @@ static __always_inline unsigned int total_extension_size(void)
#endif
#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ sizeof(struct nf_conn_synproxy)
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ + sizeof(struct nf_conn_act_ct_ext)
#endif
;
};
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f9afb5abff21..ebdf7caf7084 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -32,6 +32,7 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netfilter/nf_conntrack_act_ct.h>
#include <uapi/linux/netfilter/nf_nat.h>
static struct workqueue_struct *act_ct_wq;
@@ -56,6 +57,12 @@ static const struct rhashtable_params zones_params = {
.automatic_shrinking = true,
};
+static struct nf_ct_ext_type act_ct_extend __read_mostly = {
+ .len = sizeof(struct nf_conn_act_ct_ext),
+ .align = __alignof__(struct nf_conn_act_ct_ext),
+ .id = NF_CT_EXT_ACT_CT,
+};
+
static struct flow_action_entry *
tcf_ct_flow_table_flow_action_get_next(struct flow_action *flow_action)
{
@@ -358,6 +365,7 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
bool tcp)
{
+ struct nf_conn_act_ct_ext *act_ct_ext;
struct flow_offload *entry;
int err;
@@ -375,6 +383,14 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
+ act_ct_ext = nf_conn_act_ct_ext_find(ct);
+ if (act_ct_ext) {
+ entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
+ act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL];
+ entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
+ act_ct_ext->ifindex[IP_CT_DIR_REPLY];
+ }
+
err = flow_offload_add(&ct_ft->nf_ft, entry);
if (err)
goto err_add;
@@ -1027,6 +1043,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
if (!ct)
goto out_push;
nf_ct_deliver_cached_events(ct);
+ nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit);
if (err != NF_ACCEPT)
@@ -1036,6 +1053,9 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
tcf_ct_act_set_mark(ct, p->mark, p->mark_mask);
tcf_ct_act_set_labels(ct, p->labels, p->labels_mask);
+ if (!nf_ct_is_confirmed(ct))
+ nf_conn_act_ct_ext_add(ct);
+
/* This will take care of sending queued events
* even if the connection is already confirmed.
*/
@@ -1583,10 +1603,16 @@ static int __init ct_init_module(void)
if (err)
goto err_register;
+ err = nf_ct_extend_register(&act_ct_extend);
+ if (err)
+ goto err_register_extend;
+
static_branch_inc(&tcf_frag_xmit_count);
return 0;
+err_register_extend:
+ tcf_unregister_action(&act_ct_ops, &ct_net_ops);
err_register:
tcf_ct_flow_tables_uninit();
err_tbl_init:
@@ -1597,6 +1623,7 @@ static int __init ct_init_module(void)
static void __exit ct_cleanup_module(void)
{
static_branch_dec(&tcf_frag_xmit_count);
+ nf_ct_extend_unregister(&act_ct_extend);
tcf_unregister_action(&act_ct_ops, &ct_net_ops);
tcf_ct_flow_tables_uninit();
destroy_workqueue(act_ct_wq);
--
2.30.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next 2/3] net: openvswitch: Fill act ct extension
2022-01-03 11:44 [PATCH net-next 0/3] net/sched: Pass originating device to drivers offloading ct connection Paul Blakey
2022-01-03 11:44 ` [PATCH net-next 1/3] net/sched: act_ct: Fill offloading tuple iifidx Paul Blakey
@ 2022-01-03 11:44 ` Paul Blakey
2022-01-03 11:44 ` [PATCH net-next 3/3] net/mlx5: CT: Set flow source hint from provided tuple device Paul Blakey
2022-01-04 12:20 ` [PATCH net-next 0/3] net/sched: Pass originating device to drivers offloading ct connection patchwork-bot+netdevbpf
3 siblings, 0 replies; 5+ messages in thread
From: Paul Blakey @ 2022-01-03 11:44 UTC (permalink / raw)
To: Paul Blakey, dev, netdev, Saeed Mahameed, Cong Wang,
Jamal Hadi Salim, Pravin B Shelar, davem, Jiri Pirko,
Jakub Kicinski, Marcelo Ricardo Leitner
Cc: Oz Shlomo, Vlad Buslov, Roi Dayan
To give drivers the originating device information for optimized
connection tracking offload, fill in act ct extension with
ifindex from skb.
Signed-off-by: Paul Blakey <paulb@nvidia.com>
---
net/openvswitch/conntrack.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 1b5eae57bc90..13294a55073a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -25,6 +25,8 @@
#include <net/netfilter/nf_nat.h>
#endif
+#include <net/netfilter/nf_conntrack_act_ct.h>
+
#include "datapath.h"
#include "conntrack.h"
#include "flow.h"
@@ -1045,6 +1047,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
*/
nf_ct_set_tcp_be_liberal(ct);
}
+
+ nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
}
return 0;
@@ -1245,6 +1249,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
&info->labels.mask);
if (err)
return err;
+
+ nf_conn_act_ct_ext_add(ct);
} else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
labels_nonzero(&info->labels.mask)) {
err = ovs_ct_set_labels(ct, key, &info->labels.value,
--
2.30.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next 3/3] net/mlx5: CT: Set flow source hint from provided tuple device
2022-01-03 11:44 [PATCH net-next 0/3] net/sched: Pass originating device to drivers offloading ct connection Paul Blakey
2022-01-03 11:44 ` [PATCH net-next 1/3] net/sched: act_ct: Fill offloading tuple iifidx Paul Blakey
2022-01-03 11:44 ` [PATCH net-next 2/3] net: openvswitch: Fill act ct extension Paul Blakey
@ 2022-01-03 11:44 ` Paul Blakey
2022-01-04 12:20 ` [PATCH net-next 0/3] net/sched: Pass originating device to drivers offloading ct connection patchwork-bot+netdevbpf
3 siblings, 0 replies; 5+ messages in thread
From: Paul Blakey @ 2022-01-03 11:44 UTC (permalink / raw)
To: Paul Blakey, dev, netdev, Saeed Mahameed, Cong Wang,
Jamal Hadi Salim, Pravin B Shelar, davem, Jiri Pirko,
Jakub Kicinski, Marcelo Ricardo Leitner
Cc: Oz Shlomo, Vlad Buslov, Roi Dayan
Get originating device from tuple offload metadata match ingress_ifindex,
and set flow_source hint to either LOCAL for vf/sf reps, UPLINK for
uplink/wire/tunnel devices/bond, or ANY (as before this patch)
for all others.
This allows lower layer (software steering or firmware) to insert the tuple
rule only in one table (either rx or tx) instead of two (rx and tx).
Signed-off-by: Paul Blakey <paulb@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/dev.c | 2 +-
.../ethernet/mellanox/mlx5/core/en/tc_ct.c | 51 +++++++++++++++++--
.../ethernet/mellanox/mlx5/core/mlx5_core.h | 1 +
3 files changed, 49 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index a8b84d53dfb0..ba6dad97e308 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -538,7 +538,7 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
return add_drivers(dev);
}
-static bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev)
+bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev)
{
u64 fsystem_guid, psystem_guid;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 9f33729e7fc4..4a0d38d219ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -14,6 +14,7 @@
#include <linux/workqueue.h>
#include <linux/refcount.h>
#include <linux/xarray.h>
+#include <linux/if_macvlan.h>
#include "lib/fs_chains.h"
#include "en/tc_ct.h"
@@ -326,7 +327,33 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
}
static int
-mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
+mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct net_device *ndev)
+{
+ struct mlx5e_priv *other_priv = netdev_priv(ndev);
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ bool vf_rep, uplink_rep;
+
+ vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+ uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+
+ if (vf_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ if (uplink_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+ if (is_vlan_dev(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
+ if (netif_is_macvlan(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
+ if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
+}
+
+static int
+mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *spec,
struct flow_rule *rule)
{
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -341,8 +368,7 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
flow_rule_match_basic(rule, &match);
- mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
- headers_v);
+ mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
match.mask->ip_proto);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
@@ -438,6 +464,23 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
ntohs(match.key->flags));
}
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+ struct flow_match_meta match;
+
+ flow_rule_match_meta(rule, &match);
+
+ if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
+ if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
+ spec->flow_context.flow_source =
+ mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
+
+ dev_put(dev);
+ }
+ }
+
return 0;
}
@@ -770,7 +813,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
attr->esw_attr->in_mdev = priv->mdev;
- mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
+ mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index bb677329ea08..6f8baa0f2a73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -305,5 +305,6 @@ static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
bool mlx5_eth_supported(struct mlx5_core_dev *dev);
bool mlx5_rdma_supported(struct mlx5_core_dev *dev);
bool mlx5_vnet_supported(struct mlx5_core_dev *dev);
+bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev);
#endif /* __MLX5_CORE_H__ */
--
2.30.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH net-next 0/3] net/sched: Pass originating device to drivers offloading ct connection
2022-01-03 11:44 [PATCH net-next 0/3] net/sched: Pass originating device to drivers offloading ct connection Paul Blakey
` (2 preceding siblings ...)
2022-01-03 11:44 ` [PATCH net-next 3/3] net/mlx5: CT: Set flow source hint from provided tuple device Paul Blakey
@ 2022-01-04 12:20 ` patchwork-bot+netdevbpf
3 siblings, 0 replies; 5+ messages in thread
From: patchwork-bot+netdevbpf @ 2022-01-04 12:20 UTC (permalink / raw)
To: Paul Blakey
Cc: dev, netdev, saeedm, xiyou.wangcong, jhs, pshelar, davem, jiri,
kuba, marcelo.leitner, ozsh, vladbu, roid
Hello:
This series was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:
On Mon, 3 Jan 2022 13:44:49 +0200 you wrote:
> Hi,
>
> Currently, drivers register to a ct zone that can be shared by multiple
> devices. This can be inefficient for the driver to offload, as it
> needs to handle all the cases where the tuple can come from,
> instead of where it's most likely will arive from.
>
> [...]
Here is the summary with links:
- [net-next,1/3] net/sched: act_ct: Fill offloading tuple iifidx
https://git.kernel.org/netdev/net-next/c/9795ded7f924
- [net-next,2/3] net: openvswitch: Fill act ct extension
https://git.kernel.org/netdev/net-next/c/b702436a51df
- [net-next,3/3] net/mlx5: CT: Set flow source hint from provided tuple device
https://git.kernel.org/netdev/net-next/c/c9c079b4deaa
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply [flat|nested] 5+ messages in thread