* [PATCH net-next v7 1/6] cls_api: modify the tc_indr_block_ing_cmd parameters.
2019-08-07 1:13 [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload wenxu
@ 2019-08-07 1:13 ` wenxu
2019-08-07 1:13 ` [PATCH net-next v7 2/6] cls_api: remove the tcf_block cache wenxu
` (5 subsequent siblings)
6 siblings, 0 replies; 15+ messages in thread
From: wenxu @ 2019-08-07 1:13 UTC (permalink / raw)
To: jakub.kicinski, pablo; +Cc: netfilter-devel, netdev
From: wenxu <wenxu@ucloud.cn>
This patch make tc_indr_block_ing_cmd can't access struct
tc_indr_block_dev and tc_indr_block_cb.
Signed-off-by: wenxu <wenxu@ucloud.cn>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
v7: no change
net/sched/cls_api.c | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9d85d32..1dd210d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -677,26 +677,28 @@ static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
static int tcf_block_setup(struct tcf_block *block,
struct flow_block_offload *bo);
-static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
- struct tc_indr_block_cb *indr_block_cb,
+static void tc_indr_block_ing_cmd(struct net_device *dev,
+ struct tcf_block *block,
+ tc_indr_block_bind_cb_t *cb,
+ void *cb_priv,
enum flow_block_command command)
{
struct flow_block_offload bo = {
.command = command,
.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
- .net = dev_net(indr_dev->dev),
- .block_shared = tcf_block_non_null_shared(indr_dev->block),
+ .net = dev_net(dev),
+ .block_shared = tcf_block_non_null_shared(block),
};
INIT_LIST_HEAD(&bo.cb_list);
- if (!indr_dev->block)
+ if (!block)
return;
- bo.block = &indr_dev->block->flow_block;
+ bo.block = &block->flow_block;
- indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
- &bo);
- tcf_block_setup(indr_dev->block, &bo);
+ cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
+
+ tcf_block_setup(block, &bo);
}
int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
@@ -715,7 +717,8 @@ int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
if (err)
goto err_dev_put;
- tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND);
+ tc_indr_block_ing_cmd(dev, indr_dev->block, cb, cb_priv,
+ FLOW_BLOCK_BIND);
return 0;
err_dev_put:
@@ -752,7 +755,8 @@ void __tc_indr_block_cb_unregister(struct net_device *dev,
return;
/* Send unbind message if required to free any block cbs. */
- tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND);
+ tc_indr_block_ing_cmd(dev, indr_dev->block, cb, indr_block_cb->cb_priv,
+ FLOW_BLOCK_UNBIND);
tc_indr_block_cb_del(indr_block_cb);
tc_indr_block_dev_put(indr_dev);
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next v7 2/6] cls_api: remove the tcf_block cache
2019-08-07 1:13 [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload wenxu
2019-08-07 1:13 ` [PATCH net-next v7 1/6] cls_api: modify the tc_indr_block_ing_cmd parameters wenxu
@ 2019-08-07 1:13 ` wenxu
2019-08-07 1:13 ` [PATCH net-next v7 3/6] cls_api: add flow_indr_block_call function wenxu
` (4 subsequent siblings)
6 siblings, 0 replies; 15+ messages in thread
From: wenxu @ 2019-08-07 1:13 UTC (permalink / raw)
To: jakub.kicinski, pablo; +Cc: netfilter-devel, netdev
From: wenxu <wenxu@ucloud.cn>
Remove the tcf_block in the tc_indr_block_dev for muti-subsystem
support.
Signed-off-by: wenxu <wenxu@ucloud.cn>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
v7: no change
net/sched/cls_api.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1dd210d..12eaa6c9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -574,7 +574,6 @@ struct tc_indr_block_dev {
struct net_device *dev;
unsigned int refcnt;
struct list_head cb_list;
- struct tcf_block *block;
};
struct tc_indr_block_cb {
@@ -611,7 +610,6 @@ static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
INIT_LIST_HEAD(&indr_dev->cb_list);
indr_dev->dev = dev;
- indr_dev->block = tc_dev_ingress_block(dev);
if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
tc_indr_setup_block_ht_params)) {
kfree(indr_dev);
@@ -706,6 +704,7 @@ int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
{
struct tc_indr_block_cb *indr_block_cb;
struct tc_indr_block_dev *indr_dev;
+ struct tcf_block *block;
int err;
indr_dev = tc_indr_block_dev_get(dev);
@@ -717,8 +716,9 @@ int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
if (err)
goto err_dev_put;
- tc_indr_block_ing_cmd(dev, indr_dev->block, cb, cb_priv,
- FLOW_BLOCK_BIND);
+ block = tc_dev_ingress_block(dev);
+ tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb,
+ indr_block_cb->cb_priv, FLOW_BLOCK_BIND);
return 0;
err_dev_put:
@@ -745,6 +745,7 @@ void __tc_indr_block_cb_unregister(struct net_device *dev,
{
struct tc_indr_block_cb *indr_block_cb;
struct tc_indr_block_dev *indr_dev;
+ struct tcf_block *block;
indr_dev = tc_indr_block_dev_lookup(dev);
if (!indr_dev)
@@ -755,8 +756,9 @@ void __tc_indr_block_cb_unregister(struct net_device *dev,
return;
/* Send unbind message if required to free any block cbs. */
- tc_indr_block_ing_cmd(dev, indr_dev->block, cb, indr_block_cb->cb_priv,
- FLOW_BLOCK_UNBIND);
+ block = tc_dev_ingress_block(dev);
+ tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb,
+ indr_block_cb->cb_priv, FLOW_BLOCK_UNBIND);
tc_indr_block_cb_del(indr_block_cb);
tc_indr_block_dev_put(indr_dev);
}
@@ -792,8 +794,6 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
if (!indr_dev)
return;
- indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL;
-
list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
&bo);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next v7 3/6] cls_api: add flow_indr_block_call function
2019-08-07 1:13 [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload wenxu
2019-08-07 1:13 ` [PATCH net-next v7 1/6] cls_api: modify the tc_indr_block_ing_cmd parameters wenxu
2019-08-07 1:13 ` [PATCH net-next v7 2/6] cls_api: remove the tcf_block cache wenxu
@ 2019-08-07 1:13 ` wenxu
2019-08-07 1:13 ` [PATCH net-next v7 4/6] flow_offload: move tc indirect block to flow offload wenxu
` (3 subsequent siblings)
6 siblings, 0 replies; 15+ messages in thread
From: wenxu @ 2019-08-07 1:13 UTC (permalink / raw)
To: jakub.kicinski, pablo; +Cc: netfilter-devel, netdev
From: wenxu <wenxu@ucloud.cn>
This patch make indr_block_call don't access struct tc_indr_block_cb
and tc_indr_block_dev directly
Signed-off-by: wenxu <wenxu@ucloud.cn>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
v7: no change
net/sched/cls_api.c | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 12eaa6c9..7c34fc6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -773,13 +773,27 @@ void tc_indr_block_cb_unregister(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
+static void flow_indr_block_call(struct net_device *dev,
+ struct flow_block_offload *bo,
+ enum flow_block_command command)
+{
+ struct tc_indr_block_cb *indr_block_cb;
+ struct tc_indr_block_dev *indr_dev;
+
+ indr_dev = tc_indr_block_dev_lookup(dev);
+ if (!indr_dev)
+ return;
+
+ list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+ indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+ bo);
+}
+
static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
struct tcf_block_ext_info *ei,
enum flow_block_command command,
struct netlink_ext_ack *extack)
{
- struct tc_indr_block_cb *indr_block_cb;
- struct tc_indr_block_dev *indr_dev;
struct flow_block_offload bo = {
.command = command,
.binder_type = ei->binder_type,
@@ -790,14 +804,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
};
INIT_LIST_HEAD(&bo.cb_list);
- indr_dev = tc_indr_block_dev_lookup(dev);
- if (!indr_dev)
- return;
-
- list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
- indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
- &bo);
-
+ flow_indr_block_call(dev, &bo, command);
tcf_block_setup(block, &bo);
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next v7 4/6] flow_offload: move tc indirect block to flow offload
2019-08-07 1:13 [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload wenxu
` (2 preceding siblings ...)
2019-08-07 1:13 ` [PATCH net-next v7 3/6] cls_api: add flow_indr_block_call function wenxu
@ 2019-08-07 1:13 ` wenxu
2019-08-07 1:13 ` [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block wenxu
` (2 subsequent siblings)
6 siblings, 0 replies; 15+ messages in thread
From: wenxu @ 2019-08-07 1:13 UTC (permalink / raw)
To: jakub.kicinski, pablo; +Cc: netfilter-devel, netdev
From: wenxu <wenxu@ucloud.cn>
move tc indirect block to flow_offload and rename
it to flow indirect block.The nf_tables can use the
indr block architecture.
Signed-off-by: wenxu <wenxu@ucloud.cn>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
v7: no change
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 10 +-
.../net/ethernet/netronome/nfp/flower/offload.c | 11 +-
include/net/flow_offload.h | 29 +++
include/net/pkt_cls.h | 35 ---
include/net/sch_generic.h | 3 -
net/core/flow_offload.c | 215 ++++++++++++++++++
net/sched/cls_api.c | 240 +++------------------
7 files changed, 280 insertions(+), 263 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index bf6f483..1ebbd63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -781,9 +781,9 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
{
int err;
- err = __tc_indr_block_cb_register(netdev, rpriv,
- mlx5e_rep_indr_setup_tc_cb,
- rpriv);
+ err = __flow_indr_block_cb_register(netdev, rpriv,
+ mlx5e_rep_indr_setup_tc_cb,
+ rpriv);
if (err) {
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
@@ -796,8 +796,8 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
struct net_device *netdev)
{
- __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
- rpriv);
+ __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
+ rpriv);
}
static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index ff8a9f1..3a4f4f0 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1649,16 +1649,17 @@ int nfp_flower_reg_indir_block_handler(struct nfp_app *app,
return NOTIFY_OK;
if (event == NETDEV_REGISTER) {
- err = __tc_indr_block_cb_register(netdev, app,
- nfp_flower_indr_setup_tc_cb,
- app);
+ err = __flow_indr_block_cb_register(netdev, app,
+ nfp_flower_indr_setup_tc_cb,
+ app);
if (err)
nfp_flower_cmsg_warn(app,
"Indirect block reg failed - %s\n",
netdev->name);
} else if (event == NETDEV_UNREGISTER) {
- __tc_indr_block_cb_unregister(netdev,
- nfp_flower_indr_setup_tc_cb, app);
+ __flow_indr_block_cb_unregister(netdev,
+ nfp_flower_indr_setup_tc_cb,
+ app);
}
return NOTIFY_OK;
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index d3b12bc..46b8777 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -4,6 +4,7 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <net/flow_dissector.h>
+#include <linux/rhashtable.h>
struct flow_match {
struct flow_dissector *dissector;
@@ -370,4 +371,32 @@ static inline void flow_block_init(struct flow_block *flow_block)
INIT_LIST_HEAD(&flow_block->cb_list);
}
+typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
+ enum tc_setup_type type, void *type_data);
+
+typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command command);
+
+int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident);
+
+void __flow_indr_block_cb_unregister(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident);
+
+int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+ flow_indr_block_bind_cb_t *cb, void *cb_ident);
+
+void flow_indr_block_cb_unregister(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident);
+
+void flow_indr_block_call(struct net_device *dev,
+ flow_indr_block_ing_cmd_t *cb,
+ struct flow_block_offload *bo,
+ enum flow_block_command command);
+
#endif /* _NET_FLOW_OFFLOAD_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e429809..0790a4e 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -70,15 +70,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
return block->q;
}
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident);
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident);
-void __tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident);
-void tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident);
-
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode);
@@ -137,32 +128,6 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
{
}
-static inline
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- return 0;
-}
-
-static inline
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- return 0;
-}
-
-static inline
-void __tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-}
-
-static inline
-void tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-}
-
static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6b6b012..d9f359a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,9 +23,6 @@
struct module;
struct bpf_flow_keys;
-typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
- enum tc_setup_type type, void *type_data);
-
struct qdisc_rate_table {
struct tc_ratespec rate;
u32 data[256];
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index d63b970..4cc18e4 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -2,6 +2,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <net/flow_offload.h>
+#include <linux/rtnetlink.h>
struct flow_rule *flow_rule_alloc(unsigned int num_actions)
{
@@ -280,3 +281,217 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
}
}
EXPORT_SYMBOL(flow_block_cb_setup_simple);
+
+static struct rhashtable indr_setup_block_ht;
+
+struct flow_indr_block_cb {
+ struct list_head list;
+ void *cb_priv;
+ flow_indr_block_bind_cb_t *cb;
+ void *cb_ident;
+};
+
+struct flow_indr_block_dev {
+ struct rhash_head ht_node;
+ struct net_device *dev;
+ unsigned int refcnt;
+ flow_indr_block_ing_cmd_t *block_ing_cmd_cb;
+ struct list_head cb_list;
+};
+
+static const struct rhashtable_params flow_indr_setup_block_ht_params = {
+ .key_offset = offsetof(struct flow_indr_block_dev, dev),
+ .head_offset = offsetof(struct flow_indr_block_dev, ht_node),
+ .key_len = sizeof(struct net_device *),
+};
+
+static struct flow_indr_block_dev *
+flow_indr_block_dev_lookup(struct net_device *dev)
+{
+ return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
+ flow_indr_setup_block_ht_params);
+}
+
+static struct flow_indr_block_dev *
+flow_indr_block_dev_get(struct net_device *dev)
+{
+ struct flow_indr_block_dev *indr_dev;
+
+ indr_dev = flow_indr_block_dev_lookup(dev);
+ if (indr_dev)
+ goto inc_ref;
+
+ indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
+ if (!indr_dev)
+ return NULL;
+
+ INIT_LIST_HEAD(&indr_dev->cb_list);
+ indr_dev->dev = dev;
+ if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+ flow_indr_setup_block_ht_params)) {
+ kfree(indr_dev);
+ return NULL;
+ }
+
+inc_ref:
+ indr_dev->refcnt++;
+ return indr_dev;
+}
+
+static void flow_indr_block_dev_put(struct flow_indr_block_dev *indr_dev)
+{
+ if (--indr_dev->refcnt)
+ return;
+
+ rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+ flow_indr_setup_block_ht_params);
+ kfree(indr_dev);
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_lookup(struct flow_indr_block_dev *indr_dev,
+ flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+ struct flow_indr_block_cb *indr_block_cb;
+
+ list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+ if (indr_block_cb->cb == cb &&
+ indr_block_cb->cb_ident == cb_ident)
+ return indr_block_cb;
+ return NULL;
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_add(struct flow_indr_block_dev *indr_dev, void *cb_priv,
+ flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+ struct flow_indr_block_cb *indr_block_cb;
+
+ indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+ if (indr_block_cb)
+ return ERR_PTR(-EEXIST);
+
+ indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
+ if (!indr_block_cb)
+ return ERR_PTR(-ENOMEM);
+
+ indr_block_cb->cb_priv = cb_priv;
+ indr_block_cb->cb = cb;
+ indr_block_cb->cb_ident = cb_ident;
+ list_add(&indr_block_cb->list, &indr_dev->cb_list);
+
+ return indr_block_cb;
+}
+
+static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
+{
+ list_del(&indr_block_cb->list);
+ kfree(indr_block_cb);
+}
+
+int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident)
+{
+ struct flow_indr_block_cb *indr_block_cb;
+ struct flow_indr_block_dev *indr_dev;
+ int err;
+
+ indr_dev = flow_indr_block_dev_get(dev);
+ if (!indr_dev)
+ return -ENOMEM;
+
+ indr_block_cb = flow_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
+ err = PTR_ERR_OR_ZERO(indr_block_cb);
+ if (err)
+ goto err_dev_put;
+
+ if (indr_dev->block_ing_cmd_cb)
+ indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
+ indr_block_cb->cb_priv,
+ FLOW_BLOCK_BIND);
+
+ return 0;
+
+err_dev_put:
+ flow_indr_block_dev_put(indr_dev);
+ return err;
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_register);
+
+int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident)
+{
+ int err;
+
+ rtnl_lock();
+ err = __flow_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
+ rtnl_unlock();
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_register);
+
+void __flow_indr_block_cb_unregister(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident)
+{
+ struct flow_indr_block_cb *indr_block_cb;
+ struct flow_indr_block_dev *indr_dev;
+
+ indr_dev = flow_indr_block_dev_lookup(dev);
+ if (!indr_dev)
+ return;
+
+ indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+ if (!indr_block_cb)
+ return;
+
+ if (indr_dev->block_ing_cmd_cb)
+ indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
+ indr_block_cb->cb_priv,
+ FLOW_BLOCK_UNBIND);
+
+ flow_indr_block_cb_del(indr_block_cb);
+ flow_indr_block_dev_put(indr_dev);
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_unregister);
+
+void flow_indr_block_cb_unregister(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident)
+{
+ rtnl_lock();
+ __flow_indr_block_cb_unregister(dev, cb, cb_ident);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);
+
+void flow_indr_block_call(struct net_device *dev,
+ flow_indr_block_ing_cmd_t cb,
+ struct flow_block_offload *bo,
+ enum flow_block_command command)
+{
+ struct flow_indr_block_cb *indr_block_cb;
+ struct flow_indr_block_dev *indr_dev;
+
+ indr_dev = flow_indr_block_dev_lookup(dev);
+ if (!indr_dev)
+ return;
+
+ indr_dev->block_ing_cmd_cb = command == FLOW_BLOCK_BIND
+ ? cb : NULL;
+
+ list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+ indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+ bo);
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_call);
+
+static int __init init_flow_indr_rhashtable(void)
+{
+ return rhashtable_init(&indr_setup_block_ht,
+ &flow_indr_setup_block_ht_params);
+}
+subsys_initcall(init_flow_indr_rhashtable);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 7c34fc6..0b0dde2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -37,6 +37,7 @@
#include <net/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_ct.h>
#include <net/tc_act/tc_mpls.h>
+#include <net/flow_offload.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -545,139 +546,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
}
}
-static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
-{
- const struct Qdisc_class_ops *cops;
- struct Qdisc *qdisc;
-
- if (!dev_ingress_queue(dev))
- return NULL;
-
- qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
- if (!qdisc)
- return NULL;
-
- cops = qdisc->ops->cl_ops;
- if (!cops)
- return NULL;
-
- if (!cops->tcf_block)
- return NULL;
-
- return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
-}
-
-static struct rhashtable indr_setup_block_ht;
-
-struct tc_indr_block_dev {
- struct rhash_head ht_node;
- struct net_device *dev;
- unsigned int refcnt;
- struct list_head cb_list;
-};
-
-struct tc_indr_block_cb {
- struct list_head list;
- void *cb_priv;
- tc_indr_block_bind_cb_t *cb;
- void *cb_ident;
-};
-
-static const struct rhashtable_params tc_indr_setup_block_ht_params = {
- .key_offset = offsetof(struct tc_indr_block_dev, dev),
- .head_offset = offsetof(struct tc_indr_block_dev, ht_node),
- .key_len = sizeof(struct net_device *),
-};
-
-static struct tc_indr_block_dev *
-tc_indr_block_dev_lookup(struct net_device *dev)
-{
- return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
- tc_indr_setup_block_ht_params);
-}
-
-static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
-{
- struct tc_indr_block_dev *indr_dev;
-
- indr_dev = tc_indr_block_dev_lookup(dev);
- if (indr_dev)
- goto inc_ref;
-
- indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
- if (!indr_dev)
- return NULL;
-
- INIT_LIST_HEAD(&indr_dev->cb_list);
- indr_dev->dev = dev;
- if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
- tc_indr_setup_block_ht_params)) {
- kfree(indr_dev);
- return NULL;
- }
-
-inc_ref:
- indr_dev->refcnt++;
- return indr_dev;
-}
-
-static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
-{
- if (--indr_dev->refcnt)
- return;
-
- rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
- tc_indr_setup_block_ht_params);
- kfree(indr_dev);
-}
-
-static struct tc_indr_block_cb *
-tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- struct tc_indr_block_cb *indr_block_cb;
-
- list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
- if (indr_block_cb->cb == cb &&
- indr_block_cb->cb_ident == cb_ident)
- return indr_block_cb;
- return NULL;
-}
-
-static struct tc_indr_block_cb *
-tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- struct tc_indr_block_cb *indr_block_cb;
-
- indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
- if (indr_block_cb)
- return ERR_PTR(-EEXIST);
-
- indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
- if (!indr_block_cb)
- return ERR_PTR(-ENOMEM);
-
- indr_block_cb->cb_priv = cb_priv;
- indr_block_cb->cb = cb;
- indr_block_cb->cb_ident = cb_ident;
- list_add(&indr_block_cb->list, &indr_dev->cb_list);
-
- return indr_block_cb;
-}
-
-static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
-{
- list_del(&indr_block_cb->list);
- kfree(indr_block_cb);
-}
-
static int tcf_block_setup(struct tcf_block *block,
struct flow_block_offload *bo);
static void tc_indr_block_ing_cmd(struct net_device *dev,
struct tcf_block *block,
- tc_indr_block_bind_cb_t *cb,
+ flow_indr_block_bind_cb_t *cb,
void *cb_priv,
enum flow_block_command command)
{
@@ -699,97 +573,40 @@ static void tc_indr_block_ing_cmd(struct net_device *dev,
tcf_block_setup(block, &bo);
}
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- struct tc_indr_block_cb *indr_block_cb;
- struct tc_indr_block_dev *indr_dev;
- struct tcf_block *block;
- int err;
-
- indr_dev = tc_indr_block_dev_get(dev);
- if (!indr_dev)
- return -ENOMEM;
-
- indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
- err = PTR_ERR_OR_ZERO(indr_block_cb);
- if (err)
- goto err_dev_put;
-
- block = tc_dev_ingress_block(dev);
- tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb,
- indr_block_cb->cb_priv, FLOW_BLOCK_BIND);
- return 0;
-
-err_dev_put:
- tc_indr_block_dev_put(indr_dev);
- return err;
-}
-EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
-
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
+static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
{
- int err;
-
- rtnl_lock();
- err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
- rtnl_unlock();
-
- return err;
-}
-EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
+ const struct Qdisc_class_ops *cops;
+ struct Qdisc *qdisc;
-void __tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- struct tc_indr_block_cb *indr_block_cb;
- struct tc_indr_block_dev *indr_dev;
- struct tcf_block *block;
+ if (!dev_ingress_queue(dev))
+ return NULL;
- indr_dev = tc_indr_block_dev_lookup(dev);
- if (!indr_dev)
- return;
+ qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
+ if (!qdisc)
+ return NULL;
- indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
- if (!indr_block_cb)
- return;
+ cops = qdisc->ops->cl_ops;
+ if (!cops)
+ return NULL;
- /* Send unbind message if required to free any block cbs. */
- block = tc_dev_ingress_block(dev);
- tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb,
- indr_block_cb->cb_priv, FLOW_BLOCK_UNBIND);
- tc_indr_block_cb_del(indr_block_cb);
- tc_indr_block_dev_put(indr_dev);
-}
-EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
+ if (!cops->tcf_block)
+ return NULL;
-void tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- rtnl_lock();
- __tc_indr_block_cb_unregister(dev, cb, cb_ident);
- rtnl_unlock();
+ return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
}
-EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
-static void flow_indr_block_call(struct net_device *dev,
- struct flow_block_offload *bo,
- enum flow_block_command command)
+static void tc_indr_block_get_and_ing_cmd(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command command)
{
- struct tc_indr_block_cb *indr_block_cb;
- struct tc_indr_block_dev *indr_dev;
-
- indr_dev = tc_indr_block_dev_lookup(dev);
- if (!indr_dev)
- return;
+ struct tcf_block *block = tc_dev_ingress_block(dev);
- list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
- indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
- bo);
+ tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command);
}
-static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
+static void tc_indr_block_call(struct tcf_block *block,
+ struct net_device *dev,
struct tcf_block_ext_info *ei,
enum flow_block_command command,
struct netlink_ext_ack *extack)
@@ -804,7 +621,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
};
INIT_LIST_HEAD(&bo.cb_list);
- flow_indr_block_call(dev, &bo, command);
+ flow_indr_block_call(dev, tc_indr_block_get_and_ing_cmd, &bo, command);
tcf_block_setup(block, &bo);
}
@@ -3378,11 +3195,6 @@ static int __init tc_filter_init(void)
if (err)
goto err_register_pernet_subsys;
- err = rhashtable_init(&indr_setup_block_ht,
- &tc_indr_setup_block_ht_params);
- if (err)
- goto err_rhash_setup_block_ht;
-
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
@@ -3396,8 +3208,6 @@ static int __init tc_filter_init(void)
return 0;
-err_rhash_setup_block_ht:
- unregister_pernet_subsys(&tcf_net_ops);
err_register_pernet_subsys:
destroy_workqueue(tc_filter_wq);
return err;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
2019-08-07 1:13 [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload wenxu
` (3 preceding siblings ...)
2019-08-07 1:13 ` [PATCH net-next v7 4/6] flow_offload: move tc indirect block to flow offload wenxu
@ 2019-08-07 1:13 ` wenxu
2019-08-12 14:11 ` Vlad Buslov
2019-08-07 1:13 ` [PATCH net-next v7 6/6] netfilter: nf_tables_offload: support indr block call wenxu
2019-08-09 1:44 ` [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload David Miller
6 siblings, 1 reply; 15+ messages in thread
From: wenxu @ 2019-08-07 1:13 UTC (permalink / raw)
To: jakub.kicinski, pablo; +Cc: netfilter-devel, netdev
From: wenxu <wenxu@ucloud.cn>
It provide a callback list to find the blocks of tc
and nft subsystems
Signed-off-by: wenxu <wenxu@ucloud.cn>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
v7: add a mutex lock for add/del flow_indr_block_ing_cb
include/net/flow_offload.h | 10 ++++++++-
net/core/flow_offload.c | 51 ++++++++++++++++++++++++++++++++++------------
net/sched/cls_api.c | 9 +++++++-
3 files changed, 55 insertions(+), 15 deletions(-)
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 46b8777..e8069b6 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -379,6 +379,15 @@ typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
void *cb_priv,
enum flow_block_command command);
+struct flow_indr_block_ing_entry {
+ flow_indr_block_ing_cmd_t *cb;
+ struct list_head list;
+};
+
+void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry);
+
+void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry);
+
int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
flow_indr_block_bind_cb_t *cb,
void *cb_ident);
@@ -395,7 +404,6 @@ void flow_indr_block_cb_unregister(struct net_device *dev,
void *cb_ident);
void flow_indr_block_call(struct net_device *dev,
- flow_indr_block_ing_cmd_t *cb,
struct flow_block_offload *bo,
enum flow_block_command command);
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 4cc18e4..64c3d4d 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <net/flow_offload.h>
#include <linux/rtnetlink.h>
+#include <linux/mutex.h>
struct flow_rule *flow_rule_alloc(unsigned int num_actions)
{
@@ -282,6 +283,8 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
}
EXPORT_SYMBOL(flow_block_cb_setup_simple);
+static LIST_HEAD(block_ing_cb_list);
+
static struct rhashtable indr_setup_block_ht;
struct flow_indr_block_cb {
@@ -295,7 +298,6 @@ struct flow_indr_block_dev {
struct rhash_head ht_node;
struct net_device *dev;
unsigned int refcnt;
- flow_indr_block_ing_cmd_t *block_ing_cmd_cb;
struct list_head cb_list;
};
@@ -389,6 +391,20 @@ static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
kfree(indr_block_cb);
}
+static void flow_block_ing_cmd(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command command)
+{
+ struct flow_indr_block_ing_entry *entry;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
+ entry->cb(dev, cb, cb_priv, command);
+ }
+ rcu_read_unlock();
+}
+
int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
flow_indr_block_bind_cb_t *cb,
void *cb_ident)
@@ -406,10 +422,8 @@ int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
if (err)
goto err_dev_put;
- if (indr_dev->block_ing_cmd_cb)
- indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
- indr_block_cb->cb_priv,
- FLOW_BLOCK_BIND);
+ flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+ FLOW_BLOCK_BIND);
return 0;
@@ -448,10 +462,8 @@ void __flow_indr_block_cb_unregister(struct net_device *dev,
if (!indr_block_cb)
return;
- if (indr_dev->block_ing_cmd_cb)
- indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
- indr_block_cb->cb_priv,
- FLOW_BLOCK_UNBIND);
+ flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+ FLOW_BLOCK_UNBIND);
flow_indr_block_cb_del(indr_block_cb);
flow_indr_block_dev_put(indr_dev);
@@ -469,7 +481,6 @@ void flow_indr_block_cb_unregister(struct net_device *dev,
EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);
void flow_indr_block_call(struct net_device *dev,
- flow_indr_block_ing_cmd_t cb,
struct flow_block_offload *bo,
enum flow_block_command command)
{
@@ -480,15 +491,29 @@ void flow_indr_block_call(struct net_device *dev,
if (!indr_dev)
return;
- indr_dev->block_ing_cmd_cb = command == FLOW_BLOCK_BIND
- ? cb : NULL;
-
list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
bo);
}
EXPORT_SYMBOL_GPL(flow_indr_block_call);
+static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
+void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry)
+{
+ mutex_lock(&flow_indr_block_ing_cb_lock);
+ list_add_tail_rcu(&entry->list, &block_ing_cb_list);
+ mutex_unlock(&flow_indr_block_ing_cb_lock);
+}
+EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
+
+void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry)
+{
+ mutex_lock(&flow_indr_block_ing_cb_lock);
+ list_del_rcu(&entry->list);
+ mutex_unlock(&flow_indr_block_ing_cb_lock);
+}
+EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb);
+
static int __init init_flow_indr_rhashtable(void)
{
return rhashtable_init(&indr_setup_block_ht,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b0dde2..e0d8b45 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -621,7 +621,7 @@ static void tc_indr_block_call(struct tcf_block *block,
};
INIT_LIST_HEAD(&bo.cb_list);
- flow_indr_block_call(dev, tc_indr_block_get_and_ing_cmd, &bo, command);
+ flow_indr_block_call(dev, &bo, command);
tcf_block_setup(block, &bo);
}
@@ -3183,6 +3183,11 @@ static void __net_exit tcf_net_exit(struct net *net)
.size = sizeof(struct tcf_net),
};
+static struct flow_indr_block_ing_entry block_ing_entry = {
+ .cb = tc_indr_block_get_and_ing_cmd,
+ .list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
static int __init tc_filter_init(void)
{
int err;
@@ -3195,6 +3200,8 @@ static int __init tc_filter_init(void)
if (err)
goto err_register_pernet_subsys;
+ flow_indr_add_block_ing_cb(&block_ing_entry);
+
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
2019-08-07 1:13 ` [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block wenxu
@ 2019-08-12 14:11 ` Vlad Buslov
2019-08-14 2:50 ` wenxu
0 siblings, 1 reply; 15+ messages in thread
From: Vlad Buslov @ 2019-08-12 14:11 UTC (permalink / raw)
To: wenxu, Jakub Kicinski
Cc: David Miller, Jiri Pirko, pablo, netfilter-devel, netdev
On Wed 07 Aug 2019 at 04:13, wenxu@ucloud.cn wrote:
> From: wenxu <wenxu@ucloud.cn>
>
> It provide a callback list to find the blocks of tc
> and nft subsystems
>
> Signed-off-by: wenxu <wenxu@ucloud.cn>
> Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> ---
> v7: add a mutex lock for add/del flow_indr_block_ing_cb
>
> include/net/flow_offload.h | 10 ++++++++-
> net/core/flow_offload.c | 51 ++++++++++++++++++++++++++++++++++------------
> net/sched/cls_api.c | 9 +++++++-
> 3 files changed, 55 insertions(+), 15 deletions(-)
>
> diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
> index 46b8777..e8069b6 100644
> --- a/include/net/flow_offload.h
> +++ b/include/net/flow_offload.h
> @@ -379,6 +379,15 @@ typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
> void *cb_priv,
> enum flow_block_command command);
>
> +struct flow_indr_block_ing_entry {
> + flow_indr_block_ing_cmd_t *cb;
> + struct list_head list;
> +};
> +
> +void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry);
> +
> +void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry);
> +
> int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
> flow_indr_block_bind_cb_t *cb,
> void *cb_ident);
> @@ -395,7 +404,6 @@ void flow_indr_block_cb_unregister(struct net_device *dev,
> void *cb_ident);
>
> void flow_indr_block_call(struct net_device *dev,
> - flow_indr_block_ing_cmd_t *cb,
> struct flow_block_offload *bo,
> enum flow_block_command command);
>
> diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
> index 4cc18e4..64c3d4d 100644
> --- a/net/core/flow_offload.c
> +++ b/net/core/flow_offload.c
> @@ -3,6 +3,7 @@
> #include <linux/slab.h>
> #include <net/flow_offload.h>
> #include <linux/rtnetlink.h>
> +#include <linux/mutex.h>
>
> struct flow_rule *flow_rule_alloc(unsigned int num_actions)
> {
> @@ -282,6 +283,8 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
> }
> EXPORT_SYMBOL(flow_block_cb_setup_simple);
>
> +static LIST_HEAD(block_ing_cb_list);
> +
> static struct rhashtable indr_setup_block_ht;
>
> struct flow_indr_block_cb {
> @@ -295,7 +298,6 @@ struct flow_indr_block_dev {
> struct rhash_head ht_node;
> struct net_device *dev;
> unsigned int refcnt;
> - flow_indr_block_ing_cmd_t *block_ing_cmd_cb;
> struct list_head cb_list;
> };
>
> @@ -389,6 +391,20 @@ static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
> kfree(indr_block_cb);
> }
>
> +static void flow_block_ing_cmd(struct net_device *dev,
> + flow_indr_block_bind_cb_t *cb,
> + void *cb_priv,
> + enum flow_block_command command)
> +{
> + struct flow_indr_block_ing_entry *entry;
> +
> + rcu_read_lock();
> + list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
> + entry->cb(dev, cb, cb_priv, command);
> + }
> + rcu_read_unlock();
> +}
Hi,
I'm getting following incorrect rcu usage warnings with this patch
caused by rcu_read_lock in flow_block_ing_cmd:
[ 401.510948] =============================
[ 401.510952] WARNING: suspicious RCU usage
[ 401.510993] 5.3.0-rc3+ #589 Not tainted
[ 401.510996] -----------------------------
[ 401.511001] include/linux/rcupdate.h:265 Illegal context switch in RCU read-side critical section!
[ 401.511004]
other info that might help us debug this:
[ 401.511008]
rcu_scheduler_active = 2, debug_locks = 1
[ 401.511012] 7 locks held by test-ecmp-add-v/7576:
[ 401.511015] #0: 00000000081d71a5 (sb_writers#4){.+.+}, at: vfs_write+0x166/0x1d0
[ 401.511037] #1: 000000002bd338c3 (&of->mutex){+.+.}, at: kernfs_fop_write+0xef/0x1b0
[ 401.511051] #2: 00000000c921c634 (kn->count#317){.+.+}, at: kernfs_fop_write+0xf7/0x1b0
[ 401.511062] #3: 00000000a19cdd56 (&dev->mutex){....}, at: sriov_numvfs_store+0x6b/0x130
[ 401.511079] #4: 000000005425fa52 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x30/0x140
[ 401.511092] #5: 00000000c5822793 (rtnl_mutex){+.+.}, at: unregister_netdevice_notifier+0x35/0x140
[ 401.511101] #6: 00000000c2f3507e (rcu_read_lock){....}, at: flow_block_ing_cmd+0x5/0x130
[ 401.511115]
stack backtrace:
[ 401.511121] CPU: 21 PID: 7576 Comm: test-ecmp-add-v Not tainted 5.3.0-rc3+ #589
[ 401.511124] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[ 401.511127] Call Trace:
[ 401.511138] dump_stack+0x85/0xc0
[ 401.511146] ___might_sleep+0x100/0x180
[ 401.511154] __mutex_lock+0x5b/0x960
[ 401.511162] ? find_held_lock+0x2b/0x80
[ 401.511173] ? __tcf_get_next_chain+0x1d/0xb0
[ 401.511179] ? mark_held_locks+0x49/0x70
[ 401.511194] ? __tcf_get_next_chain+0x1d/0xb0
[ 401.511198] __tcf_get_next_chain+0x1d/0xb0
[ 401.511251] ? uplink_rep_async_event+0x70/0x70 [mlx5_core]
[ 401.511261] tcf_block_playback_offloads+0x39/0x160
[ 401.511276] tcf_block_setup+0x1b0/0x240
[ 401.511312] ? mlx5e_rep_indr_setup_tc_cb+0xca/0x290 [mlx5_core]
[ 401.511347] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[ 401.511359] tc_indr_block_get_and_ing_cmd+0x11b/0x1e0
[ 401.511404] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[ 401.511414] flow_block_ing_cmd+0x7e/0x130
[ 401.511453] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[ 401.511462] __flow_indr_block_cb_unregister+0x7f/0xf0
[ 401.511502] mlx5e_nic_rep_netdevice_event+0x75/0xb0 [mlx5_core]
[ 401.511513] unregister_netdevice_notifier+0xe9/0x140
[ 401.511554] mlx5e_cleanup_rep_tx+0x6f/0xe0 [mlx5_core]
[ 401.511597] mlx5e_detach_netdev+0x4b/0x60 [mlx5_core]
[ 401.511637] mlx5e_vport_rep_unload+0x71/0xc0 [mlx5_core]
[ 401.511679] esw_offloads_disable+0x5b/0x90 [mlx5_core]
[ 401.511724] mlx5_eswitch_disable.cold+0xdf/0x176 [mlx5_core]
[ 401.511759] mlx5_device_disable_sriov+0xab/0xb0 [mlx5_core]
[ 401.511794] mlx5_core_sriov_configure+0xaf/0xd0 [mlx5_core]
[ 401.511805] sriov_numvfs_store+0xf8/0x130
[ 401.511817] kernfs_fop_write+0x122/0x1b0
[ 401.511826] vfs_write+0xdb/0x1d0
[ 401.511835] ksys_write+0x65/0xe0
[ 401.511847] do_syscall_64+0x5c/0xb0
[ 401.511857] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 401.511862] RIP: 0033:0x7fad892d30f8
[ 401.511868] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 96 0d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 60 c3 0f 1f 80 00 00 00 00 48 83
ec 28 48 89
[ 401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
[ 401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
[ 401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
[ 401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
[ 401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740
I don't think it is correct approach to try to call these callbacks with
rcu protection because:
- Cls API uses sleeping locks that cannot be used in rcu read section
(hence the included trace).
- It assumes that all implementation of classifier ops reoffload() don't
sleep.
- And that all driver offload callbacks (both block and classifier
setup) don't sleep, which is not the case.
I don't see any straightforward way to fix this, besides using some
other locking mechanism to protect block_ing_cb_list.
Regards,
Vlad
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
2019-08-12 14:11 ` Vlad Buslov
@ 2019-08-14 2:50 ` wenxu
2019-08-16 15:04 ` Vlad Buslov
0 siblings, 1 reply; 15+ messages in thread
From: wenxu @ 2019-08-14 2:50 UTC (permalink / raw)
To: Vlad Buslov, Jakub Kicinski
Cc: David Miller, Jiri Pirko, pablo, netfilter-devel, netdev
On 8/12/2019 10:11 PM, Vlad Buslov wrote:
>
>> +static void flow_block_ing_cmd(struct net_device *dev,
>> + flow_indr_block_bind_cb_t *cb,
>> + void *cb_priv,
>> + enum flow_block_command command)
>> +{
>> + struct flow_indr_block_ing_entry *entry;
>> +
>> + rcu_read_lock();
>> + list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
>> + entry->cb(dev, cb, cb_priv, command);
>> + }
>> + rcu_read_unlock();
>> +}
> Hi,
>
> I'm getting following incorrect rcu usage warnings with this patch
> caused by rcu_read_lock in flow_block_ing_cmd:
>
> [ 401.510948] =============================
> [ 401.510952] WARNING: suspicious RCU usage
> [ 401.510993] 5.3.0-rc3+ #589 Not tainted
> [ 401.510996] -----------------------------
> [ 401.511001] include/linux/rcupdate.h:265 Illegal context switch in RCU read-side critical section!
> [ 401.511004]
> other info that might help us debug this:
>
> [ 401.511008]
> rcu_scheduler_active = 2, debug_locks = 1
> [ 401.511012] 7 locks held by test-ecmp-add-v/7576:
> [ 401.511015] #0: 00000000081d71a5 (sb_writers#4){.+.+}, at: vfs_write+0x166/0x1d0
> [ 401.511037] #1: 000000002bd338c3 (&of->mutex){+.+.}, at: kernfs_fop_write+0xef/0x1b0
> [ 401.511051] #2: 00000000c921c634 (kn->count#317){.+.+}, at: kernfs_fop_write+0xf7/0x1b0
> [ 401.511062] #3: 00000000a19cdd56 (&dev->mutex){....}, at: sriov_numvfs_store+0x6b/0x130
> [ 401.511079] #4: 000000005425fa52 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x30/0x140
> [ 401.511092] #5: 00000000c5822793 (rtnl_mutex){+.+.}, at: unregister_netdevice_notifier+0x35/0x140
> [ 401.511101] #6: 00000000c2f3507e (rcu_read_lock){....}, at: flow_block_ing_cmd+0x5/0x130
> [ 401.511115]
> stack backtrace:
> [ 401.511121] CPU: 21 PID: 7576 Comm: test-ecmp-add-v Not tainted 5.3.0-rc3+ #589
> [ 401.511124] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
> [ 401.511127] Call Trace:
> [ 401.511138] dump_stack+0x85/0xc0
> [ 401.511146] ___might_sleep+0x100/0x180
> [ 401.511154] __mutex_lock+0x5b/0x960
> [ 401.511162] ? find_held_lock+0x2b/0x80
> [ 401.511173] ? __tcf_get_next_chain+0x1d/0xb0
> [ 401.511179] ? mark_held_locks+0x49/0x70
> [ 401.511194] ? __tcf_get_next_chain+0x1d/0xb0
> [ 401.511198] __tcf_get_next_chain+0x1d/0xb0
> [ 401.511251] ? uplink_rep_async_event+0x70/0x70 [mlx5_core]
> [ 401.511261] tcf_block_playback_offloads+0x39/0x160
> [ 401.511276] tcf_block_setup+0x1b0/0x240
> [ 401.511312] ? mlx5e_rep_indr_setup_tc_cb+0xca/0x290 [mlx5_core]
> [ 401.511347] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
> [ 401.511359] tc_indr_block_get_and_ing_cmd+0x11b/0x1e0
> [ 401.511404] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
> [ 401.511414] flow_block_ing_cmd+0x7e/0x130
> [ 401.511453] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
> [ 401.511462] __flow_indr_block_cb_unregister+0x7f/0xf0
> [ 401.511502] mlx5e_nic_rep_netdevice_event+0x75/0xb0 [mlx5_core]
> [ 401.511513] unregister_netdevice_notifier+0xe9/0x140
> [ 401.511554] mlx5e_cleanup_rep_tx+0x6f/0xe0 [mlx5_core]
> [ 401.511597] mlx5e_detach_netdev+0x4b/0x60 [mlx5_core]
> [ 401.511637] mlx5e_vport_rep_unload+0x71/0xc0 [mlx5_core]
> [ 401.511679] esw_offloads_disable+0x5b/0x90 [mlx5_core]
> [ 401.511724] mlx5_eswitch_disable.cold+0xdf/0x176 [mlx5_core]
> [ 401.511759] mlx5_device_disable_sriov+0xab/0xb0 [mlx5_core]
> [ 401.511794] mlx5_core_sriov_configure+0xaf/0xd0 [mlx5_core]
> [ 401.511805] sriov_numvfs_store+0xf8/0x130
> [ 401.511817] kernfs_fop_write+0x122/0x1b0
> [ 401.511826] vfs_write+0xdb/0x1d0
> [ 401.511835] ksys_write+0x65/0xe0
> [ 401.511847] do_syscall_64+0x5c/0xb0
> [ 401.511857] entry_SYSCALL_64_after_hwframe+0x49/0xbe
> [ 401.511862] RIP: 0033:0x7fad892d30f8
> [ 401.511868] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 96 0d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 60 c3 0f 1f 80 00 00 00 00 48 83
> ec 28 48 89
> [ 401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
> [ 401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
> [ 401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
> [ 401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
> [ 401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
> [ 401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740
>
> I don't think it is correct approach to try to call these callbacks with
> rcu protection because:
>
> - Cls API uses sleeping locks that cannot be used in rcu read section
> (hence the included trace).
>
> - It assumes that all implementation of classifier ops reoffload() don't
> sleep.
>
> - And that all driver offload callbacks (both block and classifier
> setup) don't sleep, which is not the case.
>
> I don't see any straightforward way to fix this, besides using some
> other locking mechanism to protect block_ing_cb_list.
>
> Regards,
> Vlad
Maybe get the mutex flow_indr_block_ing_cb_lock for both lookup, add, delete?
the callbacks_lists. the add and delete is work only on modules init case. So the
lookup is also not frequently(ony [un]register) and can protect with the locks.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
2019-08-14 2:50 ` wenxu
@ 2019-08-16 15:04 ` Vlad Buslov
2019-08-16 17:56 ` Jakub Kicinski
0 siblings, 1 reply; 15+ messages in thread
From: Vlad Buslov @ 2019-08-16 15:04 UTC (permalink / raw)
To: wenxu
Cc: Vlad Buslov, Jakub Kicinski, David Miller, Jiri Pirko, pablo,
netfilter-devel, netdev
On Wed 14 Aug 2019 at 05:50, wenxu <wenxu@ucloud.cn> wrote:
> On 8/12/2019 10:11 PM, Vlad Buslov wrote:
>>
>>> +static void flow_block_ing_cmd(struct net_device *dev,
>>> + flow_indr_block_bind_cb_t *cb,
>>> + void *cb_priv,
>>> + enum flow_block_command command)
>>> +{
>>> + struct flow_indr_block_ing_entry *entry;
>>> +
>>> + rcu_read_lock();
>>> + list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
>>> + entry->cb(dev, cb, cb_priv, command);
>>> + }
>>> + rcu_read_unlock();
>>> +}
>> Hi,
>>
>> I'm getting following incorrect rcu usage warnings with this patch
>> caused by rcu_read_lock in flow_block_ing_cmd:
>>
>> [ 401.510948] =============================
>> [ 401.510952] WARNING: suspicious RCU usage
>> [ 401.510993] 5.3.0-rc3+ #589 Not tainted
>> [ 401.510996] -----------------------------
>> [ 401.511001] include/linux/rcupdate.h:265 Illegal context switch in RCU read-side critical section!
>> [ 401.511004]
>> other info that might help us debug this:
>>
>> [ 401.511008]
>> rcu_scheduler_active = 2, debug_locks = 1
>> [ 401.511012] 7 locks held by test-ecmp-add-v/7576:
>> [ 401.511015] #0: 00000000081d71a5 (sb_writers#4){.+.+}, at: vfs_write+0x166/0x1d0
>> [ 401.511037] #1: 000000002bd338c3 (&of->mutex){+.+.}, at: kernfs_fop_write+0xef/0x1b0
>> [ 401.511051] #2: 00000000c921c634 (kn->count#317){.+.+}, at: kernfs_fop_write+0xf7/0x1b0
>> [ 401.511062] #3: 00000000a19cdd56 (&dev->mutex){....}, at: sriov_numvfs_store+0x6b/0x130
>> [ 401.511079] #4: 000000005425fa52 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x30/0x140
>> [ 401.511092] #5: 00000000c5822793 (rtnl_mutex){+.+.}, at: unregister_netdevice_notifier+0x35/0x140
>> [ 401.511101] #6: 00000000c2f3507e (rcu_read_lock){....}, at: flow_block_ing_cmd+0x5/0x130
>> [ 401.511115]
>> stack backtrace:
>> [ 401.511121] CPU: 21 PID: 7576 Comm: test-ecmp-add-v Not tainted 5.3.0-rc3+ #589
>> [ 401.511124] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
>> [ 401.511127] Call Trace:
>> [ 401.511138] dump_stack+0x85/0xc0
>> [ 401.511146] ___might_sleep+0x100/0x180
>> [ 401.511154] __mutex_lock+0x5b/0x960
>> [ 401.511162] ? find_held_lock+0x2b/0x80
>> [ 401.511173] ? __tcf_get_next_chain+0x1d/0xb0
>> [ 401.511179] ? mark_held_locks+0x49/0x70
>> [ 401.511194] ? __tcf_get_next_chain+0x1d/0xb0
>> [ 401.511198] __tcf_get_next_chain+0x1d/0xb0
>> [ 401.511251] ? uplink_rep_async_event+0x70/0x70 [mlx5_core]
>> [ 401.511261] tcf_block_playback_offloads+0x39/0x160
>> [ 401.511276] tcf_block_setup+0x1b0/0x240
>> [ 401.511312] ? mlx5e_rep_indr_setup_tc_cb+0xca/0x290 [mlx5_core]
>> [ 401.511347] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
>> [ 401.511359] tc_indr_block_get_and_ing_cmd+0x11b/0x1e0
>> [ 401.511404] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
>> [ 401.511414] flow_block_ing_cmd+0x7e/0x130
>> [ 401.511453] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
>> [ 401.511462] __flow_indr_block_cb_unregister+0x7f/0xf0
>> [ 401.511502] mlx5e_nic_rep_netdevice_event+0x75/0xb0 [mlx5_core]
>> [ 401.511513] unregister_netdevice_notifier+0xe9/0x140
>> [ 401.511554] mlx5e_cleanup_rep_tx+0x6f/0xe0 [mlx5_core]
>> [ 401.511597] mlx5e_detach_netdev+0x4b/0x60 [mlx5_core]
>> [ 401.511637] mlx5e_vport_rep_unload+0x71/0xc0 [mlx5_core]
>> [ 401.511679] esw_offloads_disable+0x5b/0x90 [mlx5_core]
>> [ 401.511724] mlx5_eswitch_disable.cold+0xdf/0x176 [mlx5_core]
>> [ 401.511759] mlx5_device_disable_sriov+0xab/0xb0 [mlx5_core]
>> [ 401.511794] mlx5_core_sriov_configure+0xaf/0xd0 [mlx5_core]
>> [ 401.511805] sriov_numvfs_store+0xf8/0x130
>> [ 401.511817] kernfs_fop_write+0x122/0x1b0
>> [ 401.511826] vfs_write+0xdb/0x1d0
>> [ 401.511835] ksys_write+0x65/0xe0
>> [ 401.511847] do_syscall_64+0x5c/0xb0
>> [ 401.511857] entry_SYSCALL_64_after_hwframe+0x49/0xbe
>> [ 401.511862] RIP: 0033:0x7fad892d30f8
>> [ 401.511868] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 96 0d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 60 c3 0f 1f 80 00 00 00 00 48 83
>> ec 28 48 89
>> [ 401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
>> [ 401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
>> [ 401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
>> [ 401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
>> [ 401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
>> [ 401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740
>>
>> I don't think it is correct approach to try to call these callbacks with
>> rcu protection because:
>>
>> - Cls API uses sleeping locks that cannot be used in rcu read section
>> (hence the included trace).
>>
>> - It assumes that all implementation of classifier ops reoffload() don't
>> sleep.
>>
>> - And that all driver offload callbacks (both block and classifier
>> setup) don't sleep, which is not the case.
>>
>> I don't see any straightforward way to fix this, besides using some
>> other locking mechanism to protect block_ing_cb_list.
>>
>> Regards,
>> Vlad
>
> Maybe get the mutex flow_indr_block_ing_cb_lock for both lookup, add, delete?
>
> the callbacks_lists. the add and delete is work only on modules init case. So the
>
> lookup is also not frequently(ony [un]register) and can protect with the locks.
That should do the job. I'll send the patch.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
2019-08-16 15:04 ` Vlad Buslov
@ 2019-08-16 17:56 ` Jakub Kicinski
2019-08-16 18:44 ` Vlad Buslov
2019-08-19 7:26 ` Vlad Buslov
0 siblings, 2 replies; 15+ messages in thread
From: Jakub Kicinski @ 2019-08-16 17:56 UTC (permalink / raw)
To: Vlad Buslov
Cc: wenxu, David Miller, Jiri Pirko, pablo, netfilter-devel, netdev
On Fri, 16 Aug 2019 15:04:44 +0000, Vlad Buslov wrote:
> >> [ 401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
> >> [ 401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
> >> [ 401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
> >> [ 401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
> >> [ 401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
> >> [ 401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740
> >>
> >> I don't think it is correct approach to try to call these callbacks with
> >> rcu protection because:
> >>
> >> - Cls API uses sleeping locks that cannot be used in rcu read section
> >> (hence the included trace).
> >>
> >> - It assumes that all implementation of classifier ops reoffload() don't
> >> sleep.
> >>
> >> - And that all driver offload callbacks (both block and classifier
> >> setup) don't sleep, which is not the case.
> >>
> >> I don't see any straightforward way to fix this, besides using some
> >> other locking mechanism to protect block_ing_cb_list.
> >>
> >> Regards,
> >> Vlad
> >
> > Maybe get the mutex flow_indr_block_ing_cb_lock for both lookup, add, delete?
> >
> > the callbacks_lists. the add and delete is work only on modules init case. So the
> >
> > lookup is also not frequently(ony [un]register) and can protect with the locks.
>
> That should do the job. I'll send the patch.
Hi Vlad!
While looking into this, would you mind also add the missing
flow_block_cb_is_busy() calls in the indirect handlers in the drivers?
LMK if you're too busy, I don't want this to get forgotten :)
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
2019-08-16 17:56 ` Jakub Kicinski
@ 2019-08-16 18:44 ` Vlad Buslov
2019-08-19 7:26 ` Vlad Buslov
1 sibling, 0 replies; 15+ messages in thread
From: Vlad Buslov @ 2019-08-16 18:44 UTC (permalink / raw)
To: Jakub Kicinski
Cc: Vlad Buslov, wenxu, David Miller, Jiri Pirko, pablo,
netfilter-devel, netdev
On Fri 16 Aug 2019 at 20:56, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Fri, 16 Aug 2019 15:04:44 +0000, Vlad Buslov wrote:
>> >> [ 401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
>> >> [ 401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
>> >> [ 401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
>> >> [ 401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
>> >> [ 401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
>> >> [ 401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740
>> >>
>> >> I don't think it is correct approach to try to call these callbacks with
>> >> rcu protection because:
>> >>
>> >> - Cls API uses sleeping locks that cannot be used in rcu read section
>> >> (hence the included trace).
>> >>
>> >> - It assumes that all implementation of classifier ops reoffload() don't
>> >> sleep.
>> >>
>> >> - And that all driver offload callbacks (both block and classifier
>> >> setup) don't sleep, which is not the case.
>> >>
>> >> I don't see any straightforward way to fix this, besides using some
>> >> other locking mechanism to protect block_ing_cb_list.
>> >>
>> >> Regards,
>> >> Vlad
>> >
>> > Maybe get the mutex flow_indr_block_ing_cb_lock for both lookup, add, delete?
>> >
>> > the callbacks_lists. the add and delete is work only on modules init case. So the
>> >
>> > lookup is also not frequently(ony [un]register) and can protect with the locks.
>>
>> That should do the job. I'll send the patch.
>
> Hi Vlad!
>
> While looking into this, would you mind also add the missing
> flow_block_cb_is_busy() calls in the indirect handlers in the drivers?
>
> LMK if you're too busy, I don't want this to get forgotten :)
Hi Jakub,
Will do!
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
2019-08-16 17:56 ` Jakub Kicinski
2019-08-16 18:44 ` Vlad Buslov
@ 2019-08-19 7:26 ` Vlad Buslov
2019-08-19 20:27 ` Jakub Kicinski
1 sibling, 1 reply; 15+ messages in thread
From: Vlad Buslov @ 2019-08-19 7:26 UTC (permalink / raw)
To: Jakub Kicinski
Cc: Vlad Buslov, wenxu, David Miller, Jiri Pirko, pablo,
netfilter-devel, netdev
On Fri 16 Aug 2019 at 20:56, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Fri, 16 Aug 2019 15:04:44 +0000, Vlad Buslov wrote:
>> >> [ 401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
>> >> [ 401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
>> >> [ 401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
>> >> [ 401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
>> >> [ 401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
>> >> [ 401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740
>> >>
>> >> I don't think it is correct approach to try to call these callbacks with
>> >> rcu protection because:
>> >>
>> >> - Cls API uses sleeping locks that cannot be used in rcu read section
>> >> (hence the included trace).
>> >>
>> >> - It assumes that all implementation of classifier ops reoffload() don't
>> >> sleep.
>> >>
>> >> - And that all driver offload callbacks (both block and classifier
>> >> setup) don't sleep, which is not the case.
>> >>
>> >> I don't see any straightforward way to fix this, besides using some
>> >> other locking mechanism to protect block_ing_cb_list.
>> >>
>> >> Regards,
>> >> Vlad
>> >
>> > Maybe get the mutex flow_indr_block_ing_cb_lock for both lookup, add, delete?
>> >
>> > the callbacks_lists. the add and delete is work only on modules init case. So the
>> >
>> > lookup is also not frequently(ony [un]register) and can protect with the locks.
>>
>> That should do the job. I'll send the patch.
>
> Hi Vlad!
>
> While looking into this, would you mind also add the missing
> flow_block_cb_is_busy() calls in the indirect handlers in the drivers?
>
> LMK if you're too busy, I don't want this to get forgotten :)
Hi Jakub,
I've checked the code and it looks like only nfp driver is affected:
- I added check in nfp to lookup cb_priv with
nfp_flower_indr_block_cb_priv_lookup() and call
flow_block_cb_is_busy() if cb_priv exists.
- In mlx5 en_rep.c there is already a check that indr_priv exists, so
trying to lookup block_cb->cb_indent==indr_priv is redundant.
- Switch drivers (mlxsw and ocelot) take reference to block_cb on
FLOW_BLOCK_BIND, so they should not require any modifications.
Tell me if I missed anything. Sending the patch for nfp.
Regards,
Vlad
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
2019-08-19 7:26 ` Vlad Buslov
@ 2019-08-19 20:27 ` Jakub Kicinski
0 siblings, 0 replies; 15+ messages in thread
From: Jakub Kicinski @ 2019-08-19 20:27 UTC (permalink / raw)
To: Vlad Buslov
Cc: wenxu, David Miller, Jiri Pirko, pablo, netfilter-devel, netdev
On Mon, 19 Aug 2019 07:26:07 +0000, Vlad Buslov wrote:
> On Fri 16 Aug 2019 at 20:56, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> > Hi Vlad!
> >
> > While looking into this, would you mind also add the missing
> > flow_block_cb_is_busy() calls in the indirect handlers in the drivers?
> >
> > LMK if you're too busy, I don't want this to get forgotten :)
>
> Hi Jakub,
>
> I've checked the code and it looks like only nfp driver is affected:
>
> - I added check in nfp to lookup cb_priv with
> nfp_flower_indr_block_cb_priv_lookup() and call
> flow_block_cb_is_busy() if cb_priv exists.
>
> - In mlx5 en_rep.c there is already a check that indr_priv exists, so
> trying to lookup block_cb->cb_indent==indr_priv is redundant.
>
> - Switch drivers (mlxsw and ocelot) take reference to block_cb on
> FLOW_BLOCK_BIND, so they should not require any modifications.
>
> Tell me if I missed anything. Sending the patch for nfp.
Ah, that sounds plausible, I've only checked the nfp driver.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH net-next v7 6/6] netfilter: nf_tables_offload: support indr block call
2019-08-07 1:13 [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload wenxu
` (4 preceding siblings ...)
2019-08-07 1:13 ` [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block wenxu
@ 2019-08-07 1:13 ` wenxu
2019-08-09 1:44 ` [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload David Miller
6 siblings, 0 replies; 15+ messages in thread
From: wenxu @ 2019-08-07 1:13 UTC (permalink / raw)
To: jakub.kicinski, pablo; +Cc: netfilter-devel, netdev
From: wenxu <wenxu@ucloud.cn>
nftable support indr-block call. It makes nftable an offload vlan
and tunnel device.
nft add table netdev firewall
nft add chain netdev firewall aclout { type filter hook ingress offload device mlx_pf0vf0 priority - 300 \; }
nft add rule netdev firewall aclout ip daddr 10.0.0.1 fwd to vlan0
nft add chain netdev firewall aclin { type filter hook ingress device vlan0 priority - 300 \; }
nft add rule netdev firewall aclin ip daddr 10.0.0.7 fwd to mlx_pf0vf0
Signed-off-by: wenxu <wenxu@ucloud.cn>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
v7: no change
include/net/netfilter/nf_tables_offload.h | 4 +
net/netfilter/nf_tables_api.c | 7 ++
net/netfilter/nf_tables_offload.c | 148 +++++++++++++++++++++++++-----
3 files changed, 135 insertions(+), 24 deletions(-)
diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 3196663..bffd51a 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -63,6 +63,10 @@ struct nft_flow_rule {
struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule);
void nft_flow_rule_destroy(struct nft_flow_rule *flow);
int nft_flow_rule_offload_commit(struct net *net);
+void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command command);
#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
(__reg)->base_offset = \
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 605a7cf..fe3b7b0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7593,6 +7593,11 @@ static void __net_exit nf_tables_exit_net(struct net *net)
.exit = nf_tables_exit_net,
};
+static struct flow_indr_block_ing_entry block_ing_entry = {
+ .cb = nft_indr_block_get_and_ing_cmd,
+ .list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
static int __init nf_tables_module_init(void)
{
int err;
@@ -7624,6 +7629,7 @@ static int __init nf_tables_module_init(void)
goto err5;
nft_chain_route_init();
+ flow_indr_add_block_ing_cb(&block_ing_entry);
return err;
err5:
rhltable_destroy(&nft_objname_ht);
@@ -7640,6 +7646,7 @@ static int __init nf_tables_module_init(void)
static void __exit nf_tables_module_exit(void)
{
+ flow_indr_del_block_ing_cb(&block_ing_entry);
nfnetlink_subsys_unregister(&nf_tables_subsys);
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 64f5fd5..d3c4c9c 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -171,24 +171,110 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
return 0;
}
+static int nft_block_setup(struct nft_base_chain *basechain,
+ struct flow_block_offload *bo,
+ enum flow_block_command cmd)
+{
+ int err;
+
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ err = nft_flow_offload_bind(bo, basechain);
+ break;
+ case FLOW_BLOCK_UNBIND:
+ err = nft_flow_offload_unbind(bo, basechain);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int nft_block_offload_cmd(struct nft_base_chain *chain,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo = {};
+ int err;
+
+ bo.net = dev_net(dev);
+ bo.block = &chain->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+ if (err < 0)
+ return err;
+
+ return nft_block_setup(chain, &bo, cmd);
+}
+
+static void nft_indr_block_ing_cmd(struct net_device *dev,
+ struct nft_base_chain *chain,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command cmd)
+{
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo = {};
+
+ if (!chain)
+ return;
+
+ bo.net = dev_net(dev);
+ bo.block = &chain->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
+
+ nft_block_setup(chain, &bo, cmd);
+}
+
+static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ struct flow_block_offload bo = {};
+ struct netlink_ext_ack extack = {};
+
+ bo.net = dev_net(dev);
+ bo.block = &chain->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ flow_indr_block_call(dev, &bo, cmd);
+
+ if (list_empty(&bo.cb_list))
+ return -EOPNOTSUPP;
+
+ return nft_block_setup(chain, &bo, cmd);
+}
+
#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
static int nft_flow_offload_chain(struct nft_trans *trans,
enum flow_block_command cmd)
{
struct nft_chain *chain = trans->ctx.chain;
- struct netlink_ext_ack extack = {};
- struct flow_block_offload bo = {};
struct nft_base_chain *basechain;
struct net_device *dev;
- int err;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
dev = basechain->ops.dev;
- if (!dev || !dev->netdev_ops->ndo_setup_tc)
+ if (!dev)
return -EOPNOTSUPP;
/* Only default policy to accept is supported for now. */
@@ -197,26 +283,10 @@ static int nft_flow_offload_chain(struct nft_trans *trans,
nft_trans_chain_policy(trans) != NF_ACCEPT)
return -EOPNOTSUPP;
- bo.command = cmd;
- bo.block = &basechain->flow_block;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
-
- err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
- if (err < 0)
- return err;
-
- switch (cmd) {
- case FLOW_BLOCK_BIND:
- err = nft_flow_offload_bind(&bo, basechain);
- break;
- case FLOW_BLOCK_UNBIND:
- err = nft_flow_offload_unbind(&bo, basechain);
- break;
- }
-
- return err;
+ if (dev->netdev_ops->ndo_setup_tc)
+ return nft_block_offload_cmd(basechain, dev, cmd);
+ else
+ return nft_indr_block_offload_cmd(basechain, dev, cmd);
}
int nft_flow_rule_offload_commit(struct net *net)
@@ -266,3 +336,33 @@ int nft_flow_rule_offload_commit(struct net *net)
return err;
}
+
+void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command command)
+{
+ struct net *net = dev_net(dev);
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (table->family != NFPROTO_NETDEV)
+ continue;
+
+ list_for_each_entry_rcu(chain, &table->chains, list) {
+ if (nft_is_base_chain(chain)) {
+ struct nft_base_chain *basechain;
+
+ basechain = nft_base_chain(chain);
+ if (!strncmp(basechain->dev_name, dev->name,
+ IFNAMSIZ)) {
+ nft_indr_block_ing_cmd(dev, basechain,
+ cb, cb_priv,
+ command);
+ return;
+ }
+ }
+ }
+ }
+}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload
2019-08-07 1:13 [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload wenxu
` (5 preceding siblings ...)
2019-08-07 1:13 ` [PATCH net-next v7 6/6] netfilter: nf_tables_offload: support indr block call wenxu
@ 2019-08-09 1:44 ` David Miller
6 siblings, 0 replies; 15+ messages in thread
From: David Miller @ 2019-08-09 1:44 UTC (permalink / raw)
To: wenxu; +Cc: jakub.kicinski, pablo, netfilter-devel, netdev
From: wenxu@ucloud.cn
Date: Wed, 7 Aug 2019 09:13:48 +0800
> This series patch make nftables offload support the vlan and
> tunnel device offload through indr-block architecture.
>
> The first four patches mv tc indr block to flow offload and
> rename to flow-indr-block.
> Because the new flow-indr-block can't get the tcf_block
> directly. The fifth patch provide a callback list to get
> flow_block of each subsystem immediately when the device
> register and contain a block.
> The last patch make nf_tables_offload support flow-indr-block.
>
> This version add a mutex lock for add/del flow_indr_block_ing_cb
Series applied, thank you.
^ permalink raw reply [flat|nested] 15+ messages in thread