From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jakub Kicinski Subject: [RFC 1/2] net: sched: register callbacks for remote tc block binds Date: Wed, 3 Oct 2018 21:55:10 -0700 Message-ID: <20181004045511.27733-2-jakub.kicinski@netronome.com> References: <20181004045511.27733-1-jakub.kicinski@netronome.com> Cc: jiri@resnulli.us, gerlitz.or@gmail.com, oss-drivers@netronome.com, john.hurley@netronome.com, Jakub Kicinski To: netdev@vger.kernel.org Return-path: Received: from mail-qk1-f193.google.com ([209.85.222.193]:36727 "EHLO mail-qk1-f193.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727007AbeJDLrM (ORCPT ); Thu, 4 Oct 2018 07:47:12 -0400 Received: by mail-qk1-f193.google.com with SMTP id a85-v6so4961612qkg.3 for ; Wed, 03 Oct 2018 21:55:46 -0700 (PDT) In-Reply-To: <20181004045511.27733-1-jakub.kicinski@netronome.com> Sender: netdev-owner@vger.kernel.org List-ID: From: John Hurley Currently drivers can register for TC block binds/unbinds by implementing the setup_tc ndo. However, drivers may also be interested in binds to higher level devices (e.g. tunnel drivers) to potentially offload filters applied to them. Introduce indirect block setups which allows drivers to register callbacks for block binds on other devices. The calling driver is expected to allocate a struct containing an initialised list head to all its block setup callbacks. This is used to track the callbacks from a given driver and free them if the driver is removed while the upper level device is still active. Freeing a setup cb will also trigger an unbind event (if necessary) to direct the driver to unregister any block callbacks. Allow registering an indirect block setup cb for a device that is already bound to a block. In this case (if it is an ingress block), register and also trigger the callback - meaning that any already installed rules can be replayed to the calling driver if it chooses. Signed-off-by: John Hurley Signed-off-by: Jakub Kicinski --- include/net/pkt_cls.h | 56 +++++++ include/net/sch_generic.h | 3 + net/sched/cls_api.c | 297 +++++++++++++++++++++++++++++++++++++- 3 files changed, 355 insertions(+), 1 deletion(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 338ef054bf16..85e335162982 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -37,6 +37,7 @@ struct tcf_block_ext_info { }; struct tcf_block_cb; +struct tcf_indr_block_owner; bool tcf_queue_work(struct rcu_work *rwork, work_func_t func); #ifdef CONFIG_NET_CLS @@ -81,6 +82,20 @@ void __tcf_block_cb_unregister(struct tcf_block *block, struct tcf_block_cb *block_cb); void tcf_block_cb_unregister(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_ident); +int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident, + struct tcf_indr_block_owner *owner); +int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident, + struct tcf_indr_block_owner *owner); +void __tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident); +void tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, + void *cb_ident); + +struct tcf_indr_block_owner *tc_indr_block_owner_create(void); +void tc_indr_block_owner_clean(struct tcf_indr_block_owner *owner); int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@ -183,6 +198,47 @@ void tcf_block_cb_unregister(struct tcf_block *block, { } +static inline +int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, + void *cb_ident, + struct tcf_indr_block_owner *owner) +{ + return 0; +} + +static inline +int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident, + struct tcf_indr_block_owner *owner) +{ + return 0; +} + +static inline +void __tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, + void *cb_ident) +{ +} + +static inline +void tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, + void *cb_ident) +{ +} + +static inline struct tcf_indr_block_owner *tc_indr_block_owner_create(void) +{ + /* NULL would mean an error, only CONFIG_NET_CLS can dereference this */ + return (void *)1; +} + +static inline void tc_indr_block_owner_clean(struct tcf_indr_block_owner *owner) +{ +} + static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index de972403d31e..da73864c001c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -24,6 +24,9 @@ struct bpf_flow_keys; typedef int tc_setup_cb_t(enum tc_setup_type type, void *type_data, void *cb_priv); +typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, + enum tc_setup_type type, void *type_data); + struct qdisc_rate_table { struct tc_ratespec rate; u32 data[256]; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3de47e99b788..3acc103294b4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -363,6 +364,286 @@ static void tcf_chain_flush(struct tcf_chain *chain) } } +static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) +{ + const struct Qdisc_class_ops *cops; + struct Qdisc *qdisc; + + if (!dev_ingress_queue(dev)) + return NULL; + + qdisc = dev_ingress_queue(dev)->qdisc_sleeping; + if (!qdisc) + return NULL; + + cops = qdisc->ops->cl_ops; + if (!cops) + return NULL; + + if (!cops->tcf_block) + return NULL; + + return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); +} + +static struct rhashtable indr_setup_block_ht; + +struct tc_indr_block_dev { + struct rhash_head ht_node; + struct net_device *dev; + unsigned int refcnt; + struct list_head cb_list; + struct tcf_block *block; +}; + +struct tc_indr_block_cb { + struct tc_indr_block_dev *indr_dev; + struct list_head list; + void *cb_priv; + tc_indr_block_bind_cb_t *cb; + void *cb_ident; + struct list_head track_list; +}; + +struct tcf_indr_block_owner { + struct list_head cb_list; +}; + +static const struct rhashtable_params tc_indr_setup_block_ht_params = { + .key_offset = offsetof(struct tc_indr_block_dev, dev), + .head_offset = offsetof(struct tc_indr_block_dev, ht_node), + .key_len = sizeof(struct net_device *), +}; + +static struct tc_indr_block_dev * +tc_indr_block_dev_lookup(struct net_device *dev) +{ + return rhashtable_lookup_fast(&indr_setup_block_ht, &dev, + tc_indr_setup_block_ht_params); +} + +static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev) +{ + struct tc_indr_block_dev *indr_dev; + + indr_dev = tc_indr_block_dev_lookup(dev); + if (indr_dev) + goto inc_ref; + + indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL); + if (!indr_dev) + return NULL; + + INIT_LIST_HEAD(&indr_dev->cb_list); + indr_dev->dev = dev; + indr_dev->block = tc_dev_ingress_block(dev); + if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, + tc_indr_setup_block_ht_params)) { + kfree(indr_dev); + return NULL; + } + +inc_ref: + indr_dev->refcnt++; + return indr_dev; +} + +static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev) +{ + if (--indr_dev->refcnt) + return; + + rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node, + tc_indr_setup_block_ht_params); + kfree(indr_dev); +} + +static struct tc_indr_block_cb * +tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct tc_indr_block_cb *indr_block_cb; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + if (indr_block_cb->cb == cb && + indr_block_cb->cb_ident == cb_ident) + return indr_block_cb; + return NULL; +} + +static struct tc_indr_block_cb * +tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, + void *cb_priv, tc_indr_block_bind_cb_t *cb, + void *cb_ident, struct tcf_indr_block_owner *owner) +{ + struct tc_indr_block_cb *indr_block_cb; + + indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); + if (indr_block_cb) + return ERR_PTR(-EEXIST); + + indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL); + if (!indr_block_cb) + return ERR_PTR(-ENOMEM); + + indr_block_cb->indr_dev = indr_dev; + indr_block_cb->cb_priv = cb_priv; + indr_block_cb->cb = cb; + indr_block_cb->cb_ident = cb_ident; + list_add(&indr_block_cb->list, &indr_dev->cb_list); + list_add(&indr_block_cb->track_list, &owner->cb_list); + + return indr_block_cb; +} + +static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb) +{ + list_del(&indr_block_cb->list); + list_del(&indr_block_cb->track_list); + kfree(indr_block_cb); +} + +static int tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev, + struct tc_indr_block_cb *indr_block_cb, + enum tc_block_command command) +{ + struct tc_block_offload bo = { + .command = command, + .binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS, + .block = indr_dev->block, + }; + + if (!indr_dev->block) + return 0; + return indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, + TC_SETUP_BLOCK, &bo); +} + +int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident, + struct tcf_indr_block_owner *owner) +{ + struct tc_indr_block_cb *indr_block_cb; + struct tc_indr_block_dev *indr_dev; + int err; + + indr_dev = tc_indr_block_dev_get(dev); + if (!indr_dev) + return -ENOMEM; + + indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident, + owner); + err = PTR_ERR_OR_ZERO(indr_block_cb); + if (err) + goto err_dev_put; + + tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_BIND); + return 0; + +err_dev_put: + tc_indr_block_dev_put(indr_dev); + return err; +} +EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register); + +int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident, + struct tcf_indr_block_owner *owner) +{ + int err; + + rtnl_lock(); + err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident, owner); + rtnl_unlock(); + + return err; +} +EXPORT_SYMBOL_GPL(tc_indr_block_cb_register); + +void __tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct tc_indr_block_cb *indr_block_cb; + struct tc_indr_block_dev *indr_dev; + + indr_dev = tc_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); + if (!indr_block_cb) + return; + + /* Send unbind message if required to free any block cbs. */ + tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND); + tc_indr_block_cb_del(indr_block_cb); + tc_indr_block_dev_put(indr_dev); +} +EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister); + +void tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + rtnl_lock(); + __tc_indr_block_cb_unregister(dev, cb, cb_ident); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister); + +struct tcf_indr_block_owner *tc_indr_block_owner_create(void) +{ + struct tcf_indr_block_owner *owner; + + owner = kzalloc(sizeof(*owner), GFP_KERNEL); + if (!owner) + return NULL; + INIT_LIST_HEAD(&owner->cb_list); + return owner; +} +EXPORT_SYMBOL_GPL(tc_indr_block_owner_create); + +void tc_indr_block_owner_clean(struct tcf_indr_block_owner *owner) +{ + struct tc_indr_block_cb *indr_block_cb, *store; + struct tc_indr_block_dev *indr_dev; + + rtnl_lock(); + list_for_each_entry_safe(indr_block_cb, store, &owner->cb_list, + track_list) { + indr_dev = indr_block_cb->indr_dev; + tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND); + tc_indr_block_cb_del(indr_block_cb); + tc_indr_block_dev_put(indr_dev); + } + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(tc_indr_block_owner_clean); + +static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, + struct tcf_block_ext_info *ei, + enum tc_block_command command, + struct netlink_ext_ack *extack) +{ + struct tc_indr_block_cb *indr_block_cb; + struct tc_indr_block_dev *indr_dev; + struct tc_block_offload bo = { + .command = command, + .binder_type = ei->binder_type, + .block = block, + .extack = extack, + }; + + indr_dev = tc_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + indr_dev->block = command == TC_BLOCK_BIND ? block : NULL; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, + &bo); +} + static bool tcf_block_offload_in_use(struct tcf_block *block) { return block->offloadcnt; @@ -404,12 +685,17 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; - return err; + if (err) + return err; + + tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack); + return 0; no_offload_dev_inc: if (tcf_block_offload_in_use(block)) return -EOPNOTSUPP; block->nooffloaddevcnt++; + tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack); return 0; } @@ -419,6 +705,8 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + tc_indr_block_call(block, dev, ei, TC_BLOCK_UNBIND, NULL); + if (!dev->netdev_ops->ndo_setup_tc) goto no_offload_dev_dec; err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL); @@ -2349,6 +2637,11 @@ static int __init tc_filter_init(void) if (err) goto err_register_pernet_subsys; + err = rhashtable_init(&indr_setup_block_ht, + &tc_indr_setup_block_ht_params); + if (err) + goto err_rhash_setup_block_ht; + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, @@ -2360,6 +2653,8 @@ static int __init tc_filter_init(void) return 0; +err_rhash_setup_block_ht: + unregister_pernet_subsys(&tcf_net_ops); err_register_pernet_subsys: destroy_workqueue(tc_filter_wq); return err; -- 2.17.1