All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V2
@ 2010-05-27 18:08 Jiri Pirko
  2010-05-27 20:08 ` Stephen Hemminger
  0 siblings, 1 reply; 19+ messages in thread
From: Jiri Pirko @ 2010-05-27 18:08 UTC (permalink / raw)
  To: netdev; +Cc: davem, shemminger, kaber, eric.dumazet

changelog:
v1->v2
	- writers are locked by rtnl_lock (removed unnecessary spinlock)
	- using call_rcu in unregister
	- nicer macvlan_port_create cleanup
	- struct rx_hanler is made const in funtion parameters

What this patch does is it removes two receive frame hooks (for bridge and for
macvlan) from __netif_receive_skb. These are replaced them with a general
list of rx_handlers which is iterated thru instead.

Then a network driver (of virtual netdev like macvlan or bridge) can register
an rx_handler for needed net device.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
---
 drivers/net/macvlan.c      |   27 ++++++--
 include/linux/if_bridge.h  |    2 -
 include/linux/if_macvlan.h |    4 -
 include/linux/netdevice.h  |   18 +++++
 net/bridge/br.c            |    2 -
 net/bridge/br_if.c         |   14 ++++
 net/bridge/br_input.c      |   12 +++-
 net/bridge/br_private.h    |    3 +-
 net/core/dev.c             |  160 ++++++++++++++++++++++++++------------------
 9 files changed, 160 insertions(+), 82 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 87e8d4c..f6b7924 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
-					    struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 {
+	struct macvlan_port *port;
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
+	port = rcu_dereference(skb->dev->macvlan_port);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -511,10 +512,16 @@ static void macvlan_setup(struct net_device *dev)
 	dev->tx_queue_len	= 0;
 }
 
+static const struct netdev_rx_handler macvlan_rx_handler = {
+	.order		= NETDEV_RX_HANDLER_ORDER_MACVLAN,
+	.callback	= macvlan_handle_frame,
+};
+
 static int macvlan_port_create(struct net_device *dev)
 {
 	struct macvlan_port *port;
 	unsigned int i;
+	int err;
 
 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
 		return -EINVAL;
@@ -528,13 +535,26 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
+
+	err = netdev_rx_handler_register(dev, &macvlan_rx_handler);
+	if (err)
+		goto cleanup;
+
 	return 0;
+
+cleanup:
+	rcu_assign_pointer(dev->macvlan_port, NULL);
+	synchronize_rcu();
+	kfree(port);
+
+	return err;
 }
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = dev->macvlan_port;
 
+	netdev_rx_handler_unregister(dev, &macvlan_rx_handler);
 	rcu_assign_pointer(dev->macvlan_port, NULL);
 	synchronize_rcu();
 	kfree(port);
@@ -767,14 +787,12 @@ static int __init macvlan_init_module(void)
 	int err;
 
 	register_netdevice_notifier(&macvlan_notifier_block);
-	macvlan_handle_frame_hook = macvlan_handle_frame;
 
 	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
 err1:
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 	return err;
 }
@@ -782,7 +800,6 @@ err1:
 static void __exit macvlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&macvlan_link_ops);
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 }
 
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 938b7e8..0d241a5 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -102,8 +102,6 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					       struct sk_buff *skb);
 extern int (*br_should_route_hook)(struct sk_buff *skb);
 
 #endif
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9ea047a..c26a0e4 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct rtnl_link_ops *ops);
 extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
-
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
-						    struct sk_buff *);
-
 #endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a1bff65..f54b97d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -254,6 +254,16 @@ struct netdev_hw_addr_list {
 #define netdev_for_each_mc_addr(ha, dev) \
 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
+
+struct netdev_rx_handler {
+	struct list_head	list;
+	unsigned int		order;
+#define NETDEV_RX_HANDLER_ORDER_BRIDGE	1
+#define NETDEV_RX_HANDLER_ORDER_MACVLAN	2
+	struct sk_buff		*(*callback)(struct sk_buff *skb);
+	struct rcu_head		rcu;
+};
+
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
@@ -1031,6 +1041,9 @@ struct net_device {
 	/* GARP */
 	struct garp_port	*garp_port;
 
+	/* receive handlers (hooks) list */
+	struct list_head	rx_handlers;
+
 	/* class/net/name entry */
 	struct device		dev;
 	/* space for optional device, statistics, and wireless sysfs groups */
@@ -1685,6 +1698,11 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+extern int netdev_rx_handler_register(struct net_device *dev,
+				      const struct netdev_rx_handler *rh);
+extern void netdev_rx_handler_unregister(struct net_device *dev,
+					 const struct netdev_rx_handler *rh);
+
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 76357b5..c8436fa 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -63,7 +63,6 @@ static int __init br_init(void)
 		goto err_out4;
 
 	brioctl_set(br_ioctl_deviceless_stub);
-	br_handle_frame_hook = br_handle_frame;
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
 	br_fdb_test_addr_hook = NULL;
 #endif
 
-	br_handle_frame_hook = NULL;
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 18b245e..45270e5 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -119,6 +119,11 @@ static void destroy_nbp_rcu(struct rcu_head *head)
 	destroy_nbp(p);
 }
 
+static const struct netdev_rx_handler br_rx_handler = {
+	.order		= NETDEV_RX_HANDLER_ORDER_BRIDGE,
+	.callback	= br_handle_frame,
+};
+
 /* Delete port(interface) from bridge is done in two steps.
  * via RCU. First step, marks device as down. That deletes
  * all the timers and stops new packets from flowing through.
@@ -147,6 +152,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	netdev_rx_handler_unregister(dev, &br_rx_handler);
 	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
@@ -429,6 +435,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+
+	err = netdev_rx_handler_register(dev, &br_rx_handler);
+	if (err)
+		goto err3;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -451,6 +462,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	br_netpoll_enable(br, dev);
 
 	return 0;
+err3:
+	rcu_assign_pointer(dev->br_port, NULL);
+	synchronize_rcu();
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700..99647d8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,15 +131,19 @@ static inline int is_link_local(const unsigned char *dest)
 }
 
 /*
- * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock (preempt_disabled) from
+ * netif_receive_skb
  */
-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
+struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
+	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	int (*rhook)(struct sk_buff *skb);
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return skb;
+
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
+	p = rcu_dereference(skb->dev->br_port);
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74b..c83519b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -331,8 +331,7 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
-				       struct sk_buff *skb);
+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c82065..548104e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2585,70 +2585,14 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
-					     struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	struct macvlan_port *port;
-
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (!port)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2744,6 +2688,85 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+static bool rx_handlers_equal(const struct netdev_rx_handler *rh1,
+			      const struct netdev_rx_handler *rh2)
+{
+	return (rh1->order == rh2->order) && (rh1->callback == rh2->callback);
+}
+
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rh: receive handler to register
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       const struct netdev_rx_handler *rh)
+{
+	struct list_head *iter, *add_after;
+	struct netdev_rx_handler *rh1;
+	int err = 0;
+
+	ASSERT_RTNL();
+	add_after = &dev->rx_handlers;
+	list_for_each(iter, &dev->rx_handlers) {
+		rh1 = list_entry(iter, struct netdev_rx_handler, list);
+		if (rx_handlers_equal(rh, rh1))
+			return -EEXIST;
+		if (rh1->order > rh->order)
+			break;
+		add_after = iter;
+	}
+	rh1 = kzalloc(sizeof(*rh), GFP_KERNEL);
+	if (!rh1)
+		return -ENOMEM;
+
+	rh1->order = rh->order;
+	rh1->callback = rh->callback;
+	list_add_rcu(&rh1->list, add_after);
+
+	return err;
+}
+EXPORT_SYMBOL(netdev_rx_handler_register);
+
+static void rx_handler_rcu_free(struct rcu_head *head)
+{
+	struct netdev_rx_handler *rh;
+
+	rh = container_of(head, struct netdev_rx_handler, rcu);
+	kfree(rh);
+}
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *	@rh: receive handler to unregister
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev,
+				  const struct netdev_rx_handler *rh)
+{
+	struct netdev_rx_handler *rh1;
+
+	ASSERT_RTNL();
+	list_for_each_entry(rh1, &dev->rx_handlers, list) {
+		if (rx_handlers_equal(rh, rh1)) {
+			list_del_rcu(&rh1->list);
+			call_rcu(&rh1->rcu, rx_handler_rcu_free);
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL(netdev_rx_handler_unregister);
+
 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 					      struct net_device *master)
 {
@@ -2796,6 +2819,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	struct netdev_rx_handler *rh;
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
@@ -2859,12 +2883,18 @@ static int __netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/*
+	 * Go through various rx handlers, like bridge, macvlan etc.
+	 */
+	list_for_each_entry_rcu(rh, &skb->dev->rx_handlers, list) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rh->callback(skb);
+		if (!skb)
+			goto out;
+	}
 
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on
@@ -5371,6 +5401,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev_mc_init(dev);
 	dev_uc_init(dev);
 
+	INIT_LIST_HEAD(&dev->rx_handlers);
+
 	dev_net_set(dev, &init_net);
 
 	dev->_tx = tx;
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V2
  2010-05-27 18:08 [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V2 Jiri Pirko
@ 2010-05-27 20:08 ` Stephen Hemminger
  2010-05-28  5:51   ` Jiri Pirko
  0 siblings, 1 reply; 19+ messages in thread
From: Stephen Hemminger @ 2010-05-27 20:08 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, kaber, eric.dumazet

On Thu, 27 May 2010 20:08:24 +0200
Jiri Pirko <jpirko@redhat.com> wrote:

> changelog:
> v1->v2
> 	- writers are locked by rtnl_lock (removed unnecessary spinlock)
> 	- using call_rcu in unregister
> 	- nicer macvlan_port_create cleanup
> 	- struct rx_hanler is made const in funtion parameters
> 
> What this patch does is it removes two receive frame hooks (for bridge and for
> macvlan) from __netif_receive_skb. These are replaced them with a general
> list of rx_handlers which is iterated thru instead.
> 
> Then a network driver (of virtual netdev like macvlan or bridge) can register
> an rx_handler for needed net device.
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>

I wonder if we really need the chaining.  What about allowing only one
hook per device (and return -EBUSY)?  That also gets rid of the allocation,
and the extra overhead.

The handler hook, has to be export GPL, since we really don't want more
network stack abuse by 3rd parties.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V2
  2010-05-27 20:08 ` Stephen Hemminger
@ 2010-05-28  5:51   ` Jiri Pirko
  2010-05-28  6:12     ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V3 Jiri Pirko
  0 siblings, 1 reply; 19+ messages in thread
From: Jiri Pirko @ 2010-05-28  5:51 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, davem, kaber, eric.dumazet

Thu, May 27, 2010 at 10:08:22PM CEST, shemminger@vyatta.com wrote:
>On Thu, 27 May 2010 20:08:24 +0200
>Jiri Pirko <jpirko@redhat.com> wrote:
>
>> changelog:
>> v1->v2
>> 	- writers are locked by rtnl_lock (removed unnecessary spinlock)
>> 	- using call_rcu in unregister
>> 	- nicer macvlan_port_create cleanup
>> 	- struct rx_hanler is made const in funtion parameters
>> 
>> What this patch does is it removes two receive frame hooks (for bridge and for
>> macvlan) from __netif_receive_skb. These are replaced them with a general
>> list of rx_handlers which is iterated thru instead.
>> 
>> Then a network driver (of virtual netdev like macvlan or bridge) can register
>> an rx_handler for needed net device.
>> 
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>
>I wonder if we really need the chaining.  What about allowing only one
>hook per device (and return -EBUSY)?  That also gets rid of the allocation,
>and the extra overhead.

Hmm, that's a good question. I thought about it already. But I'm also wondering
if there is a possible scenario, when the chaining can be necessary. Also I do
not see any -significant- downside of having multiple handlers in a list, so I
would probably go this way now. Opinions?

>
>The handler hook, has to be export GPL, since we really don't want more
>network stack abuse by 3rd parties.

Noted, will be in the next patch version.

Jirka

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V3
  2010-05-28  5:51   ` Jiri Pirko
@ 2010-05-28  6:12     ` Jiri Pirko
  2010-05-28  7:02       ` Eric Dumazet
  0 siblings, 1 reply; 19+ messages in thread
From: Jiri Pirko @ 2010-05-28  6:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, shemminger, kaber, eric.dumazet

changelog:
v2->v3
	- used GPL-ed exports
v1->v2
	- writers are locked by rtnl_lock (removed unnecessary spinlock)
	- using call_rcu in unregister
	- nicer macvlan_port_create cleanup
	- struct rx_hanler is made const in funtion parameters

What this patch does is it removes two receive frame hooks (for bridge and for
macvlan) from __netif_receive_skb. These are replaced them with a general
list of rx_handlers which is iterated thru instead.

Then a network driver (of virtual netdev like macvlan or bridge) can register
an rx_handler for needed net device.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
---
 drivers/net/macvlan.c      |   27 ++++++--
 include/linux/if_bridge.h  |    2 -
 include/linux/if_macvlan.h |    4 -
 include/linux/netdevice.h  |   18 +++++
 net/bridge/br.c            |    2 -
 net/bridge/br_if.c         |   14 ++++
 net/bridge/br_input.c      |   12 +++-
 net/bridge/br_private.h    |    3 +-
 net/core/dev.c             |  160 ++++++++++++++++++++++++++------------------
 9 files changed, 160 insertions(+), 82 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 87e8d4c..f6b7924 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
-					    struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 {
+	struct macvlan_port *port;
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
+	port = rcu_dereference(skb->dev->macvlan_port);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -511,10 +512,16 @@ static void macvlan_setup(struct net_device *dev)
 	dev->tx_queue_len	= 0;
 }
 
+static const struct netdev_rx_handler macvlan_rx_handler = {
+	.order		= NETDEV_RX_HANDLER_ORDER_MACVLAN,
+	.callback	= macvlan_handle_frame,
+};
+
 static int macvlan_port_create(struct net_device *dev)
 {
 	struct macvlan_port *port;
 	unsigned int i;
+	int err;
 
 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
 		return -EINVAL;
@@ -528,13 +535,26 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
+
+	err = netdev_rx_handler_register(dev, &macvlan_rx_handler);
+	if (err)
+		goto cleanup;
+
 	return 0;
+
+cleanup:
+	rcu_assign_pointer(dev->macvlan_port, NULL);
+	synchronize_rcu();
+	kfree(port);
+
+	return err;
 }
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = dev->macvlan_port;
 
+	netdev_rx_handler_unregister(dev, &macvlan_rx_handler);
 	rcu_assign_pointer(dev->macvlan_port, NULL);
 	synchronize_rcu();
 	kfree(port);
@@ -767,14 +787,12 @@ static int __init macvlan_init_module(void)
 	int err;
 
 	register_netdevice_notifier(&macvlan_notifier_block);
-	macvlan_handle_frame_hook = macvlan_handle_frame;
 
 	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
 err1:
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 	return err;
 }
@@ -782,7 +800,6 @@ err1:
 static void __exit macvlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&macvlan_link_ops);
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 }
 
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 938b7e8..0d241a5 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -102,8 +102,6 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					       struct sk_buff *skb);
 extern int (*br_should_route_hook)(struct sk_buff *skb);
 
 #endif
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9ea047a..c26a0e4 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct rtnl_link_ops *ops);
 extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
-
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
-						    struct sk_buff *);
-
 #endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a1bff65..f54b97d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -254,6 +254,16 @@ struct netdev_hw_addr_list {
 #define netdev_for_each_mc_addr(ha, dev) \
 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
+
+struct netdev_rx_handler {
+	struct list_head	list;
+	unsigned int		order;
+#define NETDEV_RX_HANDLER_ORDER_BRIDGE	1
+#define NETDEV_RX_HANDLER_ORDER_MACVLAN	2
+	struct sk_buff		*(*callback)(struct sk_buff *skb);
+	struct rcu_head		rcu;
+};
+
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
@@ -1031,6 +1041,9 @@ struct net_device {
 	/* GARP */
 	struct garp_port	*garp_port;
 
+	/* receive handlers (hooks) list */
+	struct list_head	rx_handlers;
+
 	/* class/net/name entry */
 	struct device		dev;
 	/* space for optional device, statistics, and wireless sysfs groups */
@@ -1685,6 +1698,11 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+extern int netdev_rx_handler_register(struct net_device *dev,
+				      const struct netdev_rx_handler *rh);
+extern void netdev_rx_handler_unregister(struct net_device *dev,
+					 const struct netdev_rx_handler *rh);
+
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 76357b5..c8436fa 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -63,7 +63,6 @@ static int __init br_init(void)
 		goto err_out4;
 
 	brioctl_set(br_ioctl_deviceless_stub);
-	br_handle_frame_hook = br_handle_frame;
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
 	br_fdb_test_addr_hook = NULL;
 #endif
 
-	br_handle_frame_hook = NULL;
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 18b245e..45270e5 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -119,6 +119,11 @@ static void destroy_nbp_rcu(struct rcu_head *head)
 	destroy_nbp(p);
 }
 
+static const struct netdev_rx_handler br_rx_handler = {
+	.order		= NETDEV_RX_HANDLER_ORDER_BRIDGE,
+	.callback	= br_handle_frame,
+};
+
 /* Delete port(interface) from bridge is done in two steps.
  * via RCU. First step, marks device as down. That deletes
  * all the timers and stops new packets from flowing through.
@@ -147,6 +152,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	netdev_rx_handler_unregister(dev, &br_rx_handler);
 	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
@@ -429,6 +435,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+
+	err = netdev_rx_handler_register(dev, &br_rx_handler);
+	if (err)
+		goto err3;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -451,6 +462,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	br_netpoll_enable(br, dev);
 
 	return 0;
+err3:
+	rcu_assign_pointer(dev->br_port, NULL);
+	synchronize_rcu();
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700..99647d8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,15 +131,19 @@ static inline int is_link_local(const unsigned char *dest)
 }
 
 /*
- * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock (preempt_disabled) from
+ * netif_receive_skb
  */
-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
+struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
+	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	int (*rhook)(struct sk_buff *skb);
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return skb;
+
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
+	p = rcu_dereference(skb->dev->br_port);
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74b..c83519b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -331,8 +331,7 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
-				       struct sk_buff *skb);
+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c82065..d2ab71a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2585,70 +2585,14 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
-					     struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	struct macvlan_port *port;
-
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (!port)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2744,6 +2688,85 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+static bool rx_handlers_equal(const struct netdev_rx_handler *rh1,
+			      const struct netdev_rx_handler *rh2)
+{
+	return (rh1->order == rh2->order) && (rh1->callback == rh2->callback);
+}
+
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rh: receive handler to register
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       const struct netdev_rx_handler *rh)
+{
+	struct list_head *iter, *add_after;
+	struct netdev_rx_handler *rh1;
+	int err = 0;
+
+	ASSERT_RTNL();
+	add_after = &dev->rx_handlers;
+	list_for_each(iter, &dev->rx_handlers) {
+		rh1 = list_entry(iter, struct netdev_rx_handler, list);
+		if (rx_handlers_equal(rh, rh1))
+			return -EEXIST;
+		if (rh1->order > rh->order)
+			break;
+		add_after = iter;
+	}
+	rh1 = kzalloc(sizeof(*rh), GFP_KERNEL);
+	if (!rh1)
+		return -ENOMEM;
+
+	rh1->order = rh->order;
+	rh1->callback = rh->callback;
+	list_add_rcu(&rh1->list, add_after);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+static void rx_handler_rcu_free(struct rcu_head *head)
+{
+	struct netdev_rx_handler *rh;
+
+	rh = container_of(head, struct netdev_rx_handler, rcu);
+	kfree(rh);
+}
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *	@rh: receive handler to unregister
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev,
+				  const struct netdev_rx_handler *rh)
+{
+	struct netdev_rx_handler *rh1;
+
+	ASSERT_RTNL();
+	list_for_each_entry(rh1, &dev->rx_handlers, list) {
+		if (rx_handlers_equal(rh, rh1)) {
+			list_del_rcu(&rh1->list);
+			call_rcu(&rh1->rcu, rx_handler_rcu_free);
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 					      struct net_device *master)
 {
@@ -2796,6 +2819,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	struct netdev_rx_handler *rh;
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
@@ -2859,12 +2883,18 @@ static int __netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/*
+	 * Go through various rx handlers, like bridge, macvlan etc.
+	 */
+	list_for_each_entry_rcu(rh, &skb->dev->rx_handlers, list) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rh->callback(skb);
+		if (!skb)
+			goto out;
+	}
 
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on
@@ -5371,6 +5401,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev_mc_init(dev);
 	dev_uc_init(dev);
 
+	INIT_LIST_HEAD(&dev->rx_handlers);
+
 	dev_net_set(dev, &init_net);
 
 	dev->_tx = tx;
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V3
  2010-05-28  6:12     ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V3 Jiri Pirko
@ 2010-05-28  7:02       ` Eric Dumazet
  2010-05-28  7:33         ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V4 Jiri Pirko
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2010-05-28  7:02 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, shemminger, kaber

Le vendredi 28 mai 2010 à 08:12 +0200, Jiri Pirko a écrit :

> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index a1bff65..f54b97d 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -254,6 +254,16 @@ struct netdev_hw_addr_list {
>  #define netdev_for_each_mc_addr(ha, dev) \
>  	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
>  
> +
> +struct netdev_rx_handler {
> +	struct list_head	list;
> +	unsigned int		order;
> +#define NETDEV_RX_HANDLER_ORDER_BRIDGE	1
> +#define NETDEV_RX_HANDLER_ORDER_MACVLAN	2
> +	struct sk_buff		*(*callback)(struct sk_buff *skb);
> +	struct rcu_head		rcu;
> +};
> +
>  struct hh_cache {
>  	struct hh_cache *hh_next;	/* Next entry			     */
>  	atomic_t	hh_refcnt;	/* number of users                   */
> @@ -1031,6 +1041,9 @@ struct net_device {
>  	/* GARP */
>  	struct garp_port	*garp_port;
>  
> +	/* receive handlers (hooks) list */
> +	struct list_head	rx_handlers;
> +
>  	/* class/net/name entry */
>  	struct device		dev;
>  	/* space for optional device, statistics, and wireless sysfs groups */
> @@ -1685,6 +1698,11 @@ static inline void napi_free_frags(struct napi_struct *napi)
>  	napi->skb = NULL;
>  }
>  

Please chose another place to hold rx_handlers, to keep rx path memory
needs to minimum cache lines. Somewhere in the following section :

/*
 * Cache line mostly used on receive path (including eth_type_trans())
 */
        unsigned long           last_rx;        /* Time of last Rx      */
        /* Interface address info used in eth_type_trans() */
        unsigned char           *dev_addr;      /* hw address, (before bcast
                                                   because most packets are
                                                   unicast) */

        struct netdev_hw_addr_list      dev_addrs; /* list of device
                                                      hw addresses */

        unsigned char           broadcast[MAX_ADDR_LEN];        /* hw bcast add */

#ifdef CONFIG_RPS
        struct kset             *queues_kset;

        struct netdev_rx_queue  *_rx;

        /* Number of RX queues allocated at alloc_netdev_mq() time  */
        unsigned int            num_rx_queues;
#endif

        struct netdev_queue     rx_queue;


and before the :

	struct netdev_queue     *_tx ____cacheline_aligned_in_smp;




^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V4
  2010-05-28  7:02       ` Eric Dumazet
@ 2010-05-28  7:33         ` Jiri Pirko
  2010-06-01 15:28           ` net: replace hooks in __netif_receive_skb (v4) Stephen Hemminger
  0 siblings, 1 reply; 19+ messages in thread
From: Jiri Pirko @ 2010-05-28  7:33 UTC (permalink / raw)
  To: netdev; +Cc: davem, shemminger, kaber, eric.dumazet

changelog:
v3->v4
	- moved rx_handlers in net_device structure to be near to other data
	  used in rx path
v2->v3
	- used GPL-ed exports
v1->v2
	- writers are locked by rtnl_lock (removed unnecessary spinlock)
	- using call_rcu in unregister
	- nicer macvlan_port_create cleanup
	- struct rx_hanler is made const in funtion parameters

What this patch does is it removes two receive frame hooks (for bridge and for
macvlan) from __netif_receive_skb. These are replaced them with a general
list of rx_handlers which is iterated thru instead.

Then a network driver (of virtual netdev like macvlan or bridge) can register
an rx_handler for needed net device.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
---
 drivers/net/macvlan.c      |   27 ++++++--
 include/linux/if_bridge.h  |    2 -
 include/linux/if_macvlan.h |    4 -
 include/linux/netdevice.h  |   18 +++++
 net/bridge/br.c            |    2 -
 net/bridge/br_if.c         |   14 ++++
 net/bridge/br_input.c      |   12 +++-
 net/bridge/br_private.h    |    3 +-
 net/core/dev.c             |  160 ++++++++++++++++++++++++++------------------
 9 files changed, 160 insertions(+), 82 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 87e8d4c..f6b7924 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
-					    struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 {
+	struct macvlan_port *port;
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
+	port = rcu_dereference(skb->dev->macvlan_port);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -511,10 +512,16 @@ static void macvlan_setup(struct net_device *dev)
 	dev->tx_queue_len	= 0;
 }
 
+static const struct netdev_rx_handler macvlan_rx_handler = {
+	.order		= NETDEV_RX_HANDLER_ORDER_MACVLAN,
+	.callback	= macvlan_handle_frame,
+};
+
 static int macvlan_port_create(struct net_device *dev)
 {
 	struct macvlan_port *port;
 	unsigned int i;
+	int err;
 
 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
 		return -EINVAL;
@@ -528,13 +535,26 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
+
+	err = netdev_rx_handler_register(dev, &macvlan_rx_handler);
+	if (err)
+		goto cleanup;
+
 	return 0;
+
+cleanup:
+	rcu_assign_pointer(dev->macvlan_port, NULL);
+	synchronize_rcu();
+	kfree(port);
+
+	return err;
 }
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = dev->macvlan_port;
 
+	netdev_rx_handler_unregister(dev, &macvlan_rx_handler);
 	rcu_assign_pointer(dev->macvlan_port, NULL);
 	synchronize_rcu();
 	kfree(port);
@@ -767,14 +787,12 @@ static int __init macvlan_init_module(void)
 	int err;
 
 	register_netdevice_notifier(&macvlan_notifier_block);
-	macvlan_handle_frame_hook = macvlan_handle_frame;
 
 	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
 err1:
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 	return err;
 }
@@ -782,7 +800,6 @@ err1:
 static void __exit macvlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&macvlan_link_ops);
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 }
 
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 938b7e8..0d241a5 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -102,8 +102,6 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					       struct sk_buff *skb);
 extern int (*br_should_route_hook)(struct sk_buff *skb);
 
 #endif
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9ea047a..c26a0e4 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct rtnl_link_ops *ops);
 extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
-
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
-						    struct sk_buff *);
-
 #endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a1bff65..8c8bfe3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -254,6 +254,16 @@ struct netdev_hw_addr_list {
 #define netdev_for_each_mc_addr(ha, dev) \
 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
+
+struct netdev_rx_handler {
+	struct list_head	list;
+	unsigned int		order;
+#define NETDEV_RX_HANDLER_ORDER_BRIDGE	1
+#define NETDEV_RX_HANDLER_ORDER_MACVLAN	2
+	struct sk_buff		*(*callback)(struct sk_buff *skb);
+	struct rcu_head		rcu;
+};
+
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
@@ -958,6 +968,9 @@ struct net_device {
 
 	struct netdev_queue	rx_queue;
 
+	/* receive handlers (hooks) list */
+	struct list_head	rx_handlers;
+
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
 
 	/* Number of TX queues allocated at alloc_netdev_mq() time  */
@@ -1685,6 +1698,11 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+extern int netdev_rx_handler_register(struct net_device *dev,
+				      const struct netdev_rx_handler *rh);
+extern void netdev_rx_handler_unregister(struct net_device *dev,
+					 const struct netdev_rx_handler *rh);
+
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 76357b5..c8436fa 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -63,7 +63,6 @@ static int __init br_init(void)
 		goto err_out4;
 
 	brioctl_set(br_ioctl_deviceless_stub);
-	br_handle_frame_hook = br_handle_frame;
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
 	br_fdb_test_addr_hook = NULL;
 #endif
 
-	br_handle_frame_hook = NULL;
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 18b245e..45270e5 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -119,6 +119,11 @@ static void destroy_nbp_rcu(struct rcu_head *head)
 	destroy_nbp(p);
 }
 
+static const struct netdev_rx_handler br_rx_handler = {
+	.order		= NETDEV_RX_HANDLER_ORDER_BRIDGE,
+	.callback	= br_handle_frame,
+};
+
 /* Delete port(interface) from bridge is done in two steps.
  * via RCU. First step, marks device as down. That deletes
  * all the timers and stops new packets from flowing through.
@@ -147,6 +152,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	netdev_rx_handler_unregister(dev, &br_rx_handler);
 	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
@@ -429,6 +435,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+
+	err = netdev_rx_handler_register(dev, &br_rx_handler);
+	if (err)
+		goto err3;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -451,6 +462,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	br_netpoll_enable(br, dev);
 
 	return 0;
+err3:
+	rcu_assign_pointer(dev->br_port, NULL);
+	synchronize_rcu();
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700..99647d8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,15 +131,19 @@ static inline int is_link_local(const unsigned char *dest)
 }
 
 /*
- * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock (preempt_disabled) from
+ * netif_receive_skb
  */
-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
+struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
+	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	int (*rhook)(struct sk_buff *skb);
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return skb;
+
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
+	p = rcu_dereference(skb->dev->br_port);
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74b..c83519b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -331,8 +331,7 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
-				       struct sk_buff *skb);
+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c82065..d2ab71a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2585,70 +2585,14 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
-					     struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	struct macvlan_port *port;
-
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (!port)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2744,6 +2688,85 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+static bool rx_handlers_equal(const struct netdev_rx_handler *rh1,
+			      const struct netdev_rx_handler *rh2)
+{
+	return (rh1->order == rh2->order) && (rh1->callback == rh2->callback);
+}
+
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rh: receive handler to register
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       const struct netdev_rx_handler *rh)
+{
+	struct list_head *iter, *add_after;
+	struct netdev_rx_handler *rh1;
+	int err = 0;
+
+	ASSERT_RTNL();
+	add_after = &dev->rx_handlers;
+	list_for_each(iter, &dev->rx_handlers) {
+		rh1 = list_entry(iter, struct netdev_rx_handler, list);
+		if (rx_handlers_equal(rh, rh1))
+			return -EEXIST;
+		if (rh1->order > rh->order)
+			break;
+		add_after = iter;
+	}
+	rh1 = kzalloc(sizeof(*rh), GFP_KERNEL);
+	if (!rh1)
+		return -ENOMEM;
+
+	rh1->order = rh->order;
+	rh1->callback = rh->callback;
+	list_add_rcu(&rh1->list, add_after);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+static void rx_handler_rcu_free(struct rcu_head *head)
+{
+	struct netdev_rx_handler *rh;
+
+	rh = container_of(head, struct netdev_rx_handler, rcu);
+	kfree(rh);
+}
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *	@rh: receive handler to unregister
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev,
+				  const struct netdev_rx_handler *rh)
+{
+	struct netdev_rx_handler *rh1;
+
+	ASSERT_RTNL();
+	list_for_each_entry(rh1, &dev->rx_handlers, list) {
+		if (rx_handlers_equal(rh, rh1)) {
+			list_del_rcu(&rh1->list);
+			call_rcu(&rh1->rcu, rx_handler_rcu_free);
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 					      struct net_device *master)
 {
@@ -2796,6 +2819,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	struct netdev_rx_handler *rh;
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
@@ -2859,12 +2883,18 @@ static int __netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/*
+	 * Go through various rx handlers, like bridge, macvlan etc.
+	 */
+	list_for_each_entry_rcu(rh, &skb->dev->rx_handlers, list) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rh->callback(skb);
+		if (!skb)
+			goto out;
+	}
 
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on
@@ -5371,6 +5401,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev_mc_init(dev);
 	dev_uc_init(dev);
 
+	INIT_LIST_HEAD(&dev->rx_handlers);
+
 	dev_net_set(dev, &init_net);
 
 	dev->_tx = tx;
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* net: replace hooks in __netif_receive_skb (v4)
  2010-05-28  7:33         ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V4 Jiri Pirko
@ 2010-06-01 15:28           ` Stephen Hemminger
  2010-06-01 15:41             ` Jiri Pirko
                               ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Stephen Hemminger @ 2010-06-01 15:28 UTC (permalink / raw)
  To: Jiri Pirko, davem; +Cc: netdev, kaber, eric.dumazet


From: Jiri Pirko <jpirko@redhat.com>

What this patch does is it removes two receive frame hooks (for bridge and for
macvlan) from __netif_receive_skb. These are replaced them with a single
hook for both. It only supports one hook per device because it makes no
sense to do bridging and macvlan on the same device.

Then a network driver (of virtual netdev like macvlan or bridge) can register
an rx_handler for needed net device.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
v3->v4 
       - only one hook per device.
v2->v3
	- used GPL-ed exports
v1->v2
	- writers are locked by rtnl_lock (removed unnecessary spinlock)
	- using call_rcu in unregister
	- nicer macvlan_port_create cleanup
	- struct rx_hanler is made const in funtion parameters

 drivers/net/macvlan.c      |   19 ++++---
 include/linux/if_bridge.h  |    2 
 include/linux/if_macvlan.h |    4 -
 include/linux/netdevice.h  |    8 +++
 net/bridge/br.c            |    2 
 net/bridge/br_if.c         |    8 +++
 net/bridge/br_input.c      |   12 +++-
 net/bridge/br_private.h    |    3 -
 net/core/dev.c             |  119 ++++++++++++++++++++-------------------------
 9 files changed, 94 insertions(+), 83 deletions(-)

--- a/drivers/net/macvlan.c	2010-05-28 08:41:38.248169422 -0700
+++ b/drivers/net/macvlan.c	2010-06-01 08:16:52.307206412 -0700
@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
-					    struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 {
+	struct macvlan_port *port;
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
+	port = rcu_dereference(skb->dev->macvlan_port);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -515,6 +516,7 @@ static int macvlan_port_create(struct ne
 {
 	struct macvlan_port *port;
 	unsigned int i;
+	int err;
 
 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
 		return -EINVAL;
@@ -528,13 +530,21 @@ static int macvlan_port_create(struct ne
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
-	return 0;
+
+	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
+	if (err) {
+		dev->macvlan_port = NULL;
+		kfree(port);
+	}
+
+	return err;
 }
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = dev->macvlan_port;
 
+	netdev_rx_handler_unregister(dev);
 	rcu_assign_pointer(dev->macvlan_port, NULL);
 	synchronize_rcu();
 	kfree(port);
@@ -767,14 +777,12 @@ static int __init macvlan_init_module(vo
 	int err;
 
 	register_netdevice_notifier(&macvlan_notifier_block);
-	macvlan_handle_frame_hook = macvlan_handle_frame;
 
 	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
 err1:
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 	return err;
 }
@@ -782,7 +790,6 @@ err1:
 static void __exit macvlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&macvlan_link_ops);
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 }
 
--- a/include/linux/if_bridge.h	2010-05-28 08:35:11.628190230 -0700
+++ b/include/linux/if_bridge.h	2010-06-01 08:02:39.859736930 -0700
@@ -102,8 +102,6 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					       struct sk_buff *skb);
 extern int (*br_should_route_hook)(struct sk_buff *skb);
 
 #endif
--- a/include/linux/if_macvlan.h	2010-05-28 08:35:11.628190230 -0700
+++ b/include/linux/if_macvlan.h	2010-06-01 08:02:39.869738723 -0700
@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct 
 extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
-
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
-						    struct sk_buff *);
-
 #endif /* _LINUX_IF_MACVLAN_H */
--- a/include/linux/netdevice.h	2010-05-28 08:41:38.628169375 -0700
+++ b/include/linux/netdevice.h	2010-06-01 08:12:59.680035275 -0700
@@ -254,6 +254,7 @@ struct netdev_hw_addr_list {
 #define netdev_for_each_mc_addr(ha, dev) \
 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
+
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
@@ -381,6 +382,8 @@ enum gro_result {
 };
 typedef enum gro_result gro_result_t;
 
+typedef struct sk_buff *rx_callback_func_t(struct sk_buff *skb);
+
 extern void __napi_schedule(struct napi_struct *n);
 
 static inline int napi_disable_pending(struct napi_struct *n)
@@ -957,6 +960,7 @@ struct net_device {
 #endif
 
 	struct netdev_queue	rx_queue;
+	rx_callback_func_t	*rx_handler;
 
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
 
@@ -1693,6 +1697,10 @@ static inline void napi_free_frags(struc
 	napi->skb = NULL;
 }
 
+extern int netdev_rx_handler_register(struct net_device *dev,
+				      rx_callback_func_t *hook);
+extern void netdev_rx_handler_unregister(struct net_device *dev);
+
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
--- a/net/bridge/br.c	2010-05-28 08:35:11.858189945 -0700
+++ b/net/bridge/br.c	2010-06-01 08:02:39.889741050 -0700
@@ -63,7 +63,6 @@ static int __init br_init(void)
 		goto err_out4;
 
 	brioctl_set(br_ioctl_deviceless_stub);
-	br_handle_frame_hook = br_handle_frame;
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
 	br_fdb_test_addr_hook = NULL;
 #endif
 
-	br_handle_frame_hook = NULL;
 	br_fdb_fini();
 }
 
--- a/net/bridge/br_if.c	2010-05-28 08:35:11.858189945 -0700
+++ b/net/bridge/br_if.c	2010-06-01 08:14:30.521680943 -0700
@@ -147,6 +147,7 @@ static void del_nbp(struct net_bridge_po
 
 	list_del_rcu(&p->list);
 
+	netdev_rx_handler_unregister(dev);
 	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
@@ -429,6 +430,11 @@ int br_add_if(struct net_bridge *br, str
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+
+	err = netdev_rx_handler_register(dev, br_handle_frame);
+	if (err)
+		goto err3;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -451,6 +457,8 @@ int br_add_if(struct net_bridge *br, str
 	br_netpoll_enable(br, dev);
 
 	return 0;
+err3:
+	dev->br_port = NULL;
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
--- a/net/bridge/br_input.c	2010-05-28 08:35:11.858189945 -0700
+++ b/net/bridge/br_input.c	2010-06-01 08:02:39.909740447 -0700
@@ -131,15 +131,19 @@ static inline int is_link_local(const un
 }
 
 /*
- * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock (preempt_disabled) from
+ * netif_receive_skb
  */
-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
+struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
+	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	int (*rhook)(struct sk_buff *skb);
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return skb;
+
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct n
 	if (!skb)
 		return NULL;
 
+	p = rcu_dereference(skb->dev->br_port);
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
--- a/net/bridge/br_private.h	2010-05-28 08:35:11.858189945 -0700
+++ b/net/bridge/br_private.h	2010-06-01 08:02:39.919725220 -0700
@@ -331,8 +331,7 @@ extern void br_features_recompute(struct
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
-				       struct sk_buff *skb);
+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
--- a/net/core/dev.c	2010-05-28 08:41:38.678169590 -0700
+++ b/net/core/dev.c	2010-06-01 08:16:38.563051750 -0700
@@ -2581,70 +2581,14 @@ static inline int deliver_skb(struct sk_
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
-					     struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	struct macvlan_port *port;
-
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (!port)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2740,6 +2684,48 @@ void netif_nit_deliver(struct sk_buff *s
 	rcu_read_unlock();
 }
 
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rh: receive handler to register
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       rx_callback_func_t *hook)
+{
+	ASSERT_RTNL();
+
+	if (dev->rx_handler)
+		return -EBUSY;
+
+	rcu_assign_pointer(dev->rx_handler, hook);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *	@rh: receive handler to unregister
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev)
+{
+
+	ASSERT_RTNL();
+	dev->rx_handler = NULL;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 					      struct net_device *master)
 {
@@ -2792,6 +2778,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	rx_callback_func_t *rh;
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
@@ -2855,12 +2842,16 @@ static int __netif_receive_skb(struct sk
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/* Handle special case of bridge or macvlan */
+	if ((rh = rcu_dereference(skb->dev->rx_handler)) != NULL) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rh(skb);
+		if (!skb)
+			goto out;
+	}
 
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: net: replace hooks in __netif_receive_skb (v4)
  2010-06-01 15:28           ` net: replace hooks in __netif_receive_skb (v4) Stephen Hemminger
@ 2010-06-01 15:41             ` Jiri Pirko
  2010-06-01 16:10               ` Stephen Hemminger
  2010-06-01 16:13             ` Fischer, Anna
                               ` (2 subsequent siblings)
  3 siblings, 1 reply; 19+ messages in thread
From: Jiri Pirko @ 2010-06-01 15:41 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: davem, netdev, kaber, eric.dumazet

Actually, I'm not happy about this (reducing to only one hook) and for two
reasons:

1) I think it's a good behaviour to "mask" one handler by another in case device
is for example used for macvlan and then added to bridge. Because when it's
again removed from the bridge, the original functionality is restored. And also,
this would be consistent with the current behaviour.

2) I would imagine a situation, when multiple handers are needed in cascade.
Actually I'm working on a virtual device draft which uses two handlers, although
in corner situation.

Regards,
	Jirka

Tue, Jun 01, 2010 at 05:28:05PM CEST, shemminger@vyatta.com wrote:
>
>From: Jiri Pirko <jpirko@redhat.com>
>
>What this patch does is it removes two receive frame hooks (for bridge and for
>macvlan) from __netif_receive_skb. These are replaced them with a single
>hook for both. It only supports one hook per device because it makes no
>sense to do bridging and macvlan on the same device.
>
>Then a network driver (of virtual netdev like macvlan or bridge) can register
>an rx_handler for needed net device.
>
>Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>---
>v3->v4 
>       - only one hook per device.
>v2->v3
>	- used GPL-ed exports
>v1->v2
>	- writers are locked by rtnl_lock (removed unnecessary spinlock)
>	- using call_rcu in unregister
>	- nicer macvlan_port_create cleanup
>	- struct rx_hanler is made const in funtion parameters
>
> drivers/net/macvlan.c      |   19 ++++---
> include/linux/if_bridge.h  |    2 
> include/linux/if_macvlan.h |    4 -
> include/linux/netdevice.h  |    8 +++
> net/bridge/br.c            |    2 
> net/bridge/br_if.c         |    8 +++
> net/bridge/br_input.c      |   12 +++-
> net/bridge/br_private.h    |    3 -
> net/core/dev.c             |  119 ++++++++++++++++++++-------------------------
> 9 files changed, 94 insertions(+), 83 deletions(-)
>
>--- a/drivers/net/macvlan.c	2010-05-28 08:41:38.248169422 -0700
>+++ b/drivers/net/macvlan.c	2010-06-01 08:16:52.307206412 -0700
>@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_
> }
> 
> /* called under rcu_read_lock() from netif_receive_skb */
>-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
>-					    struct sk_buff *skb)
>+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
> {
>+	struct macvlan_port *port;
> 	const struct ethhdr *eth = eth_hdr(skb);
> 	const struct macvlan_dev *vlan;
> 	const struct macvlan_dev *src;
> 	struct net_device *dev;
> 	unsigned int len;
> 
>+	port = rcu_dereference(skb->dev->macvlan_port);
> 	if (is_multicast_ether_addr(eth->h_dest)) {
> 		src = macvlan_hash_lookup(port, eth->h_source);
> 		if (!src)
>@@ -515,6 +516,7 @@ static int macvlan_port_create(struct ne
> {
> 	struct macvlan_port *port;
> 	unsigned int i;
>+	int err;
> 
> 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
> 		return -EINVAL;
>@@ -528,13 +530,21 @@ static int macvlan_port_create(struct ne
> 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
> 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
> 	rcu_assign_pointer(dev->macvlan_port, port);
>-	return 0;
>+
>+	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
>+	if (err) {
>+		dev->macvlan_port = NULL;
>+		kfree(port);
>+	}
>+
>+	return err;
> }
> 
> static void macvlan_port_destroy(struct net_device *dev)
> {
> 	struct macvlan_port *port = dev->macvlan_port;
> 
>+	netdev_rx_handler_unregister(dev);
> 	rcu_assign_pointer(dev->macvlan_port, NULL);
> 	synchronize_rcu();
> 	kfree(port);
>@@ -767,14 +777,12 @@ static int __init macvlan_init_module(vo
> 	int err;
> 
> 	register_netdevice_notifier(&macvlan_notifier_block);
>-	macvlan_handle_frame_hook = macvlan_handle_frame;
> 
> 	err = macvlan_link_register(&macvlan_link_ops);
> 	if (err < 0)
> 		goto err1;
> 	return 0;
> err1:
>-	macvlan_handle_frame_hook = NULL;
> 	unregister_netdevice_notifier(&macvlan_notifier_block);
> 	return err;
> }
>@@ -782,7 +790,6 @@ err1:
> static void __exit macvlan_cleanup_module(void)
> {
> 	rtnl_link_unregister(&macvlan_link_ops);
>-	macvlan_handle_frame_hook = NULL;
> 	unregister_netdevice_notifier(&macvlan_notifier_block);
> }
> 
>--- a/include/linux/if_bridge.h	2010-05-28 08:35:11.628190230 -0700
>+++ b/include/linux/if_bridge.h	2010-06-01 08:02:39.859736930 -0700
>@@ -102,8 +102,6 @@ struct __fdb_entry {
> #include <linux/netdevice.h>
> 
> extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
>-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
>-					       struct sk_buff *skb);
> extern int (*br_should_route_hook)(struct sk_buff *skb);
> 
> #endif
>--- a/include/linux/if_macvlan.h	2010-05-28 08:35:11.628190230 -0700
>+++ b/include/linux/if_macvlan.h	2010-06-01 08:02:39.869738723 -0700
>@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct 
> extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
> 				      struct net_device *dev);
> 
>-
>-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
>-						    struct sk_buff *);
>-
> #endif /* _LINUX_IF_MACVLAN_H */
>--- a/include/linux/netdevice.h	2010-05-28 08:41:38.628169375 -0700
>+++ b/include/linux/netdevice.h	2010-06-01 08:12:59.680035275 -0700
>@@ -254,6 +254,7 @@ struct netdev_hw_addr_list {
> #define netdev_for_each_mc_addr(ha, dev) \
> 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
> 
>+
> struct hh_cache {
> 	struct hh_cache *hh_next;	/* Next entry			     */
> 	atomic_t	hh_refcnt;	/* number of users                   */
>@@ -381,6 +382,8 @@ enum gro_result {
> };
> typedef enum gro_result gro_result_t;
> 
>+typedef struct sk_buff *rx_callback_func_t(struct sk_buff *skb);
>+
> extern void __napi_schedule(struct napi_struct *n);
> 
> static inline int napi_disable_pending(struct napi_struct *n)
>@@ -957,6 +960,7 @@ struct net_device {
> #endif
> 
> 	struct netdev_queue	rx_queue;
>+	rx_callback_func_t	*rx_handler;
> 
> 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
> 
>@@ -1693,6 +1697,10 @@ static inline void napi_free_frags(struc
> 	napi->skb = NULL;
> }
> 
>+extern int netdev_rx_handler_register(struct net_device *dev,
>+				      rx_callback_func_t *hook);
>+extern void netdev_rx_handler_unregister(struct net_device *dev);
>+
> extern void		netif_nit_deliver(struct sk_buff *skb);
> extern int		dev_valid_name(const char *name);
> extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
>--- a/net/bridge/br.c	2010-05-28 08:35:11.858189945 -0700
>+++ b/net/bridge/br.c	2010-06-01 08:02:39.889741050 -0700
>@@ -63,7 +63,6 @@ static int __init br_init(void)
> 		goto err_out4;
> 
> 	brioctl_set(br_ioctl_deviceless_stub);
>-	br_handle_frame_hook = br_handle_frame;
> 
> #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
> 	br_fdb_test_addr_hook = br_fdb_test_addr;
>@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
> 	br_fdb_test_addr_hook = NULL;
> #endif
> 
>-	br_handle_frame_hook = NULL;
> 	br_fdb_fini();
> }
> 
>--- a/net/bridge/br_if.c	2010-05-28 08:35:11.858189945 -0700
>+++ b/net/bridge/br_if.c	2010-06-01 08:14:30.521680943 -0700
>@@ -147,6 +147,7 @@ static void del_nbp(struct net_bridge_po
> 
> 	list_del_rcu(&p->list);
> 
>+	netdev_rx_handler_unregister(dev);
> 	rcu_assign_pointer(dev->br_port, NULL);
> 
> 	br_multicast_del_port(p);
>@@ -429,6 +430,11 @@ int br_add_if(struct net_bridge *br, str
> 		goto err2;
> 
> 	rcu_assign_pointer(dev->br_port, p);
>+
>+	err = netdev_rx_handler_register(dev, br_handle_frame);
>+	if (err)
>+		goto err3;
>+
> 	dev_disable_lro(dev);
> 
> 	list_add_rcu(&p->list, &br->port_list);
>@@ -451,6 +457,8 @@ int br_add_if(struct net_bridge *br, str
> 	br_netpoll_enable(br, dev);
> 
> 	return 0;
>+err3:
>+	dev->br_port = NULL;
> err2:
> 	br_fdb_delete_by_port(br, p, 1);
> err1:
>--- a/net/bridge/br_input.c	2010-05-28 08:35:11.858189945 -0700
>+++ b/net/bridge/br_input.c	2010-06-01 08:02:39.909740447 -0700
>@@ -131,15 +131,19 @@ static inline int is_link_local(const un
> }
> 
> /*
>- * Called via br_handle_frame_hook.
>  * Return NULL if skb is handled
>- * note: already called with rcu_read_lock (preempt_disabled)
>+ * note: already called with rcu_read_lock (preempt_disabled) from
>+ * netif_receive_skb
>  */
>-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
>+struct sk_buff *br_handle_frame(struct sk_buff *skb)
> {
>+	struct net_bridge_port *p;
> 	const unsigned char *dest = eth_hdr(skb)->h_dest;
> 	int (*rhook)(struct sk_buff *skb);
> 
>+	if (skb->pkt_type == PACKET_LOOPBACK)
>+		return skb;
>+
> 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
> 		goto drop;
> 
>@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct n
> 	if (!skb)
> 		return NULL;
> 
>+	p = rcu_dereference(skb->dev->br_port);
>+
> 	if (unlikely(is_link_local(dest))) {
> 		/* Pause frames shouldn't be passed up by driver anyway */
> 		if (skb->protocol == htons(ETH_P_PAUSE))
>--- a/net/bridge/br_private.h	2010-05-28 08:35:11.858189945 -0700
>+++ b/net/bridge/br_private.h	2010-06-01 08:02:39.919725220 -0700
>@@ -331,8 +331,7 @@ extern void br_features_recompute(struct
> 
> /* br_input.c */
> extern int br_handle_frame_finish(struct sk_buff *skb);
>-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
>-				       struct sk_buff *skb);
>+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
> 
> /* br_ioctl.c */
> extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
>--- a/net/core/dev.c	2010-05-28 08:41:38.678169590 -0700
>+++ b/net/core/dev.c	2010-06-01 08:16:38.563051750 -0700
>@@ -2581,70 +2581,14 @@ static inline int deliver_skb(struct sk_
> 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
> }
> 
>-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
>-
>-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
>+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
>+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
> /* This hook is defined here for ATM LANE */
> int (*br_fdb_test_addr_hook)(struct net_device *dev,
> 			     unsigned char *addr) __read_mostly;
> EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
> #endif
> 
>-/*
>- * If bridge module is loaded call bridging hook.
>- *  returns NULL if packet was consumed.
>- */
>-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
>-					struct sk_buff *skb) __read_mostly;
>-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
>-
>-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
>-					    struct packet_type **pt_prev, int *ret,
>-					    struct net_device *orig_dev)
>-{
>-	struct net_bridge_port *port;
>-
>-	if (skb->pkt_type == PACKET_LOOPBACK ||
>-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
>-		return skb;
>-
>-	if (*pt_prev) {
>-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
>-		*pt_prev = NULL;
>-	}
>-
>-	return br_handle_frame_hook(port, skb);
>-}
>-#else
>-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
>-#endif
>-
>-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
>-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
>-					     struct sk_buff *skb) __read_mostly;
>-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
>-
>-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
>-					     struct packet_type **pt_prev,
>-					     int *ret,
>-					     struct net_device *orig_dev)
>-{
>-	struct macvlan_port *port;
>-
>-	port = rcu_dereference(skb->dev->macvlan_port);
>-	if (!port)
>-		return skb;
>-
>-	if (*pt_prev) {
>-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
>-		*pt_prev = NULL;
>-	}
>-	return macvlan_handle_frame_hook(port, skb);
>-}
>-#else
>-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
>-#endif
>-
> #ifdef CONFIG_NET_CLS_ACT
> /* TODO: Maybe we should just force sch_ingress to be compiled in
>  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
>@@ -2740,6 +2684,48 @@ void netif_nit_deliver(struct sk_buff *s
> 	rcu_read_unlock();
> }
> 
>+/**
>+ *	netdev_rx_handler_register - register receive handler
>+ *	@dev: device to register a handler for
>+ *	@rh: receive handler to register
>+ *
>+ *	Register a receive hander for a device. This handler will then be
>+ *	called from __netif_receive_skb. A negative errno code is returned
>+ *	on a failure.
>+ *
>+ *	The caller must hold the rtnl_mutex.
>+ */
>+int netdev_rx_handler_register(struct net_device *dev,
>+			       rx_callback_func_t *hook)
>+{
>+	ASSERT_RTNL();
>+
>+	if (dev->rx_handler)
>+		return -EBUSY;
>+
>+	rcu_assign_pointer(dev->rx_handler, hook);
>+
>+	return 0;
>+}
>+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
>+
>+/**
>+ *	netdev_rx_handler_unregister - unregister receive handler
>+ *	@dev: device to unregister a handler from
>+ *	@rh: receive handler to unregister
>+ *
>+ *	Unregister a receive hander from a device.
>+ *
>+ *	The caller must hold the rtnl_mutex.
>+ */
>+void netdev_rx_handler_unregister(struct net_device *dev)
>+{
>+
>+	ASSERT_RTNL();
>+	dev->rx_handler = NULL;
>+}
>+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
>+
> static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
> 					      struct net_device *master)
> {
>@@ -2792,6 +2778,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
> static int __netif_receive_skb(struct sk_buff *skb)
> {
> 	struct packet_type *ptype, *pt_prev;
>+	rx_callback_func_t *rh;
> 	struct net_device *orig_dev;
> 	struct net_device *master;
> 	struct net_device *null_or_orig;
>@@ -2855,12 +2842,16 @@ static int __netif_receive_skb(struct sk
> ncls:
> #endif
> 
>-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
>-	if (!skb)
>-		goto out;
>-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
>-	if (!skb)
>-		goto out;
>+	/* Handle special case of bridge or macvlan */
>+	if ((rh = rcu_dereference(skb->dev->rx_handler)) != NULL) {
>+		if (pt_prev) {
>+			ret = deliver_skb(skb, pt_prev, orig_dev);
>+			pt_prev = NULL;
>+		}
>+		skb = rh(skb);
>+		if (!skb)
>+			goto out;
>+	}
> 
> 	/*
> 	 * Make sure frames received on VLAN interfaces stacked on

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: net: replace hooks in __netif_receive_skb (v4)
  2010-06-01 15:41             ` Jiri Pirko
@ 2010-06-01 16:10               ` Stephen Hemminger
  2010-06-02  7:02                 ` Jiri Pirko
  0 siblings, 1 reply; 19+ messages in thread
From: Stephen Hemminger @ 2010-06-01 16:10 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, netdev, kaber, eric.dumazet

On Tue, 1 Jun 2010 17:41:07 +0200
Jiri Pirko <jpirko@redhat.com> wrote:

> Actually, I'm not happy about this (reducing to only one hook) and for two
> reasons:
> 
> 1) I think it's a good behaviour to "mask" one handler by another in case device
> is for example used for macvlan and then added to bridge. Because when it's
> again removed from the bridge, the original functionality is restored. And also,
> this would be consistent with the current behaviour.
> 
> 2) I would imagine a situation, when multiple handers are needed in cascade.
> Actually I'm working on a virtual device draft which uses two handlers, although
> in corner situation.
> 
> Regards,
> 	Jirka

I don't like it because:

1) Adding macvlan to bridge makes no sense. The bridge is already in promicious mode.

2) I don't like to see functionality added when it is not needed.

3) The extra overhead of traversing list causes more cache activity in extreme
   hot path.

Wait and add the multiple handlers when your code is included.

-- 
http://www.extremeprogramming.org/rules/early.html

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: replace hooks in __netif_receive_skb (v4)
  2010-06-01 15:28           ` net: replace hooks in __netif_receive_skb (v4) Stephen Hemminger
  2010-06-01 15:41             ` Jiri Pirko
@ 2010-06-01 16:13             ` Fischer, Anna
  2010-06-02  6:50               ` Jiri Pirko
  2010-06-02  7:24             ` net: " Jiri Pirko
  2010-06-02  7:52             ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5 Jiri Pirko
  3 siblings, 1 reply; 19+ messages in thread
From: Fischer, Anna @ 2010-06-01 16:13 UTC (permalink / raw)
  To: Stephen Hemminger, Jiri Pirko, davem; +Cc: netdev, kaber, eric.dumazet

> Subject: net: replace hooks in __netif_receive_skb (v4)
> 
> 
> From: Jiri Pirko <jpirko@redhat.com>
> 
> What this patch does is it removes two receive frame hooks (for bridge
> and for
> macvlan) from __netif_receive_skb. These are replaced them with a
> single
> hook for both. It only supports one hook per device because it makes no
> sense to do bridging and macvlan on the same device.
> 
> Then a network driver (of virtual netdev like macvlan or bridge) can
> register
> an rx_handler for needed net device.


I think the idea of this is really good, and it has been long required to get rid of the bridging hook and the "hack" for macvlan to get into the network stack. 

However, I wonder, if this is to be used as a generic interface, then why the restriction of only having a single hook per device? Yes, it makes sense to do this for the bridge/macvlan combination, but in general there could be other cases where you would want to allow multiple receivers per device. 


> @@ -2792,6 +2778,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
>  static int __netif_receive_skb(struct sk_buff *skb)
>  {
>  	struct packet_type *ptype, *pt_prev;
> +	rx_callback_func_t *rh;
>  	struct net_device *orig_dev;
>  	struct net_device *master;
>  	struct net_device *null_or_orig;
> @@ -2855,12 +2842,16 @@ static int __netif_receive_skb(struct sk
>  ncls:
>  #endif
> 
> -	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
> -	if (!skb)
> -		goto out;
> -	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
> -	if (!skb)
> -		goto out;
> +	/* Handle special case of bridge or macvlan */
> +	if ((rh = rcu_dereference(skb->dev->rx_handler)) != NULL) {
> +		if (pt_prev) {
> +			ret = deliver_skb(skb, pt_prev, orig_dev);

What happens with 'ret' here? It is completely ignored, isn't it? Because you assume that there is only a single receiver per device? I think it would be much better to have return codes that indicate whether a packet has been consumed by a receive handler, or whether it is supposed to be processed further. The same actually applies for the packet handlers that are processed a bit further down in netif_receive_skb() - the return code is ignored and so a handler has no control over further processing of the packet.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: replace hooks in __netif_receive_skb (v4)
  2010-06-01 16:13             ` Fischer, Anna
@ 2010-06-02  6:50               ` Jiri Pirko
  0 siblings, 0 replies; 19+ messages in thread
From: Jiri Pirko @ 2010-06-02  6:50 UTC (permalink / raw)
  To: Fischer, Anna; +Cc: Stephen Hemminger, davem, netdev, kaber, eric.dumazet

Tue, Jun 01, 2010 at 06:13:51PM CEST, anna.fischer@hp.com wrote:
>> Subject: net: replace hooks in __netif_receive_skb (v4)
>> 
>> 
>> From: Jiri Pirko <jpirko@redhat.com>
>> 
>> What this patch does is it removes two receive frame hooks (for bridge
>> and for
>> macvlan) from __netif_receive_skb. These are replaced them with a
>> single
>> hook for both. It only supports one hook per device because it makes no
>> sense to do bridging and macvlan on the same device.
>> 
>> Then a network driver (of virtual netdev like macvlan or bridge) can
>> register
>> an rx_handler for needed net device.
>
>
>I think the idea of this is really good, and it has been long required to get rid of the bridging hook and the "hack" for macvlan to get into the network stack. 
>
>However, I wonder, if this is to be used as a generic interface, then why the restriction of only having a single hook per device? Yes, it makes sense to do this for the bridge/macvlan combination, but in general there could be other cases where you would want to allow multiple receivers per device. 

Right, I agree.

>
>
>> @@ -2792,6 +2778,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
>>  static int __netif_receive_skb(struct sk_buff *skb)
>>  {
>>  	struct packet_type *ptype, *pt_prev;
>> +	rx_callback_func_t *rh;
>>  	struct net_device *orig_dev;
>>  	struct net_device *master;
>>  	struct net_device *null_or_orig;
>> @@ -2855,12 +2842,16 @@ static int __netif_receive_skb(struct sk
>>  ncls:
>>  #endif
>> 
>> -	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
>> -	if (!skb)
>> -		goto out;
>> -	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
>> -	if (!skb)
>> -		goto out;
>> +	/* Handle special case of bridge or macvlan */
>> +	if ((rh = rcu_dereference(skb->dev->rx_handler)) != NULL) {
>> +		if (pt_prev) {
>> +			ret = deliver_skb(skb, pt_prev, orig_dev);
>
>What happens with 'ret' here? It is completely ignored, isn't it? Because you assume that there is only a single receiver per device? I think it would be much better to have return codes that indicate whether a packet has been consumed by a receive handler, or whether it is supposed to be processed further. The same actually applies for the packet handlers that are processed a bit further down in netif_receive_skb() - the return code is ignored and so a handler has no control over further processing of the packet.

It's not ignored. it's returned at the end of the function __netif_receive_skb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: net: replace hooks in __netif_receive_skb (v4)
  2010-06-01 16:10               ` Stephen Hemminger
@ 2010-06-02  7:02                 ` Jiri Pirko
  0 siblings, 0 replies; 19+ messages in thread
From: Jiri Pirko @ 2010-06-02  7:02 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: davem, netdev, kaber, eric.dumazet

Tue, Jun 01, 2010 at 06:10:04PM CEST, shemminger@vyatta.com wrote:
>On Tue, 1 Jun 2010 17:41:07 +0200
>Jiri Pirko <jpirko@redhat.com> wrote:
>
>> Actually, I'm not happy about this (reducing to only one hook) and for two
>> reasons:
>> 
>> 1) I think it's a good behaviour to "mask" one handler by another in case device
>> is for example used for macvlan and then added to bridge. Because when it's
>> again removed from the bridge, the original functionality is restored. And also,
>> this would be consistent with the current behaviour.
>> 
>> 2) I would imagine a situation, when multiple handers are needed in cascade.
>> Actually I'm working on a virtual device draft which uses two handlers, although
>> in corner situation.
>> 
>> Regards,
>> 	Jirka
>
>I don't like it because:
>
>1) Adding macvlan to bridge makes no sense. The bridge is already in promicious mode.

Right, I'm not saying it has sense to have it together in the same moment. But
you can to this:

# ip link add link eth0 type macvlan

You can use eth0 + macvlan0. Now you do:

# brctl addif br0 eth0

Direct use of eth0 and macvlan0 is not not possible now.

# brctl delif br0 eth0

Now the original functionality ot eth0 and macvlan0 is restored.

>
>2) I don't like to see functionality added when it is not needed.
>
>3) The extra overhead of traversing list causes more cache activity in extreme
>   hot path.
>

But ok, I hear your arguments, I thought about them myself before I did the
patch and I thought that added overhead (which is not too big, I see one more
dereference, rx_handler pointer) would be acceptible.

Maybe someone other then us has opinion too :)

>Wait and add the multiple handlers when your code is included.
>
>-- 
>http://www.extremeprogramming.org/rules/early.html

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: net: replace hooks in __netif_receive_skb (v4)
  2010-06-01 15:28           ` net: replace hooks in __netif_receive_skb (v4) Stephen Hemminger
  2010-06-01 15:41             ` Jiri Pirko
  2010-06-01 16:13             ` Fischer, Anna
@ 2010-06-02  7:24             ` Jiri Pirko
  2010-06-02  7:52             ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5 Jiri Pirko
  3 siblings, 0 replies; 19+ messages in thread
From: Jiri Pirko @ 2010-06-02  7:24 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: davem, netdev, kaber, eric.dumazet

Few nitpicks, I'm going to send patch reflecting this later.

Tue, Jun 01, 2010 at 05:28:05PM CEST, shemminger@vyatta.com wrote:
>
>From: Jiri Pirko <jpirko@redhat.com>
>
>What this patch does is it removes two receive frame hooks (for bridge and for
>macvlan) from __netif_receive_skb. These are replaced them with a single
>hook for both. It only supports one hook per device because it makes no
>sense to do bridging and macvlan on the same device.
>
>Then a network driver (of virtual netdev like macvlan or bridge) can register
>an rx_handler for needed net device.
>
>Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>---
>v3->v4 
>       - only one hook per device.
>v2->v3
>	- used GPL-ed exports
>v1->v2
>	- writers are locked by rtnl_lock (removed unnecessary spinlock)
>	- using call_rcu in unregister
>	- nicer macvlan_port_create cleanup
>	- struct rx_hanler is made const in funtion parameters
>
> drivers/net/macvlan.c      |   19 ++++---
> include/linux/if_bridge.h  |    2 
> include/linux/if_macvlan.h |    4 -
> include/linux/netdevice.h  |    8 +++
> net/bridge/br.c            |    2 
> net/bridge/br_if.c         |    8 +++
> net/bridge/br_input.c      |   12 +++-
> net/bridge/br_private.h    |    3 -
> net/core/dev.c             |  119 ++++++++++++++++++++-------------------------
> 9 files changed, 94 insertions(+), 83 deletions(-)
>
>--- a/drivers/net/macvlan.c	2010-05-28 08:41:38.248169422 -0700
>+++ b/drivers/net/macvlan.c	2010-06-01 08:16:52.307206412 -0700
>@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_
> }
> 
> /* called under rcu_read_lock() from netif_receive_skb */
>-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
>-					    struct sk_buff *skb)
>+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
> {
>+	struct macvlan_port *port;
> 	const struct ethhdr *eth = eth_hdr(skb);
> 	const struct macvlan_dev *vlan;
> 	const struct macvlan_dev *src;
> 	struct net_device *dev;
> 	unsigned int len;
> 
>+	port = rcu_dereference(skb->dev->macvlan_port);
> 	if (is_multicast_ether_addr(eth->h_dest)) {
> 		src = macvlan_hash_lookup(port, eth->h_source);
> 		if (!src)
>@@ -515,6 +516,7 @@ static int macvlan_port_create(struct ne
> {
> 	struct macvlan_port *port;
> 	unsigned int i;
>+	int err;
> 
> 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
> 		return -EINVAL;
>@@ -528,13 +530,21 @@ static int macvlan_port_create(struct ne
> 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
> 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
> 	rcu_assign_pointer(dev->macvlan_port, port);
>-	return 0;
>+
>+	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
>+	if (err) {
>+		dev->macvlan_port = NULL;

Why not to use rcu_assign_pointer here? since this is not hot path, that would be
nicer. It should be done for rcu-protected pointers (see rcu_assign_poiter
comment).

>+		kfree(port);
>+	}
>+
>+	return err;
> }
> 
> static void macvlan_port_destroy(struct net_device *dev)
> {
> 	struct macvlan_port *port = dev->macvlan_port;
> 
>+	netdev_rx_handler_unregister(dev);
> 	rcu_assign_pointer(dev->macvlan_port, NULL);
> 	synchronize_rcu();
> 	kfree(port);
>@@ -767,14 +777,12 @@ static int __init macvlan_init_module(vo
> 	int err;
> 
> 	register_netdevice_notifier(&macvlan_notifier_block);
>-	macvlan_handle_frame_hook = macvlan_handle_frame;
> 
> 	err = macvlan_link_register(&macvlan_link_ops);
> 	if (err < 0)
> 		goto err1;
> 	return 0;
> err1:
>-	macvlan_handle_frame_hook = NULL;
> 	unregister_netdevice_notifier(&macvlan_notifier_block);
> 	return err;
> }
>@@ -782,7 +790,6 @@ err1:
> static void __exit macvlan_cleanup_module(void)
> {
> 	rtnl_link_unregister(&macvlan_link_ops);
>-	macvlan_handle_frame_hook = NULL;
> 	unregister_netdevice_notifier(&macvlan_notifier_block);
> }
> 
>--- a/include/linux/if_bridge.h	2010-05-28 08:35:11.628190230 -0700
>+++ b/include/linux/if_bridge.h	2010-06-01 08:02:39.859736930 -0700
>@@ -102,8 +102,6 @@ struct __fdb_entry {
> #include <linux/netdevice.h>
> 
> extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
>-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
>-					       struct sk_buff *skb);
> extern int (*br_should_route_hook)(struct sk_buff *skb);
> 
> #endif
>--- a/include/linux/if_macvlan.h	2010-05-28 08:35:11.628190230 -0700
>+++ b/include/linux/if_macvlan.h	2010-06-01 08:02:39.869738723 -0700
>@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct 
> extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
> 				      struct net_device *dev);
> 
>-
>-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
>-						    struct sk_buff *);
>-
> #endif /* _LINUX_IF_MACVLAN_H */
>--- a/include/linux/netdevice.h	2010-05-28 08:41:38.628169375 -0700
>+++ b/include/linux/netdevice.h	2010-06-01 08:12:59.680035275 -0700
>@@ -254,6 +254,7 @@ struct netdev_hw_addr_list {
> #define netdev_for_each_mc_addr(ha, dev) \
> 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
> 
>+
Forgotten free line.

> struct hh_cache {
> 	struct hh_cache *hh_next;	/* Next entry			     */
> 	atomic_t	hh_refcnt;	/* number of users                   */
>@@ -381,6 +382,8 @@ enum gro_result {
> };
> typedef enum gro_result gro_result_t;
> 
>+typedef struct sk_buff *rx_callback_func_t(struct sk_buff *skb);
>+
> extern void __napi_schedule(struct napi_struct *n);
> 
> static inline int napi_disable_pending(struct napi_struct *n)
>@@ -957,6 +960,7 @@ struct net_device {
> #endif
> 
> 	struct netdev_queue	rx_queue;
>+	rx_callback_func_t	*rx_handler;
> 
> 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
> 
>@@ -1693,6 +1697,10 @@ static inline void napi_free_frags(struc
> 	napi->skb = NULL;
> }
> 
>+extern int netdev_rx_handler_register(struct net_device *dev,
>+				      rx_callback_func_t *hook);
>+extern void netdev_rx_handler_unregister(struct net_device *dev);
>+
> extern void		netif_nit_deliver(struct sk_buff *skb);
> extern int		dev_valid_name(const char *name);
> extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
>--- a/net/bridge/br.c	2010-05-28 08:35:11.858189945 -0700
>+++ b/net/bridge/br.c	2010-06-01 08:02:39.889741050 -0700
>@@ -63,7 +63,6 @@ static int __init br_init(void)
> 		goto err_out4;
> 
> 	brioctl_set(br_ioctl_deviceless_stub);
>-	br_handle_frame_hook = br_handle_frame;
> 
> #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
> 	br_fdb_test_addr_hook = br_fdb_test_addr;
>@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
> 	br_fdb_test_addr_hook = NULL;
> #endif
> 
>-	br_handle_frame_hook = NULL;
> 	br_fdb_fini();
> }
> 
>--- a/net/bridge/br_if.c	2010-05-28 08:35:11.858189945 -0700
>+++ b/net/bridge/br_if.c	2010-06-01 08:14:30.521680943 -0700
>@@ -147,6 +147,7 @@ static void del_nbp(struct net_bridge_po
> 
> 	list_del_rcu(&p->list);
> 
>+	netdev_rx_handler_unregister(dev);
> 	rcu_assign_pointer(dev->br_port, NULL);

> 
> 	br_multicast_del_port(p);
>@@ -429,6 +430,11 @@ int br_add_if(struct net_bridge *br, str
> 		goto err2;
> 
> 	rcu_assign_pointer(dev->br_port, p);
>+
>+	err = netdev_rx_handler_register(dev, br_handle_frame);
>+	if (err)
>+		goto err3;
>+
> 	dev_disable_lro(dev);
> 
> 	list_add_rcu(&p->list, &br->port_list);
>@@ -451,6 +457,8 @@ int br_add_if(struct net_bridge *br, str
> 	br_netpoll_enable(br, dev);
> 
> 	return 0;
>+err3:
>+	dev->br_port = NULL;

I would like to see rcu_assing_pointer here too

> err2:
> 	br_fdb_delete_by_port(br, p, 1);
> err1:
>--- a/net/bridge/br_input.c	2010-05-28 08:35:11.858189945 -0700
>+++ b/net/bridge/br_input.c	2010-06-01 08:02:39.909740447 -0700
>@@ -131,15 +131,19 @@ static inline int is_link_local(const un
> }
> 
> /*
>- * Called via br_handle_frame_hook.
>  * Return NULL if skb is handled
>- * note: already called with rcu_read_lock (preempt_disabled)
>+ * note: already called with rcu_read_lock (preempt_disabled) from
>+ * netif_receive_skb
>  */
>-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
>+struct sk_buff *br_handle_frame(struct sk_buff *skb)
> {
>+	struct net_bridge_port *p;
> 	const unsigned char *dest = eth_hdr(skb)->h_dest;
> 	int (*rhook)(struct sk_buff *skb);
> 
>+	if (skb->pkt_type == PACKET_LOOPBACK)
>+		return skb;
>+
> 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
> 		goto drop;
> 
>@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct n
> 	if (!skb)
> 		return NULL;
> 
>+	p = rcu_dereference(skb->dev->br_port);
>+
> 	if (unlikely(is_link_local(dest))) {
> 		/* Pause frames shouldn't be passed up by driver anyway */
> 		if (skb->protocol == htons(ETH_P_PAUSE))
>--- a/net/bridge/br_private.h	2010-05-28 08:35:11.858189945 -0700
>+++ b/net/bridge/br_private.h	2010-06-01 08:02:39.919725220 -0700
>@@ -331,8 +331,7 @@ extern void br_features_recompute(struct
> 
> /* br_input.c */
> extern int br_handle_frame_finish(struct sk_buff *skb);
>-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
>-				       struct sk_buff *skb);
>+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
> 
> /* br_ioctl.c */
> extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
>--- a/net/core/dev.c	2010-05-28 08:41:38.678169590 -0700
>+++ b/net/core/dev.c	2010-06-01 08:16:38.563051750 -0700
>@@ -2581,70 +2581,14 @@ static inline int deliver_skb(struct sk_
> 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
> }
> 
>-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
>-
>-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
>+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
>+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
> /* This hook is defined here for ATM LANE */
> int (*br_fdb_test_addr_hook)(struct net_device *dev,
> 			     unsigned char *addr) __read_mostly;
> EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
> #endif
> 
>-/*
>- * If bridge module is loaded call bridging hook.
>- *  returns NULL if packet was consumed.
>- */
>-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
>-					struct sk_buff *skb) __read_mostly;
>-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
>-
>-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
>-					    struct packet_type **pt_prev, int *ret,
>-					    struct net_device *orig_dev)
>-{
>-	struct net_bridge_port *port;
>-
>-	if (skb->pkt_type == PACKET_LOOPBACK ||
>-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
>-		return skb;
>-
>-	if (*pt_prev) {
>-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
>-		*pt_prev = NULL;
>-	}
>-
>-	return br_handle_frame_hook(port, skb);
>-}
>-#else
>-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
>-#endif
>-
>-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
>-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
>-					     struct sk_buff *skb) __read_mostly;
>-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
>-
>-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
>-					     struct packet_type **pt_prev,
>-					     int *ret,
>-					     struct net_device *orig_dev)
>-{
>-	struct macvlan_port *port;
>-
>-	port = rcu_dereference(skb->dev->macvlan_port);
>-	if (!port)
>-		return skb;
>-
>-	if (*pt_prev) {
>-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
>-		*pt_prev = NULL;
>-	}
>-	return macvlan_handle_frame_hook(port, skb);
>-}
>-#else
>-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
>-#endif
>-
> #ifdef CONFIG_NET_CLS_ACT
> /* TODO: Maybe we should just force sch_ingress to be compiled in
>  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
>@@ -2740,6 +2684,48 @@ void netif_nit_deliver(struct sk_buff *s
> 	rcu_read_unlock();
> }
> 
>+/**
>+ *	netdev_rx_handler_register - register receive handler
>+ *	@dev: device to register a handler for
>+ *	@rh: receive handler to register

Old comment

>+ *
>+ *	Register a receive hander for a device. This handler will then be
>+ *	called from __netif_receive_skb. A negative errno code is returned
>+ *	on a failure.
>+ *
>+ *	The caller must hold the rtnl_mutex.
>+ */
>+int netdev_rx_handler_register(struct net_device *dev,
>+			       rx_callback_func_t *hook)
>+{
>+	ASSERT_RTNL();
>+
>+	if (dev->rx_handler)
>+		return -EBUSY;
>+
>+	rcu_assign_pointer(dev->rx_handler, hook);
>+
>+	return 0;
>+}
>+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
>+
>+/**
>+ *	netdev_rx_handler_unregister - unregister receive handler
>+ *	@dev: device to unregister a handler from
>+ *	@rh: receive handler to unregister
old comment

>+ *
>+ *	Unregister a receive hander from a device.
>+ *
>+ *	The caller must hold the rtnl_mutex.
>+ */
>+void netdev_rx_handler_unregister(struct net_device *dev)
>+{
>+
>+	ASSERT_RTNL();
>+	dev->rx_handler = NULL;

Also rcu_assign pointer would be nicer here.
>+}
>+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
>+
> static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
> 					      struct net_device *master)
> {
>@@ -2792,6 +2778,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
> static int __netif_receive_skb(struct sk_buff *skb)
> {
> 	struct packet_type *ptype, *pt_prev;
>+	rx_callback_func_t *rh;
> 	struct net_device *orig_dev;
> 	struct net_device *master;
> 	struct net_device *null_or_orig;
>@@ -2855,12 +2842,16 @@ static int __netif_receive_skb(struct sk
> ncls:
> #endif
> 
>-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
>-	if (!skb)
>-		goto out;
>-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
>-	if (!skb)
>-		goto out;
>+	/* Handle special case of bridge or macvlan */
>+	if ((rh = rcu_dereference(skb->dev->rx_handler)) != NULL) {
>+		if (pt_prev) {
>+			ret = deliver_skb(skb, pt_prev, orig_dev);
>+			pt_prev = NULL;
>+		}
>+		skb = rh(skb);
>+		if (!skb)
>+			goto out;
>+	}
> 
> 	/*
> 	 * Make sure frames received on VLAN interfaces stacked on

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5
  2010-06-01 15:28           ` net: replace hooks in __netif_receive_skb (v4) Stephen Hemminger
                               ` (2 preceding siblings ...)
  2010-06-02  7:24             ` net: " Jiri Pirko
@ 2010-06-02  7:52             ` Jiri Pirko
  2010-06-02 14:11               ` David Miller
                                 ` (2 more replies)
  3 siblings, 3 replies; 19+ messages in thread
From: Jiri Pirko @ 2010-06-02  7:52 UTC (permalink / raw)
  To: netdev; +Cc: davem, kaber, eric.dumazet, shemminger

What this patch does is it removes two receive frame hooks (for bridge and for
macvlan) from __netif_receive_skb. These are replaced them with a single
hook for both. It only supports one hook per device because it makes no
sense to do bridging and macvlan on the same device.

Then a network driver (of virtual netdev like macvlan or bridge) can register
an rx_handler for needed net device.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
v4->v5
	- do rcu_assign_pointer even for assigning null for rcu-protected
	  pointers 
	- cosmetics (renames, comments and __netif_receive_skb if part)

 drivers/net/macvlan.c      |   19 +++++--
 include/linux/if_bridge.h  |    2 -
 include/linux/if_macvlan.h |    4 --
 include/linux/netdevice.h  |    7 +++
 net/bridge/br.c            |    2 -
 net/bridge/br_if.c         |    8 +++
 net/bridge/br_input.c      |   12 +++-
 net/bridge/br_private.h    |    3 +-
 net/core/dev.c             |  119 ++++++++++++++++++++-----------------------
 9 files changed, 93 insertions(+), 83 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 87e8d4c..53422ce 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
-					    struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 {
+	struct macvlan_port *port;
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
+	port = rcu_dereference(skb->dev->macvlan_port);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -515,6 +516,7 @@ static int macvlan_port_create(struct net_device *dev)
 {
 	struct macvlan_port *port;
 	unsigned int i;
+	int err;
 
 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
 		return -EINVAL;
@@ -528,13 +530,21 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
-	return 0;
+
+	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
+	if (err) {
+		rcu_assign_pointer(dev->macvlan_port, NULL);
+		kfree(port);
+	}
+
+	return err;
 }
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = dev->macvlan_port;
 
+	netdev_rx_handler_unregister(dev);
 	rcu_assign_pointer(dev->macvlan_port, NULL);
 	synchronize_rcu();
 	kfree(port);
@@ -767,14 +777,12 @@ static int __init macvlan_init_module(void)
 	int err;
 
 	register_netdevice_notifier(&macvlan_notifier_block);
-	macvlan_handle_frame_hook = macvlan_handle_frame;
 
 	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
 err1:
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 	return err;
 }
@@ -782,7 +790,6 @@ err1:
 static void __exit macvlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&macvlan_link_ops);
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 }
 
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 938b7e8..0d241a5 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -102,8 +102,6 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					       struct sk_buff *skb);
 extern int (*br_should_route_hook)(struct sk_buff *skb);
 
 #endif
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9ea047a..c26a0e4 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct rtnl_link_ops *ops);
 extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
-
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
-						    struct sk_buff *);
-
 #endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a249161..244494f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -381,6 +381,8 @@ enum gro_result {
 };
 typedef enum gro_result gro_result_t;
 
+typedef struct sk_buff *rx_handler_func_t(struct sk_buff *skb);
+
 extern void __napi_schedule(struct napi_struct *n);
 
 static inline int napi_disable_pending(struct napi_struct *n)
@@ -957,6 +959,7 @@ struct net_device {
 #endif
 
 	struct netdev_queue	rx_queue;
+	rx_handler_func_t	*rx_handler;
 
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
 
@@ -1693,6 +1696,10 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+extern int netdev_rx_handler_register(struct net_device *dev,
+				      rx_handler_func_t *rx_handler);
+extern void netdev_rx_handler_unregister(struct net_device *dev);
+
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 76357b5..c8436fa 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -63,7 +63,6 @@ static int __init br_init(void)
 		goto err_out4;
 
 	brioctl_set(br_ioctl_deviceless_stub);
-	br_handle_frame_hook = br_handle_frame;
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
 	br_fdb_test_addr_hook = NULL;
 #endif
 
-	br_handle_frame_hook = NULL;
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 18b245e..d924234 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -147,6 +147,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	netdev_rx_handler_unregister(dev);
 	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
@@ -429,6 +430,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+
+	err = netdev_rx_handler_register(dev, br_handle_frame);
+	if (err)
+		goto err3;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -451,6 +457,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	br_netpoll_enable(br, dev);
 
 	return 0;
+err3:
+	rcu_assign_pointer(dev->br_port, NULL);
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700..99647d8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,15 +131,19 @@ static inline int is_link_local(const unsigned char *dest)
 }
 
 /*
- * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock (preempt_disabled) from
+ * netif_receive_skb
  */
-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
+struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
+	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	int (*rhook)(struct sk_buff *skb);
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return skb;
+
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
+	p = rcu_dereference(skb->dev->br_port);
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74b..c83519b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -331,8 +331,7 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
-				       struct sk_buff *skb);
+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index 983a3c1..4a02b58 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2583,70 +2583,14 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
-					     struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	struct macvlan_port *port;
-
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (!port)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2742,6 +2686,47 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rx_handler: receive handler to register
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       rx_handler_func_t *rx_handler)
+{
+	ASSERT_RTNL();
+
+	if (dev->rx_handler)
+		return -EBUSY;
+
+	rcu_assign_pointer(dev->rx_handler, rx_handler);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev)
+{
+
+	ASSERT_RTNL();
+	rcu_assign_pointer(dev->rx_handler, NULL);
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 					      struct net_device *master)
 {
@@ -2794,6 +2779,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
@@ -2856,12 +2842,17 @@ static int __netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/* Handle special case of bridge or macvlan */
+	rx_handler = rcu_dereference(skb->dev->rx_handler);
+	if (rx_handler) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rx_handler(skb);
+		if (!skb)
+			goto out;
+	}
 
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5
  2010-06-02  7:52             ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5 Jiri Pirko
@ 2010-06-02 14:11               ` David Miller
  2010-06-02 15:07               ` Stephen Hemminger
  2010-06-02 16:20               ` Fischer, Anna
  2 siblings, 0 replies; 19+ messages in thread
From: David Miller @ 2010-06-02 14:11 UTC (permalink / raw)
  To: jpirko; +Cc: netdev, kaber, eric.dumazet, shemminger

From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 2 Jun 2010 09:52:08 +0200

> What this patch does is it removes two receive frame hooks (for bridge and for
> macvlan) from __netif_receive_skb. These are replaced them with a single
> hook for both. It only supports one hook per device because it makes no
> sense to do bridging and macvlan on the same device.
> 
> Then a network driver (of virtual netdev like macvlan or bridge) can register
> an rx_handler for needed net device.
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

Ok, this looks good to me.  Applied, thanks everyone.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5
  2010-06-02  7:52             ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5 Jiri Pirko
  2010-06-02 14:11               ` David Miller
@ 2010-06-02 15:07               ` Stephen Hemminger
  2010-06-02 15:15                 ` Eric Dumazet
  2010-06-02 16:20               ` Fischer, Anna
  2 siblings, 1 reply; 19+ messages in thread
From: Stephen Hemminger @ 2010-06-02 15:07 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, kaber, eric.dumazet

On Wed, 2 Jun 2010 09:52:08 +0200
Jiri Pirko <jpirko@redhat.com> wrote:

> +
> +	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
> +	if (err) {
> +		rcu_assign_pointer(dev->macvlan_port, NULL);
> +		kfree(port);
> +	}
> +
> +	return err;
>  }

Rcu assign is not necessary here for because the hook didn't
get registered so there is no way for other CPU to see it.

-- 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5
  2010-06-02 15:07               ` Stephen Hemminger
@ 2010-06-02 15:15                 ` Eric Dumazet
  2010-06-02 20:43                   ` Paul E. McKenney
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2010-06-02 15:15 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Jiri Pirko, netdev, davem, kaber, Paul E. McKenney

Le mercredi 02 juin 2010 à 08:07 -0700, Stephen Hemminger a écrit :
> On Wed, 2 Jun 2010 09:52:08 +0200
> Jiri Pirko <jpirko@redhat.com> wrote:
> 
> > +
> > +	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
> > +	if (err) {
> > +		rcu_assign_pointer(dev->macvlan_port, NULL);
> > +		kfree(port);
> > +	}
> > +
> > +	return err;
> >  }
> 
> Rcu assign is not necessary here for because the hook didn't
> get registered so there is no way for other CPU to see it.
> 

Thats a valid point, but we should use it, and not care of this litle
detail. Compiler generates same code anyway, since NULL value is tested
by rcu_assign_pointer().

If we dont use rcu_assign_pointer() ourself, Paul or Arnd will put it
one day or another :)

http://lkml.org/lkml/2010/6/1/290




^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5
  2010-06-02  7:52             ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5 Jiri Pirko
  2010-06-02 14:11               ` David Miller
  2010-06-02 15:07               ` Stephen Hemminger
@ 2010-06-02 16:20               ` Fischer, Anna
  2 siblings, 0 replies; 19+ messages in thread
From: Fischer, Anna @ 2010-06-02 16:20 UTC (permalink / raw)
  To: Jiri Pirko, netdev; +Cc: davem, kaber, eric.dumazet, shemminger

> Subject: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb
> V5
> 
> What this patch does is it removes two receive frame hooks (for bridge
> and for
> macvlan) from __netif_receive_skb. These are replaced them with a single
> hook for both. It only supports one hook per device because it makes no
> sense to do bridging and macvlan on the same device.
> 
> Then a network driver (of virtual netdev like macvlan or bridge) can
> register
> an rx_handler for needed net device.
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

I would like to see this being accepted. As mentioned before I would love to be able to support multiple receive frame hooks per device, but I think this is a good start.

I think especially with virtualization coming into Linux and new network virtualization approaches being developed it would be nice to have a more generic function for packet receive handlers to hook into the network stack more cleanly. Current approaches rely on 'misusing' the bridging hook into the kernel, because the packet 'sniffing' hooks (via dev_pack_add()) do not give enough control over packet processing.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5
  2010-06-02 15:15                 ` Eric Dumazet
@ 2010-06-02 20:43                   ` Paul E. McKenney
  0 siblings, 0 replies; 19+ messages in thread
From: Paul E. McKenney @ 2010-06-02 20:43 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Stephen Hemminger, Jiri Pirko, netdev, davem, kaber

On Wed, Jun 02, 2010 at 05:15:22PM +0200, Eric Dumazet wrote:
> Le mercredi 02 juin 2010 à 08:07 -0700, Stephen Hemminger a écrit :
> > On Wed, 2 Jun 2010 09:52:08 +0200
> > Jiri Pirko <jpirko@redhat.com> wrote:
> > 
> > > +
> > > +	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
> > > +	if (err) {
> > > +		rcu_assign_pointer(dev->macvlan_port, NULL);
> > > +		kfree(port);
> > > +	}
> > > +
> > > +	return err;
> > >  }
> > 
> > Rcu assign is not necessary here for because the hook didn't
> > get registered so there is no way for other CPU to see it.
> > 
> 
> Thats a valid point, but we should use it, and not care of this litle
> detail. Compiler generates same code anyway, since NULL value is tested
> by rcu_assign_pointer().
> 
> If we dont use rcu_assign_pointer() ourself, Paul or Arnd will put it
> one day or another :)
> 
> http://lkml.org/lkml/2010/6/1/290

Some year soon, I hope.  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2010-06-02 20:43 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-05-27 18:08 [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V2 Jiri Pirko
2010-05-27 20:08 ` Stephen Hemminger
2010-05-28  5:51   ` Jiri Pirko
2010-05-28  6:12     ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V3 Jiri Pirko
2010-05-28  7:02       ` Eric Dumazet
2010-05-28  7:33         ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V4 Jiri Pirko
2010-06-01 15:28           ` net: replace hooks in __netif_receive_skb (v4) Stephen Hemminger
2010-06-01 15:41             ` Jiri Pirko
2010-06-01 16:10               ` Stephen Hemminger
2010-06-02  7:02                 ` Jiri Pirko
2010-06-01 16:13             ` Fischer, Anna
2010-06-02  6:50               ` Jiri Pirko
2010-06-02  7:24             ` net: " Jiri Pirko
2010-06-02  7:52             ` [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V5 Jiri Pirko
2010-06-02 14:11               ` David Miller
2010-06-02 15:07               ` Stephen Hemminger
2010-06-02 15:15                 ` Eric Dumazet
2010-06-02 20:43                   ` Paul E. McKenney
2010-06-02 16:20               ` Fischer, Anna

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.