All of lore.kernel.org
 help / color / mirror / Atom feed
From: Edward Cree <ecree@solarflare.com>
To: <linux-net-drivers@solarflare.com>, <netdev@vger.kernel.org>
Cc: <davem@davemloft.net>
Subject: [RFC PATCH v2 net-next 06/12] net: core: propagate SKB lists through packet_type lookup
Date: Tue, 26 Jun 2018 19:19:36 +0100	[thread overview]
Message-ID: <a665e6bc-176c-6c5a-282b-a3ed52e0a13f@solarflare.com> (raw)
In-Reply-To: <fa3d7e58-e7b6-ad0c-619f-824c25ed0d97@solarflare.com>

__netif_receive_skb_taps() does a depressingly large amount of per-packet
 work that can't easily be listified, because the another_round looping
 makes it nontrivial to slice up into smaller functions.
Fortunately, most of that work disappears in the fast path:
 * Hardware devices generally don't have an rx_handler
 * Unless you're tcpdumping or something, there is usually only one ptype
 * VLAN processing comes before the protocol ptype lookup, so doesn't force
   a pt_prev deliver
 so normally, __netif_receive_skb_taps() will run straight through and return
 the one ptype found in ptype_base[hash of skb->protocol].

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 include/trace/events/net.h |   7 +++
 net/core/dev.c             | 138 ++++++++++++++++++++++++++++++++-------------
 2 files changed, 105 insertions(+), 40 deletions(-)

diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 00aa72ce0e7c..3c9b262896c1 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -131,6 +131,13 @@ DEFINE_EVENT(net_dev_template, netif_receive_skb,
 	TP_ARGS(skb)
 );
 
+DEFINE_EVENT(net_dev_template, netif_receive_skb_list,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb)
+);
+
 DEFINE_EVENT(net_dev_template, netif_rx,
 
 	TP_PROTO(struct sk_buff *skb),
diff --git a/net/core/dev.c b/net/core/dev.c
index 92d78b3de656..2f46ed07c8d8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4494,12 +4494,13 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
 	return 0;
 }
 
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
+static int __netif_receive_skb_taps(struct sk_buff *skb, bool pfmemalloc,
+				    struct packet_type **pt_prev)
 {
-	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
 	bool deliver_exact = false;
+	struct packet_type *ptype;
 	int ret = NET_RX_DROP;
 	__be16 type;
 
@@ -4514,7 +4515,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 		skb_reset_transport_header(skb);
 	skb_reset_mac_len(skb);
 
-	pt_prev = NULL;
+	*pt_prev = NULL;
 
 another_round:
 	skb->skb_iif = skb->dev->ifindex;
@@ -4535,25 +4536,25 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 		goto skip_taps;
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (pt_prev)
-			ret = deliver_skb(skb, pt_prev, orig_dev);
-		pt_prev = ptype;
+		if (*pt_prev)
+			ret = deliver_skb(skb, *pt_prev, orig_dev);
+		*pt_prev = ptype;
 	}
 
 	list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
-		if (pt_prev)
-			ret = deliver_skb(skb, pt_prev, orig_dev);
-		pt_prev = ptype;
+		if (*pt_prev)
+			ret = deliver_skb(skb, *pt_prev, orig_dev);
+		*pt_prev = ptype;
 	}
 
 skip_taps:
 #ifdef CONFIG_NET_INGRESS
 	if (static_branch_unlikely(&ingress_needed_key)) {
-		skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
+		skb = sch_handle_ingress(skb, pt_prev, &ret, orig_dev);
 		if (!skb)
 			goto out;
 
-		if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
+		if (nf_ingress(skb, pt_prev, &ret, orig_dev) < 0)
 			goto out;
 	}
 #endif
@@ -4563,9 +4564,9 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 		goto drop;
 
 	if (skb_vlan_tag_present(skb)) {
-		if (pt_prev) {
-			ret = deliver_skb(skb, pt_prev, orig_dev);
-			pt_prev = NULL;
+		if (*pt_prev) {
+			ret = deliver_skb(skb, *pt_prev, orig_dev);
+			*pt_prev = NULL;
 		}
 		if (vlan_do_receive(&skb))
 			goto another_round;
@@ -4575,9 +4576,9 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 
 	rx_handler = rcu_dereference(skb->dev->rx_handler);
 	if (rx_handler) {
-		if (pt_prev) {
-			ret = deliver_skb(skb, pt_prev, orig_dev);
-			pt_prev = NULL;
+		if (*pt_prev) {
+			ret = deliver_skb(skb, *pt_prev, orig_dev);
+			*pt_prev = NULL;
 		}
 		switch (rx_handler(&skb)) {
 		case RX_HANDLER_CONSUMED:
@@ -4608,38 +4609,45 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 
 	/* deliver only exact match when indicated */
 	if (likely(!deliver_exact)) {
-		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+		deliver_ptype_list_skb(skb, pt_prev, orig_dev, type,
 				       &ptype_base[ntohs(type) &
 						   PTYPE_HASH_MASK]);
 	}
 
-	deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+	deliver_ptype_list_skb(skb, pt_prev, orig_dev, type,
 			       &orig_dev->ptype_specific);
 
 	if (unlikely(skb->dev != orig_dev)) {
-		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+		deliver_ptype_list_skb(skb, pt_prev, orig_dev, type,
 				       &skb->dev->ptype_specific);
 	}
-
-	if (pt_prev) {
-		if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
-			goto drop;
-		else
-			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
-	} else {
+	if (*pt_prev && unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
+		goto drop;
+	return ret;
 drop:
-		if (!deliver_exact)
-			atomic_long_inc(&skb->dev->rx_dropped);
-		else
-			atomic_long_inc(&skb->dev->rx_nohandler);
-		kfree_skb(skb);
-		/* Jamal, now you will not able to escape explaining
-		 * me how you were going to use this. :-)
-		 */
-		ret = NET_RX_DROP;
-	}
-
+	if (!deliver_exact)
+		atomic_long_inc(&skb->dev->rx_dropped);
+	else
+		atomic_long_inc(&skb->dev->rx_nohandler);
+	kfree_skb(skb);
+	/* Jamal, now you will not able to escape explaining
+	 * me how you were going to use this. :-)
+	 */
+	ret = NET_RX_DROP;
 out:
+	*pt_prev = NULL;
+	return ret;
+}
+
+static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
+{
+	struct net_device *orig_dev = skb->dev;
+	struct packet_type *pt_prev;
+	int ret;
+
+	ret = __netif_receive_skb_taps(skb, pfmemalloc, &pt_prev);
+	if (pt_prev)
+		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 	return ret;
 }
 
@@ -4670,12 +4678,62 @@ int netif_receive_skb_core(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb_core);
 
-static void __netif_receive_skb_list_core(struct sk_buff_head *list, bool pfmemalloc)
+static inline void __netif_receive_skb_list_ptype(struct sk_buff_head *list,
+						  struct packet_type *pt_prev,
+						  struct net_device *orig_dev)
 {
 	struct sk_buff *skb;
 
 	while ((skb = __skb_dequeue(list)) != NULL)
-		__netif_receive_skb_core(skb, pfmemalloc);
+		pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
+static void __netif_receive_skb_list_core(struct sk_buff_head *list, bool pfmemalloc)
+{
+	/* Fast-path assumptions:
+	 * - There is no RX handler.
+	 * - Only one packet_type matches.
+	 * If either of these fails, we will end up doing some per-packet
+	 * processing in-line, then handling the 'last ptype' for the whole
+	 * sublist.  This can't cause out-of-order delivery to any single ptype,
+	 * because the 'last ptype' must be constant across the sublist, and all
+	 * other ptypes are handled per-packet.
+	 */
+	/* Current (common) ptype of sublist */
+	struct packet_type *pt_curr = NULL;
+	/* Current (common) orig_dev of sublist */
+	struct net_device *od_curr = NULL;
+	struct sk_buff_head sublist;
+	struct sk_buff *skb;
+
+	__skb_queue_head_init(&sublist);
+
+	while ((skb = __skb_dequeue(list)) != NULL) {
+		struct packet_type *pt_prev;
+		struct net_device *orig_dev = skb->dev;
+
+		__netif_receive_skb_taps(skb, pfmemalloc, &pt_prev);
+		if (pt_prev) {
+			if (skb_queue_empty(&sublist)) {
+				pt_curr = pt_prev;
+				od_curr = orig_dev;
+			} else if (!(pt_curr == pt_prev &&
+				     od_curr == orig_dev)) {
+				/* dispatch old sublist */
+				__netif_receive_skb_list_ptype(&sublist,
+							       pt_curr,
+							       od_curr);
+				/* start new sublist */
+				__skb_queue_head_init(&sublist);
+				pt_curr = pt_prev;
+				od_curr = orig_dev;
+			}
+			__skb_queue_tail(&sublist, skb);
+		}
+	}
+
+	/* dispatch final sublist */
+	__netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
 }
 
 static int __netif_receive_skb(struct sk_buff *skb)

  parent reply	other threads:[~2018-06-26 18:19 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-26 18:15 [RFC PATCH v2 net-next 00/12] Handle multiple received packets at each stage Edward Cree
2018-06-26 18:17 ` [RFC PATCH v2 net-next 01/12] net: core: trivial netif_receive_skb_list() entry point Edward Cree
2018-06-27  0:06   ` Eric Dumazet
2018-06-27 14:03     ` Edward Cree
2018-06-26 18:17 ` [RFC PATCH v2 net-next 02/12] sfc: batch up RX delivery Edward Cree
2018-06-26 18:18 ` [RFC PATCH v2 net-next 03/12] net: core: unwrap skb list receive slightly further Edward Cree
2018-06-26 18:18 ` [RFC PATCH v2 net-next 04/12] net: core: Another step of skb receive list processing Edward Cree
2018-06-26 18:19 ` [RFC PATCH v2 net-next 05/12] net: core: another layer of lists, around PF_MEMALLOC skb handling Edward Cree
2018-06-26 18:19 ` Edward Cree [this message]
2018-06-27 14:36   ` [RFC PATCH v2 net-next 06/12] net: core: propagate SKB lists through packet_type lookup Willem de Bruijn
2018-06-27 14:49     ` Edward Cree
2018-06-27 16:00       ` Willem de Bruijn
2018-06-27 16:34         ` Edward Cree
2018-06-26 18:20 ` [RFC PATCH v2 net-next 07/12] net: ipv4: listified version of ip_rcv Edward Cree
2018-06-27 12:32   ` Florian Westphal
2018-06-26 18:20 ` [RFC PATCH v2 net-next 08/12] net: ipv4: listify ip_rcv_finish Edward Cree
2018-06-26 18:21 ` [RFC PATCH v2 net-next 09/12] net: don't bother calling list RX functions on empty lists Edward Cree
2018-06-26 18:21 ` [RFC PATCH v2 net-next 10/12] net: listify Generic XDP processing, part 1 Edward Cree
2018-06-26 18:22 ` [RFC PATCH v2 net-next 11/12] net: listify Generic XDP processing, part 2 Edward Cree
2018-06-26 18:22 ` [RFC PATCH v2 net-next 12/12] net: listify jited Generic XDP processing on x86_64 Edward Cree
2018-06-26 20:48 ` [RFC PATCH v2 net-next 00/12] Handle multiple received packets at each stage Tom Herbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a665e6bc-176c-6c5a-282b-a3ed52e0a13f@solarflare.com \
    --to=ecree@solarflare.com \
    --cc=davem@davemloft.net \
    --cc=linux-net-drivers@solarflare.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.