All of lore.kernel.org
 help / color / mirror / Atom feed
From: John Fastabend <john.fastabend@gmail.com>
To: xiyou.wangcong@gmail.com, jhs@mojatatu.com,
	alexei.starovoitov@gmail.com, eric.dumazet@gmail.com,
	brouer@redhat.com
Cc: john.r.fastabend@intel.com, netdev@vger.kernel.org,
	john.fastabend@gmail.com, davem@davemloft.net
Subject: [RFC PATCH 06/13] net: sched: per cpu gso handlers
Date: Wed, 17 Aug 2016 12:35:52 -0700	[thread overview]
Message-ID: <20160817193552.27032.79224.stgit@john-Precision-Tower-5810> (raw)
In-Reply-To: <20160817193120.27032.20918.stgit@john-Precision-Tower-5810>

The net sched infrastructure has a gso ptr that points to skb structs
that have failed to be enqueued by the device driver.

This can happen when multiple cores try to push a skb onto the same
underlying hardware queue resulting in lock contention. This case is
handled by a cpu collision handler handle_dev_cpu_collision(). Another
case occurs when the stack overruns the drivers low level tx queues
capacity. Ideally these should be a rare occurrence in a well-tuned
system but they do happen.

To handle this in the lockless case use a per cpu gso field to park
the skb until the conflict can be resolved. Note at this point the
skb has already been popped off the qdisc so it has to be handled
by the infrastructure.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 include/net/sch_generic.h |   39 +++++++++++++++++++++++++
 net/sched/sch_api.c       |    7 ++++
 net/sched/sch_generic.c   |   71 ++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 112 insertions(+), 5 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 193cf8c..0864813 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -36,6 +36,10 @@ struct qdisc_size_table {
 	u16			data[];
 };
 
+struct gso_cell {
+	struct sk_buff *skb;
+};
+
 struct Qdisc {
 	int 			(*enqueue)(struct sk_buff *skb,
 					   struct Qdisc *sch,
@@ -73,6 +77,8 @@ struct Qdisc {
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue	__percpu *cpu_qstats;
 
+	struct gso_cell __percpu *gso_cpu_skb;
+
 	/*
 	 * For performance sake on SMP, we put highly modified fields at the end
 	 */
@@ -744,6 +750,23 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
 	return sch->gso_skb;
 }
 
+static inline struct sk_buff *qdisc_peek_dequeued_cpu(struct Qdisc *sch)
+{
+	struct gso_cell *gso = this_cpu_ptr(sch->gso_cpu_skb);
+
+	if (!gso->skb) {
+		struct sk_buff *skb = sch->dequeue(sch);
+
+		if (skb) {
+			gso->skb = skb;
+			qdisc_qstats_cpu_backlog_inc(sch, skb);
+			qdisc_qstats_cpu_qlen_inc(sch);
+		}
+	}
+
+	return gso->skb;
+}
+
 /* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
 static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
 {
@@ -760,6 +783,22 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
 	return skb;
 }
 
+static inline struct sk_buff *qdisc_dequeue_peeked_skb(struct Qdisc *sch)
+{
+	struct gso_cell *gso = this_cpu_ptr(sch->gso_cpu_skb);
+	struct sk_buff *skb = gso->skb;
+
+	if (skb) {
+		gso->skb = NULL;
+		qdisc_qstats_cpu_backlog_dec(sch, skb);
+		qdisc_qstats_cpu_qlen_dec(sch);
+	} else {
+		skb = sch->dequeue(sch);
+	}
+
+	return skb;
+}
+
 static inline void __qdisc_reset_queue(struct sk_buff_head *list)
 {
 	/*
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 12ebde8..d713052 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -966,6 +966,12 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 				goto err_out4;
 		}
 
+		if (sch->flags & TCQ_F_NOLOCK) {
+			sch->gso_cpu_skb = alloc_percpu(struct gso_cell);
+			if (!sch->gso_cpu_skb)
+				goto err_out4;
+		}
+
 		if (tca[TCA_STAB]) {
 			stab = qdisc_get_stab(tca[TCA_STAB]);
 			if (IS_ERR(stab)) {
@@ -1014,6 +1020,7 @@ err_out:
 err_out4:
 	free_percpu(sch->cpu_bstats);
 	free_percpu(sch->cpu_qstats);
+	free_percpu(sch->gso_cpu_skb);
 	/*
 	 * Any broken qdiscs that would require a ops->reset() here?
 	 * The qdisc was never in action so it shouldn't be necessary.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f8fec81..3b9a21f 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,8 +44,25 @@ EXPORT_SYMBOL(default_qdisc_ops);
  * - ingress filtering is also serialized via qdisc root lock
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
+static inline struct sk_buff *qdisc_dequeue_gso_skb(struct Qdisc *sch)
+{
+	if (sch->gso_cpu_skb)
+		return (this_cpu_ptr(sch->gso_cpu_skb))->skb;
 
-static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+	return sch->gso_skb;
+}
+
+static inline void qdisc_null_gso_skb(struct Qdisc *sch)
+{
+	if (sch->gso_cpu_skb) {
+		(this_cpu_ptr(sch->gso_cpu_skb))->skb = NULL;
+		return;
+	}
+
+	sch->gso_skb = NULL;
+}
+
+static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
 	q->gso_skb = skb;
 	q->qstats.requeues++;
@@ -56,6 +73,25 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 	return 0;
 }
 
+static inline int dev_requeue_cpu_skb(struct sk_buff *skb, struct Qdisc *q)
+{
+	this_cpu_ptr(q->gso_cpu_skb)->skb = skb;
+	qdisc_qstats_cpu_requeues_inc(q);
+	qdisc_qstats_cpu_backlog_inc(q, skb);
+	qdisc_qstats_cpu_qlen_inc(q);
+	__netif_schedule(q);
+
+	return 0;
+}
+
+static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+{
+	if (q->flags & TCQ_F_NOLOCK)
+		return dev_requeue_cpu_skb(skb, q);
+	else
+		return __dev_requeue_skb(skb, q);
+}
+
 static void try_bulk_dequeue_skb(struct Qdisc *q,
 				 struct sk_buff *skb,
 				 const struct netdev_queue *txq,
@@ -111,7 +147,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
 static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 				   int *packets)
 {
-	struct sk_buff *skb = q->gso_skb;
+	struct sk_buff *skb = qdisc_dequeue_gso_skb(q);
 	const struct netdev_queue *txq = q->dev_queue;
 
 	*packets = 1;
@@ -121,9 +157,15 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 		/* check the reason of requeuing without tx lock first */
 		txq = skb_get_tx_queue(txq->dev, skb);
 		if (!netif_xmit_frozen_or_stopped(txq)) {
-			q->gso_skb = NULL;
-			qdisc_qstats_backlog_dec(q, skb);
-			q->q.qlen--;
+			qdisc_null_gso_skb(q);
+
+			if (qdisc_is_percpu_stats(q)) {
+				qdisc_qstats_cpu_backlog_inc(q, skb);
+				qdisc_qstats_cpu_qlen_dec(q);
+			} else {
+				qdisc_qstats_backlog_dec(q, skb);
+				q->q.qlen--;
+			}
 		} else
 			skb = NULL;
 		return skb;
@@ -670,6 +712,12 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 			goto errout;
 	}
 
+	if (sch->flags & TCQ_F_NOLOCK) {
+		sch->gso_cpu_skb = alloc_percpu(struct gso_cell);
+		if (!sch->gso_cpu_skb)
+			goto errout;
+	}
+
 	return sch;
 errout:
 	qdisc_destroy(sch);
@@ -706,6 +754,19 @@ static void qdisc_rcu_free(struct rcu_head *head)
 		free_percpu(qdisc->cpu_qstats);
 	}
 
+	if (qdisc->gso_cpu_skb) {
+		int i;
+
+		for_each_possible_cpu(i) {
+			struct gso_cell *cell;
+
+			cell = per_cpu_ptr(qdisc->gso_cpu_skb, i);
+			kfree_skb_list(cell->skb);
+		}
+
+		free_percpu(qdisc->gso_cpu_skb);
+	}
+
 	kfree((char *) qdisc - qdisc->padded);
 }
 

  parent reply	other threads:[~2016-08-17 19:36 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-17 19:33 [RFC PATCH 00/13] Series short description John Fastabend
2016-08-17 19:33 ` [RFC PATCH 01/13] net: sched: allow qdiscs to handle locking John Fastabend
2016-08-17 22:33   ` Eric Dumazet
2016-08-17 22:49     ` John Fastabend
2016-08-17 22:34   ` Eric Dumazet
2016-08-17 22:48     ` John Fastabend
2016-08-17 19:34 ` [RFC PATCH 02/13] net: sched: qdisc_qlen for per cpu logic John Fastabend
2016-08-17 19:34 ` [RFC PATCH 03/13] net: sched: provide per cpu qstat helpers John Fastabend
2016-08-17 19:35 ` [RFC PATCH 04/13] net: sched: provide atomic qlen helpers for bypass case John Fastabend
2016-08-17 19:35 ` [RFC PATCH 05/13] net: sched: a dflt qdisc may be used with per cpu stats John Fastabend
2016-08-17 19:35 ` John Fastabend [this message]
2016-08-17 19:36 ` [RFC PATCH 07/13] net: sched: support qdisc_reset on NOLOCK qdisc John Fastabend
2016-08-17 22:53   ` Eric Dumazet
2016-08-17 22:59     ` John Fastabend
2016-08-17 19:36 ` [RFC PATCH 08/13] net: sched: support skb_bad_tx with lockless qdisc John Fastabend
2016-08-17 22:58   ` Eric Dumazet
2016-08-17 23:00     ` John Fastabend
2016-08-23 20:11       ` John Fastabend
2016-08-17 19:37 ` [RFC PATCH 09/13] net: sched: helper to sum qlen John Fastabend
2016-08-17 19:37 ` [RFC PATCH 10/13] net: sched: lockless support for netif_schedule John Fastabend
2016-08-17 19:46   ` John Fastabend
2016-08-17 23:01   ` Eric Dumazet
2016-08-17 23:17     ` John Fastabend
2016-08-17 23:33       ` Eric Dumazet
2016-08-17 19:38 ` [RFC PATCH 11/13] net: sched: pfifo_fast use alf_queue John Fastabend
2016-08-19 10:13   ` Jesper Dangaard Brouer
2016-08-19 15:44     ` John Fastabend
2016-08-17 19:38 ` [RFC PATCH 12/13] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mq John Fastabend
2016-08-17 19:49   ` John Fastabend
2016-08-17 23:04   ` Eric Dumazet
2016-08-17 23:18     ` John Fastabend
2016-08-17 19:39 ` [RFC PATCH 13/13] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mqprio John Fastabend

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160817193552.27032.79224.stgit@john-Precision-Tower-5810 \
    --to=john.fastabend@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=brouer@redhat.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=jhs@mojatatu.com \
    --cc=john.r.fastabend@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.