All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next] net: netem: use a list in addition to rbtree
@ 2018-12-04  1:07 Peter Oskolkov
  2018-12-04 15:57 ` Eric Dumazet
  2018-12-04 17:56 ` Stephen Hemminger
  0 siblings, 2 replies; 6+ messages in thread
From: Peter Oskolkov @ 2018-12-04  1:07 UTC (permalink / raw)
  To: David Miller, netdev; +Cc: Peter Oskolkov, Eric Dumazet, Peter Oskolkov

When testing high-bandwidth TCP streams with large windows,
high latency, and low jitter, netem consumes a lot of CPU cycles
doing rbtree rebalancing.

This patch uses a linear list/queue in addition to the rbtree:
if an incoming packet is past the tail of the linear queue, it is
added there, otherwise it is inserted into the rbtree.

Without this patch, perf shows netem_enqueue, netem_dequeue,
and rb_* functions among the top offenders. With this patch,
only netem_enqueue is noticeable if jitter is low/absent.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
---
 net/sched/sch_netem.c | 86 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 68 insertions(+), 18 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 2c38e3d07924..f1099486ecd3 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -77,6 +77,10 @@ struct netem_sched_data {
 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
 	struct rb_root t_root;
 
+	/* a linear queue; reduces rbtree rebalancing when jitter is low */
+	struct sk_buff	*t_head;
+	struct sk_buff	*t_tail;
+
 	/* optional qdisc for classful handling (NULL at netem init) */
 	struct Qdisc	*qdisc;
 
@@ -369,26 +373,39 @@ static void tfifo_reset(struct Qdisc *sch)
 		rb_erase(&skb->rbnode, &q->t_root);
 		rtnl_kfree_skbs(skb, skb);
 	}
+
+	rtnl_kfree_skbs(q->t_head, q->t_tail);
+	q->t_head = NULL;
+	q->t_tail = NULL;
 }
 
 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	u64 tnext = netem_skb_cb(nskb)->time_to_send;
-	struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
 
-	while (*p) {
-		struct sk_buff *skb;
-
-		parent = *p;
-		skb = rb_to_skb(parent);
-		if (tnext >= netem_skb_cb(skb)->time_to_send)
-			p = &parent->rb_right;
+	if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
+		if (q->t_tail)
+			q->t_tail->next = nskb;
 		else
-			p = &parent->rb_left;
+			q->t_head = nskb;
+		q->t_tail = nskb;
+	} else {
+		struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
+
+		while (*p) {
+			struct sk_buff *skb;
+
+			parent = *p;
+			skb = rb_to_skb(parent);
+			if (tnext >= netem_skb_cb(skb)->time_to_send)
+				p = &parent->rb_right;
+			else
+				p = &parent->rb_left;
+		}
+		rb_link_node(&nskb->rbnode, parent, p);
+		rb_insert_color(&nskb->rbnode, &q->t_root);
 	}
-	rb_link_node(&nskb->rbnode, parent, p);
-	rb_insert_color(&nskb->rbnode, &q->t_root);
 	sch->q.qlen++;
 }
 
@@ -534,6 +551,15 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 					last = t_last;
 				}
 			}
+			if (q->t_tail) {
+				struct netem_skb_cb *t_last =
+					netem_skb_cb(q->t_tail);
+
+				if (!last ||
+				    t_last->time_to_send > last->time_to_send) {
+					last = t_last;
+				}
+			}
 
 			if (last) {
 				/*
@@ -611,11 +637,37 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
 	q->slot.bytes_left = q->slot_config.max_bytes;
 }
 
+static struct sk_buff *netem_peek(struct netem_sched_data *q)
+{
+	struct sk_buff *skb = skb_rb_first(&q->t_root);
+	u64 t1, t2;
+
+	if (!skb)
+		return q->t_head;
+	if (!q->t_head)
+		return skb;
+
+	t1 = netem_skb_cb(skb)->time_to_send;
+	t2 = netem_skb_cb(q->t_head)->time_to_send;
+	if (t1 < t2)
+		return skb;
+	return q->t_head;
+}
+
+static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
+{
+	if (skb == q->t_head) {
+		q->t_head = skb->next;
+		if (!q->t_head)
+			q->t_tail = NULL;
+	} else
+		rb_erase(&skb->rbnode, &q->t_root);
+}
+
 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
-	struct rb_node *p;
 
 tfifo_dequeue:
 	skb = __qdisc_dequeue_head(&sch->q);
@@ -625,20 +677,18 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 		qdisc_bstats_update(sch, skb);
 		return skb;
 	}
-	p = rb_first(&q->t_root);
-	if (p) {
+	skb = netem_peek(q);
+	if (skb) {
 		u64 time_to_send;
 		u64 now = ktime_get_ns();
 
-		skb = rb_to_skb(p);
-
 		/* if more time remaining? */
 		time_to_send = netem_skb_cb(skb)->time_to_send;
 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
 			get_slot_next(q, now);
 
-		if (time_to_send <= now &&  q->slot.slot_next <= now) {
-			rb_erase(p, &q->t_root);
+		if (time_to_send <= now && q->slot.slot_next <= now) {
+			netem_erase_head(q, skb);
 			sch->q.qlen--;
 			qdisc_qstats_backlog_dec(sch, skb);
 			skb->next = NULL;
-- 
2.20.0.rc1.387.gf8505762e3-goog

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] net: netem: use a list in addition to rbtree
  2018-12-04  1:07 [PATCH net-next] net: netem: use a list in addition to rbtree Peter Oskolkov
@ 2018-12-04 15:57 ` Eric Dumazet
  2018-12-04 17:56 ` Stephen Hemminger
  1 sibling, 0 replies; 6+ messages in thread
From: Eric Dumazet @ 2018-12-04 15:57 UTC (permalink / raw)
  To: Peter Oskolkov, David Miller, netdev; +Cc: Peter Oskolkov, Eric Dumazet



On 12/03/2018 05:07 PM, Peter Oskolkov wrote:
> When testing high-bandwidth TCP streams with large windows,
> high latency, and low jitter, netem consumes a lot of CPU cycles
> doing rbtree rebalancing.
> 
> This patch uses a linear list/queue in addition to the rbtree:
> if an incoming packet is past the tail of the linear queue, it is
> added there, otherwise it is inserted into the rbtree.
> 
> Without this patch, perf shows netem_enqueue, netem_dequeue,
> and rb_* functions among the top offenders. With this patch,
> only netem_enqueue is noticeable if jitter is low/absent.
> 
> Suggested-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Peter Oskolkov <posk@google.com>
> ---

Reviewed-by: Eric Dumazet <edumazet@google.com>

Thanks Peter !

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] net: netem: use a list in addition to rbtree
  2018-12-04  1:07 [PATCH net-next] net: netem: use a list in addition to rbtree Peter Oskolkov
  2018-12-04 15:57 ` Eric Dumazet
@ 2018-12-04 17:56 ` Stephen Hemminger
  2018-12-04 19:10   ` Peter Oskolkov
  1 sibling, 1 reply; 6+ messages in thread
From: Stephen Hemminger @ 2018-12-04 17:56 UTC (permalink / raw)
  To: Peter Oskolkov; +Cc: David Miller, netdev, Peter Oskolkov, Eric Dumazet

I like this, it makes a lot of sense since packets are almost
always queued in order.

Minor style stuff you might want to fix (but don't have to).

> +				if (!last ||
> +				    t_last->time_to_send > last->time_to_send) {
> +					last = t_last;
> +				}

I don't think you need braces here for single assignment.

> +static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
> +{
> +	if (skb == q->t_head) {
> +		q->t_head = skb->next;
> +		if (!q->t_head)
> +			q->t_tail = NULL;
> +	} else
> +		rb_erase(&skb->rbnode, &q->t_root);

Checkpatch wants both sides of if/else to have brackets.
Personally, don't care.

Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] net: netem: use a list in addition to rbtree
  2018-12-04 17:56 ` Stephen Hemminger
@ 2018-12-04 19:10   ` Peter Oskolkov
  2018-12-04 19:58     ` Peter Oskolkov
  2018-12-04 20:07     ` David Miller
  0 siblings, 2 replies; 6+ messages in thread
From: Peter Oskolkov @ 2018-12-04 19:10 UTC (permalink / raw)
  To: stephen; +Cc: davem, netdev, posk.devel, Eric Dumazet

Thanks, Stephen!

I don't care much about braces either. David, do you want me to send a
new patch with braces moved around?

On Tue, Dec 4, 2018 at 9:56 AM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> I like this, it makes a lot of sense since packets are almost
> always queued in order.
>
> Minor style stuff you might want to fix (but don't have to).
>
> > +                             if (!last ||
> > +                                 t_last->time_to_send > last->time_to_send) {
> > +                                     last = t_last;
> > +                             }
>
> I don't think you need braces here for single assignment.
>
> > +static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
> > +{
> > +     if (skb == q->t_head) {
> > +             q->t_head = skb->next;
> > +             if (!q->t_head)
> > +                     q->t_tail = NULL;
> > +     } else
> > +             rb_erase(&skb->rbnode, &q->t_root);
>
> Checkpatch wants both sides of if/else to have brackets.
> Personally, don't care.
>
> Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] net: netem: use a list in addition to rbtree
  2018-12-04 19:10   ` Peter Oskolkov
@ 2018-12-04 19:58     ` Peter Oskolkov
  2018-12-04 20:07     ` David Miller
  1 sibling, 0 replies; 6+ messages in thread
From: Peter Oskolkov @ 2018-12-04 19:58 UTC (permalink / raw)
  To: Peter Oskolkov; +Cc: Stephen Hemminger, davem, netdev, edumazet

On Tue, Dec 4, 2018 at 11:11 AM Peter Oskolkov <posk@google.com> wrote:
>
> Thanks, Stephen!
>
> I don't care much about braces either. David, do you want me to send a
> new patch with braces moved around?

Sent a v2 with style fixes, just in case.

>
> On Tue, Dec 4, 2018 at 9:56 AM Stephen Hemminger
> <stephen@networkplumber.org> wrote:
> >
> > I like this, it makes a lot of sense since packets are almost
> > always queued in order.
> >
> > Minor style stuff you might want to fix (but don't have to).
> >
> > > +                             if (!last ||
> > > +                                 t_last->time_to_send > last->time_to_send) {
> > > +                                     last = t_last;
> > > +                             }
> >
> > I don't think you need braces here for single assignment.
> >
> > > +static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
> > > +{
> > > +     if (skb == q->t_head) {
> > > +             q->t_head = skb->next;
> > > +             if (!q->t_head)
> > > +                     q->t_tail = NULL;
> > > +     } else
> > > +             rb_erase(&skb->rbnode, &q->t_root);
> >
> > Checkpatch wants both sides of if/else to have brackets.
> > Personally, don't care.
> >
> > Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] net: netem: use a list in addition to rbtree
  2018-12-04 19:10   ` Peter Oskolkov
  2018-12-04 19:58     ` Peter Oskolkov
@ 2018-12-04 20:07     ` David Miller
  1 sibling, 0 replies; 6+ messages in thread
From: David Miller @ 2018-12-04 20:07 UTC (permalink / raw)
  To: posk; +Cc: stephen, netdev, posk.devel, edumazet

From: Peter Oskolkov <posk@google.com>
Date: Tue, 4 Dec 2018 11:10:55 -0800

> Thanks, Stephen!
> 
> I don't care much about braces either. David, do you want me to send a
> new patch with braces moved around?

Single statement basic blocks definitely must not have curly braces,
please remove them and repost.

Thank you.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-12-04 20:08 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-04  1:07 [PATCH net-next] net: netem: use a list in addition to rbtree Peter Oskolkov
2018-12-04 15:57 ` Eric Dumazet
2018-12-04 17:56 ` Stephen Hemminger
2018-12-04 19:10   ` Peter Oskolkov
2018-12-04 19:58     ` Peter Oskolkov
2018-12-04 20:07     ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.