All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: Patrick McHardy <kaber@trash.net>
Cc: David Miller <davem@davemloft.net>,
	netdev <netdev@vger.kernel.org>,
	Jarek Poplawski <jarkao2@gmail.com>
Subject: [RFC PATCH] net_sched: sch_sfq: better struct layouts
Date: Fri, 17 Dec 2010 17:52:46 +0100	[thread overview]
Message-ID: <1292604766.2906.51.camel@edumazet-laptop> (raw)
In-Reply-To: <1292504932.2883.110.camel@edumazet-laptop>

Le jeudi 16 décembre 2010 à 14:08 +0100, Eric Dumazet a écrit :
> Le mercredi 15 décembre 2010 à 18:03 +0100, Patrick McHardy a écrit :
> > On 15.12.2010 17:55, Eric Dumazet wrote:
> 
> > > I was thinking in allowing more packets per SFQ (but keep the 126 active
> > > flows limit), what do you think ?
> > 
> > I keep forgetting why this limit exists, let me try to figure
> > it out once more :)
> 
> I took a look, and found we already are at max, unless we change
> sfq_index type (from u8 to u16)
> 
> SFQ_SLOTS is 128  (not really exist, but this is the number of slots)
> SFQ_DEPTH is 128 (max depth for one slot)
> Constraints are :
>  SFQ_SLOTS < 256
>  SFQ_DEPTH < 256
>  SFQ_SLOTS + SFQ_DEPTH < 256, because of the dep[] array.
> 
> We could avoid the "struct sk_buff_head" structure with its un-needed
> spinlock, and eventually group data for a given slot in a structure to
> reduce number of cache lines we touch...
> 
> struct sfq_slot {
> 	struct sk_buff *first;
> 	struct sk_buff *last;
> 	u8 qlen;
> 	sfq_index next; /* dequeue chain */
> 	u16 hash;
> 	short allot;
> 	/* 16bit hole */
> };
> 
> This would save 768 bytes on x86_64 (and much more if LOCKDEP is used)
> 
> 

Here is a preliminary patch, shrinking sizeof(struct sfq_sched_data)
from 0x14f8 (or more if spinlocks are bigger) to 0x1180 bytes, and
reduce text size as well.

   text	   data	    bss	    dec	    hex	filename
   4821	    152	      0	   4973	   136d	old/net/sched/sch_sfq.o
   4651	    136	      0	   4787	   12b3	new/net/sched/sch_sfq.o


All data for a slot/flow is now grouped in a compact and cache friendly
structure :

struct sfq_slot {
        struct sk_buff  *skblist_next;
        struct sk_buff  *skblist_prev;
        sfq_index       qlen; /* number of skbs in skblist */
        sfq_index       next; /* next slot in sfq chain */
        unsigned short  hash; /* hash value (index in ht[]) */
        short           allot; /* credit for this slot */
        struct sfq_head anchor; /* anchor in dep[] chains */
};



 net/sched/sch_sfq.c |  223 +++++++++++++++++++++++-------------------
 1 file changed, 125 insertions(+), 98 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3cf478d..28968eb 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -69,25 +69,40 @@
 	This implementation limits maximal queue length to 128;
 	maximal mtu to 2^15-1; number of hash buckets to 1024.
 	The only goal of this restrictions was that all data
-	fit into one 4K page :-). Struct sfq_sched_data is
-	organized in anti-cache manner: all the data for a bucket
-	are scattered over different locations. This is not good,
-	but it allowed me to put it into 4K.
+	fit into one 4K page on 32bit arches.
 
 	It is easy to increase these values, but not in flight.  */
 
-#define SFQ_DEPTH		128
+#define SFQ_DEPTH		128 /* max number of packets per slot (per flow) */
+#define SFQ_SLOTS		128 /* max number of flows */
+#define EMPTY_SLOT		255
 #define SFQ_HASH_DIVISOR	1024
 
-/* This type should contain at least SFQ_DEPTH*2 values */
+/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
 typedef unsigned char sfq_index;
 
+/*
+ * We dont use pointers to save space.
+ * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
+ * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
+ * are 'pointers' to dep[] array
+ */
 struct sfq_head
 {
 	sfq_index	next;
 	sfq_index	prev;
 };
 
+struct sfq_slot {
+	struct sk_buff	*skblist_next;
+	struct sk_buff	*skblist_prev;
+	sfq_index	qlen; /* number of skbs in skblist */
+	sfq_index	next; /* next slot in sfq chain */
+	unsigned short	hash; /* hash value (index in ht[]) */
+	short		allot; /* credit for this slot */
+	struct sfq_head anchor; /* anchor in dep[] chains */
+};
+
 struct sfq_sched_data
 {
 /* Parameters */
@@ -99,17 +114,24 @@ struct sfq_sched_data
 	struct tcf_proto *filter_list;
 	struct timer_list perturb_timer;
 	u32		perturbation;
-	sfq_index	tail;		/* Index of current slot in round */
-	sfq_index	max_depth;	/* Maximal depth */
+	sfq_index	max_depth;	/* depth of longest slot */
 
+	struct sfq_slot *tail;		/* current slot in round */
 	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
-	sfq_index	next[SFQ_DEPTH];	/* Active slots link */
-	short		allot[SFQ_DEPTH];	/* Current allotment per slot */
-	unsigned short	hash[SFQ_DEPTH];	/* Hash value indexed by slots */
-	struct sk_buff_head	qs[SFQ_DEPTH];		/* Slot queue */
-	struct sfq_head	dep[SFQ_DEPTH*2];	/* Linked list of slots, indexed by depth */
+	struct sfq_slot	slots[SFQ_SLOTS];
+	struct sfq_head	dep[SFQ_DEPTH];	/* Linked list of slots, indexed by depth */
 };
 
+/*
+ * sfq_head are either in a sfq_slot or in dep[] array
+ */
+static inline struct sfq_head *get_head(struct sfq_sched_data *q, sfq_index val)
+{
+	if (val < SFQ_SLOTS)
+		return &q->slots[val].anchor;
+	return &q->dep[val - SFQ_SLOTS];
+}
+
 static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
 {
 	return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
@@ -200,30 +222,37 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 	return 0;
 }
 
+/*
+ * x : slot number [0 .. SFQ_SLOTS - 1]
+ */
 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
-	int d = q->qs[x].qlen + SFQ_DEPTH;
+	int qlen = q->slots[x].qlen;
 
-	p = d;
-	n = q->dep[d].next;
-	q->dep[x].next = n;
-	q->dep[x].prev = p;
-	q->dep[p].next = q->dep[n].prev = x;
+	p = qlen + SFQ_SLOTS;
+	n = q->dep[qlen].next;
+
+	q->slots[x].anchor.next = n;
+	q->slots[x].anchor.prev = p;
+
+	q->dep[qlen].next = x;		/* get_head(q, p)->next = x */
+	get_head(q, n)->prev = x;
 }
 
 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
+	int d;
 
-	n = q->dep[x].next;
-	p = q->dep[x].prev;
-	q->dep[p].next = n;
-	q->dep[n].prev = p;
+	n = q->slots[x].anchor.next;
+	p = q->slots[x].anchor.prev;
+	get_head(q, p)->next = n;
+	get_head(q, n)->prev = p;
 
-	if (n == p && q->max_depth == q->qs[x].qlen + 1)
+	d = q->slots[x].qlen--;
+	if (n == p && q->max_depth == d)
 		q->max_depth--;
-
 	sfq_link(q, x);
 }
 
@@ -232,34 +261,36 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
 	sfq_index p, n;
 	int d;
 
-	n = q->dep[x].next;
-	p = q->dep[x].prev;
-	q->dep[p].next = n;
-	q->dep[n].prev = p;
-	d = q->qs[x].qlen;
+	n = q->slots[x].anchor.next;
+	p = q->slots[x].anchor.prev;
+	get_head(q, p)->next = n;
+	get_head(q, n)->prev = p;
+
+	d = ++q->slots[x].qlen;
 	if (q->max_depth < d)
 		q->max_depth = d;
-
 	sfq_link(q, x);
 }
 
 static unsigned int sfq_drop(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	sfq_index d = q->max_depth;
+	sfq_index x, d = q->max_depth;
 	struct sk_buff *skb;
 	unsigned int len;
+	struct sfq_slot *slot;
 
-	/* Queue is full! Find the longest slot and
-	   drop a packet from it */
-
+	/* Queue is full! Find the longest slot and drop tail packet from it */
 	if (d > 1) {
-		sfq_index x = q->dep[d + SFQ_DEPTH].next;
-		skb = q->qs[x].prev;
+		x = q->dep[d].next;
+		slot = &q->slots[x];
+drop:
+		skb = slot->skblist_prev;
+		slot->skblist_prev = skb->prev;
 		len = qdisc_pkt_len(skb);
-		__skb_unlink(skb, &q->qs[x]);
-		kfree_skb(skb);
 		sfq_dec(q, x);
+		skb->next = skb->prev = NULL;
+		kfree_skb(skb);
 		sch->q.qlen--;
 		sch->qstats.drops++;
 		sch->qstats.backlog -= len;
@@ -268,19 +299,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 
 	if (d == 1) {
 		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
-		d = q->next[q->tail];
-		q->next[q->tail] = q->next[d];
-		q->allot[q->next[d]] += q->quantum;
-		skb = q->qs[d].prev;
-		len = qdisc_pkt_len(skb);
-		__skb_unlink(skb, &q->qs[d]);
-		kfree_skb(skb);
-		sfq_dec(q, d);
-		sch->q.qlen--;
-		q->ht[q->hash[d]] = SFQ_DEPTH;
-		sch->qstats.drops++;
-		sch->qstats.backlog -= len;
-		return len;
+		x = q->tail->next;
+		slot = &q->slots[x];
+		q->tail->next = slot->next;
+		q->ht[slot->hash] = EMPTY_SLOT;
+		goto drop;
 	}
 
 	return 0;
@@ -292,6 +315,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned int hash;
 	sfq_index x;
+	struct sfq_slot *slot;
 	int uninitialized_var(ret);
 
 	hash = sfq_classify(skb, sch, &ret);
@@ -304,31 +328,36 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	hash--;
 
 	x = q->ht[hash];
-	if (x == SFQ_DEPTH) {
-		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
-		q->hash[x] = hash;
+	slot = &q->slots[x];
+	if (x == EMPTY_SLOT) {
+		x = q->dep[0].next; /* get a free slot */
+		q->ht[hash] = x;
+		slot = &q->slots[x];
+		slot->hash = hash;
+		slot->skblist_next = slot->skblist_prev = (struct sk_buff *)slot;
 	}
 
-	/* If selected queue has length q->limit, this means that
-	 * all another queues are empty and that we do simple tail drop,
+	/* If selected queue has length q->limit, do simple tail drop,
 	 * i.e. drop _this_ packet.
 	 */
-	if (q->qs[x].qlen >= q->limit)
+	if (slot->qlen >= q->limit)
 		return qdisc_drop(skb, sch);
 
 	sch->qstats.backlog += qdisc_pkt_len(skb);
-	__skb_queue_tail(&q->qs[x], skb);
+	skb->prev = slot->skblist_prev;
+	skb->next = (struct sk_buff *)slot;
+	slot->skblist_prev->next = skb;
+	slot->skblist_prev = skb;
 	sfq_inc(q, x);
-	if (q->qs[x].qlen == 1) {		/* The flow is new */
-		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
-			q->tail = x;
-			q->next[x] = x;
-			q->allot[x] = q->quantum;
+	if (slot->qlen == 1) {		/* The flow is new */
+		if (q->tail == NULL) {	/* It is the first flow */
+			slot->next = x;
 		} else {
-			q->next[x] = q->next[q->tail];
-			q->next[q->tail] = x;
-			q->tail = x;
+			slot->next = q->tail->next;
+			q->tail->next = x;
 		}
+		q->tail = slot;
+		slot->allot = q->quantum;
 	}
 	if (++sch->q.qlen <= q->limit) {
 		sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -344,14 +373,12 @@ static struct sk_buff *
 sfq_peek(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	sfq_index a;
 
 	/* No active slots */
-	if (q->tail == SFQ_DEPTH)
+	if (q->tail == NULL)
 		return NULL;
 
-	a = q->next[q->tail];
-	return skb_peek(&q->qs[a]);
+	return q->slots[q->tail->next].skblist_next;
 }
 
 static struct sk_buff *
@@ -359,34 +386,35 @@ sfq_dequeue(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
-	sfq_index a, old_a;
+	sfq_index a, next_a;
+	struct sfq_slot *slot;
 
 	/* No active slots */
-	if (q->tail == SFQ_DEPTH)
+	if (q->tail == NULL)
 		return NULL;
 
-	a = old_a = q->next[q->tail];
-
+	a = q->tail->next;
+	slot = &q->slots[a];
 	/* Grab packet */
-	skb = __skb_dequeue(&q->qs[a]);
+	skb = slot->skblist_next;
+	slot->skblist_next = skb->next;
+	skb->next = skb->prev = NULL;
 	sfq_dec(q, a);
 	sch->q.qlen--;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
 
-	/* Is the slot empty? */
-	if (q->qs[a].qlen == 0) {
-		q->ht[q->hash[a]] = SFQ_DEPTH;
-		a = q->next[a];
-		if (a == old_a) {
-			q->tail = SFQ_DEPTH;
+	/* Is the slot now empty? */
+	if (slot->qlen == 0) {
+		q->ht[slot->hash] = EMPTY_SLOT;
+		next_a = slot->next;
+		if (a == next_a) {
+			q->tail = NULL; /* no more active slots */
 			return skb;
 		}
-		q->next[q->tail] = a;
-		q->allot[a] += q->quantum;
-	} else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
-		q->tail = a;
-		a = q->next[a];
-		q->allot[a] += q->quantum;
+		q->tail->next = next_a;
+	} else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
+		q->tail = slot;
+		slot->allot += q->quantum;
 	}
 	return skb;
 }
@@ -450,17 +478,16 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	init_timer_deferrable(&q->perturb_timer);
 
 	for (i = 0; i < SFQ_HASH_DIVISOR; i++)
-		q->ht[i] = SFQ_DEPTH;
+		q->ht[i] = EMPTY_SLOT;
 
 	for (i = 0; i < SFQ_DEPTH; i++) {
-		skb_queue_head_init(&q->qs[i]);
-		q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
-		q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
+		q->dep[i].next = i + SFQ_SLOTS;
+		q->dep[i].prev = i + SFQ_SLOTS;
 	}
 
 	q->limit = SFQ_DEPTH - 1;
 	q->max_depth = 0;
-	q->tail = SFQ_DEPTH;
+	q->tail = NULL;
 	if (opt == NULL) {
 		q->quantum = psched_mtu(qdisc_dev(sch));
 		q->perturb_period = 0;
@@ -471,7 +498,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 			return err;
 	}
 
-	for (i = 0; i < SFQ_DEPTH; i++)
+	for (i = 0; i < SFQ_SLOTS; i++)
 		sfq_link(q, i);
 	return 0;
 }
@@ -547,9 +574,9 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 				struct gnet_dump *d)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	sfq_index idx = q->ht[cl-1];
-	struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen };
-	struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
+	const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
+	struct gnet_stats_queue qs = { .qlen = slot->qlen };
+	struct tc_sfq_xstats xstats = { .allot = slot->allot };
 
 	if (gnet_stats_copy_queue(d, &qs) < 0)
 		return -1;
@@ -565,7 +592,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 		return;
 
 	for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
-		if (q->ht[i] == SFQ_DEPTH ||
+		if (q->ht[i] == EMPTY_SLOT ||
 		    arg->count < arg->skip) {
 			arg->count++;
 			continue;



  reply	other threads:[~2010-12-17 16:52 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-15 14:03 [PATCH] net_sched: sch_sfq: fix allot handling Eric Dumazet
2010-12-15 16:03 ` Patrick McHardy
2010-12-15 16:27   ` Eric Dumazet
2010-12-15 16:40     ` [PATCH v2] " Eric Dumazet
2010-12-15 16:43       ` Patrick McHardy
2010-12-15 16:55         ` Eric Dumazet
2010-12-15 17:03           ` Patrick McHardy
2010-12-15 17:09             ` Eric Dumazet
2010-12-15 17:21               ` Patrick McHardy
2010-12-15 17:30                 ` [PATCH v3] " Eric Dumazet
2010-12-15 18:18                   ` [PATCH net-next-2.6] net_sched: sch_sfq: add backlog info in sfq_dump_class_stats() Eric Dumazet
2010-12-15 19:10                     ` Eric Dumazet
2010-12-16  8:16                     ` Jarek Poplawski
2010-12-16 10:18                       ` [PATCH v2 " Eric Dumazet
2010-12-16 11:03                       ` [PATCH " Eric Dumazet
2010-12-16 13:09                         ` Jarek Poplawski
2010-12-20 21:14                     ` David Miller
2010-12-20 21:18                   ` [PATCH v3] net_sched: sch_sfq: fix allot handling David Miller
2010-12-16 13:08             ` [PATCH v2] " Eric Dumazet
2010-12-17 16:52               ` Eric Dumazet [this message]
2010-12-19 21:22                 ` [RFC PATCH] net_sched: sch_sfq: better struct layouts Jarek Poplawski
2010-12-20 17:02                   ` [PATCH v2] " Eric Dumazet
2010-12-20 21:33                     ` David Miller
2010-12-20 21:42                       ` Eric Dumazet
2010-12-20 22:54                         ` [PATCH v3 net-next-2.6] " Eric Dumazet
2010-12-21  5:33                           ` David Miller
2010-12-20 22:55                     ` [PATCH v2] " Jarek Poplawski
2010-12-20 23:16                     ` [PATCH net-next-2.6] sch_sfq: allow big packets and be fair Eric Dumazet
2010-12-21 10:15                       ` Jarek Poplawski
2010-12-21 10:30                         ` Jarek Poplawski
2010-12-21 10:44                           ` Eric Dumazet
2010-12-21 10:56                             ` Jarek Poplawski
2010-12-21 10:57                         ` Eric Dumazet
2010-12-21 11:39                           ` Jarek Poplawski
2010-12-21 12:17                             ` Jarek Poplawski
2010-12-21 13:04                               ` [PATCH v2 " Eric Dumazet
2010-12-21 13:47                                 ` Jarek Poplawski
2010-12-28 21:46                                 ` David Miller
2010-12-29  7:53                                   ` [PATCH v3 " Eric Dumazet
2010-12-31 20:48                                     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1292604766.2906.51.camel@edumazet-laptop \
    --to=eric.dumazet@gmail.com \
    --cc=davem@davemloft.net \
    --cc=jarkao2@gmail.com \
    --cc=kaber@trash.net \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.