linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC] pkt_sched: enable QFQ to support TSO/GSO
@ 2012-10-29  8:51 Paolo Valente
  2012-10-29 11:08 ` Cong Wang
  0 siblings, 1 reply; 8+ messages in thread
From: Paolo Valente @ 2012-10-29  8:51 UTC (permalink / raw)
  To: jhs, davem, shemminger
  Cc: linux-kernel, netdev, rizzo, fchecconi, paolo.valente

Hi,
if the max packet size for some class (configured through tc) is
violated by the actual size of the packets of that class, then QFQ
would not schedule classes correctly, and the data structures
implementing the bucket lists may get corrupted. This problem occurs
with TSO/GSO even if the max packet size is set to the MTU, and is,
e.g., the cause of the failure reported in [1]. Two patches have been
proposed to solve this problem in [2], one of them is a preliminary
version of this patch.

This patch addresses the above issues by: 1) setting QFQ parameters to
proper values for supporting TSO/GSO (in particular, setting the
maximum possible packet size to 64KB), 2) automatically increasing the
max packet size for a class, lmax, when a packet with a larger size
than the current value of lmax arrives (a notification is also appended
to the log).

The drawback of the first point is that the maximum weight for a class
is now limited to 4096, which is equal to 1/16 of the maximum weight
sum.

Finally, this patch also forcibly caps the timestamps of a class if
they are too high to be stored in the bucket list. This capping, taken
from QFQ+ [3], handles the unfrequent case described in the comment to
the function slot_insert.

[1] http://marc.info/?l=linux-netdev&m=134968777902077&w=2
[2] http://marc.info/?l=linux-netdev&m=135096573507936&w=2
[3] http://marc.info/?l=linux-netdev&m=134902691421670&w=2

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
---
 net/sched/sch_qfq.c |  109 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 79 insertions(+), 30 deletions(-)

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f0dd83c..4092470 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -84,18 +84,19 @@
  * grp->index is the index of the group; and grp->slot_shift
  * is the shift for the corresponding (scaled) sigma_i.
  */
-#define QFQ_MAX_INDEX		19
-#define QFQ_MAX_WSHIFT		16
+#define QFQ_MAX_INDEX		24
+#define QFQ_MAX_WSHIFT		12
 
 #define	QFQ_MAX_WEIGHT		(1<<QFQ_MAX_WSHIFT)
-#define QFQ_MAX_WSUM		(2*QFQ_MAX_WEIGHT)
+#define QFQ_MAX_WSUM		(16*QFQ_MAX_WEIGHT)
 
 #define FRAC_BITS		30	/* fixed point arithmetic */
 #define ONE_FP			(1UL << FRAC_BITS)
 #define IWSUM			(ONE_FP/QFQ_MAX_WSUM)
 
-#define QFQ_MTU_SHIFT		11
+#define QFQ_MTU_SHIFT		16	/* to support TSO/GSO */
 #define QFQ_MIN_SLOT_SHIFT	(FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
+#define QFQ_MIN_LMAX		256	/* min possible lmax for a class */
 
 /*
  * Possible group states.  These values are used as indexes for the bitmaps
@@ -231,6 +232,32 @@ static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl,
 	q->wsum += delta_w;
 }
 
+static void qfq_update_reactivate_class(struct qfq_sched *q,
+					struct qfq_class *cl,
+					u32 inv_w, u32 lmax, int delta_w)
+{
+	bool need_reactivation = false;
+	int i = qfq_calc_index(inv_w, lmax);
+
+	if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
+		/*
+		 * shift cl->F back, to not charge the
+		 * class for the not-yet-served head
+		 * packet
+		 */
+		cl->F = cl->S;
+		/* remove class from its slot in the old group */
+		qfq_deactivate_class(q, cl);
+		need_reactivation = true;
+	}
+
+	qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
+
+	if (need_reactivation) /* activate in new group */
+		qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
+}
+
+
 static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			    struct nlattr **tca, unsigned long *arg)
 {
@@ -238,7 +265,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	struct qfq_class *cl = (struct qfq_class *)*arg;
 	struct nlattr *tb[TCA_QFQ_MAX + 1];
 	u32 weight, lmax, inv_w;
-	int i, err;
+	int err;
 	int delta_w;
 
 	if (tca[TCA_OPTIONS] == NULL) {
@@ -270,16 +297,14 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tb[TCA_QFQ_LMAX]) {
 		lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
-		if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) {
+		if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
 			pr_notice("qfq: invalid max length %u\n", lmax);
 			return -EINVAL;
 		}
 	} else
-		lmax = 1UL << QFQ_MTU_SHIFT;
+		lmax = psched_mtu(qdisc_dev(sch));
 
 	if (cl != NULL) {
-		bool need_reactivation = false;
-
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
 						    qdisc_root_sleeping_lock(sch),
@@ -291,24 +316,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		if (lmax == cl->lmax && inv_w == cl->inv_w)
 			return 0; /* nothing to update */
 
-		i = qfq_calc_index(inv_w, lmax);
 		sch_tree_lock(sch);
-		if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
-			/*
-			 * shift cl->F back, to not charge the
-			 * class for the not-yet-served head
-			 * packet
-			 */
-			cl->F = cl->S;
-			/* remove class from its slot in the old group */
-			qfq_deactivate_class(q, cl);
-			need_reactivation = true;
-		}
-
-		qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
-
-		if (need_reactivation) /* activate in new group */
-			qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
+		qfq_update_reactivate_class(q, cl, inv_w, lmax, delta_w);
 		sch_tree_unlock(sch);
 
 		return 0;
@@ -663,15 +672,48 @@ static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
 
 
 /*
- * XXX we should make sure that slot becomes less than 32.
- * This is guaranteed by the input values.
- * roundedS is always cl->S rounded on grp->slot_shift bits.
+ * If the weight and lmax (max_pkt_size) of the classes do not change,
+ * then QFQ guarantees that the slot index is never higher than
+ * 2 + ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM).
+ *
+ * With the current values of the above constants, the index is
+ * then guaranteed to never be higher than 2 + 256 * (1 / 16) = 18.
+ *
+ * When the weight of a class is increased or the lmax of the class is
+ * decreased, a new class with smaller slot size may happen to be
+ * activated. The activation of this class should be properly delayed
+ * to when the service of the class has finished in the ideal system
+ * tracked by QFQ. If the activation of the class is not delayed to
+ * this reference time instant, then this class may be unjustly served
+ * before other classes waiting for service. This may cause
+ * (unfrequently) the above bound to the slot index to be violated for
+ * some of these unlucky classes.
+ *
+ * Instead of delaying the activation of the new class, which is quite
+ * complex, the following inaccurate but simple solution is used: if
+ * the slot index is higher than QFQ_MAX_SLOTS-2, then the timestamps
+ * of the class are shifted backward so as to let the slot index
+ * become equal to QFQ_MAX_SLOTS-2. This threshold is used because, if
+ * the slot index is above it, then the data structure implementing
+ * the bucket list either gets immediately corrupted or may get
+ * corrupted on a possible next packet arrival that causes the start
+ * time of the group to be shifted backward.
  */
 static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
 			    u64 roundedS)
 {
 	u64 slot = (roundedS - grp->S) >> grp->slot_shift;
-	unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
+	unsigned int i; /* slot index in the bucket list */
+
+	if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
+		u64 deltaS = roundedS - grp->S -
+			((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
+		cl->S -= deltaS;
+		cl->F -= deltaS;
+		slot = QFQ_MAX_SLOTS - 2;
+	}
+
+	i = (grp->front + slot) % QFQ_MAX_SLOTS;
 
 	hlist_add_head(&cl->next, &grp->slots[i]);
 	__set_bit(slot, &grp->full_slots);
@@ -892,6 +934,13 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 	pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
 
+	if (unlikely(cl->lmax < qdisc_pkt_len(skb))) {
+		pr_notice("qfq: increasing maxpkt from %u to %u for class %u",
+			  cl->lmax, qdisc_pkt_len(skb), cl->common.classid);
+		qfq_update_reactivate_class(q, cl, cl->inv_w,
+					    qdisc_pkt_len(skb), 0);
+	}
+
 	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		pr_debug("qfq_enqueue: enqueue failed %d\n", err);
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] pkt_sched: enable QFQ to support TSO/GSO
  2012-10-29  8:51 [PATCH RFC] pkt_sched: enable QFQ to support TSO/GSO Paolo Valente
@ 2012-10-29 11:08 ` Cong Wang
  2012-10-29 11:24   ` Paolo Valente
  0 siblings, 1 reply; 8+ messages in thread
From: Cong Wang @ 2012-10-29 11:08 UTC (permalink / raw)
  To: Paolo Valente
  Cc: jhs, davem, shemminger, linux-kernel, netdev, rizzo, fchecconi

On 10/29/2012 04:51 PM, Paolo Valente wrote:
> Hi,
> if the max packet size for some class (configured through tc) is
> violated by the actual size of the packets of that class, then QFQ
> would not schedule classes correctly, and the data structures
> implementing the bucket lists may get corrupted. This problem occurs
> with TSO/GSO even if the max packet size is set to the MTU, and is,
> e.g., the cause of the failure reported in [1]. Two patches have been
> proposed to solve this problem in [2], one of them is a preliminary
> version of this patch.
>
> This patch addresses the above issues by: 1) setting QFQ parameters to
> proper values for supporting TSO/GSO (in particular, setting the
> maximum possible packet size to 64KB), 2) automatically increasing the
> max packet size for a class, lmax, when a packet with a larger size
> than the current value of lmax arrives (a notification is also appended
> to the log).
>
> The drawback of the first point is that the maximum weight for a class
> is now limited to 4096, which is equal to 1/16 of the maximum weight
> sum.
>
> Finally, this patch also forcibly caps the timestamps of a class if
> they are too high to be stored in the bucket list. This capping, taken
> from QFQ+ [3], handles the unfrequent case described in the comment to
> the function slot_insert.
>
> [1] http://marc.info/?l=linux-netdev&m=134968777902077&w=2
> [2] http://marc.info/?l=linux-netdev&m=135096573507936&w=2
> [3] http://marc.info/?l=linux-netdev&m=134902691421670&w=2
>
> Signed-off-by: Paolo Valente <paolo.valente@unimore.it>


Please respect people who helps you to test it:

Tested-by: Cong Wang <amwang@redhat.com>

I tested it again just in case...

By the way, one nit below:


> +	if (unlikely(cl->lmax < qdisc_pkt_len(skb))) {
> +		pr_notice("qfq: increasing maxpkt from %u to %u for class %u",
> +			  cl->lmax, qdisc_pkt_len(skb), cl->common.classid);
> +		qfq_update_reactivate_class(q, cl, cl->inv_w,
> +					    qdisc_pkt_len(skb), 0);
> +	}


Seems the log level KERN_NOTICE is overkill, normal users don't care 
about these information, so s/pr_notice/pr_debug/.


Thanks!


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] pkt_sched: enable QFQ to support TSO/GSO
  2012-10-29 11:08 ` Cong Wang
@ 2012-10-29 11:24   ` Paolo Valente
  2012-10-29 11:31     ` Cong Wang
  0 siblings, 1 reply; 8+ messages in thread
From: Paolo Valente @ 2012-10-29 11:24 UTC (permalink / raw)
  To: Cong Wang; +Cc: jhs, davem, shemminger, linux-kernel, netdev, rizzo, fchecconi

Il 29/10/2012 12:08, Cong Wang ha scritto:
> On 10/29/2012 04:51 PM, Paolo Valente wrote:
>> Hi,
>> if the max packet size for some class (configured through tc) is
>> violated by the actual size of the packets of that class, then QFQ
>> would not schedule classes correctly, and the data structures
>> implementing the bucket lists may get corrupted. This problem occurs
>> with TSO/GSO even if the max packet size is set to the MTU, and is,
>> e.g., the cause of the failure reported in [1]. Two patches have been
>> proposed to solve this problem in [2], one of them is a preliminary
>> version of this patch.
>>
>> This patch addresses the above issues by: 1) setting QFQ parameters to
>> proper values for supporting TSO/GSO (in particular, setting the
>> maximum possible packet size to 64KB), 2) automatically increasing the
>> max packet size for a class, lmax, when a packet with a larger size
>> than the current value of lmax arrives (a notification is also appended
>> to the log).
>>
>> The drawback of the first point is that the maximum weight for a class
>> is now limited to 4096, which is equal to 1/16 of the maximum weight
>> sum.
>>
>> Finally, this patch also forcibly caps the timestamps of a class if
>> they are too high to be stored in the bucket list. This capping, taken
>> from QFQ+ [3], handles the unfrequent case described in the comment to
>> the function slot_insert.
>>
>> [1] http://marc.info/?l=linux-netdev&m=134968777902077&w=2
>> [2] http://marc.info/?l=linux-netdev&m=135096573507936&w=2
>> [3] http://marc.info/?l=linux-netdev&m=134902691421670&w=2
>>
>> Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
>
>
> Please respect people who helps you to test it:
>
> Tested-by: Cong Wang <amwang@redhat.com>
Oops, really sorry about that. I did not mean to hide your contribution. 
I am just not familiar with the process.
>
> I tested it again just in case...
Thanks again
>
> By the way, one nit below:
>
>
>> +    if (unlikely(cl->lmax < qdisc_pkt_len(skb))) {
>> +        pr_notice("qfq: increasing maxpkt from %u to %u for class %u",
>> +              cl->lmax, qdisc_pkt_len(skb), cl->common.classid);
>> +        qfq_update_reactivate_class(q, cl, cl->inv_w,
>> +                        qdisc_pkt_len(skb), 0);
>> +    }
>
>
> Seems the log level KERN_NOTICE is overkill, normal users don't care
> about these information, so s/pr_notice/pr_debug/.
>
Thanks, I will integrate this and possible other suggestions in a new 
version of the patch.
>
> Thanks!
>
>


-- 
-----------------------------------------------------------
| Paolo Valente              |                            |
| Algogroup                  |                            |
| Dip. Ing. Informazione     | tel:   +39 059 2056318     |
| Via Vignolese 905/b        | fax:   +39 059 2056129     |
| 41125 Modena - Italy       |                            |
|     home:  http://algo.ing.unimo.it/people/paolo/       |
-----------------------------------------------------------

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] pkt_sched: enable QFQ to support TSO/GSO
  2012-10-29 11:24   ` Paolo Valente
@ 2012-10-29 11:31     ` Cong Wang
  0 siblings, 0 replies; 8+ messages in thread
From: Cong Wang @ 2012-10-29 11:31 UTC (permalink / raw)
  To: Paolo Valente
  Cc: jhs, davem, shemminger, linux-kernel, netdev, rizzo, fchecconi

On 10/29/2012 07:24 PM, Paolo Valente wrote:
> Il 29/10/2012 12:08, Cong Wang ha scritto:
>>
>> Please respect people who helps you to test it:
>>
>> Tested-by: Cong Wang <amwang@redhat.com>
> Oops, really sorry about that. I did not mean to hide your contribution.
> I am just not familiar with the process.

No problem. :)


> Thanks, I will integrate this and possible other suggestions in a new
> version of the patch.

While you are on it, please check your patch with 
./scripts/checkpatch.pl, it complains there are some trailing 
whitespaces in your patch. ;)


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] pkt_sched: enable QFQ to support TSO/GSO
  2012-11-05  8:41   ` Cong Wang
@ 2012-11-05 16:46     ` David Miller
  0 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2012-11-05 16:46 UTC (permalink / raw)
  To: xiyou.wangcong
  Cc: shemminger, paolo.valente, jhs, linux-kernel, netdev, rizzo, fchecconi

From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 5 Nov 2012 16:41:49 +0800

> On Tue, Oct 30, 2012 at 10:24 PM, Stephen Hemminger
> <shemminger@vyatta.com> wrote:
>> On Tue, 30 Oct 2012 07:00:56 +0100
>> Paolo Valente <paolo.valente@unimore.it> wrote:
>>
>>> Hi,
>>> if the max packet size for some class (configured through tc) is
>>> violated by the actual size of the packets of that class, then QFQ
>>> would not schedule classes correctly, and the data structures
>>> implementing the bucket lists may get corrupted. This problem occurs
>>> with TSO/GSO even if the max packet size is set to the MTU, and is,
>>> e.g., the cause of the failure reported in [1]. Two patches have been
>>> proposed to solve this problem in [2], one of them is a preliminary
>>> version of this patch.
>>>
>>> This patch addresses the above issues by: 1) setting QFQ parameters to
>>> proper values for supporting TSO/GSO (in particular, setting the
>>> maximum possible packet size to 64KB), 2) automatically increasing the
>>> max packet size for a class, lmax, when a packet with a larger size
>>> than the current value of lmax arrives.
>>>
>>> The drawback of the first point is that the maximum weight for a class
>>> is now limited to 4096, which is equal to 1/16 of the maximum weight
>>> sum.
>>>
>>> Finally, this patch also forcibly caps the timestamps of a class if
>>> they are too high to be stored in the bucket list. This capping, taken
>>> from QFQ+ [3], handles the unfrequent case described in the comment to
>>> the function slot_insert.
>>>
>>> [1] http://marc.info/?l=linux-netdev&m=134968777902077&w=2
>>> [2] http://marc.info/?l=linux-netdev&m=135096573507936&w=2
>>> [3] http://marc.info/?l=linux-netdev&m=134902691421670&w=2
>>>
>>> Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
>>> Tested-by: Cong Wang <amwang@redhat.com>
>>
>> Acked-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> 
> David, could you take this patch? Stephen acked it.

It's "RFC" so it needs to be explicitly reposted.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] pkt_sched: enable QFQ to support TSO/GSO
  2012-10-30 14:24 ` Stephen Hemminger
@ 2012-11-05  8:41   ` Cong Wang
  2012-11-05 16:46     ` David Miller
  0 siblings, 1 reply; 8+ messages in thread
From: Cong Wang @ 2012-11-05  8:41 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Paolo Valente, jhs, David Miller, LKML,
	Linux Kernel Network Developers, rizzo, fchecconi

On Tue, Oct 30, 2012 at 10:24 PM, Stephen Hemminger
<shemminger@vyatta.com> wrote:
> On Tue, 30 Oct 2012 07:00:56 +0100
> Paolo Valente <paolo.valente@unimore.it> wrote:
>
>> Hi,
>> if the max packet size for some class (configured through tc) is
>> violated by the actual size of the packets of that class, then QFQ
>> would not schedule classes correctly, and the data structures
>> implementing the bucket lists may get corrupted. This problem occurs
>> with TSO/GSO even if the max packet size is set to the MTU, and is,
>> e.g., the cause of the failure reported in [1]. Two patches have been
>> proposed to solve this problem in [2], one of them is a preliminary
>> version of this patch.
>>
>> This patch addresses the above issues by: 1) setting QFQ parameters to
>> proper values for supporting TSO/GSO (in particular, setting the
>> maximum possible packet size to 64KB), 2) automatically increasing the
>> max packet size for a class, lmax, when a packet with a larger size
>> than the current value of lmax arrives.
>>
>> The drawback of the first point is that the maximum weight for a class
>> is now limited to 4096, which is equal to 1/16 of the maximum weight
>> sum.
>>
>> Finally, this patch also forcibly caps the timestamps of a class if
>> they are too high to be stored in the bucket list. This capping, taken
>> from QFQ+ [3], handles the unfrequent case described in the comment to
>> the function slot_insert.
>>
>> [1] http://marc.info/?l=linux-netdev&m=134968777902077&w=2
>> [2] http://marc.info/?l=linux-netdev&m=135096573507936&w=2
>> [3] http://marc.info/?l=linux-netdev&m=134902691421670&w=2
>>
>> Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
>> Tested-by: Cong Wang <amwang@redhat.com>
>
> Acked-by: Stephen Hemminger <shemminger@vyatta.com>


David, could you take this patch? Stephen acked it.

Thanks!

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] pkt_sched: enable QFQ to support TSO/GSO
  2012-10-30  6:00 Paolo Valente
@ 2012-10-30 14:24 ` Stephen Hemminger
  2012-11-05  8:41   ` Cong Wang
  0 siblings, 1 reply; 8+ messages in thread
From: Stephen Hemminger @ 2012-10-30 14:24 UTC (permalink / raw)
  To: Paolo Valente; +Cc: jhs, davem, linux-kernel, netdev, rizzo, fchecconi

On Tue, 30 Oct 2012 07:00:56 +0100
Paolo Valente <paolo.valente@unimore.it> wrote:

> Hi,
> if the max packet size for some class (configured through tc) is
> violated by the actual size of the packets of that class, then QFQ
> would not schedule classes correctly, and the data structures
> implementing the bucket lists may get corrupted. This problem occurs
> with TSO/GSO even if the max packet size is set to the MTU, and is,
> e.g., the cause of the failure reported in [1]. Two patches have been
> proposed to solve this problem in [2], one of them is a preliminary
> version of this patch.
> 
> This patch addresses the above issues by: 1) setting QFQ parameters to
> proper values for supporting TSO/GSO (in particular, setting the
> maximum possible packet size to 64KB), 2) automatically increasing the
> max packet size for a class, lmax, when a packet with a larger size
> than the current value of lmax arrives.
> 
> The drawback of the first point is that the maximum weight for a class
> is now limited to 4096, which is equal to 1/16 of the maximum weight
> sum.
> 
> Finally, this patch also forcibly caps the timestamps of a class if
> they are too high to be stored in the bucket list. This capping, taken
> from QFQ+ [3], handles the unfrequent case described in the comment to
> the function slot_insert.
> 
> [1] http://marc.info/?l=linux-netdev&m=134968777902077&w=2
> [2] http://marc.info/?l=linux-netdev&m=135096573507936&w=2
> [3] http://marc.info/?l=linux-netdev&m=134902691421670&w=2
> 
> Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
> Tested-by: Cong Wang <amwang@redhat.com>

Acked-by: Stephen Hemminger <shemminger@vyatta.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH RFC] pkt_sched: enable QFQ to support TSO/GSO
@ 2012-10-30  6:00 Paolo Valente
  2012-10-30 14:24 ` Stephen Hemminger
  0 siblings, 1 reply; 8+ messages in thread
From: Paolo Valente @ 2012-10-30  6:00 UTC (permalink / raw)
  To: jhs, davem, shemminger
  Cc: linux-kernel, netdev, rizzo, fchecconi, paolo.valente

[new version modified according to the suggestions received]

Hi,
if the max packet size for some class (configured through tc) is
violated by the actual size of the packets of that class, then QFQ
would not schedule classes correctly, and the data structures
implementing the bucket lists may get corrupted. This problem occurs
with TSO/GSO even if the max packet size is set to the MTU, and is,
e.g., the cause of the failure reported in [1]. Two patches have been
proposed to solve this problem in [2], one of them is a preliminary
version of this patch.

This patch addresses the above issues by: 1) setting QFQ parameters to
proper values for supporting TSO/GSO (in particular, setting the
maximum possible packet size to 64KB), 2) automatically increasing the
max packet size for a class, lmax, when a packet with a larger size
than the current value of lmax arrives.

The drawback of the first point is that the maximum weight for a class
is now limited to 4096, which is equal to 1/16 of the maximum weight
sum.

Finally, this patch also forcibly caps the timestamps of a class if
they are too high to be stored in the bucket list. This capping, taken
from QFQ+ [3], handles the unfrequent case described in the comment to
the function slot_insert.

[1] http://marc.info/?l=linux-netdev&m=134968777902077&w=2
[2] http://marc.info/?l=linux-netdev&m=135096573507936&w=2
[3] http://marc.info/?l=linux-netdev&m=134902691421670&w=2

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Tested-by: Cong Wang <amwang@redhat.com>
---
 net/sched/sch_qfq.c |  109 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 79 insertions(+), 30 deletions(-)

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f0dd83c..9687fa1 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -84,18 +84,19 @@
  * grp->index is the index of the group; and grp->slot_shift
  * is the shift for the corresponding (scaled) sigma_i.
  */
-#define QFQ_MAX_INDEX		19
-#define QFQ_MAX_WSHIFT		16
+#define QFQ_MAX_INDEX		24
+#define QFQ_MAX_WSHIFT		12
 
 #define	QFQ_MAX_WEIGHT		(1<<QFQ_MAX_WSHIFT)
-#define QFQ_MAX_WSUM		(2*QFQ_MAX_WEIGHT)
+#define QFQ_MAX_WSUM		(16*QFQ_MAX_WEIGHT)
 
 #define FRAC_BITS		30	/* fixed point arithmetic */
 #define ONE_FP			(1UL << FRAC_BITS)
 #define IWSUM			(ONE_FP/QFQ_MAX_WSUM)
 
-#define QFQ_MTU_SHIFT		11
+#define QFQ_MTU_SHIFT		16	/* to support TSO/GSO */
 #define QFQ_MIN_SLOT_SHIFT	(FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
+#define QFQ_MIN_LMAX		256	/* min possible lmax for a class */
 
 /*
  * Possible group states.  These values are used as indexes for the bitmaps
@@ -231,6 +232,32 @@ static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl,
 	q->wsum += delta_w;
 }
 
+static void qfq_update_reactivate_class(struct qfq_sched *q,
+					struct qfq_class *cl,
+					u32 inv_w, u32 lmax, int delta_w)
+{
+	bool need_reactivation = false;
+	int i = qfq_calc_index(inv_w, lmax);
+
+	if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
+		/*
+		 * shift cl->F back, to not charge the
+		 * class for the not-yet-served head
+		 * packet
+		 */
+		cl->F = cl->S;
+		/* remove class from its slot in the old group */
+		qfq_deactivate_class(q, cl);
+		need_reactivation = true;
+	}
+
+	qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
+
+	if (need_reactivation) /* activate in new group */
+		qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
+}
+
+
 static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			    struct nlattr **tca, unsigned long *arg)
 {
@@ -238,7 +265,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	struct qfq_class *cl = (struct qfq_class *)*arg;
 	struct nlattr *tb[TCA_QFQ_MAX + 1];
 	u32 weight, lmax, inv_w;
-	int i, err;
+	int err;
 	int delta_w;
 
 	if (tca[TCA_OPTIONS] == NULL) {
@@ -270,16 +297,14 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tb[TCA_QFQ_LMAX]) {
 		lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
-		if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) {
+		if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
 			pr_notice("qfq: invalid max length %u\n", lmax);
 			return -EINVAL;
 		}
 	} else
-		lmax = 1UL << QFQ_MTU_SHIFT;
+		lmax = psched_mtu(qdisc_dev(sch));
 
 	if (cl != NULL) {
-		bool need_reactivation = false;
-
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
 						    qdisc_root_sleeping_lock(sch),
@@ -291,24 +316,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		if (lmax == cl->lmax && inv_w == cl->inv_w)
 			return 0; /* nothing to update */
 
-		i = qfq_calc_index(inv_w, lmax);
 		sch_tree_lock(sch);
-		if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
-			/*
-			 * shift cl->F back, to not charge the
-			 * class for the not-yet-served head
-			 * packet
-			 */
-			cl->F = cl->S;
-			/* remove class from its slot in the old group */
-			qfq_deactivate_class(q, cl);
-			need_reactivation = true;
-		}
-
-		qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
-
-		if (need_reactivation) /* activate in new group */
-			qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
+		qfq_update_reactivate_class(q, cl, inv_w, lmax, delta_w);
 		sch_tree_unlock(sch);
 
 		return 0;
@@ -663,15 +672,48 @@ static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
 
 
 /*
- * XXX we should make sure that slot becomes less than 32.
- * This is guaranteed by the input values.
- * roundedS is always cl->S rounded on grp->slot_shift bits.
+ * If the weight and lmax (max_pkt_size) of the classes do not change,
+ * then QFQ guarantees that the slot index is never higher than
+ * 2 + ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM).
+ *
+ * With the current values of the above constants, the index is
+ * then guaranteed to never be higher than 2 + 256 * (1 / 16) = 18.
+ *
+ * When the weight of a class is increased or the lmax of the class is
+ * decreased, a new class with smaller slot size may happen to be
+ * activated. The activation of this class should be properly delayed
+ * to when the service of the class has finished in the ideal system
+ * tracked by QFQ. If the activation of the class is not delayed to
+ * this reference time instant, then this class may be unjustly served
+ * before other classes waiting for service. This may cause
+ * (unfrequently) the above bound to the slot index to be violated for
+ * some of these unlucky classes.
+ *
+ * Instead of delaying the activation of the new class, which is quite
+ * complex, the following inaccurate but simple solution is used: if
+ * the slot index is higher than QFQ_MAX_SLOTS-2, then the timestamps
+ * of the class are shifted backward so as to let the slot index
+ * become equal to QFQ_MAX_SLOTS-2. This threshold is used because, if
+ * the slot index is above it, then the data structure implementing
+ * the bucket list either gets immediately corrupted or may get
+ * corrupted on a possible next packet arrival that causes the start
+ * time of the group to be shifted backward.
  */
 static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
 			    u64 roundedS)
 {
 	u64 slot = (roundedS - grp->S) >> grp->slot_shift;
-	unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
+	unsigned int i; /* slot index in the bucket list */
+
+	if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
+		u64 deltaS = roundedS - grp->S -
+			((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
+		cl->S -= deltaS;
+		cl->F -= deltaS;
+		slot = QFQ_MAX_SLOTS - 2;
+	}
+
+	i = (grp->front + slot) % QFQ_MAX_SLOTS;
 
 	hlist_add_head(&cl->next, &grp->slots[i]);
 	__set_bit(slot, &grp->full_slots);
@@ -892,6 +934,13 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 	pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
 
+	if (unlikely(cl->lmax < qdisc_pkt_len(skb))) {
+		pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
+			  cl->lmax, qdisc_pkt_len(skb), cl->common.classid);
+		qfq_update_reactivate_class(q, cl, cl->inv_w,
+					    qdisc_pkt_len(skb), 0);
+	}
+
 	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		pr_debug("qfq_enqueue: enqueue failed %d\n", err);
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-11-05 16:46 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-29  8:51 [PATCH RFC] pkt_sched: enable QFQ to support TSO/GSO Paolo Valente
2012-10-29 11:08 ` Cong Wang
2012-10-29 11:24   ` Paolo Valente
2012-10-29 11:31     ` Cong Wang
2012-10-30  6:00 Paolo Valente
2012-10-30 14:24 ` Stephen Hemminger
2012-11-05  8:41   ` Cong Wang
2012-11-05 16:46     ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).