All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
@ 2013-03-10  3:20 Vimalkumar
  2013-03-10  4:03 ` Eric Dumazet
  0 siblings, 1 reply; 17+ messages in thread
From: Vimalkumar @ 2013-03-10  3:20 UTC (permalink / raw)
  To: netdev, eric.dumazet, shemminger; +Cc: Vimalkumar

Since rate values are passed around between kernel and tc
in bytes/sec, 2**32 bytes/sec is around 34Gb/s.  Beyond that
rate, htb, tbf, hfsc, etc. can never be configured correctly.

Signed-off-by: Vimalkumar <j.vimal@gmail.com>
---
 include/linux/gen_stats.h |    2 +-
 include/linux/pkt_sched.h |   10 +++++-----
 misc/ifstat.c             |    4 ++--
 tc/m_police.c             |    2 +-
 tc/q_cbq.c                |    2 +-
 tc/q_choke.c              |    2 +-
 tc/q_gred.c               |    2 +-
 tc/q_hfsc.c               |    6 ++++--
 tc/q_red.c                |    2 +-
 tc/tc_cbq.c               |    4 ++--
 tc/tc_cbq.h               |    4 ++--
 tc/tc_core.c              |    4 ++--
 tc/tc_core.h              |    4 ++--
 tc/tc_util.c              |    6 +++---
 tc/tc_util.h              |    6 +++---
 15 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h
index 552c8a0..5ca6015 100644
--- a/include/linux/gen_stats.h
+++ b/include/linux/gen_stats.h
@@ -33,7 +33,7 @@ struct gnet_stats_basic_packed {
  * @pps: current packet rate
  */
 struct gnet_stats_rate_est {
-	__u32	bps;
+	__u64	bps;
 	__u32	pps;
 };
 
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 32aef0a..d6bc658 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -35,7 +35,7 @@ struct tc_stats {
 	__u32	drops;			/* Packets dropped because of lack of resources */
 	__u32	overlimits;		/* Number of throttle events when this
 					 * flow goes out of allocated bandwidth */
-	__u32	bps;			/* Current flow byte rate */
+	__u64	bps;			/* Current flow byte rate */
 	__u32	pps;			/* Current flow packet rate */
 	__u32	qlen;
 	__u32	backlog;
@@ -79,7 +79,7 @@ struct tc_ratespec {
 	unsigned short	overhead;
 	short		cell_align;
 	unsigned short	mpu;
-	__u32		rate;
+	__u64		rate;
 };
 
 #define TC_RTAB_SIZE	1024
@@ -368,9 +368,9 @@ struct tc_hfsc_qopt {
 };
 
 struct tc_service_curve {
-	__u32	m1;		/* slope of the first segment in bps */
+	__u64	m1;		/* slope of the first segment in bps */
 	__u32	d;		/* x-projection of the first segment in us */
-	__u32	m2;		/* slope of the second segment in bps */
+	__u64	m2;		/* slope of the second segment in bps */
 };
 
 struct tc_hfsc_stats {
@@ -541,7 +541,7 @@ struct tc_netem_corrupt {
 };
 
 struct tc_netem_rate {
-	__u32	rate;	/* byte/s */
+	__u64	rate;	/* byte/s */
 	__s32	packet_overhead;
 	__u32	cell_size;
 	__s32	cell_overhead;
diff --git a/misc/ifstat.c b/misc/ifstat.c
index 6d0ad8c..67a97eb 100644
--- a/misc/ifstat.c
+++ b/misc/ifstat.c
@@ -181,7 +181,7 @@ static void load_raw_table(FILE *fp)
 		p = next;
 
 		for (i=0; i<MAXS; i++) {
-			unsigned rate;
+			__u64 rate;
 			if (!(next = strchr(p, ' ')))
 				abort();
 			*next++ = 0;
@@ -192,7 +192,7 @@ static void load_raw_table(FILE *fp)
 			if (!(next = strchr(p, ' ')))
 				abort();
 			*next++ = 0;
-			if (sscanf(p, "%u", &rate) != 1)
+			if (sscanf(p, "%llu", &rate) != 1)
 				abort();
 			n->rate[i] = rate;
 			p = next;
diff --git a/tc/m_police.c b/tc/m_police.c
index 53cbefc..b187440 100644
--- a/tc/m_police.c
+++ b/tc/m_police.c
@@ -131,7 +131,7 @@ int act_parse_police(struct action_util *a,int *argc_p, char ***argv_p, int tca_
 	struct tc_police p;
 	__u32 rtab[256];
 	__u32 ptab[256];
-	__u32 avrate = 0;
+	__u64 avrate = 0;
 	int presult = 0;
 	unsigned buffer=0, mtu=0, mpu=0;
 	unsigned short overhead=0;
diff --git a/tc/q_cbq.c b/tc/q_cbq.c
index 3c5e72c..a742248 100644
--- a/tc/q_cbq.c
+++ b/tc/q_cbq.c
@@ -190,7 +190,7 @@ static int cbq_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str
 	unsigned mpu=0;
 	int cell_log=-1;
 	int ewma_log=-1;
-	unsigned bndw = 0;
+	__u64 bndw = 0;
 	unsigned minburst=0, maxburst=0;
 	unsigned short overhead=0;
 	unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */
diff --git a/tc/q_choke.c b/tc/q_choke.c
index 6fbcadf..6cc691c 100644
--- a/tc/q_choke.c
+++ b/tc/q_choke.c
@@ -38,7 +38,7 @@ static int choke_parse_opt(struct qdisc_util *qu, int argc, char **argv,
 	unsigned burst = 0;
 	unsigned avpkt = 1000;
 	double probability = 0.02;
-	unsigned rate = 0;
+	__u64 rate = 0;
 	int ecn_ok = 0;
 	int wlog;
 	__u8 sbuf[256];
diff --git a/tc/q_gred.c b/tc/q_gred.c
index a4df3a6..3b9e079 100644
--- a/tc/q_gred.c
+++ b/tc/q_gred.c
@@ -122,7 +122,7 @@ static int gred_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct n
 	unsigned burst = 0;
 	unsigned avpkt = 0;
 	double probability = 0.02;
-	unsigned rate = 0;
+	__u64 rate = 0;
 	int wlog;
 	__u8 sbuf[256];
 	struct rtattr *tail;
diff --git a/tc/q_hfsc.c b/tc/q_hfsc.c
index 03539ec..ff3b8a0 100644
--- a/tc/q_hfsc.c
+++ b/tc/q_hfsc.c
@@ -294,7 +294,8 @@ hfsc_get_sc1(int *argcp, char ***argvp, struct tc_service_curve *sc)
 {
 	char **argv = *argvp;
 	int argc = *argcp;
-	unsigned int m1 = 0, d = 0, m2 = 0;
+	unsigned int d = 0;
+	__u64 m1 = 0, m2 = 0;
 
 	if (matches(*argv, "m1") == 0) {
 		NEXT_ARG();
@@ -337,7 +338,8 @@ hfsc_get_sc2(int *argcp, char ***argvp, struct tc_service_curve *sc)
 {
 	char **argv = *argvp;
 	int argc = *argcp;
-	unsigned int umax = 0, dmax = 0, rate = 0;
+	unsigned int umax = 0, dmax = 0;
+	__u64 rate;
 
 	if (matches(*argv, "umax") == 0) {
 		NEXT_ARG();
diff --git a/tc/q_red.c b/tc/q_red.c
index 89e7320..2fd9f61 100644
--- a/tc/q_red.c
+++ b/tc/q_red.c
@@ -39,7 +39,7 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl
 	unsigned burst = 0;
 	unsigned avpkt = 0;
 	double probability = 0.02;
-	unsigned rate = 0;
+	__u64 rate = 0;
 	int wlog;
 	__u8 sbuf[256];
 	__u32 max_P;
diff --git a/tc/tc_cbq.c b/tc/tc_cbq.c
index 0bb262e..46b9957 100644
--- a/tc/tc_cbq.c
+++ b/tc/tc_cbq.c
@@ -24,7 +24,7 @@
 #include "tc_core.h"
 #include "tc_cbq.h"
 
-unsigned tc_cbq_calc_maxidle(unsigned bndw, unsigned rate, unsigned avpkt,
+unsigned tc_cbq_calc_maxidle(__u64 bndw, __u64 rate, unsigned avpkt,
 			     int ewma_log, unsigned maxburst)
 {
 	double maxidle;
@@ -41,7 +41,7 @@ unsigned tc_cbq_calc_maxidle(unsigned bndw, unsigned rate, unsigned avpkt,
 	return tc_core_time2tick(maxidle*(1<<ewma_log)*TIME_UNITS_PER_SEC);
 }
 
-unsigned tc_cbq_calc_offtime(unsigned bndw, unsigned rate, unsigned avpkt,
+unsigned tc_cbq_calc_offtime(__u64 bndw, __u64 rate, unsigned avpkt,
 			     int ewma_log, unsigned minburst)
 {
 	double g = 1.0 - 1.0/(1<<ewma_log);
diff --git a/tc/tc_cbq.h b/tc/tc_cbq.h
index 8f95649..b485919 100644
--- a/tc/tc_cbq.h
+++ b/tc/tc_cbq.h
@@ -1,9 +1,9 @@
 #ifndef _TC_CBQ_H_
 #define _TC_CBQ_H_ 1
 
-unsigned tc_cbq_calc_maxidle(unsigned bndw, unsigned rate, unsigned avpkt,
+unsigned tc_cbq_calc_maxidle(__u64 bndw, __u64 rate, unsigned avpkt,
 			     int ewma_log, unsigned maxburst);
-unsigned tc_cbq_calc_offtime(unsigned bndw, unsigned rate, unsigned avpkt,
+unsigned tc_cbq_calc_offtime(__u64 bndw, __u64 rate, unsigned avpkt,
 			     int ewma_log, unsigned minburst);
 
 #endif
diff --git a/tc/tc_core.c b/tc/tc_core.c
index 85b072e..a3cacc1 100644
--- a/tc/tc_core.c
+++ b/tc/tc_core.c
@@ -56,12 +56,12 @@ unsigned tc_core_ktime2time(unsigned ktime)
 	return ktime / clock_factor;
 }
 
-unsigned tc_calc_xmittime(unsigned rate, unsigned size)
+unsigned tc_calc_xmittime(__u64 rate, unsigned size)
 {
 	return tc_core_time2tick(TIME_UNITS_PER_SEC*((double)size/rate));
 }
 
-unsigned tc_calc_xmitsize(unsigned rate, unsigned ticks)
+unsigned tc_calc_xmitsize(__u64 rate, unsigned ticks)
 {
 	return ((double)rate*tc_core_tick2time(ticks))/TIME_UNITS_PER_SEC;
 }
diff --git a/tc/tc_core.h b/tc/tc_core.h
index 5a693ba..8a63b79 100644
--- a/tc/tc_core.h
+++ b/tc/tc_core.h
@@ -18,8 +18,8 @@ unsigned tc_core_time2tick(unsigned time);
 unsigned tc_core_tick2time(unsigned tick);
 unsigned tc_core_time2ktime(unsigned time);
 unsigned tc_core_ktime2time(unsigned ktime);
-unsigned tc_calc_xmittime(unsigned rate, unsigned size);
-unsigned tc_calc_xmitsize(unsigned rate, unsigned ticks);
+unsigned tc_calc_xmittime(__u64 rate, unsigned size);
+unsigned tc_calc_xmitsize(__u64 rate, unsigned ticks);
 int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab,
 		   int cell_log, unsigned mtu, enum link_layer link_layer);
 int tc_calc_size_table(struct tc_sizespec *s, __u16 **stab);
diff --git a/tc/tc_util.c b/tc/tc_util.c
index 8e62a01..0939536 100644
--- a/tc/tc_util.c
+++ b/tc/tc_util.c
@@ -143,7 +143,7 @@ static const struct rate_suffix {
 };
 
 
-int get_rate(unsigned *rate, const char *str)
+int get_rate(__u64 *rate, const char *str)
 {
 	char *p;
 	double bps = strtod(str, &p);
@@ -167,7 +167,7 @@ int get_rate(unsigned *rate, const char *str)
 	return -1;
 }
 
-void print_rate(char *buf, int len, __u32 rate)
+void print_rate(char *buf, int len, __u64 rate)
 {
 	double tmp = (double)rate*8;
 	extern int use_iec;
@@ -189,7 +189,7 @@ void print_rate(char *buf, int len, __u32 rate)
 	}
 }
 
-char * sprint_rate(__u32 rate, char *buf)
+char * sprint_rate(__u64 rate, char *buf)
 {
 	print_rate(buf, SPRINT_BSIZE-1, rate);
 	return buf;
diff --git a/tc/tc_util.h b/tc/tc_util.h
index 4f54436..45d5d1a 100644
--- a/tc/tc_util.h
+++ b/tc/tc_util.h
@@ -57,18 +57,18 @@ extern struct qdisc_util *get_qdisc_kind(const char *str);
 extern struct filter_util *get_filter_kind(const char *str);
 
 extern int get_qdisc_handle(__u32 *h, const char *str);
-extern int get_rate(unsigned *rate, const char *str);
+extern int get_rate(__u64 *rate, const char *str);
 extern int get_size(unsigned *size, const char *str);
 extern int get_size_and_cell(unsigned *size, int *cell_log, char *str);
 extern int get_time(unsigned *time, const char *str);
 extern int get_linklayer(unsigned *val, const char *arg);
 
-extern void print_rate(char *buf, int len, __u32 rate);
+extern void print_rate(char *buf, int len, __u64 rate);
 extern void print_size(char *buf, int len, __u32 size);
 extern void print_qdisc_handle(char *buf, int len, __u32 h);
 extern void print_time(char *buf, int len, __u32 time);
 extern void print_linklayer(char *buf, int len, unsigned linklayer);
-extern char * sprint_rate(__u32 rate, char *buf);
+extern char * sprint_rate(__u64 rate, char *buf);
 extern char * sprint_size(__u32 size, char *buf);
 extern char * sprint_qdisc_handle(__u32 h, char *buf);
 extern char * sprint_tc_classid(__u32 h, char *buf);
-- 
1.7.5.3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-10  3:20 [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit) Vimalkumar
@ 2013-03-10  4:03 ` Eric Dumazet
  2013-03-10  4:53   ` Vimal
  0 siblings, 1 reply; 17+ messages in thread
From: Eric Dumazet @ 2013-03-10  4:03 UTC (permalink / raw)
  To: Vimalkumar; +Cc: netdev, shemminger

On Sat, 2013-03-09 at 19:20 -0800, Vimalkumar wrote:
> Since rate values are passed around between kernel and tc
> in bytes/sec, 2**32 bytes/sec is around 34Gb/s.  Beyond that
> rate, htb, tbf, hfsc, etc. can never be configured correctly.
> 
> Signed-off-by: Vimalkumar <j.vimal@gmail.com>
> ---
>  include/linux/gen_stats.h |    2 +-
>  include/linux/pkt_sched.h |   10 +++++-----
>  misc/ifstat.c             |    4 ++--
>  tc/m_police.c             |    2 +-
>  tc/q_cbq.c                |    2 +-
>  tc/q_choke.c              |    2 +-
>  tc/q_gred.c               |    2 +-
>  tc/q_hfsc.c               |    6 ++++--
>  tc/q_red.c                |    2 +-
>  tc/tc_cbq.c               |    4 ++--
>  tc/tc_cbq.h               |    4 ++--
>  tc/tc_core.c              |    4 ++--
>  tc/tc_core.h              |    4 ++--
>  tc/tc_util.c              |    6 +++---
>  tc/tc_util.h              |    6 +++---
>  15 files changed, 31 insertions(+), 29 deletions(-)
> 
> diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h
> index 552c8a0..5ca6015 100644
> --- a/include/linux/gen_stats.h
> +++ b/include/linux/gen_stats.h
> @@ -33,7 +33,7 @@ struct gnet_stats_basic_packed {
>   * @pps: current packet rate
>   */
>  struct gnet_stats_rate_est {
> -	__u32	bps;
> +	__u64	bps;
>  	__u32	pps;
>  };
>  
> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
> index 32aef0a..d6bc658 100644
> --- a/include/linux/pkt_sched.h
> +++ b/include/linux/pkt_sched.h
> @@ -35,7 +35,7 @@ struct tc_stats {
>  	__u32	drops;			/* Packets dropped because of lack of resources */
>  	__u32	overlimits;		/* Number of throttle events when this
>  					 * flow goes out of allocated bandwidth */
> -	__u32	bps;			/* Current flow byte rate */
> +	__u64	bps;			/* Current flow byte rate */
>  	__u32	pps;			/* Current flow packet rate */
>  	__u32	qlen;
>  	__u32	backlog;
> @@ -79,7 +79,7 @@ struct tc_ratespec {
>  	unsigned short	overhead;
>  	short		cell_align;
>  	unsigned short	mpu;
> -	__u32		rate;
> +	__u64		rate;
>  };
>  

You cannot do that without breaking user land tools.

Not only tc, but all user applications as well.

We need to support compatibility, before considering adding such
changes.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-10  4:03 ` Eric Dumazet
@ 2013-03-10  4:53   ` Vimal
  2013-03-10  5:05     ` Eric Dumazet
  0 siblings, 1 reply; 17+ messages in thread
From: Vimal @ 2013-03-10  4:53 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, shemminger

Ok, do you have suggestions on how to do this?  Maybe a better way to
do this would be to introduce an additional "multipler" option for
rates, which is set to 1 as default, so actual rate can be computed as
multipler * rate supplied.

On 9 March 2013 20:03, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Sat, 2013-03-09 at 19:20 -0800, Vimalkumar wrote:
>> Since rate values are passed around between kernel and tc
>> in bytes/sec, 2**32 bytes/sec is around 34Gb/s.  Beyond that
>> rate, htb, tbf, hfsc, etc. can never be configured correctly.
>>
>> Signed-off-by: Vimalkumar <j.vimal@gmail.com>
>> ---
>>  include/linux/gen_stats.h |    2 +-
>>  include/linux/pkt_sched.h |   10 +++++-----
>>  misc/ifstat.c             |    4 ++--
>>  tc/m_police.c             |    2 +-
>>  tc/q_cbq.c                |    2 +-
>>  tc/q_choke.c              |    2 +-
>>  tc/q_gred.c               |    2 +-
>>  tc/q_hfsc.c               |    6 ++++--
>>  tc/q_red.c                |    2 +-
>>  tc/tc_cbq.c               |    4 ++--
>>  tc/tc_cbq.h               |    4 ++--
>>  tc/tc_core.c              |    4 ++--
>>  tc/tc_core.h              |    4 ++--
>>  tc/tc_util.c              |    6 +++---
>>  tc/tc_util.h              |    6 +++---
>>  15 files changed, 31 insertions(+), 29 deletions(-)
>>
>> diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h
>> index 552c8a0..5ca6015 100644
>> --- a/include/linux/gen_stats.h
>> +++ b/include/linux/gen_stats.h
>> @@ -33,7 +33,7 @@ struct gnet_stats_basic_packed {
>>   * @pps: current packet rate
>>   */
>>  struct gnet_stats_rate_est {
>> -     __u32   bps;
>> +     __u64   bps;
>>       __u32   pps;
>>  };
>>
>> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
>> index 32aef0a..d6bc658 100644
>> --- a/include/linux/pkt_sched.h
>> +++ b/include/linux/pkt_sched.h
>> @@ -35,7 +35,7 @@ struct tc_stats {
>>       __u32   drops;                  /* Packets dropped because of lack of resources */
>>       __u32   overlimits;             /* Number of throttle events when this
>>                                        * flow goes out of allocated bandwidth */
>> -     __u32   bps;                    /* Current flow byte rate */
>> +     __u64   bps;                    /* Current flow byte rate */
>>       __u32   pps;                    /* Current flow packet rate */
>>       __u32   qlen;
>>       __u32   backlog;
>> @@ -79,7 +79,7 @@ struct tc_ratespec {
>>       unsigned short  overhead;
>>       short           cell_align;
>>       unsigned short  mpu;
>> -     __u32           rate;
>> +     __u64           rate;
>>  };
>>
>
> You cannot do that without breaking user land tools.
>
> Not only tc, but all user applications as well.
>
> We need to support compatibility, before considering adding such
> changes.
>
>
>



--
Vimal

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-10  4:53   ` Vimal
@ 2013-03-10  5:05     ` Eric Dumazet
  2013-03-10  5:49       ` Bill Fink
  0 siblings, 1 reply; 17+ messages in thread
From: Eric Dumazet @ 2013-03-10  5:05 UTC (permalink / raw)
  To: Vimal; +Cc: netdev, shemminger

On Sat, 2013-03-09 at 20:53 -0800, Vimal wrote:
> Ok, do you have suggestions on how to do this?  Maybe a better way to
> do this would be to introduce an additional "multipler" option for
> rates, which is set to 1 as default, so actual rate can be computed as
> multipler * rate supplied.

How an old program, in binary form, will automatically knows it has to
change its behavior to use an inexistent field ?

I can use an old distro, and update kernel to upstream kernel, it must
continue to work.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-10  5:05     ` Eric Dumazet
@ 2013-03-10  5:49       ` Bill Fink
  2013-03-10  5:54         ` Eric Dumazet
  0 siblings, 1 reply; 17+ messages in thread
From: Bill Fink @ 2013-03-10  5:49 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Vimal, netdev, shemminger

On Sun, 10 Mar 2013, Eric Dumazet wrote:

> On Sat, 2013-03-09 at 20:53 -0800, Vimal wrote:
> > Ok, do you have suggestions on how to do this?  Maybe a better way to
> > do this would be to introduce an additional "multipler" option for
> > rates, which is set to 1 as default, so actual rate can be computed as
> > multipler * rate supplied.
> 
> How an old program, in binary form, will automatically knows it has to
> change its behavior to use an inexistent field ?
> 
> I can use an old distro, and update kernel to upstream kernel, it must
> continue to work.

I don't see the problem.  An old program would not know about
the new multiplier, would thus get the default multiplier of 1,
and get the same behavior as always, with the same limitation
of ~34 Gbps.  But someone with a newer tc/kernel could for example
specify a multiplier of 10, which would then support rates up to
about 340 Gbps.  It sounds like a reasonable approach to me.

						-Bill

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-10  5:49       ` Bill Fink
@ 2013-03-10  5:54         ` Eric Dumazet
  2013-03-12 14:29           ` Chris Friesen
  0 siblings, 1 reply; 17+ messages in thread
From: Eric Dumazet @ 2013-03-10  5:54 UTC (permalink / raw)
  To: Bill Fink; +Cc: Vimal, netdev, shemminger

On Sun, 2013-03-10 at 00:49 -0500, Bill Fink wrote:

> I don't see the problem.  An old program would not know about
> the new multiplier, would thus get the default multiplier of 1,
> and get the same behavior as always, with the same limitation
> of ~34 Gbps.  But someone with a newer tc/kernel could for example
> specify a multiplier of 10, which would then support rates up to
> about 340 Gbps.  It sounds like a reasonable approach to me.

Hopefully, some of us see the problem here and are able to reject
patches before breaking user land.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-10  5:54         ` Eric Dumazet
@ 2013-03-12 14:29           ` Chris Friesen
  2013-03-12 15:42             ` Thomas Graf
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Friesen @ 2013-03-12 14:29 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Bill Fink, Vimal, netdev, shemminger

On 03/09/2013 11:54 PM, Eric Dumazet wrote:
> On Sun, 2013-03-10 at 00:49 -0500, Bill Fink wrote:
>
>> I don't see the problem.  An old program would not know about
>> the new multiplier, would thus get the default multiplier of 1,
>> and get the same behavior as always, with the same limitation
>> of ~34 Gbps.  But someone with a newer tc/kernel could for example
>> specify a multiplier of 10, which would then support rates up to
>> about 340 Gbps.  It sounds like a reasonable approach to me.
>
> Hopefully, some of us see the problem here and are able to reject
> patches before breaking user land.

The only problem I see is that you can't set the multiplier with a new 
tool and then query the rate with old tools.

But you're going to run into that problem with the old tools no matter 
what you do--and not doing anything is a crappy option as well.

Some kind of multiplier or shift makes as much sense as anything else. 
With old tools you get current behaviour, with new tools you can specify 
a multiplying factor to trade off resolution vs precision.

Chris

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-12 14:29           ` Chris Friesen
@ 2013-03-12 15:42             ` Thomas Graf
  2013-03-12 15:44               ` Eric Dumazet
                                 ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Thomas Graf @ 2013-03-12 15:42 UTC (permalink / raw)
  To: Chris Friesen; +Cc: Eric Dumazet, Bill Fink, Vimal, netdev, shemminger

On 03/12/13 at 08:29am, Chris Friesen wrote:
> The only problem I see is that you can't set the multiplier with a
> new tool and then query the rate with old tools.
> 
> But you're going to run into that problem with the old tools no
> matter what you do--and not doing anything is a crappy option as
> well.
> 
> Some kind of multiplier or shift makes as much sense as anything
> else. With old tools you get current behaviour, with new tools you
> can specify a multiplying factor to trade off resolution vs
> precision.

The introduction of a shift operator or multiplier introduces
inprecision. I'd much rather see new 64bit Netlink attributes
that, if present, replace the old rate values and statistics.

You will need to add a new Netlink attribute anyway and we might
as well transfer the actual rate instead of a multiplier. Just
like we did with IFLA_STATS64.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-12 15:42             ` Thomas Graf
@ 2013-03-12 15:44               ` Eric Dumazet
  2013-03-12 15:53               ` Chris Friesen
  2013-03-13  6:01               ` Bill Fink
  2 siblings, 0 replies; 17+ messages in thread
From: Eric Dumazet @ 2013-03-12 15:44 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Chris Friesen, Bill Fink, Vimal, netdev, shemminger

On Tue, 2013-03-12 at 15:42 +0000, Thomas Graf wrote:

> The introduction of a shift operator or multiplier introduces
> inprecision. I'd much rather see new 64bit Netlink attributes
> that, if present, replace the old rate values and statistics.
> 
> You will need to add a new Netlink attribute anyway and we might
> as well transfer the actual rate instead of a multiplier. Just
> like we did with IFLA_STATS64.

Thanks Thomas !

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-12 15:42             ` Thomas Graf
  2013-03-12 15:44               ` Eric Dumazet
@ 2013-03-12 15:53               ` Chris Friesen
  2013-03-12 15:56                 ` Chris Friesen
  2013-03-13  6:01               ` Bill Fink
  2 siblings, 1 reply; 17+ messages in thread
From: Chris Friesen @ 2013-03-12 15:53 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Eric Dumazet, Bill Fink, Vimal, netdev, shemminger

On 03/12/2013 09:42 AM, Thomas Graf wrote:
> On 03/12/13 at 08:29am, Chris Friesen wrote:
>> The only problem I see is that you can't set the multiplier with a
>> new tool and then query the rate with old tools.
>>
>> But you're going to run into that problem with the old tools no
>> matter what you do--and not doing anything is a crappy option as
>> well.
>>
>> Some kind of multiplier or shift makes as much sense as anything
>> else. With old tools you get current behaviour, with new tools you
>> can specify a multiplying factor to trade off resolution vs
>> precision.
>
> The introduction of a shift operator or multiplier introduces
> inprecision. I'd much rather see new 64bit Netlink attributes
> that, if present, replace the old rate values and statistics.

I have strong opinions either way, but I don't think "imprecision" is a 
problem in practice.

With the shift/multiplier we would have 32 bits of precision.  Do you 
really expect that someone will care about the difference between 
8589934592 and 8589934593?  Or the difference between 40000000000 and 
40000000010?

Chris

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-12 15:53               ` Chris Friesen
@ 2013-03-12 15:56                 ` Chris Friesen
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Friesen @ 2013-03-12 15:56 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Eric Dumazet, Bill Fink, Vimal, netdev, shemminger

On 03/12/2013 09:53 AM, Chris Friesen wrote:

> I have strong opinions either way, but I don't think "imprecision" is a
> problem in practice.

Oops, make that "I don't have strong opinions...".   :)

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-12 15:42             ` Thomas Graf
  2013-03-12 15:44               ` Eric Dumazet
  2013-03-12 15:53               ` Chris Friesen
@ 2013-03-13  6:01               ` Bill Fink
  2013-03-13  6:13                 ` Eric Dumazet
  2 siblings, 1 reply; 17+ messages in thread
From: Bill Fink @ 2013-03-13  6:01 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Chris Friesen, Eric Dumazet, Vimal, netdev, shemminger

On Tue, 12 Mar 2013, Thomas Graf wrote:

> On 03/12/13 at 08:29am, Chris Friesen wrote:
> > The only problem I see is that you can't set the multiplier with a
> > new tool and then query the rate with old tools.
> > 
> > But you're going to run into that problem with the old tools no
> > matter what you do--and not doing anything is a crappy option as
> > well.
> > 
> > Some kind of multiplier or shift makes as much sense as anything
> > else. With old tools you get current behaviour, with new tools you
> > can specify a multiplying factor to trade off resolution vs
> > precision.
> 
> The introduction of a shift operator or multiplier introduces
> inprecision. I'd much rather see new 64bit Netlink attributes
> that, if present, replace the old rate values and statistics.
> 
> You will need to add a new Netlink attribute anyway and we might
> as well transfer the actual rate instead of a multiplier. Just
> like we did with IFLA_STATS64.

The last time this was discussed appears to be (on 2011-03-28):

	http://marc.info/?l=linux-netdev&m=130128741907282&w=2

where Maciej Żenczykowski argued that creating a new 64-bit
Netlink attribute for this would be much more complex than for
the IFLA_STATS64 support.  There was no reply.

Providing a new multiplier/shift parameter would be a simple
way to extend support for higher rates, and would not break
existing user space that doesn't require the higher rates.
I imagine the user would not explicitly specify the multiplier/
shift parameter, but would just normally specify the desired
rate, and a newer tc would figure out what multiplier/shift
to use if a high enough rate demanded it.  To maintain user
space compatibility, the kernel should report back the same
rate and multiplier/shift it was given, and the newer tc would
convert it back to the user's originally specified rate.  Older
user space that was fine with the ~34 Gbps rate limitation would
always have the default multiplier of 1 or shift of 0 bits, and
would see the exact same unmultiplied/unshifted rate it always
did.

I also believe 32 bits of precision is significant enough
at these higher data rates.

					-Bill

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-13  6:01               ` Bill Fink
@ 2013-03-13  6:13                 ` Eric Dumazet
  2013-03-13 15:29                   ` Bill Fink
  0 siblings, 1 reply; 17+ messages in thread
From: Eric Dumazet @ 2013-03-13  6:13 UTC (permalink / raw)
  To: Bill Fink; +Cc: Thomas Graf, Chris Friesen, Vimal, netdev, shemminger

On Wed, 2013-03-13 at 02:01 -0400, Bill Fink wrote:

> The last time this was discussed appears to be (on 2011-03-28):
> 
> 	http://marc.info/?l=linux-netdev&m=130128741907282&w=2
> 
> where Maciej Żenczykowski argued that creating a new 64-bit
> Netlink attribute for this would be much more complex than for
> the IFLA_STATS64 support.  There was no reply.
> 
> Providing a new multiplier/shift parameter would be a simple
> way to extend support for higher rates, and would not break
> existing user space that doesn't require the higher rates.
> I imagine the user would not explicitly specify the multiplier/
> shift parameter, but would just normally specify the desired
> rate, and a newer tc would figure out what multiplier/shift
> to use if a high enough rate demanded it.  To maintain user
> space compatibility, the kernel should report back the same
> rate and multiplier/shift it was given, and the newer tc would
> convert it back to the user's originally specified rate.  Older
> user space that was fine with the ~34 Gbps rate limitation would
> always have the default multiplier of 1 or shift of 0 bits, and
> would see the exact same unmultiplied/unshifted rate it always
> did.


We already said no to such a hack. Maybe its not clear enough ?

netlink allows us to a proper way, and Thomas Graf explained how we
expect the thing to be done.

Yes, this is not a one liner patch, its a bit more of work, and its how
it will be done when someone does the job.


> I also believe 32 bits of precision is significant enough
> at these higher data rates.
> 

This has nothing to do with the issue. Thats an implementation detail in
the kernel. And frankly with 64bit arches we better use 64bit native
fields, instead of adding arbitrary limits.

We are speaking of the netlink message itself and old binaries.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-13  6:13                 ` Eric Dumazet
@ 2013-03-13 15:29                   ` Bill Fink
  2013-03-13 15:34                     ` Stephen Hemminger
  0 siblings, 1 reply; 17+ messages in thread
From: Bill Fink @ 2013-03-13 15:29 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Thomas Graf, Chris Friesen, Vimal, netdev, shemminger

On Wed, 13 Mar 2013, Eric Dumazet wrote:

> On Wed, 2013-03-13 at 02:01 -0400, Bill Fink wrote:
> 
> > The last time this was discussed appears to be (on 2011-03-28):
> > 
> > 	http://marc.info/?l=linux-netdev&m=130128741907282&w=2
> > 
> > where Maciej Żenczykowski argued that creating a new 64-bit
> > Netlink attribute for this would be much more complex than for
> > the IFLA_STATS64 support.  There was no reply.
> > 
> > Providing a new multiplier/shift parameter would be a simple
> > way to extend support for higher rates, and would not break
> > existing user space that doesn't require the higher rates.
> > I imagine the user would not explicitly specify the multiplier/
> > shift parameter, but would just normally specify the desired
> > rate, and a newer tc would figure out what multiplier/shift
> > to use if a high enough rate demanded it.  To maintain user
> > space compatibility, the kernel should report back the same
> > rate and multiplier/shift it was given, and the newer tc would
> > convert it back to the user's originally specified rate.  Older
> > user space that was fine with the ~34 Gbps rate limitation would
> > always have the default multiplier of 1 or shift of 0 bits, and
> > would see the exact same unmultiplied/unshifted rate it always
> > did.
> 
> We already said no to such a hack. Maybe its not clear enough ?
> 
> netlink allows us to a proper way, and Thomas Graf explained how we
> expect the thing to be done.
> 
> Yes, this is not a one liner patch, its a bit more of work, and its how
> it will be done when someone does the job.

I've no problem with that since it is a cleaner solution, but
one that requires significantly more work.  I was only arguing
that the multiplier/shift approach was also a workable solution
and should be simpler to implement.  But since there appears to
be developer consensus that it's not a desired method, I'm fine
with going along with that expert opinion.

						-Bill

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-13 15:29                   ` Bill Fink
@ 2013-03-13 15:34                     ` Stephen Hemminger
  2013-03-13 16:57                       ` Chris Friesen
  2013-03-14  4:08                       ` Bill Fink
  0 siblings, 2 replies; 17+ messages in thread
From: Stephen Hemminger @ 2013-03-13 15:34 UTC (permalink / raw)
  To: Bill Fink
  Cc: Eric Dumazet, Thomas Graf, Chris Friesen, Vimal, netdev, shemminger

On Wed, 13 Mar 2013 11:29:50 -0400
Bill Fink <billfink@mindspring.com> wrote:

> On Wed, 13 Mar 2013, Eric Dumazet wrote:
> 
> > On Wed, 2013-03-13 at 02:01 -0400, Bill Fink wrote:
> > 
> > > The last time this was discussed appears to be (on 2011-03-28):
> > > 
> > > 	http://marc.info/?l=linux-netdev&m=130128741907282&w=2
> > > 
> > > where Maciej Żenczykowski argued that creating a new 64-bit
> > > Netlink attribute for this would be much more complex than for
> > > the IFLA_STATS64 support.  There was no reply.
> > > 
> > > Providing a new multiplier/shift parameter would be a simple
> > > way to extend support for higher rates, and would not break
> > > existing user space that doesn't require the higher rates.
> > > I imagine the user would not explicitly specify the multiplier/
> > > shift parameter, but would just normally specify the desired
> > > rate, and a newer tc would figure out what multiplier/shift
> > > to use if a high enough rate demanded it.  To maintain user
> > > space compatibility, the kernel should report back the same
> > > rate and multiplier/shift it was given, and the newer tc would
> > > convert it back to the user's originally specified rate.  Older
> > > user space that was fine with the ~34 Gbps rate limitation would
> > > always have the default multiplier of 1 or shift of 0 bits, and
> > > would see the exact same unmultiplied/unshifted rate it always
> > > did.
> > 
> > We already said no to such a hack. Maybe its not clear enough ?
> > 
> > netlink allows us to a proper way, and Thomas Graf explained how we
> > expect the thing to be done.
> > 
> > Yes, this is not a one liner patch, its a bit more of work, and its how
> > it will be done when someone does the job.
> 
> I've no problem with that since it is a cleaner solution, but
> one that requires significantly more work.  I was only arguing
> that the multiplier/shift approach was also a workable solution
> and should be simpler to implement.  But since there appears to
> be developer consensus that it's not a desired method, I'm fine
> with going along with that expert opinion.
> 
> 						-Bill

As others have said the multiplier shift approach is a not a workable
solution because it is likely to cause too many compatibility surprises.
Older kernels would ignore the multiplier and therefore not give the users
the effective rate they wanted.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-13 15:34                     ` Stephen Hemminger
@ 2013-03-13 16:57                       ` Chris Friesen
  2013-03-14  4:08                       ` Bill Fink
  1 sibling, 0 replies; 17+ messages in thread
From: Chris Friesen @ 2013-03-13 16:57 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Bill Fink, Eric Dumazet, Thomas Graf, Vimal, netdev, shemminger

On 03/13/2013 09:34 AM, Stephen Hemminger wrote:

> As others have said the multiplier shift approach is a not a workable
> solution because it is likely to cause too many compatibility surprises.
> Older kernels would ignore the multiplier and therefore not give the users
> the effective rate they wanted.

How is that any different than ignoring the 64-bit value and not giving 
the effective rate they wanted?

Chris

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit)
  2013-03-13 15:34                     ` Stephen Hemminger
  2013-03-13 16:57                       ` Chris Friesen
@ 2013-03-14  4:08                       ` Bill Fink
  1 sibling, 0 replies; 17+ messages in thread
From: Bill Fink @ 2013-03-14  4:08 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Eric Dumazet, Thomas Graf, Chris Friesen, Vimal, netdev, shemminger

On Wed, 13 Mar 2013, Stephen Hemminger wrote:

> On Wed, 13 Mar 2013 11:29:50 -0400
> Bill Fink <billfink@mindspring.com> wrote:
> 
> > On Wed, 13 Mar 2013, Eric Dumazet wrote:
> > 
> > > On Wed, 2013-03-13 at 02:01 -0400, Bill Fink wrote:
> > > 
> > > > The last time this was discussed appears to be (on 2011-03-28):
> > > > 
> > > > 	http://marc.info/?l=linux-netdev&m=130128741907282&w=2
> > > > 
> > > > where Maciej Żenczykowski argued that creating a new 64-bit
> > > > Netlink attribute for this would be much more complex than for
> > > > the IFLA_STATS64 support.  There was no reply.
> > > > 
> > > > Providing a new multiplier/shift parameter would be a simple
> > > > way to extend support for higher rates, and would not break
> > > > existing user space that doesn't require the higher rates.
> > > > I imagine the user would not explicitly specify the multiplier/
> > > > shift parameter, but would just normally specify the desired
> > > > rate, and a newer tc would figure out what multiplier/shift
> > > > to use if a high enough rate demanded it.  To maintain user
> > > > space compatibility, the kernel should report back the same
> > > > rate and multiplier/shift it was given, and the newer tc would
> > > > convert it back to the user's originally specified rate.  Older
> > > > user space that was fine with the ~34 Gbps rate limitation would
> > > > always have the default multiplier of 1 or shift of 0 bits, and
> > > > would see the exact same unmultiplied/unshifted rate it always
> > > > did.
> > > 
> > > We already said no to such a hack. Maybe its not clear enough ?
> > > 
> > > netlink allows us to a proper way, and Thomas Graf explained how we
> > > expect the thing to be done.
> > > 
> > > Yes, this is not a one liner patch, its a bit more of work, and its how
> > > it will be done when someone does the job.
> > 
> > I've no problem with that since it is a cleaner solution, but
> > one that requires significantly more work.  I was only arguing
> > that the multiplier/shift approach was also a workable solution
> > and should be simpler to implement.  But since there appears to
> > be developer consensus that it's not a desired method, I'm fine
> > with going along with that expert opinion.
> > 
> > 						-Bill
> 
> As others have said the multiplier shift approach is a not a workable
> solution because it is likely to cause too many compatibility surprises.
> Older kernels would ignore the multiplier and therefore not give the users
> the effective rate they wanted.

Hopefully they would get an error saying that the rate was not
supported by the running kernel, from a failure of trying to
set the multiplier/shift.  You can't get new features from an
old kernel.  But anything working today should still work since
if your rate is less than or equal to ~34 Gbps, your multiplier
would be 1 (or shift of 0 bits), and thus the effective rate is
unmodified from what the user specified (and thus no need to even
use the new interface).

Today if you specify a rate greater than ~34 Gbps, you don't get
what you expected, since from what I understand the value just
gets silently truncated so 40 Gbps results in 5.64 Gbps.  See:

	http://marc.info/?l=linux-netdev&m=130103727012841&w=2

So I don't think anything would get broken that isn't already
broken (or just currently impossible to do).

But I've already said that I'm fine with a more proper solution.
I just hope someone will implement it before another 2 years
passes, as I suspect 100G NICs will become available in that
timeframe.

					-Bill

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2013-03-14  4:08 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-10  3:20 [PATCH] Rate should be u64 to avoid integer overflow at high speeds (>= ~35Gbit) Vimalkumar
2013-03-10  4:03 ` Eric Dumazet
2013-03-10  4:53   ` Vimal
2013-03-10  5:05     ` Eric Dumazet
2013-03-10  5:49       ` Bill Fink
2013-03-10  5:54         ` Eric Dumazet
2013-03-12 14:29           ` Chris Friesen
2013-03-12 15:42             ` Thomas Graf
2013-03-12 15:44               ` Eric Dumazet
2013-03-12 15:53               ` Chris Friesen
2013-03-12 15:56                 ` Chris Friesen
2013-03-13  6:01               ` Bill Fink
2013-03-13  6:13                 ` Eric Dumazet
2013-03-13 15:29                   ` Bill Fink
2013-03-13 15:34                     ` Stephen Hemminger
2013-03-13 16:57                       ` Chris Friesen
2013-03-14  4:08                       ` Bill Fink

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.