All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next] net_sched: add 64bit rate estimators
@ 2013-06-05  5:00 Eric Dumazet
  2013-06-05 18:55 ` Ben Hutchings
  2013-06-06 15:43 ` [PATCH v2 " Eric Dumazet
  0 siblings, 2 replies; 8+ messages in thread
From: Eric Dumazet @ 2013-06-05  5:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

From: Eric Dumazet <edumazet@google.com>

struct gnet_stats_rate_est contains u32 fields, so the byte per second
field can wrap at 34360Mbit.

Add a new gnet_stats_rate_est64 structure to get 64bit bps field, and
switch the kernel to use this structure natively.

This structure is dumped to user space as a new attribute :

TCA_STATS_RATE_EST64

Old tc command will now display the capped bps (to 34360Mbit), instead
of wrapped values, and updated tc command will display correct
information.

Old tc command output, after patch :

eric:~# tc -s -d qd sh dev lo
qdisc pfifo 8001: root refcnt 2 limit 1000p
 Sent 291983404374 bytes 8543320 pkt (dropped 0, overlimits 0 requeues 0) 
 rate 34360Mbit 246746pps backlog 0b 0p requeues 0 

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/act_api.h              |    2 +-
 include/net/gen_stats.h            |   10 +++++-----
 include/net/netfilter/xt_rateest.h |    4 ++--
 include/net/sch_generic.h          |    2 +-
 include/uapi/linux/gen_stats.h     |   11 +++++++++++
 net/core/gen_estimator.c           |   12 ++++++------
 net/core/gen_stats.c               |   20 +++++++++++++++-----
 net/netfilter/xt_rateest.c         |    2 +-
 net/sched/sch_cbq.c                |    2 +-
 net/sched/sch_drr.c                |    2 +-
 net/sched/sch_hfsc.c               |    2 +-
 net/sched/sch_htb.c                |    2 +-
 net/sched/sch_qfq.c                |    2 +-
 13 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 06ef7e9..b8ffac7 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -18,7 +18,7 @@ struct tcf_common {
 	struct tcf_t			tcfc_tm;
 	struct gnet_stats_basic_packed	tcfc_bstats;
 	struct gnet_stats_queue		tcfc_qstats;
-	struct gnet_stats_rate_est	tcfc_rate_est;
+	struct gnet_stats_rate_est64	tcfc_rate_est;
 	spinlock_t			tcfc_lock;
 	struct rcu_head			tcfc_rcu;
 };
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index a79b6cf..cf8439b 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -30,7 +30,7 @@ extern int gnet_stats_copy_basic(struct gnet_dump *d,
 				 struct gnet_stats_basic_packed *b);
 extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
 				    const struct gnet_stats_basic_packed *b,
-				    struct gnet_stats_rate_est *r);
+				    struct gnet_stats_rate_est64 *r);
 extern int gnet_stats_copy_queue(struct gnet_dump *d,
 				 struct gnet_stats_queue *q);
 extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
@@ -38,13 +38,13 @@ extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
 extern int gnet_stats_finish_copy(struct gnet_dump *d);
 
 extern int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
-			     struct gnet_stats_rate_est *rate_est,
+			     struct gnet_stats_rate_est64 *rate_est,
 			     spinlock_t *stats_lock, struct nlattr *opt);
 extern void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
-			       struct gnet_stats_rate_est *rate_est);
+			       struct gnet_stats_rate_est64 *rate_est);
 extern int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
-				 struct gnet_stats_rate_est *rate_est,
+				 struct gnet_stats_rate_est64 *rate_est,
 				 spinlock_t *stats_lock, struct nlattr *opt);
 extern bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
-				 const struct gnet_stats_rate_est *rate_est);
+				 const struct gnet_stats_rate_est64 *rate_est);
 #endif
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index 5a2978d..66bd154 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -6,12 +6,12 @@ struct xt_rateest {
 	struct gnet_stats_basic_packed	bstats;
 	spinlock_t			lock;
 	/* keep rstats and lock on same cache line to speedup xt_rateest_mt() */
-	struct gnet_stats_rate_est	rstats;
+	struct gnet_stats_rate_est64	rstats;
 
 	/* following fields not accessed in hot path */
+	unsigned int			refcnt;
 	struct hlist_node		list;
 	char				name[IFNAMSIZ];
-	unsigned int			refcnt;
 	struct gnet_estimator		params;
 	struct rcu_head			rcu;
 };
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f10818f..6a0f147 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -64,8 +64,8 @@ struct Qdisc {
 	struct list_head	list;
 	u32			handle;
 	u32			parent;
+	struct gnet_stats_rate_est64	rate_est;
 	atomic_t		refcnt;
-	struct gnet_stats_rate_est	rate_est;
 	int			(*reshape_fail)(struct sk_buff *skb,
 					struct Qdisc *q);
 
diff --git a/include/uapi/linux/gen_stats.h b/include/uapi/linux/gen_stats.h
index 552c8a0..b4c5f60 100644
--- a/include/uapi/linux/gen_stats.h
+++ b/include/uapi/linux/gen_stats.h
@@ -9,6 +9,7 @@ enum {
 	TCA_STATS_RATE_EST,
 	TCA_STATS_QUEUE,
 	TCA_STATS_APP,
+	TCA_STATS_RATE_EST64,
 	__TCA_STATS_MAX,
 };
 #define TCA_STATS_MAX (__TCA_STATS_MAX - 1)
@@ -38,6 +39,16 @@ struct gnet_stats_rate_est {
 };
 
 /**
+ * struct gnet_stats_rate_est64 - rate estimator
+ * @bps: current byte rate
+ * @pps: current packet rate
+ */
+struct gnet_stats_rate_est64 {
+	__u64	bps;
+	__u32	pps;
+} __attribute__ ((packed));
+
+/**
  * struct gnet_stats_queue - queuing statistics
  * @qlen: queue length
  * @backlog: backlog size of queue
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index d9d198a..6b5b6e7 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -82,7 +82,7 @@ struct gen_estimator
 {
 	struct list_head	list;
 	struct gnet_stats_basic_packed	*bstats;
-	struct gnet_stats_rate_est	*rate_est;
+	struct gnet_stats_rate_est64	*rate_est;
 	spinlock_t		*stats_lock;
 	int			ewma_log;
 	u64			last_bytes;
@@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est)
 
 static
 struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
-				    const struct gnet_stats_rate_est *rate_est)
+				    const struct gnet_stats_rate_est64 *rate_est)
 {
 	struct rb_node *p = est_root.rb_node;
 
@@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
  *
  */
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
-		      struct gnet_stats_rate_est *rate_est,
+		      struct gnet_stats_rate_est64 *rate_est,
 		      spinlock_t *stats_lock,
 		      struct nlattr *opt)
 {
@@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator);
  * Note : Caller should respect an RCU grace period before freeing stats_lock
  */
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
-			struct gnet_stats_rate_est *rate_est)
+			struct gnet_stats_rate_est64 *rate_est)
 {
 	struct gen_estimator *e;
 
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
  * Returns 0 on success or a negative error code.
  */
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
-			  struct gnet_stats_rate_est *rate_est,
+			  struct gnet_stats_rate_est64 *rate_est,
 			  spinlock_t *stats_lock, struct nlattr *opt)
 {
 	gen_kill_estimator(bstats, rate_est);
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator);
  * Returns true if estimator is active, and false if not.
  */
 bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
-			  const struct gnet_stats_rate_est *rate_est)
+			  const struct gnet_stats_rate_est64 *rate_est)
 {
 	bool res;
 
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index ddedf21..20b306f 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -143,18 +143,28 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
 int
 gnet_stats_copy_rate_est(struct gnet_dump *d,
 			 const struct gnet_stats_basic_packed *b,
-			 struct gnet_stats_rate_est *r)
+			 struct gnet_stats_rate_est64 *r)
 {
+	struct gnet_stats_rate_est est;
+	int res;
+
 	if (b && !gen_estimator_active(b, r))
 		return 0;
 
+	est.bps = min_t(u64, UINT_MAX, r->bps);
+	est.pps = r->pps;
+
 	if (d->compat_tc_stats) {
-		d->tc_stats.bps = r->bps;
-		d->tc_stats.pps = r->pps;
+		d->tc_stats.bps = est.bps;
+		d->tc_stats.pps = est.pps;
 	}
 
-	if (d->tail)
-		return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
+	if (d->tail) {
+		res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est));
+		if (res < 0)
+			return res;
+		return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r));
+	}
 
 	return 0;
 }
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index ed0db15..7720b03 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -18,7 +18,7 @@ static bool
 xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rateest_match_info *info = par->matchinfo;
-	struct gnet_stats_rate_est *r;
+	struct gnet_stats_rate_est64 *r;
 	u_int32_t bps1, bps2, pps1, pps2;
 	bool ret = true;
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 1bc210f..71a5688 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -130,7 +130,7 @@ struct cbq_class {
 	psched_time_t		penalized;
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
-	struct gnet_stats_rate_est rate_est;
+	struct gnet_stats_rate_est64 rate_est;
 	struct tc_cbq_xstats	xstats;
 
 	struct tcf_proto	*filter_list;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 759b308..8302717 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -25,7 +25,7 @@ struct drr_class {
 
 	struct gnet_stats_basic_packed		bstats;
 	struct gnet_stats_queue		qstats;
-	struct gnet_stats_rate_est	rate_est;
+	struct gnet_stats_rate_est64	rate_est;
 	struct list_head		alist;
 	struct Qdisc			*qdisc;
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 9facea0..c407561 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -114,7 +114,7 @@ struct hfsc_class {
 
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
-	struct gnet_stats_rate_est rate_est;
+	struct gnet_stats_rate_est64 rate_est;
 	unsigned int	level;		/* class level in hierarchy */
 	struct tcf_proto *filter_list;	/* filter list */
 	unsigned int	filter_cnt;	/* filter count */
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 79b1876..a3ae7ab 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -78,7 +78,7 @@ struct htb_class {
 	/* general class parameters */
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
-	struct gnet_stats_rate_est rate_est;
+	struct gnet_stats_rate_est64 rate_est;
 	struct tc_htb_xstats xstats;	/* our special stats */
 	int refcnt;		/* usage count of this class */
 
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d51852b..7c195d9 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -138,7 +138,7 @@ struct qfq_class {
 
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
-	struct gnet_stats_rate_est rate_est;
+	struct gnet_stats_rate_est64 rate_est;
 	struct Qdisc *qdisc;
 	struct list_head alist;		/* Link for active-classes list. */
 	struct qfq_aggregate *agg;	/* Parent aggregate. */

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next] net_sched: add 64bit rate estimators
  2013-06-05  5:00 [PATCH net-next] net_sched: add 64bit rate estimators Eric Dumazet
@ 2013-06-05 18:55 ` Ben Hutchings
  2013-06-05 19:37   ` Eric Dumazet
  2013-06-06  8:39   ` David Laight
  2013-06-06 15:43 ` [PATCH v2 " Eric Dumazet
  1 sibling, 2 replies; 8+ messages in thread
From: Ben Hutchings @ 2013-06-05 18:55 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev

On Tue, 2013-06-04 at 22:00 -0700, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> struct gnet_stats_rate_est contains u32 fields, so the byte per second
> field can wrap at 34360Mbit.
> 
> Add a new gnet_stats_rate_est64 structure to get 64bit bps field, and
> switch the kernel to use this structure natively.
[...] 
>  /**
> + * struct gnet_stats_rate_est64 - rate estimator
> + * @bps: current byte rate
> + * @pps: current packet rate
> + */
> +struct gnet_stats_rate_est64 {
> +	__u64	bps;
> +	__u32	pps;
> +} __attribute__ ((packed));
[...]

Using attribute packed (which should be written as __packed) forces the
alignment of each member and the structure as a whole to be 1.  For an
architecture where unaligned access is expensive, gcc will make all
reads and writes use byte operations.

I think you have to add a dummy __u32 member instead.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next] net_sched: add 64bit rate estimators
  2013-06-05 18:55 ` Ben Hutchings
@ 2013-06-05 19:37   ` Eric Dumazet
  2013-06-05 20:06     ` Ben Hutchings
  2013-06-06  8:39   ` David Laight
  1 sibling, 1 reply; 8+ messages in thread
From: Eric Dumazet @ 2013-06-05 19:37 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: David Miller, netdev

On Wed, 2013-06-05 at 19:55 +0100, Ben Hutchings wrote:
> On Tue, 2013-06-04 at 22:00 -0700, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@google.com>
> > 
> > struct gnet_stats_rate_est contains u32 fields, so the byte per second
> > field can wrap at 34360Mbit.
> > 
> > Add a new gnet_stats_rate_est64 structure to get 64bit bps field, and
> > switch the kernel to use this structure natively.
> [...] 
> >  /**
> > + * struct gnet_stats_rate_est64 - rate estimator
> > + * @bps: current byte rate
> > + * @pps: current packet rate
> > + */
> > +struct gnet_stats_rate_est64 {
> > +	__u64	bps;
> > +	__u32	pps;
> > +} __attribute__ ((packed));
> [...]
> 
> Using attribute packed (which should be written as __packed) forces the
> alignment of each member and the structure as a whole to be 1.  For an
> architecture where unaligned access is expensive, gcc will make all
> reads and writes use byte operations.
> 
> I think you have to add a dummy __u32 member instead.

Its exported to userland so __packed is not available. Or has this been
changed lately ?

This structure is only used in very slow path, in the rate estimator
thing, so the unaligned accesses are not a problem at all.

I made this choice to not increase sizeof(struct Qdisc) and the
alignments as well.

If you think there is an issue with userland, we could use a
gnet_stats_rate_est64_packed in kernel, and a gnet_stats_rate_est64 in
userland.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next] net_sched: add 64bit rate estimators
  2013-06-05 19:37   ` Eric Dumazet
@ 2013-06-05 20:06     ` Ben Hutchings
  2013-06-05 23:07       ` Eric Dumazet
  0 siblings, 1 reply; 8+ messages in thread
From: Ben Hutchings @ 2013-06-05 20:06 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev

On Wed, 2013-06-05 at 12:37 -0700, Eric Dumazet wrote:
> On Wed, 2013-06-05 at 19:55 +0100, Ben Hutchings wrote:
> > On Tue, 2013-06-04 at 22:00 -0700, Eric Dumazet wrote:
> > > From: Eric Dumazet <edumazet@google.com>
> > > 
> > > struct gnet_stats_rate_est contains u32 fields, so the byte per second
> > > field can wrap at 34360Mbit.
> > > 
> > > Add a new gnet_stats_rate_est64 structure to get 64bit bps field, and
> > > switch the kernel to use this structure natively.
> > [...] 
> > >  /**
> > > + * struct gnet_stats_rate_est64 - rate estimator
> > > + * @bps: current byte rate
> > > + * @pps: current packet rate
> > > + */
> > > +struct gnet_stats_rate_est64 {
> > > +	__u64	bps;
> > > +	__u32	pps;
> > > +} __attribute__ ((packed));
> > [...]
> > 
> > Using attribute packed (which should be written as __packed) forces the
> > alignment of each member and the structure as a whole to be 1.  For an
> > architecture where unaligned access is expensive, gcc will make all
> > reads and writes use byte operations.
> > 
> > I think you have to add a dummy __u32 member instead.
> 
> Its exported to userland so __packed is not available. Or has this been
> changed lately ?

Sorry, you're quite right.

> This structure is only used in very slow path, in the rate estimator
> thing, so the unaligned accesses are not a problem at all.
> 
> I made this choice to not increase sizeof(struct Qdisc) and the
> alignments as well.
> 
> If you think there is an issue with userland, we could use a
> gnet_stats_rate_est64_packed in kernel, and a gnet_stats_rate_est64 in
> userland.

If you're sure this is the right trade-off then I accept that.  I
thought you were using packed to ensure the structure layout was
consistent between userland and kernel, and might not have realised the
unfortunate effect on alignment.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next] net_sched: add 64bit rate estimators
  2013-06-05 20:06     ` Ben Hutchings
@ 2013-06-05 23:07       ` Eric Dumazet
  0 siblings, 0 replies; 8+ messages in thread
From: Eric Dumazet @ 2013-06-05 23:07 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: David Miller, netdev

On Wed, 2013-06-05 at 21:06 +0100, Ben Hutchings wrote:

> If you're sure this is the right trade-off then I accept that.  I
> thought you were using packed to ensure the structure layout was
> consistent between userland and kernel, and might not have realised the
> unfortunate effect on alignment.

I'll spin a v2 with clear distinction between kernel/user

And I will also dump the TCA_STATS_RATE_EST64 part only if needed (if
the bps rate is above the 32bit limit)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH net-next] net_sched: add 64bit rate estimators
  2013-06-05 18:55 ` Ben Hutchings
  2013-06-05 19:37   ` Eric Dumazet
@ 2013-06-06  8:39   ` David Laight
  1 sibling, 0 replies; 8+ messages in thread
From: David Laight @ 2013-06-06  8:39 UTC (permalink / raw)
  To: Ben Hutchings, Eric Dumazet; +Cc: David Miller, netdev

> > struct gnet_stats_rate_est contains u32 fields, so the byte per second
> > field can wrap at 34360Mbit.
> >
> > Add a new gnet_stats_rate_est64 structure to get 64bit bps field, and
> > switch the kernel to use this structure natively.
> [...]
> >  /**
> > + * struct gnet_stats_rate_est64 - rate estimator
> > + * @bps: current byte rate
> > + * @pps: current packet rate
> > + */
> > +struct gnet_stats_rate_est64 {
> > +	__u64	bps;
> > +	__u32	pps;
> > +} __attribute__ ((packed));
> [...]
> 
> Using attribute packed (which should be written as __packed) forces the
> alignment of each member and the structure as a whole to be 1.  For an
> architecture where unaligned access is expensive, gcc will make all
> reads and writes use byte operations.
> 
> I think you have to add a dummy __u32 member instead.

Or mark the __u64 with (IIRC) __attribute__((aligned,4))
so that gcc will generate two 32bit accesses rather than
the eight byte accesses is the structure is packed.

	David


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 net-next] net_sched: add 64bit rate estimators
  2013-06-05  5:00 [PATCH net-next] net_sched: add 64bit rate estimators Eric Dumazet
  2013-06-05 18:55 ` Ben Hutchings
@ 2013-06-06 15:43 ` Eric Dumazet
  2013-06-11  9:51   ` David Miller
  1 sibling, 1 reply; 8+ messages in thread
From: Eric Dumazet @ 2013-06-06 15:43 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ben Hutchings

From: Eric Dumazet <edumazet@google.com>

struct gnet_stats_rate_est contains u32 fields, so the bytes per second
field can wrap at 34360Mbit.

Add a new gnet_stats_rate_est64 structure to get 64bit bps/pps fields,
and switch the kernel to use this structure natively.

This structure is dumped to user space as a new attribute :

TCA_STATS_RATE_EST64

Old tc command will now display the capped bps (to 34360Mbit), instead
of wrapped values, and updated tc command will display correct
information.

Old tc command output, after patch :

eric:~# tc -s -d qd sh dev lo
qdisc pfifo 8001: root refcnt 2 limit 1000p
 Sent 80868245400 bytes 1978837 pkt (dropped 0, overlimits 0 requeues 0) 
 rate 34360Mbit 189696pps backlog 0b 0p requeues 0 

This patch carefully reorganizes "struct Qdisc" layout to get optimal
performance on SMP.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
---
 include/net/act_api.h              |    2 +-
 include/net/gen_stats.h            |   10 +++++-----
 include/net/netfilter/xt_rateest.h |    2 +-
 include/net/sch_generic.h          |   13 +++++++------
 include/uapi/linux/gen_stats.h     |   11 +++++++++++
 net/core/gen_estimator.c           |   12 ++++++------
 net/core/gen_stats.c               |   22 +++++++++++++++++-----
 net/netfilter/xt_rateest.c         |    2 +-
 net/sched/sch_cbq.c                |    2 +-
 net/sched/sch_drr.c                |    2 +-
 net/sched/sch_hfsc.c               |    2 +-
 net/sched/sch_htb.c                |    2 +-
 net/sched/sch_qfq.c                |    2 +-
 13 files changed, 54 insertions(+), 30 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 06ef7e9..b8ffac7 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -18,7 +18,7 @@ struct tcf_common {
 	struct tcf_t			tcfc_tm;
 	struct gnet_stats_basic_packed	tcfc_bstats;
 	struct gnet_stats_queue		tcfc_qstats;
-	struct gnet_stats_rate_est	tcfc_rate_est;
+	struct gnet_stats_rate_est64	tcfc_rate_est;
 	spinlock_t			tcfc_lock;
 	struct rcu_head			tcfc_rcu;
 };
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index a79b6cf..cf8439b 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -30,7 +30,7 @@ extern int gnet_stats_copy_basic(struct gnet_dump *d,
 				 struct gnet_stats_basic_packed *b);
 extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
 				    const struct gnet_stats_basic_packed *b,
-				    struct gnet_stats_rate_est *r);
+				    struct gnet_stats_rate_est64 *r);
 extern int gnet_stats_copy_queue(struct gnet_dump *d,
 				 struct gnet_stats_queue *q);
 extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
@@ -38,13 +38,13 @@ extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
 extern int gnet_stats_finish_copy(struct gnet_dump *d);
 
 extern int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
-			     struct gnet_stats_rate_est *rate_est,
+			     struct gnet_stats_rate_est64 *rate_est,
 			     spinlock_t *stats_lock, struct nlattr *opt);
 extern void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
-			       struct gnet_stats_rate_est *rate_est);
+			       struct gnet_stats_rate_est64 *rate_est);
 extern int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
-				 struct gnet_stats_rate_est *rate_est,
+				 struct gnet_stats_rate_est64 *rate_est,
 				 spinlock_t *stats_lock, struct nlattr *opt);
 extern bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
-				 const struct gnet_stats_rate_est *rate_est);
+				 const struct gnet_stats_rate_est64 *rate_est);
 #endif
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index 5a2978d..495c71f 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -6,7 +6,7 @@ struct xt_rateest {
 	struct gnet_stats_basic_packed	bstats;
 	spinlock_t			lock;
 	/* keep rstats and lock on same cache line to speedup xt_rateest_mt() */
-	struct gnet_stats_rate_est	rstats;
+	struct gnet_stats_rate_est64	rstats;
 
 	/* following fields not accessed in hot path */
 	struct hlist_node		list;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e7f4e21..df56760 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -58,14 +58,12 @@ struct Qdisc {
 				      * multiqueue device.
 				      */
 #define TCQ_F_WARN_NONWC	(1 << 16)
-	int			padded;
+	u32			limit;
 	const struct Qdisc_ops	*ops;
 	struct qdisc_size_table	__rcu *stab;
 	struct list_head	list;
 	u32			handle;
 	u32			parent;
-	atomic_t		refcnt;
-	struct gnet_stats_rate_est	rate_est;
 	int			(*reshape_fail)(struct sk_buff *skb,
 					struct Qdisc *q);
 
@@ -76,8 +74,9 @@ struct Qdisc {
 	 */
 	struct Qdisc		*__parent;
 	struct netdev_queue	*dev_queue;
-	struct Qdisc		*next_sched;
 
+	struct gnet_stats_rate_est64	rate_est;
+	struct Qdisc		*next_sched;
 	struct sk_buff		*gso_skb;
 	/*
 	 * For performance sake on SMP, we put highly modified fields at the end
@@ -88,8 +87,10 @@ struct Qdisc {
 	unsigned int		__state;
 	struct gnet_stats_queue	qstats;
 	struct rcu_head		rcu_head;
-	spinlock_t		busylock;
-	u32			limit;
+	int			padded;
+	atomic_t		refcnt;
+
+	spinlock_t		busylock ____cacheline_aligned_in_smp;
 };
 
 static inline bool qdisc_is_running(const struct Qdisc *qdisc)
diff --git a/include/uapi/linux/gen_stats.h b/include/uapi/linux/gen_stats.h
index 552c8a0..6487317 100644
--- a/include/uapi/linux/gen_stats.h
+++ b/include/uapi/linux/gen_stats.h
@@ -9,6 +9,7 @@ enum {
 	TCA_STATS_RATE_EST,
 	TCA_STATS_QUEUE,
 	TCA_STATS_APP,
+	TCA_STATS_RATE_EST64,
 	__TCA_STATS_MAX,
 };
 #define TCA_STATS_MAX (__TCA_STATS_MAX - 1)
@@ -38,6 +39,16 @@ struct gnet_stats_rate_est {
 };
 
 /**
+ * struct gnet_stats_rate_est64 - rate estimator
+ * @bps: current byte rate
+ * @pps: current packet rate
+ */
+struct gnet_stats_rate_est64 {
+	__u64	bps;
+	__u64	pps;
+};
+
+/**
  * struct gnet_stats_queue - queuing statistics
  * @qlen: queue length
  * @backlog: backlog size of queue
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index d9d198a..6b5b6e7 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -82,7 +82,7 @@ struct gen_estimator
 {
 	struct list_head	list;
 	struct gnet_stats_basic_packed	*bstats;
-	struct gnet_stats_rate_est	*rate_est;
+	struct gnet_stats_rate_est64	*rate_est;
 	spinlock_t		*stats_lock;
 	int			ewma_log;
 	u64			last_bytes;
@@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est)
 
 static
 struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
-				    const struct gnet_stats_rate_est *rate_est)
+				    const struct gnet_stats_rate_est64 *rate_est)
 {
 	struct rb_node *p = est_root.rb_node;
 
@@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
  *
  */
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
-		      struct gnet_stats_rate_est *rate_est,
+		      struct gnet_stats_rate_est64 *rate_est,
 		      spinlock_t *stats_lock,
 		      struct nlattr *opt)
 {
@@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator);
  * Note : Caller should respect an RCU grace period before freeing stats_lock
  */
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
-			struct gnet_stats_rate_est *rate_est)
+			struct gnet_stats_rate_est64 *rate_est)
 {
 	struct gen_estimator *e;
 
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
  * Returns 0 on success or a negative error code.
  */
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
-			  struct gnet_stats_rate_est *rate_est,
+			  struct gnet_stats_rate_est64 *rate_est,
 			  spinlock_t *stats_lock, struct nlattr *opt)
 {
 	gen_kill_estimator(bstats, rate_est);
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator);
  * Returns true if estimator is active, and false if not.
  */
 bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
-			  const struct gnet_stats_rate_est *rate_est)
+			  const struct gnet_stats_rate_est64 *rate_est)
 {
 	bool res;
 
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index ddedf21..9d3d9e7 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -143,18 +143,30 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
 int
 gnet_stats_copy_rate_est(struct gnet_dump *d,
 			 const struct gnet_stats_basic_packed *b,
-			 struct gnet_stats_rate_est *r)
+			 struct gnet_stats_rate_est64 *r)
 {
+	struct gnet_stats_rate_est est;
+	int res;
+
 	if (b && !gen_estimator_active(b, r))
 		return 0;
 
+	est.bps = min_t(u64, UINT_MAX, r->bps);
+	/* we have some time before reaching 2^32 packets per second */
+	est.pps = r->pps;
+
 	if (d->compat_tc_stats) {
-		d->tc_stats.bps = r->bps;
-		d->tc_stats.pps = r->pps;
+		d->tc_stats.bps = est.bps;
+		d->tc_stats.pps = est.pps;
 	}
 
-	if (d->tail)
-		return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
+	if (d->tail) {
+		res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est));
+		if (res < 0 || est.bps == r->bps)
+			return res;
+		/* emit 64bit stats only if needed */
+		return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r));
+	}
 
 	return 0;
 }
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index ed0db15..7720b03 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -18,7 +18,7 @@ static bool
 xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rateest_match_info *info = par->matchinfo;
-	struct gnet_stats_rate_est *r;
+	struct gnet_stats_rate_est64 *r;
 	u_int32_t bps1, bps2, pps1, pps2;
 	bool ret = true;
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 1bc210f..71a5688 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -130,7 +130,7 @@ struct cbq_class {
 	psched_time_t		penalized;
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
-	struct gnet_stats_rate_est rate_est;
+	struct gnet_stats_rate_est64 rate_est;
 	struct tc_cbq_xstats	xstats;
 
 	struct tcf_proto	*filter_list;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 759b308..8302717 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -25,7 +25,7 @@ struct drr_class {
 
 	struct gnet_stats_basic_packed		bstats;
 	struct gnet_stats_queue		qstats;
-	struct gnet_stats_rate_est	rate_est;
+	struct gnet_stats_rate_est64	rate_est;
 	struct list_head		alist;
 	struct Qdisc			*qdisc;
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 9facea0..c407561 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -114,7 +114,7 @@ struct hfsc_class {
 
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
-	struct gnet_stats_rate_est rate_est;
+	struct gnet_stats_rate_est64 rate_est;
 	unsigned int	level;		/* class level in hierarchy */
 	struct tcf_proto *filter_list;	/* filter list */
 	unsigned int	filter_cnt;	/* filter count */
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index adaedd7..162fb80 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -78,7 +78,7 @@ struct htb_class {
 	/* general class parameters */
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
-	struct gnet_stats_rate_est rate_est;
+	struct gnet_stats_rate_est64 rate_est;
 	struct tc_htb_xstats xstats;	/* our special stats */
 	int refcnt;		/* usage count of this class */
 
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d51852b..7c195d9 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -138,7 +138,7 @@ struct qfq_class {
 
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
-	struct gnet_stats_rate_est rate_est;
+	struct gnet_stats_rate_est64 rate_est;
 	struct Qdisc *qdisc;
 	struct list_head alist;		/* Link for active-classes list. */
 	struct qfq_aggregate *agg;	/* Parent aggregate. */

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] net_sched: add 64bit rate estimators
  2013-06-06 15:43 ` [PATCH v2 " Eric Dumazet
@ 2013-06-11  9:51   ` David Miller
  0 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2013-06-11  9:51 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, bhutchings

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 06 Jun 2013 08:43:22 -0700

> From: Eric Dumazet <edumazet@google.com>
> 
> struct gnet_stats_rate_est contains u32 fields, so the bytes per second
> field can wrap at 34360Mbit.
> 
> Add a new gnet_stats_rate_est64 structure to get 64bit bps/pps fields,
> and switch the kernel to use this structure natively.
> 
> This structure is dumped to user space as a new attribute :
> 
> TCA_STATS_RATE_EST64
> 
> Old tc command will now display the capped bps (to 34360Mbit), instead
> of wrapped values, and updated tc command will display correct
> information.
> 
> Old tc command output, after patch :
> 
> eric:~# tc -s -d qd sh dev lo
> qdisc pfifo 8001: root refcnt 2 limit 1000p
>  Sent 80868245400 bytes 1978837 pkt (dropped 0, overlimits 0 requeues 0) 
>  rate 34360Mbit 189696pps backlog 0b 0p requeues 0 
> 
> This patch carefully reorganizes "struct Qdisc" layout to get optimal
> performance on SMP.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Ben Hutchings <bhutchings@solarflare.com>

Applied, thanks Eric.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-06-11  9:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-06-05  5:00 [PATCH net-next] net_sched: add 64bit rate estimators Eric Dumazet
2013-06-05 18:55 ` Ben Hutchings
2013-06-05 19:37   ` Eric Dumazet
2013-06-05 20:06     ` Ben Hutchings
2013-06-05 23:07       ` Eric Dumazet
2013-06-06  8:39   ` David Laight
2013-06-06 15:43 ` [PATCH v2 " Eric Dumazet
2013-06-11  9:51   ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.