linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] net/mlx5e: fix high stack usage
@ 2018-11-02 15:33 Arnd Bergmann
  2018-11-02 16:08 ` Jason Gunthorpe
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Arnd Bergmann @ 2018-11-02 15:33 UTC (permalink / raw)
  To: Saeed Mahameed, Leon Romanovsky, David S. Miller
  Cc: Arnd Bergmann, Tariq Toukan, Eran Ben Elisha, Boris Pismenny,
	Ilya Lesokhin, Moshe Shemesh, Kamal Heib, netdev, linux-rdma,
	linux-kernel

A patch that looks harmless causes the stack usage of the mlx5e_grp_sw_update_stats()
function to drastically increase with x86 gcc-4.9 and higher (tested up to 8.1):

drivers/net/ethernet/mellanox/mlx5/core/en_stats.c: In function ‘mlx5e_grp_sw_update_stats’:
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c:216:1: warning: the frame size of 1276 bytes is larger than 500 bytes [-Wframe-larger-than=]

By splitting out the loop body into a non-inlined function, the stack size goes
back down to under 500 bytes.

Fixes: 779d986d60de ("net/mlx5e: Do not ignore netdevice TX/RX queues number")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 .../ethernet/mellanox/mlx5/core/en_stats.c    | 168 +++++++++---------
 1 file changed, 86 insertions(+), 82 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 1e55b9c27ffc..c270206f3475 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -126,93 +126,97 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 	return idx;
 }
 
+static noinline_for_stack void
+mlx5e_grp_sw_collect_stat(struct mlx5e_priv *priv, struct mlx5e_sw_stats *s, int i)
+{
+	struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
+	struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
+	struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq;
+	struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
+	struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
+	int j;
+
+	s->rx_packets	+= rq_stats->packets;
+	s->rx_bytes	+= rq_stats->bytes;
+	s->rx_lro_packets += rq_stats->lro_packets;
+	s->rx_lro_bytes	+= rq_stats->lro_bytes;
+	s->rx_ecn_mark	+= rq_stats->ecn_mark;
+	s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
+	s->rx_csum_none	+= rq_stats->csum_none;
+	s->rx_csum_complete += rq_stats->csum_complete;
+	s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
+	s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
+	s->rx_xdp_drop     += rq_stats->xdp_drop;
+	s->rx_xdp_redirect += rq_stats->xdp_redirect;
+	s->rx_xdp_tx_xmit  += xdpsq_stats->xmit;
+	s->rx_xdp_tx_full  += xdpsq_stats->full;
+	s->rx_xdp_tx_err   += xdpsq_stats->err;
+	s->rx_xdp_tx_cqe   += xdpsq_stats->cqes;
+	s->rx_wqe_err   += rq_stats->wqe_err;
+	s->rx_mpwqe_filler_cqes    += rq_stats->mpwqe_filler_cqes;
+	s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides;
+	s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
+	s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
+	s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
+	s->rx_page_reuse  += rq_stats->page_reuse;
+	s->rx_cache_reuse += rq_stats->cache_reuse;
+	s->rx_cache_full  += rq_stats->cache_full;
+	s->rx_cache_empty += rq_stats->cache_empty;
+	s->rx_cache_busy  += rq_stats->cache_busy;
+	s->rx_cache_waive += rq_stats->cache_waive;
+	s->rx_congst_umr  += rq_stats->congst_umr;
+	s->rx_arfs_err    += rq_stats->arfs_err;
+	s->ch_events      += ch_stats->events;
+	s->ch_poll        += ch_stats->poll;
+	s->ch_arm         += ch_stats->arm;
+	s->ch_aff_change  += ch_stats->aff_change;
+	s->ch_eq_rearm    += ch_stats->eq_rearm;
+	/* xdp redirect */
+	s->tx_xdp_xmit    += xdpsq_red_stats->xmit;
+	s->tx_xdp_full    += xdpsq_red_stats->full;
+	s->tx_xdp_err     += xdpsq_red_stats->err;
+	s->tx_xdp_cqes    += xdpsq_red_stats->cqes;
+
+	for (j = 0; j < priv->max_opened_tc; j++) {
+		struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+
+		s->tx_packets		+= sq_stats->packets;
+		s->tx_bytes		+= sq_stats->bytes;
+		s->tx_tso_packets	+= sq_stats->tso_packets;
+		s->tx_tso_bytes		+= sq_stats->tso_bytes;
+		s->tx_tso_inner_packets	+= sq_stats->tso_inner_packets;
+		s->tx_tso_inner_bytes	+= sq_stats->tso_inner_bytes;
+		s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
+		s->tx_nop               += sq_stats->nop;
+		s->tx_queue_stopped	+= sq_stats->stopped;
+		s->tx_queue_wake	+= sq_stats->wake;
+		s->tx_udp_seg_rem	+= sq_stats->udp_seg_rem;
+		s->tx_queue_dropped	+= sq_stats->dropped;
+		s->tx_cqe_err		+= sq_stats->cqe_err;
+		s->tx_recover		+= sq_stats->recover;
+		s->tx_xmit_more		+= sq_stats->xmit_more;
+		s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
+		s->tx_csum_none		+= sq_stats->csum_none;
+		s->tx_csum_partial	+= sq_stats->csum_partial;
+#ifdef CONFIG_MLX5_EN_TLS
+		s->tx_tls_ooo		+= sq_stats->tls_ooo;
+		s->tx_tls_resync_bytes	+= sq_stats->tls_resync_bytes;
+#endif
+		s->tx_cqes		+= sq_stats->cqes;
+	}
+}
+
 void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 {
-	struct mlx5e_sw_stats temp, *s = &temp;
+	struct mlx5e_sw_stats s;
 	int i;
 
-	memset(s, 0, sizeof(*s));
-
-	for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) {
-		struct mlx5e_channel_stats *channel_stats =
-			&priv->channel_stats[i];
-		struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
-		struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq;
-		struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
-		struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
-		int j;
-
-		s->rx_packets	+= rq_stats->packets;
-		s->rx_bytes	+= rq_stats->bytes;
-		s->rx_lro_packets += rq_stats->lro_packets;
-		s->rx_lro_bytes	+= rq_stats->lro_bytes;
-		s->rx_ecn_mark	+= rq_stats->ecn_mark;
-		s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
-		s->rx_csum_none	+= rq_stats->csum_none;
-		s->rx_csum_complete += rq_stats->csum_complete;
-		s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
-		s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
-		s->rx_xdp_drop     += rq_stats->xdp_drop;
-		s->rx_xdp_redirect += rq_stats->xdp_redirect;
-		s->rx_xdp_tx_xmit  += xdpsq_stats->xmit;
-		s->rx_xdp_tx_full  += xdpsq_stats->full;
-		s->rx_xdp_tx_err   += xdpsq_stats->err;
-		s->rx_xdp_tx_cqe   += xdpsq_stats->cqes;
-		s->rx_wqe_err   += rq_stats->wqe_err;
-		s->rx_mpwqe_filler_cqes    += rq_stats->mpwqe_filler_cqes;
-		s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides;
-		s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
-		s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
-		s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
-		s->rx_page_reuse  += rq_stats->page_reuse;
-		s->rx_cache_reuse += rq_stats->cache_reuse;
-		s->rx_cache_full  += rq_stats->cache_full;
-		s->rx_cache_empty += rq_stats->cache_empty;
-		s->rx_cache_busy  += rq_stats->cache_busy;
-		s->rx_cache_waive += rq_stats->cache_waive;
-		s->rx_congst_umr  += rq_stats->congst_umr;
-		s->rx_arfs_err    += rq_stats->arfs_err;
-		s->ch_events      += ch_stats->events;
-		s->ch_poll        += ch_stats->poll;
-		s->ch_arm         += ch_stats->arm;
-		s->ch_aff_change  += ch_stats->aff_change;
-		s->ch_eq_rearm    += ch_stats->eq_rearm;
-		/* xdp redirect */
-		s->tx_xdp_xmit    += xdpsq_red_stats->xmit;
-		s->tx_xdp_full    += xdpsq_red_stats->full;
-		s->tx_xdp_err     += xdpsq_red_stats->err;
-		s->tx_xdp_cqes    += xdpsq_red_stats->cqes;
-
-		for (j = 0; j < priv->max_opened_tc; j++) {
-			struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
-
-			s->tx_packets		+= sq_stats->packets;
-			s->tx_bytes		+= sq_stats->bytes;
-			s->tx_tso_packets	+= sq_stats->tso_packets;
-			s->tx_tso_bytes		+= sq_stats->tso_bytes;
-			s->tx_tso_inner_packets	+= sq_stats->tso_inner_packets;
-			s->tx_tso_inner_bytes	+= sq_stats->tso_inner_bytes;
-			s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
-			s->tx_nop               += sq_stats->nop;
-			s->tx_queue_stopped	+= sq_stats->stopped;
-			s->tx_queue_wake	+= sq_stats->wake;
-			s->tx_udp_seg_rem	+= sq_stats->udp_seg_rem;
-			s->tx_queue_dropped	+= sq_stats->dropped;
-			s->tx_cqe_err		+= sq_stats->cqe_err;
-			s->tx_recover		+= sq_stats->recover;
-			s->tx_xmit_more		+= sq_stats->xmit_more;
-			s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
-			s->tx_csum_none		+= sq_stats->csum_none;
-			s->tx_csum_partial	+= sq_stats->csum_partial;
-#ifdef CONFIG_MLX5_EN_TLS
-			s->tx_tls_ooo		+= sq_stats->tls_ooo;
-			s->tx_tls_resync_bytes	+= sq_stats->tls_resync_bytes;
-#endif
-			s->tx_cqes		+= sq_stats->cqes;
-		}
-	}
+	memset(&s, 0, sizeof(s));
+
+	for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++)
+		mlx5e_grp_sw_collect_stat(priv, &s, i);
 
-	memcpy(&priv->stats.sw, s, sizeof(*s));
+	memcpy(&priv->stats.sw, &s, sizeof(s));
 }
 
 static const struct counter_desc q_stats_desc[] = {
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 15:33 [PATCH] net/mlx5e: fix high stack usage Arnd Bergmann
@ 2018-11-02 16:08 ` Jason Gunthorpe
  2018-11-02 16:23   ` Arnd Bergmann
  2018-11-02 21:05 ` Saeed Mahameed
  2018-11-05 11:24 ` David Laight
  2 siblings, 1 reply; 12+ messages in thread
From: Jason Gunthorpe @ 2018-11-02 16:08 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Saeed Mahameed, Leon Romanovsky, David S. Miller, Tariq Toukan,
	Eran Ben Elisha, Boris Pismenny, Ilya Lesokhin, Moshe Shemesh,
	Kamal Heib, netdev, linux-rdma, linux-kernel

On Fri, Nov 02, 2018 at 04:33:03PM +0100, Arnd Bergmann wrote:
> A patch that looks harmless causes the stack usage of the mlx5e_grp_sw_update_stats()
> function to drastically increase with x86 gcc-4.9 and higher (tested up to 8.1):
> 
> drivers/net/ethernet/mellanox/mlx5/core/en_stats.c: In function ‘mlx5e_grp_sw_update_stats’:
> drivers/net/ethernet/mellanox/mlx5/core/en_stats.c:216:1: warning: the frame size of 1276 bytes is larger than 500 bytes [-Wframe-larger-than=]

Why is the stack size so big here? The mlx5e_sw_stats is < 500 bytes
and all the other on-stack stuff looks pretty small?

> By splitting out the loop body into a non-inlined function, the stack size goes
> back down to under 500 bytes.

Does this actually reduce the stack consumed or does this just suppress
the warning?

Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 16:08 ` Jason Gunthorpe
@ 2018-11-02 16:23   ` Arnd Bergmann
  2018-11-02 17:00     ` Jason Gunthorpe
  0 siblings, 1 reply; 12+ messages in thread
From: Arnd Bergmann @ 2018-11-02 16:23 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Saeed Mahameed, Leon Romanovsky, David S. Miller, Tariq Toukan,
	Eran Ben Elisha, Boris Pismenny, Ilya Lesokhin, Moshe Shemesh,
	Kamal Heib, netdev, linux-rdma, linux-kernel

On 11/2/18, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> On Fri, Nov 02, 2018 at 04:33:03PM +0100, Arnd Bergmann wrote:
>> A patch that looks harmless causes the stack usage of the
>> mlx5e_grp_sw_update_stats()
>> function to drastically increase with x86 gcc-4.9 and higher (tested up to
>> 8.1):
>>
>> drivers/net/ethernet/mellanox/mlx5/core/en_stats.c: In function
>> ‘mlx5e_grp_sw_update_stats’:
>> drivers/net/ethernet/mellanox/mlx5/core/en_stats.c:216:1: warning: the
>> frame size of 1276 bytes is larger than 500 bytes [-Wframe-larger-than=]
>
> Why is the stack size so big here? The mlx5e_sw_stats is < 500 bytes
> and all the other on-stack stuff looks pretty small?

I am not entirely sure, but my analysis indicates that gcc tries loop unrolling
or some other optimization that leads to two copies on the stack.

>> By splitting out the loop body into a non-inlined function, the stack size
>> goes
>> back down to under 500 bytes.
>
> Does this actually reduce the stack consumed or does this just suppress
> the warning?

It definitely reduces the total stack usage, the separate functions just
had the expected stack usage that was a few hundred bytes combined.

       Arnd

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 16:23   ` Arnd Bergmann
@ 2018-11-02 17:00     ` Jason Gunthorpe
  0 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2018-11-02 17:00 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Saeed Mahameed, Leon Romanovsky, David S. Miller, Tariq Toukan,
	Eran Ben Elisha, Boris Pismenny, Ilya Lesokhin, Moshe Shemesh,
	Kamal Heib, netdev, linux-rdma, linux-kernel

On Fri, Nov 02, 2018 at 05:23:26PM +0100, Arnd Bergmann wrote:
> On 11/2/18, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > On Fri, Nov 02, 2018 at 04:33:03PM +0100, Arnd Bergmann wrote:
> >> A patch that looks harmless causes the stack usage of the
> >> mlx5e_grp_sw_update_stats()
> >> function to drastically increase with x86 gcc-4.9 and higher (tested up to
> >> 8.1):
> >>
> >> drivers/net/ethernet/mellanox/mlx5/core/en_stats.c: In function
> >> ‘mlx5e_grp_sw_update_stats’:
> >> drivers/net/ethernet/mellanox/mlx5/core/en_stats.c:216:1: warning: the
> >> frame size of 1276 bytes is larger than 500 bytes [-Wframe-larger-than=]
> >
> > Why is the stack size so big here? The mlx5e_sw_stats is < 500 bytes
> > and all the other on-stack stuff looks pretty small?
> 
> I am not entirely sure, but my analysis indicates that gcc tries loop unrolling
> or some other optimization that leads to two copies on the stack.

Wow how strange

Did you consier adding a
  __attribute__((optimize("no-unroll-loops"))) 

type macro?

Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 15:33 [PATCH] net/mlx5e: fix high stack usage Arnd Bergmann
  2018-11-02 16:08 ` Jason Gunthorpe
@ 2018-11-02 21:05 ` Saeed Mahameed
  2018-11-02 21:39   ` Eric Dumazet
  2018-11-05 11:24 ` David Laight
  2 siblings, 1 reply; 12+ messages in thread
From: Saeed Mahameed @ 2018-11-02 21:05 UTC (permalink / raw)
  To: davem, arnd, leon
  Cc: linux-kernel, Moshe Shemesh, linux-rdma, Boris Pismenny,
	Tariq Toukan, akpm, Eran Ben Elisha, netdev, Ilya Lesokhin,
	Kamal Heib

On Fri, 2018-11-02 at 16:33 +0100, Arnd Bergmann wrote:
> A patch that looks harmless causes the stack usage of the
> mlx5e_grp_sw_update_stats()
> function to drastically increase with x86 gcc-4.9 and higher (tested
> up to 8.1):
> 
> drivers/net/ethernet/mellanox/mlx5/core/en_stats.c: In function
> ‘mlx5e_grp_sw_update_stats’:
> drivers/net/ethernet/mellanox/mlx5/core/en_stats.c:216:1: warning:
> the frame size of 1276 bytes is larger than 500 bytes [-Wframe-
> larger-than=]
> 
> By splitting out the loop body into a non-inlined function, the stack
> size goes
> back down to under 500 bytes.
> 
> Fixes: 779d986d60de ("net/mlx5e: Do not ignore netdevice TX/RX queues
> number")
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
>  .../ethernet/mellanox/mlx5/core/en_stats.c    | 168 +++++++++-------
> --
>  1 file changed, 86 insertions(+), 82 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> index 1e55b9c27ffc..c270206f3475 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> @@ -126,93 +126,97 @@ static int mlx5e_grp_sw_fill_stats(struct
> mlx5e_priv *priv, u64 *data, int idx)
>  	return idx;
>  }
>  
> +static noinline_for_stack void
> +mlx5e_grp_sw_collect_stat(struct mlx5e_priv *priv, struct
> mlx5e_sw_stats *s, int i)
> +{
> +	struct mlx5e_channel_stats *channel_stats = &priv-
> >channel_stats[i];
> +	struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats-
> >xdpsq;
> +	struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats-
> >rq_xdpsq;
> +	struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
> +	struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
> +	int j;
> +
> +	s->rx_packets	+= rq_stats->packets;
> +	s->rx_bytes	+= rq_stats->bytes;
> +	s->rx_lro_packets += rq_stats->lro_packets;
> +	s->rx_lro_bytes	+= rq_stats->lro_bytes;
> +	s->rx_ecn_mark	+= rq_stats->ecn_mark;
> +	s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
> +	s->rx_csum_none	+= rq_stats->csum_none;
> +	s->rx_csum_complete += rq_stats->csum_complete;
> +	s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
> +	s->rx_csum_unnecessary_inner += rq_stats-
> >csum_unnecessary_inner;
> +	s->rx_xdp_drop     += rq_stats->xdp_drop;
> +	s->rx_xdp_redirect += rq_stats->xdp_redirect;
> +	s->rx_xdp_tx_xmit  += xdpsq_stats->xmit;
> +	s->rx_xdp_tx_full  += xdpsq_stats->full;
> +	s->rx_xdp_tx_err   += xdpsq_stats->err;
> +	s->rx_xdp_tx_cqe   += xdpsq_stats->cqes;
> +	s->rx_wqe_err   += rq_stats->wqe_err;
> +	s->rx_mpwqe_filler_cqes    += rq_stats->mpwqe_filler_cqes;
> +	s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides;
> +	s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
> +	s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
> +	s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
> +	s->rx_page_reuse  += rq_stats->page_reuse;
> +	s->rx_cache_reuse += rq_stats->cache_reuse;
> +	s->rx_cache_full  += rq_stats->cache_full;
> +	s->rx_cache_empty += rq_stats->cache_empty;
> +	s->rx_cache_busy  += rq_stats->cache_busy;
> +	s->rx_cache_waive += rq_stats->cache_waive;
> +	s->rx_congst_umr  += rq_stats->congst_umr;
> +	s->rx_arfs_err    += rq_stats->arfs_err;
> +	s->ch_events      += ch_stats->events;
> +	s->ch_poll        += ch_stats->poll;
> +	s->ch_arm         += ch_stats->arm;
> +	s->ch_aff_change  += ch_stats->aff_change;
> +	s->ch_eq_rearm    += ch_stats->eq_rearm;
> +	/* xdp redirect */
> +	s->tx_xdp_xmit    += xdpsq_red_stats->xmit;
> +	s->tx_xdp_full    += xdpsq_red_stats->full;
> +	s->tx_xdp_err     += xdpsq_red_stats->err;
> +	s->tx_xdp_cqes    += xdpsq_red_stats->cqes;
> +
> +	for (j = 0; j < priv->max_opened_tc; j++) {
> +		struct mlx5e_sq_stats *sq_stats = &channel_stats-
> >sq[j];
> +
> +		s->tx_packets		+= sq_stats->packets;
> +		s->tx_bytes		+= sq_stats->bytes;
> +		s->tx_tso_packets	+= sq_stats->tso_packets;
> +		s->tx_tso_bytes		+= sq_stats->tso_bytes;
> +		s->tx_tso_inner_packets	+= sq_stats-
> >tso_inner_packets;
> +		s->tx_tso_inner_bytes	+= sq_stats->tso_inner_bytes;
> +		s->tx_added_vlan_packets += sq_stats-
> >added_vlan_packets;
> +		s->tx_nop               += sq_stats->nop;
> +		s->tx_queue_stopped	+= sq_stats->stopped;
> +		s->tx_queue_wake	+= sq_stats->wake;
> +		s->tx_udp_seg_rem	+= sq_stats->udp_seg_rem;
> +		s->tx_queue_dropped	+= sq_stats->dropped;
> +		s->tx_cqe_err		+= sq_stats->cqe_err;
> +		s->tx_recover		+= sq_stats->recover;
> +		s->tx_xmit_more		+= sq_stats->xmit_more;
> +		s->tx_csum_partial_inner += sq_stats-
> >csum_partial_inner;
> +		s->tx_csum_none		+= sq_stats->csum_none;
> +		s->tx_csum_partial	+= sq_stats->csum_partial;
> +#ifdef CONFIG_MLX5_EN_TLS
> +		s->tx_tls_ooo		+= sq_stats->tls_ooo;
> +		s->tx_tls_resync_bytes	+= sq_stats-
> >tls_resync_bytes;
> +#endif
> +		s->tx_cqes		+= sq_stats->cqes;
> +	}
> +}
> +
>  void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
>  {
> -	struct mlx5e_sw_stats temp, *s = &temp;

temp will be mem copied to priv->stats.sw at the end,
memcpy(&priv->stats.sw, &s, sizeof(s)); 

one other way to solve this as suggested by Andrew, is to get rid of
the temp var and make it point directly to priv->stats.sw 


> +	struct mlx5e_sw_stats s;
>  	int i;
>  
> -	memset(s, 0, sizeof(*s));
> -
> -	for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev);
> i++) {
> -		struct mlx5e_channel_stats *channel_stats =
> -			&priv->channel_stats[i];
> -		struct mlx5e_xdpsq_stats *xdpsq_red_stats =
> &channel_stats->xdpsq;
> -		struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats-
> >rq_xdpsq;
> -		struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
> -		struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
> -		int j;
> -
> -		s->rx_packets	+= rq_stats->packets;
> -		s->rx_bytes	+= rq_stats->bytes;
> -		s->rx_lro_packets += rq_stats->lro_packets;
> -		s->rx_lro_bytes	+= rq_stats->lro_bytes;
> -		s->rx_ecn_mark	+= rq_stats->ecn_mark;
> -		s->rx_removed_vlan_packets += rq_stats-
> >removed_vlan_packets;
> -		s->rx_csum_none	+= rq_stats->csum_none;
> -		s->rx_csum_complete += rq_stats->csum_complete;
> -		s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
> -		s->rx_csum_unnecessary_inner += rq_stats-
> >csum_unnecessary_inner;
> -		s->rx_xdp_drop     += rq_stats->xdp_drop;
> -		s->rx_xdp_redirect += rq_stats->xdp_redirect;
> -		s->rx_xdp_tx_xmit  += xdpsq_stats->xmit;
> -		s->rx_xdp_tx_full  += xdpsq_stats->full;
> -		s->rx_xdp_tx_err   += xdpsq_stats->err;
> -		s->rx_xdp_tx_cqe   += xdpsq_stats->cqes;
> -		s->rx_wqe_err   += rq_stats->wqe_err;
> -		s->rx_mpwqe_filler_cqes    += rq_stats-
> >mpwqe_filler_cqes;
> -		s->rx_mpwqe_filler_strides += rq_stats-
> >mpwqe_filler_strides;
> -		s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
> -		s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
> -		s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
> -		s->rx_page_reuse  += rq_stats->page_reuse;
> -		s->rx_cache_reuse += rq_stats->cache_reuse;
> -		s->rx_cache_full  += rq_stats->cache_full;
> -		s->rx_cache_empty += rq_stats->cache_empty;
> -		s->rx_cache_busy  += rq_stats->cache_busy;
> -		s->rx_cache_waive += rq_stats->cache_waive;
> -		s->rx_congst_umr  += rq_stats->congst_umr;
> -		s->rx_arfs_err    += rq_stats->arfs_err;
> -		s->ch_events      += ch_stats->events;
> -		s->ch_poll        += ch_stats->poll;
> -		s->ch_arm         += ch_stats->arm;
> -		s->ch_aff_change  += ch_stats->aff_change;
> -		s->ch_eq_rearm    += ch_stats->eq_rearm;
> -		/* xdp redirect */
> -		s->tx_xdp_xmit    += xdpsq_red_stats->xmit;
> -		s->tx_xdp_full    += xdpsq_red_stats->full;
> -		s->tx_xdp_err     += xdpsq_red_stats->err;
> -		s->tx_xdp_cqes    += xdpsq_red_stats->cqes;
> -
> -		for (j = 0; j < priv->max_opened_tc; j++) {
> -			struct mlx5e_sq_stats *sq_stats =
> &channel_stats->sq[j];
> -
> -			s->tx_packets		+= sq_stats->packets;
> -			s->tx_bytes		+= sq_stats->bytes;
> -			s->tx_tso_packets	+= sq_stats->tso_packets;
> -			s->tx_tso_bytes		+= sq_stats-
> >tso_bytes;
> -			s->tx_tso_inner_packets	+= sq_stats-
> >tso_inner_packets;
> -			s->tx_tso_inner_bytes	+= sq_stats-
> >tso_inner_bytes;
> -			s->tx_added_vlan_packets += sq_stats-
> >added_vlan_packets;
> -			s->tx_nop               += sq_stats->nop;
> -			s->tx_queue_stopped	+= sq_stats->stopped;
> -			s->tx_queue_wake	+= sq_stats->wake;
> -			s->tx_udp_seg_rem	+= sq_stats->udp_seg_rem;
> -			s->tx_queue_dropped	+= sq_stats->dropped;
> -			s->tx_cqe_err		+= sq_stats->cqe_err;
> -			s->tx_recover		+= sq_stats->recover;
> -			s->tx_xmit_more		+= sq_stats-
> >xmit_more;
> -			s->tx_csum_partial_inner += sq_stats-
> >csum_partial_inner;
> -			s->tx_csum_none		+= sq_stats-
> >csum_none;
> -			s->tx_csum_partial	+= sq_stats-
> >csum_partial;
> -#ifdef CONFIG_MLX5_EN_TLS
> -			s->tx_tls_ooo		+= sq_stats->tls_ooo;
> -			s->tx_tls_resync_bytes	+= sq_stats-
> >tls_resync_bytes;
> -#endif
> -			s->tx_cqes		+= sq_stats->cqes;
> -		}
> -	}
> +	memset(&s, 0, sizeof(s));
> +
> +	for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev);
> i++)
> +		mlx5e_grp_sw_collect_stat(priv, &s, i);
>  
> -	memcpy(&priv->stats.sw, s, sizeof(*s));
> +	memcpy(&priv->stats.sw, &s, sizeof(s));
>  }
>  
>  static const struct counter_desc q_stats_desc[] = {

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 21:05 ` Saeed Mahameed
@ 2018-11-02 21:39   ` Eric Dumazet
  2018-11-02 22:07     ` Saeed Mahameed
  0 siblings, 1 reply; 12+ messages in thread
From: Eric Dumazet @ 2018-11-02 21:39 UTC (permalink / raw)
  To: Saeed Mahameed, davem, arnd, leon
  Cc: linux-kernel, Moshe Shemesh, linux-rdma, Boris Pismenny,
	Tariq Toukan, akpm, Eran Ben Elisha, netdev, Ilya Lesokhin,
	Kamal Heib



On 11/02/2018 02:05 PM, Saeed Mahameed wrote:
 
> temp will be mem copied to priv->stats.sw at the end,
> memcpy(&priv->stats.sw, &s, sizeof(s)); 
> 
> one other way to solve this as suggested by Andrew, is to get rid of
> the temp var and make it point directly to priv->stats.sw 
> 

What about concurrency ?

This temp variable is there to make sure concurrent readers of stats might
not see mangle data (because another 'reader' just did a memset() and is doing the folding)


mlx5e_get_stats() can definitely be run at the same time by multiple threads.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 21:39   ` Eric Dumazet
@ 2018-11-02 22:07     ` Saeed Mahameed
  2018-11-02 22:15       ` Arnd Bergmann
  2018-11-02 22:15       ` Jason Gunthorpe
  0 siblings, 2 replies; 12+ messages in thread
From: Saeed Mahameed @ 2018-11-02 22:07 UTC (permalink / raw)
  To: eric.dumazet, davem, arnd, leon
  Cc: linux-kernel, Moshe Shemesh, linux-rdma, Boris Pismenny,
	Tariq Toukan, akpm, Eran Ben Elisha, netdev, Ilya Lesokhin,
	Kamal Heib

On Fri, 2018-11-02 at 14:39 -0700, Eric Dumazet wrote:
> 
> On 11/02/2018 02:05 PM, Saeed Mahameed wrote:
>  
> > temp will be mem copied to priv->stats.sw at the end,
> > memcpy(&priv->stats.sw, &s, sizeof(s)); 
> > 
> > one other way to solve this as suggested by Andrew, is to get rid
> > of
> > the temp var and make it point directly to priv->stats.sw 
> > 
> 
> What about concurrency ?
> 
> This temp variable is there to make sure concurrent readers of stats
> might
> not see mangle data (because another 'reader' just did a memset() and
> is doing the folding)
> 
> 
> mlx5e_get_stats() can definitely be run at the same time by multiple
> threads.
> 

hmm, you are right, i was thinking that mlx5e_get_Stats will trigger a
work to update stats and grab the state_lock, but for sw stats this is
not the case it is done in place.

BTW memcpy itself is not thread safe.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 22:07     ` Saeed Mahameed
@ 2018-11-02 22:15       ` Arnd Bergmann
  2018-11-03  0:52         ` Saeed Mahameed
  2018-11-02 22:15       ` Jason Gunthorpe
  1 sibling, 1 reply; 12+ messages in thread
From: Arnd Bergmann @ 2018-11-02 22:15 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: eric.dumazet, davem, leon, linux-kernel, Moshe Shemesh,
	linux-rdma, Boris Pismenny, Tariq Toukan, akpm, Eran Ben Elisha,
	netdev, Ilya Lesokhin, Kamal Heib

On 11/2/18, Saeed Mahameed <saeedm@mellanox.com> wrote:
> On Fri, 2018-11-02 at 14:39 -0700, Eric Dumazet wrote:
>>
>> On 11/02/2018 02:05 PM, Saeed Mahameed wrote:
>>
>> > temp will be mem copied to priv->stats.sw at the end,
>> > memcpy(&priv->stats.sw, &s, sizeof(s));
>> >
>> > one other way to solve this as suggested by Andrew, is to get rid
>> > of
>> > the temp var and make it point directly to priv->stats.sw
>> >
>>
>> What about concurrency ?
>>
>> This temp variable is there to make sure concurrent readers of stats
>> might
>> not see mangle data (because another 'reader' just did a memset() and
>> is doing the folding)
>>
>>
>> mlx5e_get_stats() can definitely be run at the same time by multiple
>> threads.
>>
>
> hmm, you are right, i was thinking that mlx5e_get_Stats will trigger a
> work to update stats and grab the state_lock, but for sw stats this is
> not the case it is done in place.
>
> BTW memcpy itself is not thread safe.

Before commit 6c63efe4cfab ("net/mlx5e: Remove redundant active_channels
indication"), there was a read_lock() in the function apparently intended to
made it thread safe. This got removed with the comment

commit 6c63efe4cfabf230a8ed4b1d880249875ffdac13
Author: Eran Ben Elisha <eranbe@mellanox.com>
Date:   Tue May 29 11:06:31 2018 +0300

    net/mlx5e: Remove redundant active_channels indication

    Now, when all channels stats are saved regardless of the channel's state
    {open, closed}, we can safely remove this indication and the stats spin
    lock which protects it.

    Fixes: 76c3810bade3 ("net/mlx5e: Avoid reset netdev stats on
configuration changes")

I don't really understand the reasoning, but maybe we can remove
the memcpy() if the code is thread safe, or we need the lock back if it's not.

         Arnd

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 22:07     ` Saeed Mahameed
  2018-11-02 22:15       ` Arnd Bergmann
@ 2018-11-02 22:15       ` Jason Gunthorpe
  2018-11-03  0:37         ` Saeed Mahameed
  1 sibling, 1 reply; 12+ messages in thread
From: Jason Gunthorpe @ 2018-11-02 22:15 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: eric.dumazet, davem, arnd, leon, linux-kernel, Moshe Shemesh,
	linux-rdma, Boris Pismenny, Tariq Toukan, akpm, Eran Ben Elisha,
	netdev, Ilya Lesokhin, Kamal Heib

On Fri, Nov 02, 2018 at 10:07:02PM +0000, Saeed Mahameed wrote:
> On Fri, 2018-11-02 at 14:39 -0700, Eric Dumazet wrote:
> > 
> > On 11/02/2018 02:05 PM, Saeed Mahameed wrote:
> >  
> > > temp will be mem copied to priv->stats.sw at the end,
> > > memcpy(&priv->stats.sw, &s, sizeof(s)); 
> > > 
> > > one other way to solve this as suggested by Andrew, is to get rid
> > > of
> > > the temp var and make it point directly to priv->stats.sw 
> > > 
> > 
> > What about concurrency ?
> > 
> > This temp variable is there to make sure concurrent readers of stats
> > might
> > not see mangle data (because another 'reader' just did a memset() and
> > is doing the folding)
> > 
> > 
> > mlx5e_get_stats() can definitely be run at the same time by multiple
> > threads.
> > 
> 
> hmm, you are right, i was thinking that mlx5e_get_Stats will trigger a
> work to update stats and grab the state_lock, but for sw stats this is
> not the case it is done in place.

That was my guess when I saw this.. the confusing bit is why is there
s and temp, why not just s?

> BTW memcpy itself is not thread safe.

At least on 64 bit memcpy will do > 8 byte stores when copying so on
most architectures it will cause individual new or old u64 to be
returned and not a mess..

32 bit will always make a mess.

If the stats don't update that often then kmalloc'ing a new buffer and
RCU'ing it into view might be a reasonable alternative to this?

Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 22:15       ` Jason Gunthorpe
@ 2018-11-03  0:37         ` Saeed Mahameed
  0 siblings, 0 replies; 12+ messages in thread
From: Saeed Mahameed @ 2018-11-03  0:37 UTC (permalink / raw)
  To: jgg
  Cc: linux-kernel, Moshe Shemesh, linux-rdma, Boris Pismenny,
	Tariq Toukan, akpm, Eran Ben Elisha, netdev, Ilya Lesokhin,
	eric.dumazet, davem, arnd, leon, Kamal Heib

On Fri, 2018-11-02 at 16:15 -0600, Jason Gunthorpe wrote:
> On Fri, Nov 02, 2018 at 10:07:02PM +0000, Saeed Mahameed wrote:
> > On Fri, 2018-11-02 at 14:39 -0700, Eric Dumazet wrote:
> > > 
> > > On 11/02/2018 02:05 PM, Saeed Mahameed wrote:
> > >  
> > > > temp will be mem copied to priv->stats.sw at the end,
> > > > memcpy(&priv->stats.sw, &s, sizeof(s)); 
> > > > 
> > > > one other way to solve this as suggested by Andrew, is to get
> > > > rid
> > > > of
> > > > the temp var and make it point directly to priv->stats.sw 
> > > > 
> > > 
> > > What about concurrency ?
> > > 
> > > This temp variable is there to make sure concurrent readers of
> > > stats
> > > might
> > > not see mangle data (because another 'reader' just did a memset()
> > > and
> > > is doing the folding)
> > > 
> > > 
> > > mlx5e_get_stats() can definitely be run at the same time by
> > > multiple
> > > threads.
> > > 
> > 
> > hmm, you are right, i was thinking that mlx5e_get_Stats will
> > trigger a
> > work to update stats and grab the state_lock, but for sw stats this
> > is
> > not the case it is done in place.
> 
> That was my guess when I saw this.. the confusing bit is why is there
> s and temp, why not just s?
> 

silly cut and paste from mlx4 !

> > BTW memcpy itself is not thread safe.
> 
> At least on 64 bit memcpy will do > 8 byte stores when copying so on
> most architectures it will cause individual new or old u64 to be
> returned and not a mess..
> 
> 32 bit will always make a mess.
> 
> If the stats don't update that often then kmalloc'ing a new buffer
> and
> RCU'ing it into view might be a reasonable alternative to this?
> 

kmalloc is not an option, stats update too often, priv->stats.sw is
already a temp, it is needed only to accumulate channels stats into it
and then report them to ethtool buffer or ndo_get_stats , both will
copy the priv->stats.sw to their own buffers, so there can only be two
use cases (two threads), maybe it is best if we maintain two priv-
>stats.sw one for each case and use it as a temp for each thread.

> Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 22:15       ` Arnd Bergmann
@ 2018-11-03  0:52         ` Saeed Mahameed
  0 siblings, 0 replies; 12+ messages in thread
From: Saeed Mahameed @ 2018-11-03  0:52 UTC (permalink / raw)
  To: arnd
  Cc: linux-kernel, Moshe Shemesh, linux-rdma, Boris Pismenny,
	Tariq Toukan, akpm, Eran Ben Elisha, netdev, Ilya Lesokhin,
	eric.dumazet, davem, leon, Kamal Heib

On Fri, 2018-11-02 at 23:15 +0100, Arnd Bergmann wrote:
> On 11/2/18, Saeed Mahameed <saeedm@mellanox.com> wrote:
> > On Fri, 2018-11-02 at 14:39 -0700, Eric Dumazet wrote:
> > > 
> > > On 11/02/2018 02:05 PM, Saeed Mahameed wrote:
> > > 
> > > > temp will be mem copied to priv->stats.sw at the end,
> > > > memcpy(&priv->stats.sw, &s, sizeof(s));
> > > > 
> > > > one other way to solve this as suggested by Andrew, is to get
> > > > rid
> > > > of
> > > > the temp var and make it point directly to priv->stats.sw
> > > > 
> > > 
> > > What about concurrency ?
> > > 
> > > This temp variable is there to make sure concurrent readers of
> > > stats
> > > might
> > > not see mangle data (because another 'reader' just did a memset()
> > > and
> > > is doing the folding)
> > > 
> > > 
> > > mlx5e_get_stats() can definitely be run at the same time by
> > > multiple
> > > threads.
> > > 
> > 
> > hmm, you are right, i was thinking that mlx5e_get_Stats will
> > trigger a
> > work to update stats and grab the state_lock, but for sw stats this
> > is
> > not the case it is done in place.
> > 
> > BTW memcpy itself is not thread safe.
> 
> Before commit 6c63efe4cfab ("net/mlx5e: Remove redundant
> active_channels
> indication"), there was a read_lock() in the function apparently
> intended to
> made it thread safe. This got removed with the comment
> 
> commit 6c63efe4cfabf230a8ed4b1d880249875ffdac13
> Author: Eran Ben Elisha <eranbe@mellanox.com>
> Date:   Tue May 29 11:06:31 2018 +0300
> 
>     net/mlx5e: Remove redundant active_channels indication
> 
>     Now, when all channels stats are saved regardless of the
> channel's state
>     {open, closed}, we can safely remove this indication and the
> stats spin
>     lock which protects it.
> 
>     Fixes: 76c3810bade3 ("net/mlx5e: Avoid reset netdev stats on
> configuration changes")
> 
> I don't really understand the reasoning, but maybe we can remove
> the memcpy() if the code is thread safe, or we need the lock back if
> it's not.
> 

this lock was needed for a whole different purpose, it wasn't meant to
synchronize between two reader threads, it was meant to synchronize
between driver restarts and the reader for loop which ran over the open
channels, while they could be going through a destruction process.

I think all we need is to maintain two priv->stats.sw copies and use
them as temp for each reader thread, when can only have two concurrent
readers (mlx5e_ethtool_get_ethtool_stats and ndo_get_stats64) .. 

>          Arnd

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] net/mlx5e: fix high stack usage
  2018-11-02 15:33 [PATCH] net/mlx5e: fix high stack usage Arnd Bergmann
  2018-11-02 16:08 ` Jason Gunthorpe
  2018-11-02 21:05 ` Saeed Mahameed
@ 2018-11-05 11:24 ` David Laight
  2 siblings, 0 replies; 12+ messages in thread
From: David Laight @ 2018-11-05 11:24 UTC (permalink / raw)
  To: 'Arnd Bergmann',
	Saeed Mahameed, Leon Romanovsky, David S. Miller
  Cc: Tariq Toukan, Eran Ben Elisha, Boris Pismenny, Ilya Lesokhin,
	Moshe Shemesh, Kamal Heib, netdev, linux-rdma, linux-kernel

From: Arnd Bergmann
> Sent: 02 November 2018 15:33
> 
> A patch that looks harmless causes the stack usage of the mlx5e_grp_sw_update_stats()
> function to drastically increase with x86 gcc-4.9 and higher (tested up to 8.1):
> 
> drivers/net/ethernet/mellanox/mlx5/core/en_stats.c: In function ‘mlx5e_grp_sw_update_stats’:
> drivers/net/ethernet/mellanox/mlx5/core/en_stats.c:216:1: warning: the frame size of 1276 bytes is
> larger than 500 bytes [-Wframe-larger-than=]
> 
> By splitting out the loop body into a non-inlined function, the stack size goes
> back down to under 500 bytes.

I'd look at the generated code for the function.
It might be truly horrid.

I suspect that gcc allocates 'virtual registers' for all the
s->tx_... members and then writes them to 's' outside the loop.
Unfortunately there aren't enough real registers so all the
virtual ones get spilled to stack.

I've seen it do something similar (extra spills to stack) in the
ixgbe byte and packet counting.

I think it is really a gcc bug.

...
> +	for (j = 0; j < priv->max_opened_tc; j++) {
> +		struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];

Try adding barrier() here.

> +
> +		s->tx_packets		+= sq_stats->packets;

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2018-11-05 11:24 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-02 15:33 [PATCH] net/mlx5e: fix high stack usage Arnd Bergmann
2018-11-02 16:08 ` Jason Gunthorpe
2018-11-02 16:23   ` Arnd Bergmann
2018-11-02 17:00     ` Jason Gunthorpe
2018-11-02 21:05 ` Saeed Mahameed
2018-11-02 21:39   ` Eric Dumazet
2018-11-02 22:07     ` Saeed Mahameed
2018-11-02 22:15       ` Arnd Bergmann
2018-11-03  0:52         ` Saeed Mahameed
2018-11-02 22:15       ` Jason Gunthorpe
2018-11-03  0:37         ` Saeed Mahameed
2018-11-05 11:24 ` David Laight

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).