linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] [net-next] mlx4: avoid large stack usage in mlx4_init_hca()
@ 2019-07-22 15:01 Arnd Bergmann
  2019-07-23 22:09 ` Saeed Mahameed
  2019-07-24 22:47 ` David Miller
  0 siblings, 2 replies; 3+ messages in thread
From: Arnd Bergmann @ 2019-07-22 15:01 UTC (permalink / raw)
  To: Tariq Toukan, David S. Miller
  Cc: Arnd Bergmann, Erez Alfasi, Jack Morgenstein, Eli Cohen,
	Moshe Shemesh, Jiri Pirko, netdev, linux-rdma, linux-kernel,
	clang-built-linux

The mlx4_dev_cap and mlx4_init_hca_param are really too large
to be put on the kernel stack, as shown by this clang warning:

drivers/net/ethernet/mellanox/mlx4/main.c:3304:12: error: stack frame size of 1088 bytes in function 'mlx4_load_one' [-Werror,-Wframe-larger-than=]

With gcc, the problem is the same, but it does not warn because
it does not inline this function, and therefore stays just below
the warning limit, while clang is just above it.

Use kzalloc for dynamic allocation instead of putting them
on stack. This gets the combined stack frame down to 424 bytes.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 66 +++++++++++++----------
 1 file changed, 39 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 1f6e16d5ea6b..07c204bd3fc4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2292,23 +2292,31 @@ static int mlx4_init_fw(struct mlx4_dev *dev)
 static int mlx4_init_hca(struct mlx4_dev *dev)
 {
 	struct mlx4_priv	  *priv = mlx4_priv(dev);
+	struct mlx4_init_hca_param *init_hca = NULL;
+	struct mlx4_dev_cap	  *dev_cap = NULL;
 	struct mlx4_adapter	   adapter;
-	struct mlx4_dev_cap	   dev_cap;
 	struct mlx4_profile	   profile;
-	struct mlx4_init_hca_param init_hca;
 	u64 icm_size;
 	struct mlx4_config_dev_params params;
 	int err;
 
 	if (!mlx4_is_slave(dev)) {
-		err = mlx4_dev_cap(dev, &dev_cap);
+		dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
+		init_hca = kzalloc(sizeof(*init_hca), GFP_KERNEL);
+
+		if (!dev_cap || !init_hca) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+
+		err = mlx4_dev_cap(dev, dev_cap);
 		if (err) {
 			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
-			return err;
+			goto out_free;
 		}
 
-		choose_steering_mode(dev, &dev_cap);
-		choose_tunnel_offload_mode(dev, &dev_cap);
+		choose_steering_mode(dev, dev_cap);
+		choose_tunnel_offload_mode(dev, dev_cap);
 
 		if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
 		    mlx4_is_master(dev))
@@ -2331,48 +2339,48 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 		    MLX4_STEERING_MODE_DEVICE_MANAGED)
 			profile.num_mcg = MLX4_FS_NUM_MCG;
 
-		icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
-					     &init_hca);
+		icm_size = mlx4_make_profile(dev, &profile, dev_cap,
+					     init_hca);
 		if ((long long) icm_size < 0) {
 			err = icm_size;
-			return err;
+			goto out_free;
 		}
 
 		dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
 
 		if (enable_4k_uar || !dev->persist->num_vfs) {
-			init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
+			init_hca->log_uar_sz = ilog2(dev->caps.num_uars) +
 						    PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT;
-			init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
+			init_hca->uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
 		} else {
-			init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
-			init_hca.uar_page_sz = PAGE_SHIFT - 12;
+			init_hca->log_uar_sz = ilog2(dev->caps.num_uars);
+			init_hca->uar_page_sz = PAGE_SHIFT - 12;
 		}
 
-		init_hca.mw_enabled = 0;
+		init_hca->mw_enabled = 0;
 		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
 		    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
-			init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
+			init_hca->mw_enabled = INIT_HCA_TPT_MW_ENABLE;
 
-		err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
+		err = mlx4_init_icm(dev, dev_cap, init_hca, icm_size);
 		if (err)
-			return err;
+			goto out_free;
 
-		err = mlx4_INIT_HCA(dev, &init_hca);
+		err = mlx4_INIT_HCA(dev, init_hca);
 		if (err) {
 			mlx4_err(dev, "INIT_HCA command failed, aborting\n");
 			goto err_free_icm;
 		}
 
-		if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
-			err = mlx4_query_func(dev, &dev_cap);
+		if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+			err = mlx4_query_func(dev, dev_cap);
 			if (err < 0) {
 				mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
 				goto err_close;
 			} else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
-				dev->caps.num_eqs = dev_cap.max_eqs;
-				dev->caps.reserved_eqs = dev_cap.reserved_eqs;
-				dev->caps.reserved_uars = dev_cap.reserved_uars;
+				dev->caps.num_eqs = dev_cap->max_eqs;
+				dev->caps.reserved_eqs = dev_cap->reserved_eqs;
+				dev->caps.reserved_uars = dev_cap->reserved_uars;
 			}
 		}
 
@@ -2381,14 +2389,13 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 		 * read HCA frequency by QUERY_HCA command
 		 */
 		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
-			memset(&init_hca, 0, sizeof(init_hca));
-			err = mlx4_QUERY_HCA(dev, &init_hca);
+			err = mlx4_QUERY_HCA(dev, init_hca);
 			if (err) {
 				mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
 			} else {
 				dev->caps.hca_core_clock =
-					init_hca.hca_core_clock;
+					init_hca->hca_core_clock;
 			}
 
 			/* In case we got HCA frequency 0 - disable timestamping
@@ -2464,7 +2471,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 	priv->eq_table.inta_pin = adapter.inta_pin;
 	memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id));
 
-	return 0;
+	err = 0;
+	goto out_free;
 
 unmap_bf:
 	unmap_internal_clock(dev);
@@ -2483,6 +2491,10 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 	if (!mlx4_is_slave(dev))
 		mlx4_free_icms(dev);
 
+out_free:
+	kfree(dev_cap);
+	kfree(init_hca);
+
 	return err;
 }
 
-- 
2.20.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next] [net-next] mlx4: avoid large stack usage in mlx4_init_hca()
  2019-07-22 15:01 [PATCH net-next] [net-next] mlx4: avoid large stack usage in mlx4_init_hca() Arnd Bergmann
@ 2019-07-23 22:09 ` Saeed Mahameed
  2019-07-24 22:47 ` David Miller
  1 sibling, 0 replies; 3+ messages in thread
From: Saeed Mahameed @ 2019-07-23 22:09 UTC (permalink / raw)
  To: davem, Tariq Toukan, arnd
  Cc: linux-rdma, jackm, Erez Alfasi, Moshe Shemesh, Eli Cohen,
	linux-kernel, Jiri Pirko, netdev, clang-built-linux

On Mon, 2019-07-22 at 17:01 +0200, Arnd Bergmann wrote:
> The mlx4_dev_cap and mlx4_init_hca_param are really too large
> to be put on the kernel stack, as shown by this clang warning:
> 
> drivers/net/ethernet/mellanox/mlx4/main.c:3304:12: error: stack frame
> size of 1088 bytes in function 'mlx4_load_one' [-Werror,-Wframe-
> larger-than=]
> 
> With gcc, the problem is the same, but it does not warn because
> it does not inline this function, and therefore stays just below
> the warning limit, while clang is just above it.
> 
> Use kzalloc for dynamic allocation instead of putting them
> on stack. This gets the combined stack frame down to 424 bytes.
> 
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>

> ---
>  drivers/net/ethernet/mellanox/mlx4/main.c | 66 +++++++++++++------
> ----
>  1 file changed, 39 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c
> b/drivers/net/ethernet/mellanox/mlx4/main.c
> index 1f6e16d5ea6b..07c204bd3fc4 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c
> @@ -2292,23 +2292,31 @@ static int mlx4_init_fw(struct mlx4_dev *dev)
>  static int mlx4_init_hca(struct mlx4_dev *dev)
>  {
>  	struct mlx4_priv	  *priv = mlx4_priv(dev);
> +	struct mlx4_init_hca_param *init_hca = NULL;
> +	struct mlx4_dev_cap	  *dev_cap = NULL;
>  	struct mlx4_adapter	   adapter;
> -	struct mlx4_dev_cap	   dev_cap;
>  	struct mlx4_profile	   profile;
> -	struct mlx4_init_hca_param init_hca;
>  	u64 icm_size;
>  	struct mlx4_config_dev_params params;
>  	int err;
>  
>  	if (!mlx4_is_slave(dev)) {
> -		err = mlx4_dev_cap(dev, &dev_cap);
> +		dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
> +		init_hca = kzalloc(sizeof(*init_hca), GFP_KERNEL);
> +
> +		if (!dev_cap || !init_hca) {
> +			err = -ENOMEM;
> +			goto out_free;
> +		}
> +
> +		err = mlx4_dev_cap(dev, dev_cap);
>  		if (err) {
>  			mlx4_err(dev, "QUERY_DEV_CAP command failed,
> aborting\n");
> -			return err;
> +			goto out_free;
>  		}
>  
> -		choose_steering_mode(dev, &dev_cap);
> -		choose_tunnel_offload_mode(dev, &dev_cap);
> +		choose_steering_mode(dev, dev_cap);
> +		choose_tunnel_offload_mode(dev, dev_cap);
>  
>  		if (dev->caps.dmfs_high_steer_mode ==
> MLX4_STEERING_DMFS_A0_STATIC &&
>  		    mlx4_is_master(dev))
> @@ -2331,48 +2339,48 @@ static int mlx4_init_hca(struct mlx4_dev
> *dev)
>  		    MLX4_STEERING_MODE_DEVICE_MANAGED)
>  			profile.num_mcg = MLX4_FS_NUM_MCG;
>  
> -		icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
> -					     &init_hca);
> +		icm_size = mlx4_make_profile(dev, &profile, dev_cap,
> +					     init_hca);
>  		if ((long long) icm_size < 0) {
>  			err = icm_size;
> -			return err;
> +			goto out_free;
>  		}
>  
>  		dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev-
> >caps.num_mpts))) - 1;
>  
>  		if (enable_4k_uar || !dev->persist->num_vfs) {
> -			init_hca.log_uar_sz = ilog2(dev->caps.num_uars) 
> +
> +			init_hca->log_uar_sz = ilog2(dev-
> >caps.num_uars) +
>  						    PAGE_SHIFT -
> DEFAULT_UAR_PAGE_SHIFT;
> -			init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT -
> 12;
> +			init_hca->uar_page_sz = DEFAULT_UAR_PAGE_SHIFT
> - 12;
>  		} else {
> -			init_hca.log_uar_sz = ilog2(dev-
> >caps.num_uars);
> -			init_hca.uar_page_sz = PAGE_SHIFT - 12;
> +			init_hca->log_uar_sz = ilog2(dev-
> >caps.num_uars);
> +			init_hca->uar_page_sz = PAGE_SHIFT - 12;
>  		}
>  
> -		init_hca.mw_enabled = 0;
> +		init_hca->mw_enabled = 0;
>  		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
>  		    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
> -			init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
> +			init_hca->mw_enabled = INIT_HCA_TPT_MW_ENABLE;
>  
> -		err = mlx4_init_icm(dev, &dev_cap, &init_hca,
> icm_size);
> +		err = mlx4_init_icm(dev, dev_cap, init_hca, icm_size);
>  		if (err)
> -			return err;
> +			goto out_free;
>  
> -		err = mlx4_INIT_HCA(dev, &init_hca);
> +		err = mlx4_INIT_HCA(dev, init_hca);
>  		if (err) {
>  			mlx4_err(dev, "INIT_HCA command failed,
> aborting\n");
>  			goto err_free_icm;
>  		}
>  
> -		if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
> -			err = mlx4_query_func(dev, &dev_cap);
> +		if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
> +			err = mlx4_query_func(dev, dev_cap);
>  			if (err < 0) {
>  				mlx4_err(dev, "QUERY_FUNC command
> failed, aborting.\n");
>  				goto err_close;
>  			} else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
> -				dev->caps.num_eqs = dev_cap.max_eqs;
> -				dev->caps.reserved_eqs =
> dev_cap.reserved_eqs;
> -				dev->caps.reserved_uars =
> dev_cap.reserved_uars;
> +				dev->caps.num_eqs = dev_cap->max_eqs;
> +				dev->caps.reserved_eqs = dev_cap-
> >reserved_eqs;
> +				dev->caps.reserved_uars = dev_cap-
> >reserved_uars;
>  			}
>  		}
>  
> @@ -2381,14 +2389,13 @@ static int mlx4_init_hca(struct mlx4_dev
> *dev)
>  		 * read HCA frequency by QUERY_HCA command
>  		 */
>  		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
> -			memset(&init_hca, 0, sizeof(init_hca));
> -			err = mlx4_QUERY_HCA(dev, &init_hca);
> +			err = mlx4_QUERY_HCA(dev, init_hca);
>  			if (err) {
>  				mlx4_err(dev, "QUERY_HCA command
> failed, disable timestamp\n");
>  				dev->caps.flags2 &=
> ~MLX4_DEV_CAP_FLAG2_TS;
>  			} else {
>  				dev->caps.hca_core_clock =
> -					init_hca.hca_core_clock;
> +					init_hca->hca_core_clock;
>  			}
>  
>  			/* In case we got HCA frequency 0 - disable
> timestamping
> @@ -2464,7 +2471,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
>  	priv->eq_table.inta_pin = adapter.inta_pin;
>  	memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id));
>  
> -	return 0;
> +	err = 0;
> +	goto out_free;
>  
>  unmap_bf:
>  	unmap_internal_clock(dev);
> @@ -2483,6 +2491,10 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
>  	if (!mlx4_is_slave(dev))
>  		mlx4_free_icms(dev);
>  
> +out_free:
> +	kfree(dev_cap);
> +	kfree(init_hca);
> +
>  	return err;
>  }
>  

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next] [net-next] mlx4: avoid large stack usage in mlx4_init_hca()
  2019-07-22 15:01 [PATCH net-next] [net-next] mlx4: avoid large stack usage in mlx4_init_hca() Arnd Bergmann
  2019-07-23 22:09 ` Saeed Mahameed
@ 2019-07-24 22:47 ` David Miller
  1 sibling, 0 replies; 3+ messages in thread
From: David Miller @ 2019-07-24 22:47 UTC (permalink / raw)
  To: arnd
  Cc: tariqt, ereza, jackm, eli, moshe, jiri, netdev, linux-rdma,
	linux-kernel, clang-built-linux

From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Jul 2019 17:01:55 +0200

> The mlx4_dev_cap and mlx4_init_hca_param are really too large
> to be put on the kernel stack, as shown by this clang warning:
> 
> drivers/net/ethernet/mellanox/mlx4/main.c:3304:12: error: stack frame size of 1088 bytes in function 'mlx4_load_one' [-Werror,-Wframe-larger-than=]
> 
> With gcc, the problem is the same, but it does not warn because
> it does not inline this function, and therefore stays just below
> the warning limit, while clang is just above it.
> 
> Use kzalloc for dynamic allocation instead of putting them
> on stack. This gets the combined stack frame down to 424 bytes.
> 
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Applied.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-07-24 22:47 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-22 15:01 [PATCH net-next] [net-next] mlx4: avoid large stack usage in mlx4_init_hca() Arnd Bergmann
2019-07-23 22:09 ` Saeed Mahameed
2019-07-24 22:47 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).