linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH mlx5-next 0/2] Mellanox, mlx5 new device events
@ 2018-05-30  0:19 Saeed Mahameed
  2018-05-30  0:19 ` [PATCH mlx5-next 1/2] net/mlx5: Add temperature warning event to log Saeed Mahameed
  2018-05-30  0:19 ` [PATCH mlx5-next 2/2] net/mlx5: Add FPGA QP error event Saeed Mahameed
  0 siblings, 2 replies; 11+ messages in thread
From: Saeed Mahameed @ 2018-05-30  0:19 UTC (permalink / raw)
  To: netdev, linux-rdma; +Cc: Leon Romanovsky, Jason Gunthorpe, Saeed Mahameed

Hi, 

The following series is for mlx5-next tree [1], it adds the support of two
new device events, from Ilan Tayari:

1. High temperature warnings.
2. FPGA QP error event.

In case of no objection this series will be applied to mlx5-next tree
and will be sent later as a pull request to both rdma and net trees.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux.git/log/?h=mlx5-next

Thanks,
Saeed.

Ilan Tayari (2):
  net/mlx5: Add temperature warning event to log
  net/mlx5: Add FPGA QP error event

 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 28 +++++++++++++++++++-
 include/linux/mlx5/device.h                  |  8 ++++++
 include/linux/mlx5/mlx5_ifc.h                |  3 ++-
 include/linux/mlx5/mlx5_ifc_fpga.h           | 16 +++++++++++
 4 files changed, 53 insertions(+), 2 deletions(-)

-- 
2.17.0

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH mlx5-next 1/2] net/mlx5: Add temperature warning event to log
  2018-05-30  0:19 [PATCH mlx5-next 0/2] Mellanox, mlx5 new device events Saeed Mahameed
@ 2018-05-30  0:19 ` Saeed Mahameed
  2018-05-30  1:04   ` Andrew Lunn
  2018-05-30  0:19 ` [PATCH mlx5-next 2/2] net/mlx5: Add FPGA QP error event Saeed Mahameed
  1 sibling, 1 reply; 11+ messages in thread
From: Saeed Mahameed @ 2018-05-30  0:19 UTC (permalink / raw)
  To: netdev, linux-rdma
  Cc: Leon Romanovsky, Jason Gunthorpe, Ilan Tayari, Adi Nissim,
	Saeed Mahameed

From: Ilan Tayari <ilant@mellanox.com>

Temperature warning event is sent by FW to indicate high temperature
as detected by one of the sensors on the board.
Add handling of this event by writing the numbers of the alert sensors
to the kernel log.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Adi Nissim <adin@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 23 ++++++++++++++++++++
 include/linux/mlx5/device.h                  |  7 ++++++
 include/linux/mlx5/mlx5_ifc.h                |  2 +-
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c1c94974e16b..4bd4f011f0a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -141,6 +141,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_GPIO_EVENT";
 	case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
 		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+	case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+		return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
 	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
 		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
 	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
@@ -393,6 +395,20 @@ static void general_event_handler(struct mlx5_core_dev *dev,
 	}
 }
 
+static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
+				    struct mlx5_eqe *eqe)
+{
+	u64 value_lsb;
+	u64 value_msb;
+
+	value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+	value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+	mlx5_core_warn(dev,
+		       "High temperature on sensors with bit set %llx %llx",
+		       value_msb, value_lsb);
+}
+
 /* caller must eventually call mlx5_cq_put on the returned cq */
 static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
 {
@@ -547,6 +563,10 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
 			break;
 
+		case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+			mlx5_temp_warning_event(dev, eqe);
+			break;
+
 		case MLX5_EVENT_TYPE_GENERAL_EVENT:
 			general_event_handler(dev, eqe);
 			break;
@@ -824,6 +844,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
 
 
+	if (MLX5_CAP_GEN(dev, temp_warn_event))
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 2bc27f8c5b87..eddacee5cf61 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -314,6 +314,7 @@ enum mlx5_event {
 	MLX5_EVENT_TYPE_PORT_CHANGE	   = 0x09,
 	MLX5_EVENT_TYPE_GPIO_EVENT	   = 0x15,
 	MLX5_EVENT_TYPE_PORT_MODULE_EVENT  = 0x16,
+	MLX5_EVENT_TYPE_TEMP_WARN_EVENT    = 0x17,
 	MLX5_EVENT_TYPE_REMOTE_CONFIG	   = 0x19,
 	MLX5_EVENT_TYPE_GENERAL_EVENT	   = 0x22,
 	MLX5_EVENT_TYPE_PPS_EVENT          = 0x25,
@@ -626,6 +627,11 @@ struct mlx5_eqe_dct {
 	__be32  dctn;
 };
 
+struct mlx5_eqe_temp_warning {
+	__be64 sensor_warning_msb;
+	__be64 sensor_warning_lsb;
+} __packed;
+
 union ev_data {
 	__be32				raw[7];
 	struct mlx5_eqe_cmd		cmd;
@@ -642,6 +648,7 @@ union ev_data {
 	struct mlx5_eqe_port_module	port_module;
 	struct mlx5_eqe_pps		pps;
 	struct mlx5_eqe_dct             dct;
+	struct mlx5_eqe_temp_warning	temp_warning;
 } __packed;
 
 struct mlx5_eqe {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 10c1613d9434..ba30c26aa6eb 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -926,7 +926,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_msg[0x5];
 	u8         reserved_at_1c8[0x4];
 	u8         max_tc[0x4];
-	u8         reserved_at_1d0[0x1];
+	u8         temp_warn_event[0x1];
 	u8         dcbx[0x1];
 	u8         general_notification_event[0x1];
 	u8         reserved_at_1d3[0x2];
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH mlx5-next 2/2] net/mlx5: Add FPGA QP error event
  2018-05-30  0:19 [PATCH mlx5-next 0/2] Mellanox, mlx5 new device events Saeed Mahameed
  2018-05-30  0:19 ` [PATCH mlx5-next 1/2] net/mlx5: Add temperature warning event to log Saeed Mahameed
@ 2018-05-30  0:19 ` Saeed Mahameed
  2018-05-30  1:07   ` Andrew Lunn
  1 sibling, 1 reply; 11+ messages in thread
From: Saeed Mahameed @ 2018-05-30  0:19 UTC (permalink / raw)
  To: netdev, linux-rdma
  Cc: Leon Romanovsky, Jason Gunthorpe, Ilan Tayari, Adi Nissim,
	Saeed Mahameed

From: Ilan Tayari <ilant@mellanox.com>

The FPGA QP event fires whenever a QP on the FPGA trasitions
to the error state.

At this stage, this event is unrecoverable, it may become recoverable
in the future.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Adi Nissim <adin@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c |  7 +++++--
 include/linux/mlx5/device.h                  |  1 +
 include/linux/mlx5/mlx5_ifc.h                |  1 +
 include/linux/mlx5/mlx5_ifc_fpga.h           | 16 ++++++++++++++++
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 4bd4f011f0a9..77c685645c66 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -161,6 +161,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
 	case MLX5_EVENT_TYPE_FPGA_ERROR:
 		return "MLX5_EVENT_TYPE_FPGA_ERROR";
+	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+		return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
 	case MLX5_EVENT_TYPE_GENERAL_EVENT:
 		return "MLX5_EVENT_TYPE_GENERAL_EVENT";
 	default:
@@ -560,6 +562,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			break;
 
 		case MLX5_EVENT_TYPE_FPGA_ERROR:
+		case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
 			mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
 			break;
 
@@ -839,11 +842,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
 
 	if (MLX5_CAP_GEN(dev, fpga))
-		async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR);
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) |
+				    (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR);
 	if (MLX5_CAP_GEN_MAX(dev, dct))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
 
-
 	if (MLX5_CAP_GEN(dev, temp_warn_event))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
 
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index eddacee5cf61..71e1dc2523a6 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -331,6 +331,7 @@ enum mlx5_event {
 	MLX5_EVENT_TYPE_DCT_DRAINED        = 0x1c,
 
 	MLX5_EVENT_TYPE_FPGA_ERROR         = 0x20,
+	MLX5_EVENT_TYPE_FPGA_QP_ERROR      = 0x21,
 };
 
 enum {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ba30c26aa6eb..3e8845dc85fe 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -60,6 +60,7 @@ enum {
 	MLX5_EVENT_TYPE_CODING_COMMAND_INTERFACE_COMPLETION        = 0xa,
 	MLX5_EVENT_TYPE_CODING_PAGE_REQUEST                        = 0xb,
 	MLX5_EVENT_TYPE_CODING_FPGA_ERROR                          = 0x20,
+	MLX5_EVENT_TYPE_CODING_FPGA_QP_ERROR                       = 0x21
 };
 
 enum {
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index ec052491ba3d..7ddca31fa05d 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -432,6 +432,22 @@ struct mlx5_ifc_ipsec_counters_bits {
 	u8         dropped_cmd[0x40];
 };
 
+enum {
+	MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RETRY_COUNTER_EXPIRED  = 0x1,
+	MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RNR_EXPIRED            = 0x2,
+};
+
+struct mlx5_ifc_fpga_qp_error_event_bits {
+	u8         reserved_at_0[0x40];
+
+	u8         reserved_at_40[0x18];
+	u8         syndrome[0x8];
+
+	u8         reserved_at_60[0x60];
+
+	u8         reserved_at_c0[0x8];
+	u8         fpga_qpn[0x18];
+};
 enum mlx5_ifc_fpga_ipsec_response_syndrome {
 	MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0,
 	MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH mlx5-next 1/2] net/mlx5: Add temperature warning event to log
  2018-05-30  0:19 ` [PATCH mlx5-next 1/2] net/mlx5: Add temperature warning event to log Saeed Mahameed
@ 2018-05-30  1:04   ` Andrew Lunn
  2018-05-30 15:08     ` Saeed Mahameed
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Lunn @ 2018-05-30  1:04 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: netdev, linux-rdma, Leon Romanovsky, Jason Gunthorpe,
	Ilan Tayari, Adi Nissim

On Tue, May 29, 2018 at 05:19:53PM -0700, Saeed Mahameed wrote:
> From: Ilan Tayari <ilant@mellanox.com>
> 
> Temperature warning event is sent by FW to indicate high temperature
> as detected by one of the sensors on the board.
> Add handling of this event by writing the numbers of the alert sensors
> to the kernel log.

Hi Saaed

Is the temperature itself available? If so, it would be better to
expose this as a hwmon device per temperature sensor.

       Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH mlx5-next 2/2] net/mlx5: Add FPGA QP error event
  2018-05-30  0:19 ` [PATCH mlx5-next 2/2] net/mlx5: Add FPGA QP error event Saeed Mahameed
@ 2018-05-30  1:07   ` Andrew Lunn
  2018-05-30 15:14     ` Saeed Mahameed
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Lunn @ 2018-05-30  1:07 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: netdev, linux-rdma, Leon Romanovsky, Jason Gunthorpe,
	Ilan Tayari, Adi Nissim

On Tue, May 29, 2018 at 05:19:54PM -0700, Saeed Mahameed wrote:
> From: Ilan Tayari <ilant@mellanox.com>
> 
> The FPGA QP event fires whenever a QP on the FPGA trasitions
> to the error state.

FPGA i know, field programmable gate array. Could you offer some clue
as to what QP means?

   Thanks
	Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH mlx5-next 1/2] net/mlx5: Add temperature warning event to log
  2018-05-30  1:04   ` Andrew Lunn
@ 2018-05-30 15:08     ` Saeed Mahameed
  2018-05-30 16:17       ` Andrew Lunn
  0 siblings, 1 reply; 11+ messages in thread
From: Saeed Mahameed @ 2018-05-30 15:08 UTC (permalink / raw)
  To: andrew
  Cc: Jason Gunthorpe, netdev, Ilan Tayari, linux-rdma,
	Leon Romanovsky, Adi Nissim

On Wed, 2018-05-30 at 03:04 +0200, Andrew Lunn wrote:
> On Tue, May 29, 2018 at 05:19:53PM -0700, Saeed Mahameed wrote:
> > From: Ilan Tayari <ilant@mellanox.com>
> > 
> > Temperature warning event is sent by FW to indicate high
> > temperature
> > as detected by one of the sensors on the board.
> > Add handling of this event by writing the numbers of the alert
> > sensors
> > to the kernel log.
> 
> Hi Saaed
> 
> Is the temperature itself available? If so, it would be better to
> expose this as a hwmon device per temperature sensor.
> 

Hi Andrew, yes the temperature is available by other means, this patch
is needed for alert information reasons in order to know which internal
sensors triggered the alarm.
We are working in parallel to expose temperature sensor to hwmon, but
this is still WIP.


Is it ok to have both ?

>        Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH mlx5-next 2/2] net/mlx5: Add FPGA QP error event
  2018-05-30  1:07   ` Andrew Lunn
@ 2018-05-30 15:14     ` Saeed Mahameed
  2018-05-30 16:21       ` Andrew Lunn
  0 siblings, 1 reply; 11+ messages in thread
From: Saeed Mahameed @ 2018-05-30 15:14 UTC (permalink / raw)
  To: andrew
  Cc: Jason Gunthorpe, netdev, Ilan Tayari, linux-rdma,
	Leon Romanovsky, Adi Nissim

On Wed, 2018-05-30 at 03:07 +0200, Andrew Lunn wrote:
> On Tue, May 29, 2018 at 05:19:54PM -0700, Saeed Mahameed wrote:
> > From: Ilan Tayari <ilant@mellanox.com>
> > 
> > The FPGA QP event fires whenever a QP on the FPGA trasitions
> > to the error state.
> 
> FPGA i know, field programmable gate array. Could you offer some clue
> as to what QP means?
> 

Hi Andre, QP "Queue Pair" is well known rdma concept, and widely used
in mlx drivers, it is used as in the driver as a ring buffer to
communicate with the FPGA device.
 
>    Thanks
> 	Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH mlx5-next 1/2] net/mlx5: Add temperature warning event to log
  2018-05-30 15:08     ` Saeed Mahameed
@ 2018-05-30 16:17       ` Andrew Lunn
  2018-05-30 17:31         ` Saeed Mahameed
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Lunn @ 2018-05-30 16:17 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: Jason Gunthorpe, netdev, Ilan Tayari, linux-rdma,
	Leon Romanovsky, Adi Nissim

> Hi Andrew, yes the temperature is available by other means, this patch
> is needed for alert information reasons in order to know which internal
> sensors triggered the alarm.
> We are working in parallel to expose temperature sensor to hwmon, but
> this is still WIP.
> 
> 
> Is it ok to have both ?

Hi Saeed

Ideally no. hwmon has mechanisms for setting alarm thresholds, and
indicating the thresholds have been exceeded. There are also ways to
tie this to thermal zones, so the system can react on overheating,
slow down the CPU, drop voltages, ramp up fans, etc. hwmon should be
your primary interface, not dmesg.

But if you are stuck doing things in the wrong order, i guess it is
O.K. I don't think dmesg is a Binary API, so you can remove it later.

     Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH mlx5-next 2/2] net/mlx5: Add FPGA QP error event
  2018-05-30 15:14     ` Saeed Mahameed
@ 2018-05-30 16:21       ` Andrew Lunn
  2018-05-30 17:28         ` Saeed Mahameed
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Lunn @ 2018-05-30 16:21 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: Jason Gunthorpe, netdev, Ilan Tayari, linux-rdma,
	Leon Romanovsky, Adi Nissim

On Wed, May 30, 2018 at 03:14:20PM +0000, Saeed Mahameed wrote:
> On Wed, 2018-05-30 at 03:07 +0200, Andrew Lunn wrote:
> > On Tue, May 29, 2018 at 05:19:54PM -0700, Saeed Mahameed wrote:
> > > From: Ilan Tayari <ilant@mellanox.com>
> > > 
> > > The FPGA QP event fires whenever a QP on the FPGA trasitions
> > > to the error state.
> > 
> > FPGA i know, field programmable gate array. Could you offer some clue
> > as to what QP means?
> > 
> 
> Hi Andre, QP "Queue Pair" is well known rdma concept, and widely used
> in mlx drivers, it is used as in the driver as a ring buffer to
> communicate with the FPGA device.

O.K. Thanks.

It is hard to get the right balance between assuming people know
everything, and know nothing. But you can help teach people these
terms i commit messages:

      The FPGA queue pair event fires whenever a QP on the FPGA
      transitions to the error state.

   Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH mlx5-next 2/2] net/mlx5: Add FPGA QP error event
  2018-05-30 16:21       ` Andrew Lunn
@ 2018-05-30 17:28         ` Saeed Mahameed
  0 siblings, 0 replies; 11+ messages in thread
From: Saeed Mahameed @ 2018-05-30 17:28 UTC (permalink / raw)
  To: andrew
  Cc: Jason Gunthorpe, netdev, Ilan Tayari, linux-rdma,
	Leon Romanovsky, Adi Nissim

On Wed, 2018-05-30 at 18:21 +0200, Andrew Lunn wrote:
> On Wed, May 30, 2018 at 03:14:20PM +0000, Saeed Mahameed wrote:
> > On Wed, 2018-05-30 at 03:07 +0200, Andrew Lunn wrote:
> > > On Tue, May 29, 2018 at 05:19:54PM -0700, Saeed Mahameed wrote:
> > > > From: Ilan Tayari <ilant@mellanox.com>
> > > > 
> > > > The FPGA QP event fires whenever a QP on the FPGA trasitions
> > > > to the error state.
> > > 
> > > FPGA i know, field programmable gate array. Could you offer some
> > > clue
> > > as to what QP means?
> > > 
> > 
> > Hi Andre, QP "Queue Pair" is well known rdma concept, and widely
> > used
> > in mlx drivers, it is used as in the driver as a ring buffer to
> > communicate with the FPGA device.
> 
> O.K. Thanks.
> 
> It is hard to get the right balance between assuming people know
> everything, and know nothing. But you can help teach people these
> terms i commit messages:
> 
>       The FPGA queue pair event fires whenever a QP on the FPGA
>       transitions to the error state.
> 

Sure will fix the commit message in V2,

Thanks a lot for the feedback.

>    Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH mlx5-next 1/2] net/mlx5: Add temperature warning event to log
  2018-05-30 16:17       ` Andrew Lunn
@ 2018-05-30 17:31         ` Saeed Mahameed
  0 siblings, 0 replies; 11+ messages in thread
From: Saeed Mahameed @ 2018-05-30 17:31 UTC (permalink / raw)
  To: andrew
  Cc: Jason Gunthorpe, netdev, Ilan Tayari, linux-rdma,
	Leon Romanovsky, Adi Nissim

On Wed, 2018-05-30 at 18:17 +0200, Andrew Lunn wrote:
> > Hi Andrew, yes the temperature is available by other means, this
> > patch
> > is needed for alert information reasons in order to know which
> > internal
> > sensors triggered the alarm.
> > We are working in parallel to expose temperature sensor to hwmon,
> > but
> > this is still WIP.
> > 
> > 
> > Is it ok to have both ?
> 
> Hi Saeed
> 
> Ideally no. hwmon has mechanisms for setting alarm thresholds, and
> indicating the thresholds have been exceeded. There are also ways to
> tie this to thermal zones, so the system can react on overheating,
> slow down the CPU, drop voltages, ramp up fans, etc. hwmon should be
> your primary interface, not dmesg.
> 

Yes we are working on this, but it is not something that can happen
soon since we need to define the correct Firmware APIs which are still
WIP.

> But if you are stuck doing things in the wrong order, i guess it is
> O.K. I don't think dmesg is a Binary API, so you can remove it later.
> 

Yes this is the plan, once the hwmon is supported we will remove the
extra dmesg warnings.

>      Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-05-30 17:31 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-30  0:19 [PATCH mlx5-next 0/2] Mellanox, mlx5 new device events Saeed Mahameed
2018-05-30  0:19 ` [PATCH mlx5-next 1/2] net/mlx5: Add temperature warning event to log Saeed Mahameed
2018-05-30  1:04   ` Andrew Lunn
2018-05-30 15:08     ` Saeed Mahameed
2018-05-30 16:17       ` Andrew Lunn
2018-05-30 17:31         ` Saeed Mahameed
2018-05-30  0:19 ` [PATCH mlx5-next 2/2] net/mlx5: Add FPGA QP error event Saeed Mahameed
2018-05-30  1:07   ` Andrew Lunn
2018-05-30 15:14     ` Saeed Mahameed
2018-05-30 16:21       ` Andrew Lunn
2018-05-30 17:28         ` Saeed Mahameed

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).